summaryrefslogtreecommitdiff
path: root/spec
diff options
context:
space:
mode:
Diffstat (limited to 'spec')
-rw-r--r--spec/syntax.abnf57
-rw-r--r--spec/syntax.md31
-rw-r--r--spec/syntax.zbnf54
3 files changed, 59 insertions, 83 deletions
diff --git a/spec/syntax.abnf b/spec/syntax.abnf
index 6ee024d..a083eda 100644
--- a/spec/syntax.abnf
+++ b/spec/syntax.abnf
@@ -1,23 +1,39 @@
-Unit = *Blank Datum [ Blank ]
+;This file follows strict ABNF rules and can be used with abnfgen.
+File = [Unit] *( Blank Unit ) *Blank [Trail]
+
+
+Unit = *Blank Datum
+
Blank = HTAB / LF / %x0b / %x0c / CR / Comment
-Datum = OneDatum *( [JoinChar] OneDatum )
+Trail = SkipLine / SkipUnit
+
+
+Comment = SkipLine LF / SkipUnit Blank
+
+SkipLine = ';' [ SkipLStart *AnyButLF ]
-JoinChar = '.' / ':'
+SkipUnit = ';' '~' Unit
-Comment = ';' ( SkipUnit / SkipLine [LF] )
+SkipLStart = %x00-09 / %x0b-7d / %x7f-ff
+ ; any but LF or '~'
-SkipUnit = '~' Unit
+AnyButLF = %x00-09 / %x0b-ff
-SkipLine = *( %x00-09 / %x0b-ff ) ; any but LF
+Datum = SingleDatum
+ / JoinedDatum *( [ '.' / ':' ] JoinedDatum )
-OneDatum = BareString / CladDatum
-BareString = 1*BareChar
+SingleDatum = BareString / CladDatum / DottedString
+
+JoinedDatum = BareString / CladDatum
+
+
+BareString = BareChar *( BareChar / Numeric )
CladDatum = '|' *( PipeStrChar / '\' StringEsc ) '|'
/ '"' *( QuotStrChar / '\' StringEsc ) '"'
@@ -25,28 +41,37 @@ CladDatum = '|' *( PipeStrChar / '\' StringEsc ) '|'
/ '(' List ')' / '[' List ']' / '{' List '}'
/ "'" Datum / '`' Datum / ',' Datum
+DottedString = ( '.' / Numeric ) *( '.' / Numeric / BareChar )
-BareChar = ALPHA / DIGIT
- / '!' / '$' / '%' / '*' / '+' / '-' / '.' / '/'
- / '<' / '=' / '>' / '?' / '@' / '^' / '_' / '~'
+BareChar = '!' / '$' / '%' / '*' / '/' / '<' / '=' / '>'
+ / '?' / '@' / '^' / '_' / '~' / ALPHA
-PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff ; any but | or \
+Numeric = '+' / '-' / DIGIT
-QuotStrChar = %x00-21 / %x23-5b / %x5d-ff ; any but " or \
+
+PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff
+ ; any but '|' or '\'
+
+QuotStrChar = %x00-21 / %x23-5b / %x5d-ff
+ ; any but '"' or '\'
HashExpr = Rune [ '\' BareString / CladDatum ]
/ '\' BareString
/ '%' Label ( '%' / '=' Datum )
/ CladDatum
-List = *Unit [ '&' Unit ] *Blank
+List = [Unit] *( Blank Unit ) *Blank [Tail] [SkipUnit]
+
+Tail = '&' Unit *Blank
StringEsc = '\' / '|' / '"' / *( HTAB / SP ) LF *( HTAB / SP )
/ 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e'
- / 'x' 1*( 2*HEXDIG ) ';'
- / 'u' 1*6( HEXDIG ) ';'
+ / 'x' 1*( 2HEXDIG ) ';'
+ / 'u' 1*5HEXDIG ';'
+ / 'u' '0' 1*5HEXDIG ';'
+ / 'u' '1' '0' 1*4HEXDIG ';'
Rune = ALPHA *5( ALPHA / DIGIT )
diff --git a/spec/syntax.md b/spec/syntax.md
index affa7a1..7f3561c 100644
--- a/spec/syntax.md
+++ b/spec/syntax.md
@@ -6,7 +6,7 @@ We use a BNF notation with the following rules:
followed by `bar`.
* Expressions may be followed by `?`, `*`, `+`, `{N}`, or `{N,M}`,
- which have meanings analogous to regular expressions.
+ which have the same meanings as in regular expressions.
* The syntax `[foo]` is shorthand for `(foo)?`.
@@ -20,21 +20,24 @@ We use a BNF notation with the following rules:
* Ranges of terminal values are expressed as `x...y` (inclusive).
-* ABNF "core rules" like `ALPHA` and `HEXDIG` are supported, with the
- addition of EOF to explicitly demarcate the end of the byte stream.
+* ABNF "core rules" like `ALPHA` and `HEXDIG` are supported.
-* There is no ambiguity, backtracking, or look-ahead beyond one byte.
+* There is no ambiguity, or look-ahead / backtracking beyond one byte.
Rules match left to right, depth-first, and greedy. As soon as the
input matches the first terminal of a rule, it must match that rule
to the end or it is considered a syntax error.
The last rule means that the BNF is very simple to translate to code.
+It also probably makes it equivalent to PEG.
-The parser consumes one `unit` from an input stream every time it's
-called; it returns the `datum` therein, or EOF.
+The parser consumes one `Unit` from an input stream every time it's
+called; it returns the `Datum` therein, or EOF. The final optional
+`Blank` represents the fact that the parser will consume one more
+blank at the end if it finds one; this is because `Datum` is not
+self-closing so the parser has to check if it goes on.
```
-Unit : Blank* ( Datum [Blank] | EOF )
+Unit : Blank* [ Datum [Blank] ]
Blank : 9...13 | Comment
@@ -44,16 +47,17 @@ Datum : OneDatum ( [JoinChar] OneDatum )*
JoinChar : '.' | ':'
-Comment : ';' ( SkipUnit | SkipLine [LF] )
+Comment : ';' ( SkipUnit | SkipLine )
SkipUnit : '~' Unit
-SkipLine : ( ~LF )*
+SkipLine : ( ~LF )* [LF]
OneDatum : BareString | CladDatum
-BareString : BareChar+
+BareString : ( '.' | '+' | '-' | DIGIT ) ( BareChar | '.' )*
+ | BareChar+
CladDatum : '|' ( PipeStrChar | '\' StringEsc )* '|'
| '"' ( QuotStrChar | '\' StringEsc )* '"'
@@ -63,8 +67,9 @@ CladDatum : '|' ( PipeStrChar | '\' StringEsc )* '|'
BareChar : ALPHA | DIGIT
- | '!' | '$' | '%' | '*' | '+' | '-' | '.' | '/'
- | '<' | '=' | '>' | '?' | '@' | '^' | '_' | '~'
+ | '!' | '$' | '%' | '*' | '+'
+ | '-' | '/' | '<' | '=' | '>'
+ | '?' | '@' | '^' | '_' | '~'
PipeStrChar : ~( '|' | '\' )
@@ -76,7 +81,7 @@ HashExpr : Rune [ '\' BareString | CladDatum ]
| '%' Label ( '%' | '=' Datum )
| CladDatum
-List : Unit* [ '&' Unit ] Blank*
+List : Unit* [ Blank* '&' Unit ] Blank*
StringEsc : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )*
diff --git a/spec/syntax.zbnf b/spec/syntax.zbnf
deleted file mode 100644
index d920845..0000000
--- a/spec/syntax.zbnf
+++ /dev/null
@@ -1,54 +0,0 @@
-Unit : Blank* ( Datum [Blank] | EOF )
-
-
-Blank : 9...13 | Comment
-
-Datum : OneDatum ( [JoinChar] OneDatum )*
-
-JoinChar : '.' | ':'
-
-
-Comment : ';' ( SkipUnit | SkipLine [LF] )
-
-SkipUnit : '~' Unit
-
-SkipLine : ( ~LF )*
-
-
-OneDatum : BareString | CladDatum
-
-BareString : BareChar+
-
-CladDatum : '|' ( PipeStrChar | '\' StringEsc )* '|'
- | '"' ( QuotStrChar | '\' StringEsc )* '"'
- | '#' HashExpr
- | '(' List ')' | '[' List ']' | '{' List '}'
- | "'" Datum | '`' Datum | ',' Datum
-
-
-BareChar : ALPHA | DIGIT
- | '!' | '$' | '%' | '*' | '+' | '-' | '.' | '/'
- | '<' | '=' | '>' | '?' | '@' | '^' | '_' | '~'
-
-
-PipeStrChar : ~( '|' | '\' )
-
-QuotStrChar : ~( '"' | '\' )
-
-HashExpr : Rune [ '\' BareString | CladDatum ]
- | '\' BareString
- | '%' Label ( '%' | '=' Datum )
- | CladDatum
-
-List : Unit* [ '&' Unit ] Blank*
-
-
-StringEsc : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )*
- | 'a' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | 'e'
- | 'x' ( HEXDIG{2} )+ ';'
- | 'u' HEXDIG{1,6} ';'
-
-
-Rune : ALPHA ( ALPHA | DIGIT ){0,5}
-
-Label : HEXDIG{1,12}