diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2026-01-08 14:55:20 +0100 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2026-01-08 14:55:20 +0100 |
| commit | 5cd61ba424bedfa88f06463f3ba8f3900f0c7d2d (patch) | |
| tree | 468af908b7b7f2fd6182cd4ad038c155632f5b7d | |
| parent | 8012e3fe177069a709f30d2ab4a18ff11025c86f (diff) | |
Fix syntax spec in ABNF.
| -rw-r--r-- | spec/syntax.abnf | 57 | ||||
| -rw-r--r-- | spec/syntax.md | 31 | ||||
| -rw-r--r-- | spec/syntax.zbnf | 54 |
3 files changed, 59 insertions, 83 deletions
diff --git a/spec/syntax.abnf b/spec/syntax.abnf index 6ee024d..a083eda 100644 --- a/spec/syntax.abnf +++ b/spec/syntax.abnf @@ -1,23 +1,39 @@ -Unit = *Blank Datum [ Blank ] +;This file follows strict ABNF rules and can be used with abnfgen. +File = [Unit] *( Blank Unit ) *Blank [Trail] + + +Unit = *Blank Datum + Blank = HTAB / LF / %x0b / %x0c / CR / Comment -Datum = OneDatum *( [JoinChar] OneDatum ) +Trail = SkipLine / SkipUnit + + +Comment = SkipLine LF / SkipUnit Blank + +SkipLine = ';' [ SkipLStart *AnyButLF ] -JoinChar = '.' / ':' +SkipUnit = ';' '~' Unit -Comment = ';' ( SkipUnit / SkipLine [LF] ) +SkipLStart = %x00-09 / %x0b-7d / %x7f-ff + ; any but LF or '~' -SkipUnit = '~' Unit +AnyButLF = %x00-09 / %x0b-ff -SkipLine = *( %x00-09 / %x0b-ff ) ; any but LF +Datum = SingleDatum + / JoinedDatum *( [ '.' / ':' ] JoinedDatum ) -OneDatum = BareString / CladDatum -BareString = 1*BareChar +SingleDatum = BareString / CladDatum / DottedString + +JoinedDatum = BareString / CladDatum + + +BareString = BareChar *( BareChar / Numeric ) CladDatum = '|' *( PipeStrChar / '\' StringEsc ) '|' / '"' *( QuotStrChar / '\' StringEsc ) '"' @@ -25,28 +41,37 @@ CladDatum = '|' *( PipeStrChar / '\' StringEsc ) '|' / '(' List ')' / '[' List ']' / '{' List '}' / "'" Datum / '`' Datum / ',' Datum +DottedString = ( '.' / Numeric ) *( '.' / Numeric / BareChar ) -BareChar = ALPHA / DIGIT - / '!' / '$' / '%' / '*' / '+' / '-' / '.' / '/' - / '<' / '=' / '>' / '?' / '@' / '^' / '_' / '~' +BareChar = '!' / '$' / '%' / '*' / '/' / '<' / '=' / '>' + / '?' / '@' / '^' / '_' / '~' / ALPHA -PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff ; any but | or \ +Numeric = '+' / '-' / DIGIT -QuotStrChar = %x00-21 / %x23-5b / %x5d-ff ; any but " or \ + +PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff + ; any but '|' or '\' + +QuotStrChar = %x00-21 / %x23-5b / %x5d-ff + ; any but '"' or '\' HashExpr = Rune [ '\' BareString / CladDatum ] / '\' BareString / '%' Label ( '%' / '=' Datum ) / CladDatum -List = *Unit [ '&' Unit ] *Blank +List = [Unit] *( Blank Unit ) *Blank [Tail] [SkipUnit] + +Tail = '&' Unit *Blank StringEsc = '\' / '|' / '"' / *( HTAB / SP ) LF *( HTAB / SP ) / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e' - / 'x' 1*( 2*HEXDIG ) ';' - / 'u' 1*6( HEXDIG ) ';' + / 'x' 1*( 2HEXDIG ) ';' + / 'u' 1*5HEXDIG ';' + / 'u' '0' 1*5HEXDIG ';' + / 'u' '1' '0' 1*4HEXDIG ';' Rune = ALPHA *5( ALPHA / DIGIT ) diff --git a/spec/syntax.md b/spec/syntax.md index affa7a1..7f3561c 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -6,7 +6,7 @@ We use a BNF notation with the following rules: followed by `bar`. * Expressions may be followed by `?`, `*`, `+`, `{N}`, or `{N,M}`, - which have meanings analogous to regular expressions. + which have the same meanings as in regular expressions. * The syntax `[foo]` is shorthand for `(foo)?`. @@ -20,21 +20,24 @@ We use a BNF notation with the following rules: * Ranges of terminal values are expressed as `x...y` (inclusive). -* ABNF "core rules" like `ALPHA` and `HEXDIG` are supported, with the - addition of EOF to explicitly demarcate the end of the byte stream. +* ABNF "core rules" like `ALPHA` and `HEXDIG` are supported. -* There is no ambiguity, backtracking, or look-ahead beyond one byte. +* There is no ambiguity, or look-ahead / backtracking beyond one byte. Rules match left to right, depth-first, and greedy. As soon as the input matches the first terminal of a rule, it must match that rule to the end or it is considered a syntax error. The last rule means that the BNF is very simple to translate to code. +It also probably makes it equivalent to PEG. -The parser consumes one `unit` from an input stream every time it's -called; it returns the `datum` therein, or EOF. +The parser consumes one `Unit` from an input stream every time it's +called; it returns the `Datum` therein, or EOF. The final optional +`Blank` represents the fact that the parser will consume one more +blank at the end if it finds one; this is because `Datum` is not +self-closing so the parser has to check if it goes on. ``` -Unit : Blank* ( Datum [Blank] | EOF ) +Unit : Blank* [ Datum [Blank] ] Blank : 9...13 | Comment @@ -44,16 +47,17 @@ Datum : OneDatum ( [JoinChar] OneDatum )* JoinChar : '.' | ':' -Comment : ';' ( SkipUnit | SkipLine [LF] ) +Comment : ';' ( SkipUnit | SkipLine ) SkipUnit : '~' Unit -SkipLine : ( ~LF )* +SkipLine : ( ~LF )* [LF] OneDatum : BareString | CladDatum -BareString : BareChar+ +BareString : ( '.' | '+' | '-' | DIGIT ) ( BareChar | '.' )* + | BareChar+ CladDatum : '|' ( PipeStrChar | '\' StringEsc )* '|' | '"' ( QuotStrChar | '\' StringEsc )* '"' @@ -63,8 +67,9 @@ CladDatum : '|' ( PipeStrChar | '\' StringEsc )* '|' BareChar : ALPHA | DIGIT - | '!' | '$' | '%' | '*' | '+' | '-' | '.' | '/' - | '<' | '=' | '>' | '?' | '@' | '^' | '_' | '~' + | '!' | '$' | '%' | '*' | '+' + | '-' | '/' | '<' | '=' | '>' + | '?' | '@' | '^' | '_' | '~' PipeStrChar : ~( '|' | '\' ) @@ -76,7 +81,7 @@ HashExpr : Rune [ '\' BareString | CladDatum ] | '%' Label ( '%' | '=' Datum ) | CladDatum -List : Unit* [ '&' Unit ] Blank* +List : Unit* [ Blank* '&' Unit ] Blank* StringEsc : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )* diff --git a/spec/syntax.zbnf b/spec/syntax.zbnf deleted file mode 100644 index d920845..0000000 --- a/spec/syntax.zbnf +++ /dev/null @@ -1,54 +0,0 @@ -Unit : Blank* ( Datum [Blank] | EOF ) - - -Blank : 9...13 | Comment - -Datum : OneDatum ( [JoinChar] OneDatum )* - -JoinChar : '.' | ':' - - -Comment : ';' ( SkipUnit | SkipLine [LF] ) - -SkipUnit : '~' Unit - -SkipLine : ( ~LF )* - - -OneDatum : BareString | CladDatum - -BareString : BareChar+ - -CladDatum : '|' ( PipeStrChar | '\' StringEsc )* '|' - | '"' ( QuotStrChar | '\' StringEsc )* '"' - | '#' HashExpr - | '(' List ')' | '[' List ']' | '{' List '}' - | "'" Datum | '`' Datum | ',' Datum - - -BareChar : ALPHA | DIGIT - | '!' | '$' | '%' | '*' | '+' | '-' | '.' | '/' - | '<' | '=' | '>' | '?' | '@' | '^' | '_' | '~' - - -PipeStrChar : ~( '|' | '\' ) - -QuotStrChar : ~( '"' | '\' ) - -HashExpr : Rune [ '\' BareString | CladDatum ] - | '\' BareString - | '%' Label ( '%' | '=' Datum ) - | CladDatum - -List : Unit* [ '&' Unit ] Blank* - - -StringEsc : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )* - | 'a' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | 'e' - | 'x' ( HEXDIG{2} )+ ';' - | 'u' HEXDIG{1,6} ';' - - -Rune : ALPHA ( ALPHA | DIGIT ){0,5} - -Label : HEXDIG{1,12} |
