summaryrefslogtreecommitdiff
path: root/spec/syntax.abnf
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2026-01-09 18:09:59 +0100
committerTaylan Kammer <taylan.kammer@gmail.com>2026-01-09 18:09:59 +0100
commit2d72a1aa64a66c486a2329999123c14afcddeb32 (patch)
tree4eba98eb1240d3d445e2d35c61bad63d352e413b /spec/syntax.abnf
parenta2ece405cc61341122fc075d499420e894c56909 (diff)
More grammar fuckery. BNF is horrible!
Diffstat (limited to 'spec/syntax.abnf')
-rw-r--r--spec/syntax.abnf65
1 files changed, 41 insertions, 24 deletions
diff --git a/spec/syntax.abnf b/spec/syntax.abnf
index a083eda..132deeb 100644
--- a/spec/syntax.abnf
+++ b/spec/syntax.abnf
@@ -6,42 +6,52 @@ File = [Unit] *( Blank Unit ) *Blank [Trail]
Unit = *Blank Datum
-Blank = HTAB / LF / %x0b / %x0c / CR / Comment
+Blank = HTAB / LF / %x0b / %x0c / CR / SP / Comment
Trail = SkipLine / SkipUnit
+Datum = BareString
+ / DottedString
+ / CladDatum
+ / HashExpr
+ / HashDotExpr
+ / QuoteExpr
+ / JoinExpr
+
Comment = SkipLine LF / SkipUnit Blank
SkipLine = ';' [ SkipLStart *AnyButLF ]
-SkipUnit = ';' '~' Unit
-
-
SkipLStart = %x00-09 / %x0b-7d / %x7f-ff
; any but LF or '~'
AnyButLF = %x00-09 / %x0b-ff
-
-Datum = SingleDatum
- / JoinedDatum *( [ '.' / ':' ] JoinedDatum )
-
-
-SingleDatum = BareString / CladDatum / DottedString
-
-JoinedDatum = BareString / CladDatum
+SkipUnit = ';' '~' Unit
BareString = BareChar *( BareChar / Numeric )
+DottedString = ( '.' / Numeric ) *( '.' / Numeric / BareChar )
+
CladDatum = '|' *( PipeStrChar / '\' StringEsc ) '|'
/ '"' *( QuotStrChar / '\' StringEsc ) '"'
- / '#' HashExpr
/ '(' List ')' / '[' List ']' / '{' List '}'
- / "'" Datum / '`' Datum / ',' Datum
-DottedString = ( '.' / Numeric ) *( '.' / Numeric / BareChar )
+HashExpr = LabelExpr / RuneExpr / HashDatum
+
+HashDotExpr = RuneDotExpr / HashDotDatum
+
+QuoteExpr = "'" Datum / '`' Datum / ',' Datum
+
+JoinExpr = Datum LeftCladDatum
+ / Datum ':' Datum
+ / DotlessDatum '.' Datum
+
+LeftCladDatum = CladDatum / HashExpr / QuoteExpr
+
+DotlessDatum = BareString / CladDatum / RuneExpr / HashDatum
BareChar = '!' / '$' / '%' / '*' / '/' / '<' / '=' / '>'
@@ -49,29 +59,36 @@ BareChar = '!' / '$' / '%' / '*' / '/' / '<' / '=' / '>'
Numeric = '+' / '-' / DIGIT
-
PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff
; any but '|' or '\'
QuotStrChar = %x00-21 / %x23-5b / %x5d-ff
; any but '"' or '\'
-HashExpr = Rune [ '\' BareString / CladDatum ]
- / '\' BareString
- / '%' Label ( '%' / '=' Datum )
- / CladDatum
-
List = [Unit] *( Blank Unit ) *Blank [Tail] [SkipUnit]
Tail = '&' Unit *Blank
+LabelExpr = '#' '%' Label ( '%' / '=' Datum )
+
+RuneExpr = '#' Rune [ '\' BareString / CladDatum ]
+
+RuneDotExpr = '#' Rune '\' DottedString
+
+HashDatum = '#' '\' BareString / CladDatum
+
+HashDotDatum = '#' '\' DottedString
+
+; Unicode escapes must not represent surrogate code points.
+; This is difficult to express in ABNF. But we do at least
+; disallow code points greater than \u10FFFF which are also
+; invalid, since U+10FFFF is the highest allowed.
StringEsc = '\' / '|' / '"' / *( HTAB / SP ) LF *( HTAB / SP )
/ 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e'
/ 'x' 1*( 2HEXDIG ) ';'
- / 'u' 1*5HEXDIG ';'
- / 'u' '0' 1*5HEXDIG ';'
- / 'u' '1' '0' 1*4HEXDIG ';'
+ / 'u' ['0'] 1*5HEXDIG ';'
+ / 'u' '1' '0' 4HEXDIG ';'
Rune = ALPHA *5( ALPHA / DIGIT )