From 2d72a1aa64a66c486a2329999123c14afcddeb32 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Fri, 9 Jan 2026 18:09:59 +0100 Subject: More grammar fuckery. BNF is horrible! --- spec/syntax.abnf | 65 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 24 deletions(-) (limited to 'spec/syntax.abnf') diff --git a/spec/syntax.abnf b/spec/syntax.abnf index a083eda..132deeb 100644 --- a/spec/syntax.abnf +++ b/spec/syntax.abnf @@ -6,42 +6,52 @@ File = [Unit] *( Blank Unit ) *Blank [Trail] Unit = *Blank Datum -Blank = HTAB / LF / %x0b / %x0c / CR / Comment +Blank = HTAB / LF / %x0b / %x0c / CR / SP / Comment Trail = SkipLine / SkipUnit +Datum = BareString + / DottedString + / CladDatum + / HashExpr + / HashDotExpr + / QuoteExpr + / JoinExpr + Comment = SkipLine LF / SkipUnit Blank SkipLine = ';' [ SkipLStart *AnyButLF ] -SkipUnit = ';' '~' Unit - - SkipLStart = %x00-09 / %x0b-7d / %x7f-ff ; any but LF or '~' AnyButLF = %x00-09 / %x0b-ff - -Datum = SingleDatum - / JoinedDatum *( [ '.' / ':' ] JoinedDatum ) - - -SingleDatum = BareString / CladDatum / DottedString - -JoinedDatum = BareString / CladDatum +SkipUnit = ';' '~' Unit BareString = BareChar *( BareChar / Numeric ) +DottedString = ( '.' / Numeric ) *( '.' / Numeric / BareChar ) + CladDatum = '|' *( PipeStrChar / '\' StringEsc ) '|' / '"' *( QuotStrChar / '\' StringEsc ) '"' - / '#' HashExpr / '(' List ')' / '[' List ']' / '{' List '}' - / "'" Datum / '`' Datum / ',' Datum -DottedString = ( '.' / Numeric ) *( '.' / Numeric / BareChar ) +HashExpr = LabelExpr / RuneExpr / HashDatum + +HashDotExpr = RuneDotExpr / HashDotDatum + +QuoteExpr = "'" Datum / '`' Datum / ',' Datum + +JoinExpr = Datum LeftCladDatum + / Datum ':' Datum + / DotlessDatum '.' Datum + +LeftCladDatum = CladDatum / HashExpr / QuoteExpr + +DotlessDatum = BareString / CladDatum / RuneExpr / HashDatum BareChar = '!' / '$' / '%' / '*' / '/' / '<' / '=' / '>' @@ -49,29 +59,36 @@ BareChar = '!' / '$' / '%' / '*' / '/' / '<' / '=' / '>' Numeric = '+' / '-' / DIGIT - PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff ; any but '|' or '\' QuotStrChar = %x00-21 / %x23-5b / %x5d-ff ; any but '"' or '\' -HashExpr = Rune [ '\' BareString / CladDatum ] - / '\' BareString - / '%' Label ( '%' / '=' Datum ) - / CladDatum - List = [Unit] *( Blank Unit ) *Blank [Tail] [SkipUnit] Tail = '&' Unit *Blank +LabelExpr = '#' '%' Label ( '%' / '=' Datum ) + +RuneExpr = '#' Rune [ '\' BareString / CladDatum ] + +RuneDotExpr = '#' Rune '\' DottedString + +HashDatum = '#' '\' BareString / CladDatum + +HashDotDatum = '#' '\' DottedString + +; Unicode escapes must not represent surrogate code points. +; This is difficult to express in ABNF. But we do at least +; disallow code points greater than \u10FFFF which are also +; invalid, since U+10FFFF is the highest allowed. StringEsc = '\' / '|' / '"' / *( HTAB / SP ) LF *( HTAB / SP ) / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e' / 'x' 1*( 2HEXDIG ) ';' - / 'u' 1*5HEXDIG ';' - / 'u' '0' 1*5HEXDIG ';' - / 'u' '1' '0' 1*4HEXDIG ';' + / 'u' ['0'] 1*5HEXDIG ';' + / 'u' '1' '0' 4HEXDIG ';' Rune = ALPHA *5( ALPHA / DIGIT ) -- cgit v1.2.3