diff options
Diffstat (limited to 'docs/c1/grammar')
| -rw-r--r-- | docs/c1/grammar/abnf.txt | 32 | ||||
| -rw-r--r-- | docs/c1/grammar/peg.txt | 6 | ||||
| -rw-r--r-- | docs/c1/grammar/zbnf.txt | 5 |
3 files changed, 35 insertions, 8 deletions
diff --git a/docs/c1/grammar/abnf.txt b/docs/c1/grammar/abnf.txt index a5b9eca..aa67646 100644 --- a/docs/c1/grammar/abnf.txt +++ b/docs/c1/grammar/abnf.txt @@ -2,11 +2,27 @@ ; Compatible with: https://www.quut.com/abnfgen/ -; It's unclear whether this grammar is truly complete. It has been -; verified not to produce text that is rejected by the Zisp parser -; --except for Unicode escape sequences for surrogate code points-- -; but there may be some text that is accepted by the parser despite -; not being grammatical according to these rules. +; Unlike PEG, grammar rules in BNF are non-deterministic, which makes +; it much more challenging to express our naive parse logic. Whether +; this ABNF file is truly accurate is difficult to assess. + +; The abnfgen(1) tool linked above can be used to generate arbitrary +; strings matching the grammar in this file. These can be fed into +; the Zisp parser to reveal some potential bugs; either in the parser +; itself, or this ABNF grammar. + +; Note that the tool may generate Zisp string literals with Unicode +; escape sequences corresponding to surrogate code points; the parser +; may reject these. This is expected; it's difficult to rewrite this +; ABNF grammar to exclude those Unicode values. + +; Other minor inaccuracies that aren't important include: This ABNF +; forces line comments to be terminated with an LF character, when in +; fact the end-of-file may also terminate them; the same applies to +; hash-bang parsing which doesn't actually have to end in LF. These +; discrepancies won't make abnfgen(1) generate invalid strings; they +; only make this ABNF more strict than the Zisp parser, so it won't +; generate some strings that the parser would actually accept. Stream = [ Unit *( Blank Unit ) ] *Blank [Trail] @@ -52,7 +68,7 @@ RuneDotStr = "#" RuneName "\" SpecialStr RuneClad = "#" RuneName CladDatum -HashBang = "#" "!" *( SP / HTAB ) BareString +HashBang = "#" "!" *( SP / HTAB ) HBLine LF LabelRef = "#" "%" Label "%" @@ -101,6 +117,10 @@ RuneName = ALPHA *5( ALPHA / DIGIT ) Label = 1*12( HEXDIG ) +HBLine = 1*HBChar [ 1*( SP / HTAB ) *HBChar ] + +HBChar = %x00-08 / %x0b-1f / %x21-ff ; any but HT, LF, SP + RJoinDatum = CladDatum / Rune / RuneStr / RuneDotStr / RuneClad / LabelRef / LabelDef / HashStr / HashDotStr / HashClad diff --git a/docs/c1/grammar/peg.txt b/docs/c1/grammar/peg.txt index 465123f..cee9c84 100644 --- a/docs/c1/grammar/peg.txt +++ b/docs/c1/grammar/peg.txt @@ -50,11 +50,12 @@ HexByte <- HEXDIG HEXDIG UnicodeSV <- HEXDIG+ RuneExpr <- Rune ( '\' BareString / CladDatum )? -HashBang <- '!' [\t ]* BareString +HashBang <- '!' [\t ]* HBLine '\n'? LabelExpr <- '%' Label ( '%' / '=' Datum ) HashDatum <- '\' BareString / CladDatum Rune <- ALPHA ( ALPHA / DIGIT )* +HBLine <- HBChars+ [\t ]* ( HBChars+ )? Label <- HEXDIG+ ParenList <- '(' ListBody ')' @@ -63,6 +64,9 @@ BraceList <- '{' ListBody '}' ListBody <- Unit* ( Blank* '&' Unit )? Blank* +HBChars : ~( SP | HTAB | LF ) + + DIGIT <- [0-9] ALPHA <- [a-zA-Z] HEXDIG <- [0-9a-fA-F] diff --git a/docs/c1/grammar/zbnf.txt b/docs/c1/grammar/zbnf.txt index 0cbceab..704db22 100644 --- a/docs/c1/grammar/zbnf.txt +++ b/docs/c1/grammar/zbnf.txt @@ -50,11 +50,12 @@ HexByte : HEXDIG HEXDIG UnicodeSV : HEXDIG+ RuneExpr : Rune [ '\' BareString | CladDatum ] -HashBang : '!' ( SP | HTAB )* BareString +HashBang : '!' ( SP | HTAB )* HBLine [ LF ] LabelExpr : '%' Label ( '%' | '=' Datum ) HashDatum : '\' BareString | CladDatum Rune : ALPHA ( ALPHA | DIGIT )* +HBLine : HBChars+ ( SP | HTAB )* [ HBChars+ ] Label : HEXDIG+ ParenList : '(' ListBody ')' @@ -63,6 +64,8 @@ BraceList : '{' ListBody '}' ListBody : Unit* [ Blank* '&' Unit ] Blank* +HBChars : ~( SP | HTAB | LF ) + ;; Local Variables: ;; eval: (flyspell-mode -1) |
