summaryrefslogtreecommitdiff
path: root/docs/c1/grammar
diff options
context:
space:
mode:
Diffstat (limited to 'docs/c1/grammar')
-rw-r--r--docs/c1/grammar/abnf.txt32
-rw-r--r--docs/c1/grammar/peg.txt6
-rw-r--r--docs/c1/grammar/zbnf.txt5
3 files changed, 35 insertions, 8 deletions
diff --git a/docs/c1/grammar/abnf.txt b/docs/c1/grammar/abnf.txt
index a5b9eca..aa67646 100644
--- a/docs/c1/grammar/abnf.txt
+++ b/docs/c1/grammar/abnf.txt
@@ -2,11 +2,27 @@
; Compatible with: https://www.quut.com/abnfgen/
-; It's unclear whether this grammar is truly complete. It has been
-; verified not to produce text that is rejected by the Zisp parser
-; --except for Unicode escape sequences for surrogate code points--
-; but there may be some text that is accepted by the parser despite
-; not being grammatical according to these rules.
+; Unlike PEG, grammar rules in BNF are non-deterministic, which makes
+; it much more challenging to express our naive parse logic. Whether
+; this ABNF file is truly accurate is difficult to assess.
+
+; The abnfgen(1) tool linked above can be used to generate arbitrary
+; strings matching the grammar in this file. These can be fed into
+; the Zisp parser to reveal some potential bugs; either in the parser
+; itself, or this ABNF grammar.
+
+; Note that the tool may generate Zisp string literals with Unicode
+; escape sequences corresponding to surrogate code points; the parser
+; may reject these. This is expected; it's difficult to rewrite this
+; ABNF grammar to exclude those Unicode values.
+
+; Other minor inaccuracies that aren't important include: This ABNF
+; forces line comments to be terminated with an LF character, when in
+; fact the end-of-file may also terminate them; the same applies to
+; hash-bang parsing which doesn't actually have to end in LF. These
+; discrepancies won't make abnfgen(1) generate invalid strings; they
+; only make this ABNF more strict than the Zisp parser, so it won't
+; generate some strings that the parser would actually accept.
Stream = [ Unit *( Blank Unit ) ] *Blank [Trail]
@@ -52,7 +68,7 @@ RuneDotStr = "#" RuneName "\" SpecialStr
RuneClad = "#" RuneName CladDatum
-HashBang = "#" "!" *( SP / HTAB ) BareString
+HashBang = "#" "!" *( SP / HTAB ) HBLine LF
LabelRef = "#" "%" Label "%"
@@ -101,6 +117,10 @@ RuneName = ALPHA *5( ALPHA / DIGIT )
Label = 1*12( HEXDIG )
+HBLine = 1*HBChar [ 1*( SP / HTAB ) *HBChar ]
+
+HBChar = %x00-08 / %x0b-1f / %x21-ff ; any but HT, LF, SP
+
RJoinDatum = CladDatum / Rune / RuneStr / RuneDotStr / RuneClad
/ LabelRef / LabelDef / HashStr / HashDotStr / HashClad
diff --git a/docs/c1/grammar/peg.txt b/docs/c1/grammar/peg.txt
index 465123f..cee9c84 100644
--- a/docs/c1/grammar/peg.txt
+++ b/docs/c1/grammar/peg.txt
@@ -50,11 +50,12 @@ HexByte <- HEXDIG HEXDIG
UnicodeSV <- HEXDIG+
RuneExpr <- Rune ( '\' BareString / CladDatum )?
-HashBang <- '!' [\t ]* BareString
+HashBang <- '!' [\t ]* HBLine '\n'?
LabelExpr <- '%' Label ( '%' / '=' Datum )
HashDatum <- '\' BareString / CladDatum
Rune <- ALPHA ( ALPHA / DIGIT )*
+HBLine <- HBChars+ [\t ]* ( HBChars+ )?
Label <- HEXDIG+
ParenList <- '(' ListBody ')'
@@ -63,6 +64,9 @@ BraceList <- '{' ListBody '}'
ListBody <- Unit* ( Blank* '&' Unit )? Blank*
+HBChars : ~( SP | HTAB | LF )
+
+
DIGIT <- [0-9]
ALPHA <- [a-zA-Z]
HEXDIG <- [0-9a-fA-F]
diff --git a/docs/c1/grammar/zbnf.txt b/docs/c1/grammar/zbnf.txt
index 0cbceab..704db22 100644
--- a/docs/c1/grammar/zbnf.txt
+++ b/docs/c1/grammar/zbnf.txt
@@ -50,11 +50,12 @@ HexByte : HEXDIG HEXDIG
UnicodeSV : HEXDIG+
RuneExpr : Rune [ '\' BareString | CladDatum ]
-HashBang : '!' ( SP | HTAB )* BareString
+HashBang : '!' ( SP | HTAB )* HBLine [ LF ]
LabelExpr : '%' Label ( '%' | '=' Datum )
HashDatum : '\' BareString | CladDatum
Rune : ALPHA ( ALPHA | DIGIT )*
+HBLine : HBChars+ ( SP | HTAB )* [ HBChars+ ]
Label : HEXDIG+
ParenList : '(' ListBody ')'
@@ -63,6 +64,8 @@ BraceList : '{' ListBody '}'
ListBody : Unit* [ Blank* '&' Unit ] Blank*
+HBChars : ~( SP | HTAB | LF )
+
;; Local Variables:
;; eval: (flyspell-mode -1)