summaryrefslogtreecommitdiff
path: root/doc/c1/grammar/abnf.txt
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2026-06-01 21:49:37 +0200
committerTaylan Kammer <taylan.kammer@gmail.com>2026-06-01 21:49:37 +0200
commit724ac8ae394675a78c2977c6e35555b210256e01 (patch)
treed7f5574b49ec71341ea8079f18a33b9c17b60221 /doc/c1/grammar/abnf.txt
parent9ce0aa66cedc985322e06db4bac130910610c113 (diff)
docs -> doc
Diffstat (limited to 'doc/c1/grammar/abnf.txt')
-rw-r--r--doc/c1/grammar/abnf.txt141
1 files changed, 141 insertions, 0 deletions
diff --git a/doc/c1/grammar/abnf.txt b/doc/c1/grammar/abnf.txt
new file mode 100644
index 0000000..aa67646
--- /dev/null
+++ b/doc/c1/grammar/abnf.txt
@@ -0,0 +1,141 @@
+; Standards-compliant ABNF (RFC 5234, RFC 7405)
+
+; Compatible with: https://www.quut.com/abnfgen/
+
+; Unlike PEG, grammar rules in BNF are non-deterministic, which makes
+; it much more challenging to express our naive parse logic. Whether
+; this ABNF file is truly accurate is difficult to assess.
+
+; The abnfgen(1) tool linked above can be used to generate arbitrary
+; strings matching the grammar in this file. These can be fed into
+; the Zisp parser to reveal some potential bugs; either in the parser
+; itself, or this ABNF grammar.
+
+; Note that the tool may generate Zisp string literals with Unicode
+; escape sequences corresponding to surrogate code points; the parser
+; may reject these. This is expected; it's difficult to rewrite this
+; ABNF grammar to exclude those Unicode values.
+
+; Other minor inaccuracies that aren't important include: This ABNF
+; forces line comments to be terminated with an LF character, when in
+; fact the end-of-file may also terminate them; the same applies to
+; hash-bang parsing which doesn't actually have to end in LF. These
+; discrepancies won't make abnfgen(1) generate invalid strings; they
+; only make this ABNF more strict than the Zisp parser, so it won't
+; generate some strings that the parser would actually accept.
+
+
+Stream = [ Unit *( Blank Unit ) ] *Blank [Trail]
+
+
+Unit = *Blank Datum
+
+Blank = HTAB / LF / %x0b / %x0c / CR / SP / Comment
+
+Trail = SkipLine / SkipUnit / ";" "~" *Blank
+
+
+Datum = BareString / SpecialStr / CladDatum / Rune / RuneStr
+ / RuneDotStr / RuneClad / LabelRef / LabelDef / HashStr
+ / HashDotStr / HashClad / QuoteExpr / JoinExpr
+
+Comment = SkipLine LF / SkipUnit Blank
+
+SkipLine = ";" [ SkipLStart *AnyButLF ]
+
+SkipUnit = ";" "~" Unit
+
+SkipLStart = %x00-09 / %x0b-7d / %x7f-ff ; any but LF or "~"
+
+AnyButLF = %x00-09 / %x0b-ff
+
+
+BareString = BareChar *( BareChar / Numeric )
+
+SpecialStr = SpecStrChar *( SpecStrChar / BareChar )
+
+CladDatum = "|" *( PipeStrChar / "\" StringEsc ) "|"
+ / DQUOTE *( QuotStrChar / "\" StringEsc ) DQUOTE
+ / "(" List ")"
+ / "[" List "]"
+ / "{" List "}"
+
+Rune = "#" RuneName
+
+RuneStr = "#" RuneName "\" BareString
+
+RuneDotStr = "#" RuneName "\" SpecialStr
+
+RuneClad = "#" RuneName CladDatum
+
+HashBang = "#" "!" *( SP / HTAB ) HBLine LF
+
+LabelRef = "#" "%" Label "%"
+
+LabelDef = "#" "%" Label "=" Datum
+
+HashStr = "#" "\" BareString
+
+HashDotStr = "#" "\" SpecialStr
+
+HashClad = "#" CladDatum
+
+QuoteExpr = "'" Datum
+ / "`" Datum
+ / "," Datum
+
+JoinExpr = Datum RJoinDatum
+ / LJoinDatum NoStartDot
+ / Datum ":" Datum
+ / NoEndDot "." Datum
+
+
+BareChar = "!" / "$" / "%" / "*" / "/" / "<" / "=" / ">"
+ / "?" / "^" / "_" / "~" / ALPHA
+
+Numeric = "+" / "-" / DIGIT
+
+SpecStrChar = "." / ":" / Numeric
+
+PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff ; any but "|" or "\"
+
+QuotStrChar = %x00-21 / %x23-5b / %x5d-ff ; any but DQUOTE or "\"
+
+StringEsc = "\" / "|" / DQUOTE / *( HTAB / SP ) LF *( HTAB / SP )
+ / %s"a" / %s"b" / %s"t" / %s"n"
+ / %s"v" / %s"f" / %s"r" / %s"e"
+ / %s"x" *( 2HEXDIG ) ";"
+ / %s"u" ["0"] 1*5HEXDIG ";"
+ / %s"u" "1" "0" 4HEXDIG ";"
+
+List = [ Unit *( Blank Unit ) ] *Blank [Tail] [SkipUnit]
+
+Tail = "&" Unit *Blank
+
+
+RuneName = ALPHA *5( ALPHA / DIGIT )
+
+Label = 1*12( HEXDIG )
+
+HBLine = 1*HBChar [ 1*( SP / HTAB ) *HBChar ]
+
+HBChar = %x00-08 / %x0b-1f / %x21-ff ; any but HT, LF, SP
+
+
+RJoinDatum = CladDatum / Rune / RuneStr / RuneDotStr / RuneClad
+ / LabelRef / LabelDef / HashStr / HashDotStr / HashClad
+ / QuoteExpr
+
+LJoinDatum = CladDatum / RuneClad / LabelRef / HashClad
+
+NoStartDot = BareString / CladDatum / Rune / RuneStr / RuneDotStr
+ / RuneClad / LabelRef / LabelDef / HashStr / HashDotStr
+ / HashClad / QuoteExpr
+
+NoEndDot = BareString / Rune / RuneStr / RuneClad / LabelRef
+ / HashStr / HashClad
+
+
+;; Local Variables:
+;; eval: (flyspell-mode -1)
+;; End: