diff options
Diffstat (limited to 'doc/0/grammar/abnf.txt')
| -rw-r--r-- | doc/0/grammar/abnf.txt | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/doc/0/grammar/abnf.txt b/doc/0/grammar/abnf.txt new file mode 100644 index 0000000..5ab3c89 --- /dev/null +++ b/doc/0/grammar/abnf.txt @@ -0,0 +1,139 @@ +; Standards-compliant ABNF (RFC 5234, RFC 7405) + +; Compatible with: https://www.quut.com/abnfgen/ + +; Unlike PEG, grammar rules in BNF are non-deterministic, which makes +; it much more challenging to express our naive parse logic. Whether +; this ABNF file is truly accurate is difficult to assess. + +; The abnfgen(1) tool linked above can be used to generate arbitrary +; strings matching the grammar in this file. These can be fed into +; the Zisp parser to reveal some potential bugs; either in the parser +; itself, or this ABNF grammar. + +; Note that the tool may generate Zisp string literals with Unicode +; escape sequences corresponding to surrogate code points; the parser +; may reject these. This is expected; it's difficult to rewrite this +; ABNF grammar to exclude those Unicode values. + +; Other minor inaccuracies that aren't important include: This ABNF +; forces line comments to be terminated with an LF character, when in +; fact the end-of-file may also terminate them; the same applies to +; hash-bang parsing which doesn't actually have to end in LF. These +; discrepancies won't make abnfgen(1) generate invalid strings; they +; only make this ABNF more strict than the Zisp parser, so it won't +; generate some strings that the parser would actually accept. + + +Stream = [ Unit *( Blank Unit ) ] *Blank [Trail] + + +Unit = *Blank Datum + +Blank = HTAB / LF / %x0b / %x0c / CR / SP / Comment + +Trail = SkipLine / SkipUnit / ";" "~" *Blank + + +Datum = BareString / SpecialStr / CladDatum / Rune / RuneStr + / RuneDotStr / RuneClad / LabelRef / LabelDef / HashStr + / HashDotStr / HashClad / QuoteExpr / JoinExpr + +Comment = SkipLine LF / SkipUnit Blank + +SkipLine = ";" [ SkipLStart *AnyButLF ] + +SkipUnit = ";" "~" Unit + +SkipLStart = %x00-09 / %x0b-7d / %x7f-ff ; any but LF or "~" + +AnyButLF = %x00-09 / %x0b-ff + + +BareString = BareChar *( BareChar / Numeric ) + +SpecialStr = SpecStrChar *( SpecStrChar / BareChar ) + +CladDatum = "|" *( PipeStrChar / "\" StringEsc ) "|" + / DQUOTE *( QuotStrChar / "\" StringEsc ) DQUOTE + / "(" List ")" + / "[" List "]" + / "{" List "}" + +Rune = "#" RuneName + +RuneStr = "#" RuneName "\" BareString + +RuneDotStr = "#" RuneName "\" SpecialStr + +RuneClad = "#" RuneName CladDatum + +HashBang = "#" "!" *( SP / HTAB ) HBLine LF + +LabelRef = "#" "%" Label "%" + +LabelDef = "#" "%" Label "=" Datum + +HashStr = "#" "\" BareString + +HashDotStr = "#" "\" SpecialStr + +HashClad = "#" CladDatum + +QuoteExpr = "'" Datum + / "`" Datum + / "," Datum + +JoinExpr = Datum RJoinDatum + / LJoinDatum NoStartDot + / Datum ":" Datum + / NoEndDot "." Datum + + +BareChar = "!" / "$" / "%" / "&" / "*" / "/" / "<" / "=" / ">" + / "?" / "^" / "_" / "~" / ALPHA + +Numeric = "+" / "-" / DIGIT + +SpecStrChar = "." / ":" / Numeric + +PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff ; any but "|" or "\" + +QuotStrChar = %x00-21 / %x23-5b / %x5d-ff ; any but DQUOTE or "\" + +StringEsc = "\" / "|" / DQUOTE / *( HTAB / SP ) LF *( HTAB / SP ) + / %s"a" / %s"b" / %s"t" / %s"n" + / %s"v" / %s"f" / %s"r" / %s"e" + / %s"x" *( 2HEXDIG ) ";" + / %s"u" ["0"] 1*5HEXDIG ";" + / %s"u" "1" "0" 4HEXDIG ";" + +List = [ Unit *( Blank Unit ) ] *Blank [SkipUnit] + + +RuneName = ALPHA *5( ALPHA / DIGIT ) + +Label = 1*12( HEXDIG ) + +HBLine = 1*HBChar [ 1*( SP / HTAB ) *HBChar ] + +HBChar = %x00-08 / %x0b-1f / %x21-ff ; any but HT, LF, SP + + +RJoinDatum = CladDatum / Rune / RuneStr / RuneDotStr / RuneClad + / LabelRef / LabelDef / HashStr / HashDotStr / HashClad + / QuoteExpr + +LJoinDatum = CladDatum / RuneClad / LabelRef / HashClad + +NoStartDot = BareString / CladDatum / Rune / RuneStr / RuneDotStr + / RuneClad / LabelRef / LabelDef / HashStr / HashDotStr + / HashClad / QuoteExpr + +NoEndDot = BareString / Rune / RuneStr / RuneClad / LabelRef + / HashStr / HashClad + + +;; Local Variables: +;; eval: (flyspell-mode -1) +;; End: |
