summaryrefslogtreecommitdiff
path: root/docs/c1/grammar/abnf.txt
blob: aa676465a0cca1e2afe8f202c61a288add8e2747 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
; Standards-compliant ABNF (RFC 5234, RFC 7405)

; Compatible with: https://www.quut.com/abnfgen/

; Unlike PEG, grammar rules in BNF are non-deterministic, which makes
; it much more challenging to express our naive parse logic.  Whether
; this ABNF file is truly accurate is difficult to assess.

; The abnfgen(1) tool linked above can be used to generate arbitrary
; strings matching the grammar in this file.  These can be fed into
; the Zisp parser to reveal some potential bugs; either in the parser
; itself, or this ABNF grammar.

; Note that the tool may generate Zisp string literals with Unicode
; escape sequences corresponding to surrogate code points; the parser
; may reject these.  This is expected; it's difficult to rewrite this
; ABNF grammar to exclude those Unicode values.

; Other minor inaccuracies that aren't important include: This ABNF
; forces line comments to be terminated with an LF character, when in
; fact the end-of-file may also terminate them; the same applies to
; hash-bang parsing which doesn't actually have to end in LF.  These
; discrepancies won't make abnfgen(1) generate invalid strings; they
; only make this ABNF more strict than the Zisp parser, so it won't
; generate some strings that the parser would actually accept.


Stream        = [ Unit *( Blank Unit ) ] *Blank [Trail]


Unit          = *Blank Datum

Blank         = HTAB / LF / %x0b / %x0c / CR / SP / Comment

Trail         = SkipLine / SkipUnit / ";" "~" *Blank


Datum         = BareString / SpecialStr / CladDatum / Rune / RuneStr
              / RuneDotStr / RuneClad / LabelRef / LabelDef / HashStr
              / HashDotStr / HashClad / QuoteExpr / JoinExpr

Comment       = SkipLine LF / SkipUnit Blank

SkipLine      = ";" [ SkipLStart *AnyButLF ]

SkipUnit      = ";" "~" Unit

SkipLStart    = %x00-09 / %x0b-7d / %x7f-ff ; any but LF or "~"

AnyButLF      = %x00-09 / %x0b-ff


BareString    = BareChar *( BareChar / Numeric )

SpecialStr    = SpecStrChar *( SpecStrChar / BareChar )

CladDatum     = "|" *( PipeStrChar / "\" StringEsc ) "|"
              / DQUOTE *( QuotStrChar / "\" StringEsc ) DQUOTE
              / "(" List ")"
              / "[" List "]"
              / "{" List "}"

Rune          = "#" RuneName

RuneStr       = "#" RuneName "\" BareString

RuneDotStr    = "#" RuneName "\" SpecialStr

RuneClad      = "#" RuneName CladDatum

HashBang      = "#" "!" *( SP / HTAB ) HBLine LF

LabelRef      = "#" "%" Label "%"

LabelDef      = "#" "%" Label "=" Datum

HashStr       = "#" "\" BareString

HashDotStr    = "#" "\" SpecialStr

HashClad      = "#" CladDatum

QuoteExpr     = "'" Datum
              / "`" Datum
              / "," Datum

JoinExpr      = Datum RJoinDatum
              / LJoinDatum NoStartDot
              / Datum ":" Datum
              / NoEndDot "." Datum


BareChar      = "!" / "$" / "%" / "*" / "/" / "<" / "=" / ">"
              / "?" / "^" / "_" / "~" / ALPHA

Numeric       = "+" / "-" / DIGIT

SpecStrChar   = "." / ":" / Numeric

PipeStrChar   = %x00-5b / %x5d-7b / %x7d-ff ; any but "|" or "\"

QuotStrChar   = %x00-21 / %x23-5b / %x5d-ff ; any but DQUOTE or "\"

StringEsc     = "\" / "|" / DQUOTE / *( HTAB / SP ) LF *( HTAB / SP )
              / %s"a" / %s"b" / %s"t" / %s"n"
              / %s"v" / %s"f" / %s"r" / %s"e"
              / %s"x" *( 2HEXDIG ) ";"
              / %s"u" ["0"] 1*5HEXDIG ";"
              / %s"u" "1" "0" 4HEXDIG ";"

List          = [ Unit *( Blank Unit ) ] *Blank [Tail] [SkipUnit]

Tail          = "&" Unit *Blank


RuneName      = ALPHA *5( ALPHA / DIGIT )

Label         = 1*12( HEXDIG )

HBLine        = 1*HBChar [ 1*( SP / HTAB ) *HBChar ]

HBChar        = %x00-08 / %x0b-1f / %x21-ff ; any but HT, LF, SP


RJoinDatum    = CladDatum / Rune / RuneStr / RuneDotStr / RuneClad
              / LabelRef / LabelDef / HashStr / HashDotStr / HashClad
              / QuoteExpr

LJoinDatum    = CladDatum / RuneClad / LabelRef / HashClad

NoStartDot    = BareString / CladDatum / Rune / RuneStr / RuneDotStr
              / RuneClad / LabelRef / LabelDef / HashStr / HashDotStr
              / HashClad / QuoteExpr

NoEndDot      = BareString / Rune / RuneStr / RuneClad / LabelRef
              / HashStr / HashClad


;; Local Variables:
;; eval: (flyspell-mode -1)
;; End: