docs -> doc

author: Taylan Kammer <taylan.kammer@gmail.com> 2026-06-01 21:49:37 +0200
committer: Taylan Kammer <taylan.kammer@gmail.com> 2026-06-01 21:49:37 +0200
commit: 724ac8ae394675a78c2977c6e35555b210256e01 (patch)
tree: d7f5574b49ec71341ea8079f18a33b9c17b60221 /docs/c1/grammar
parent: 9ce0aa66cedc985322e06db4bac130910610c113 (diff)
4 files changed, 0 insertions, 426 deletions
diff --git a/docs/c1/grammar/abnf.txt b/docs/c1/grammar/abnf.txt
deleted file mode 100644
index aa67646..0000000
--- a/docs/c1/grammar/abnf.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-; Standards-compliant ABNF (RFC 5234, RFC 7405)
-
-; Compatible with: https://www.quut.com/abnfgen/
-
-; Unlike PEG, grammar rules in BNF are non-deterministic, which makes
-; it much more challenging to express our naive parse logic.  Whether
-; this ABNF file is truly accurate is difficult to assess.
-
-; The abnfgen(1) tool linked above can be used to generate arbitrary
-; strings matching the grammar in this file.  These can be fed into
-; the Zisp parser to reveal some potential bugs; either in the parser
-; itself, or this ABNF grammar.
-
-; Note that the tool may generate Zisp string literals with Unicode
-; escape sequences corresponding to surrogate code points; the parser
-; may reject these.  This is expected; it's difficult to rewrite this
-; ABNF grammar to exclude those Unicode values.
-
-; Other minor inaccuracies that aren't important include: This ABNF
-; forces line comments to be terminated with an LF character, when in
-; fact the end-of-file may also terminate them; the same applies to
-; hash-bang parsing which doesn't actually have to end in LF.  These
-; discrepancies won't make abnfgen(1) generate invalid strings; they
-; only make this ABNF more strict than the Zisp parser, so it won't
-; generate some strings that the parser would actually accept.
-
-
-Stream        = [ Unit *( Blank Unit ) ] *Blank [Trail]
-
-
-Unit          = *Blank Datum
-
-Blank         = HTAB / LF / %x0b / %x0c / CR / SP / Comment
-
-Trail         = SkipLine / SkipUnit / ";" "~" *Blank
-
-
-Datum         = BareString / SpecialStr / CladDatum / Rune / RuneStr
-              / RuneDotStr / RuneClad / LabelRef / LabelDef / HashStr
-              / HashDotStr / HashClad / QuoteExpr / JoinExpr
-
-Comment       = SkipLine LF / SkipUnit Blank
-
-SkipLine      = ";" [ SkipLStart *AnyButLF ]
-
-SkipUnit      = ";" "~" Unit
-
-SkipLStart    = %x00-09 / %x0b-7d / %x7f-ff ; any but LF or "~"
-
-AnyButLF      = %x00-09 / %x0b-ff
-
-
-BareString    = BareChar *( BareChar / Numeric )
-
-SpecialStr    = SpecStrChar *( SpecStrChar / BareChar )
-
-CladDatum     = "|" *( PipeStrChar / "\" StringEsc ) "|"
-              / DQUOTE *( QuotStrChar / "\" StringEsc ) DQUOTE
-              / "(" List ")"
-              / "[" List "]"
-              / "{" List "}"
-
-Rune          = "#" RuneName
-
-RuneStr       = "#" RuneName "\" BareString
-
-RuneDotStr    = "#" RuneName "\" SpecialStr
-
-RuneClad      = "#" RuneName CladDatum
-
-HashBang      = "#" "!" *( SP / HTAB ) HBLine LF
-
-LabelRef      = "#" "%" Label "%"
-
-LabelDef      = "#" "%" Label "=" Datum
-
-HashStr       = "#" "\" BareString
-
-HashDotStr    = "#" "\" SpecialStr
-
-HashClad      = "#" CladDatum
-
-QuoteExpr     = "'" Datum
-              / "`" Datum
-              / "," Datum
-
-JoinExpr      = Datum RJoinDatum
-              / LJoinDatum NoStartDot
-              / Datum ":" Datum
-              / NoEndDot "." Datum
-
-
-BareChar      = "!" / "$" / "%" / "*" / "/" / "<" / "=" / ">"
-              / "?" / "^" / "_" / "~" / ALPHA
-
-Numeric       = "+" / "-" / DIGIT
-
-SpecStrChar   = "." / ":" / Numeric
-
-PipeStrChar   = %x00-5b / %x5d-7b / %x7d-ff ; any but "|" or "\"
-
-QuotStrChar   = %x00-21 / %x23-5b / %x5d-ff ; any but DQUOTE or "\"
-
-StringEsc     = "\" / "|" / DQUOTE / *( HTAB / SP ) LF *( HTAB / SP )
-              / %s"a" / %s"b" / %s"t" / %s"n"
-              / %s"v" / %s"f" / %s"r" / %s"e"
-              / %s"x" *( 2HEXDIG ) ";"
-              / %s"u" ["0"] 1*5HEXDIG ";"
-              / %s"u" "1" "0" 4HEXDIG ";"
-
-List          = [ Unit *( Blank Unit ) ] *Blank [Tail] [SkipUnit]
-
-Tail          = "&" Unit *Blank
-
-
-RuneName      = ALPHA *5( ALPHA / DIGIT )
-
-Label         = 1*12( HEXDIG )
-
-HBLine        = 1*HBChar [ 1*( SP / HTAB ) *HBChar ]
-
-HBChar        = %x00-08 / %x0b-1f / %x21-ff ; any but HT, LF, SP
-
-
-RJoinDatum    = CladDatum / Rune / RuneStr / RuneDotStr / RuneClad
-              / LabelRef / LabelDef / HashStr / HashDotStr / HashClad
-              / QuoteExpr
-
-LJoinDatum    = CladDatum / RuneClad / LabelRef / HashClad
-
-NoStartDot    = BareString / CladDatum / Rune / RuneStr / RuneDotStr
-              / RuneClad / LabelRef / LabelDef / HashStr / HashDotStr
-              / HashClad / QuoteExpr
-
-NoEndDot      = BareString / Rune / RuneStr / RuneClad / LabelRef
-              / HashStr / HashClad
-
-
-;; Local Variables:
-;; eval: (flyspell-mode -1)
-;; End:
diff --git a/docs/c1/grammar/index.md b/docs/c1/grammar/index.md
deleted file mode 100644
index e3716ea..0000000
--- a/docs/c1/grammar/index.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Zisp S-Expression Grammar
-
-The grammar is available in several different formats:
-
-* [ZBNF](zbnf.txt): See below for the rules of this notation
-* [ABNF](abnf.txt): Compatible with the `abnfgen` tool
-* [PEG](peg.txt): Compatible with `peg/leg` tool
-
-
-## ZBNF notation
-
-The ZBNF grammar specification uses a BNF-like notation with PEG-like
-semantics:
-
-* Concatenation of expressions is implicit: `foo bar` means `foo`
-  followed by `bar`.
-
-* Parentheses are used for grouping, and the pipe symbol `|` is used
-  for alternatives.
-
-* The suffixes `?`, `*`, and `+` have the same meaning as in regular
-  expressions, although `[foo]` is used in place of `(foo)?`.
-
-* The syntax is defined in terms of bytes, not characters.  Terminals
-  `'c'` and `"c"` refer to the ASCII value of the given character `c`.
-  Standard C escape sequences are supported.
-
-* The prefix `~` means NOT.  It only applies to rules that match one
-  byte, and negates them.  For example, `~( 'a' | 'b' )` matches any
-  byte other than 'a' and 'b'.
-
-* Ranges of terminal values are expressed as `x...y` (inclusive).
-
-* ABNF "core rules" like `ALPHA` and `HEXDIG` are supported.
-
-* There is no ambiguity, or look-ahead / backtracking beyond one byte.
-  Rules match left to right, depth-first, and greedy.  As soon as the
-  input matches the first terminal of a rule --explicit or implied by
-  recursively descending into the first non-terminal-- it must match
-  that rule to the end or a syntax error is reported.
-
-The last point makes the notation simple to translate to code.
-
-
-## Limitations outside the grammar
-
-The following limits are not represented in the grammar:
-
-* A `UnicodeSV` is the hexadecimal representation of a Unicode scalar
-  value; it must represent a value in the range 0 to D7FF, or E000 to
-  10FFFF, inclusive.  Any other value signals an error.  Valid values
-  are converted into a UTF-8 byte sequence encoding the value.
-
-* A `Rune` longer than 6 bytes is grammatical, but signals an error.
-  This is important because runes are not self-terminating; defining
-  their grammar as ending after a maximum of 6 bytes would allow
-  another datum beginning with an alphabetic character to follow a
-  rune immediately without any visual delineation, which would be
-  terribly confusing for a human reader.  Consider: `#foobarbaz`.
-  This would parse as a `Datum` joining `#foobar` and `baz`.
-
-  (The ABNF does not suffer from this issue, since it explicitly
-   enumerates the join possibilities anyway.)
-
-* A `Label` is the hexadecimal representation of a 48-bit integer,
-  meaning it allows for a maximum of 12 hexadecimal digits.  Longer
-  values are grammatical, but signal an out-of-range error, so as to
-  avoid signaling a confusing "invalid character" error on input that
-  appears grammatical.  Consider: `#%123456789abcd=foo`.  This would
-  signal an invalid character error at the letter `d` if the grammar
-  limited a `Label` to 12 hexadecimal digits.
-
-  (As above, the ABNF doesn't care about this.  You probably don't
-   want to use the ABNF to generate a parser anyway.)
-
-
-## At-quoted strings
-
-The mechanism of at-quoted strings is not represented in any of the
-grammars, since it essentially has 256 variants.  Representing it
-sanely in a grammar requires the ability to save and reference
-variables.
-
-
-## Stream-parsing strategy
-
-The parser consumes one `Unit` from the input stream every time it's
-called; it returns the `Datum` therein if found, or else it returns
-the Zisp EOF token.
-
-Since a `Datum` is not self-terminating, the parser must read beyond
-it to realize that it has ended (if not followed by the EOF).  Thus,
-it will consume one more `Blank` following the `Unit` that it parsed.
-If this `Blank` is a comment, it will be consumed entirely, ensuring
-that parsing resumes properly on a subsequent parser call on the same
-input stream, without needing to store any state in between.
-
-Since comments of type `SkipUnit` are likewise not self-terminating,
-an arbitrary number of chained `SkipUnit` comments may need to be
-consumed before the parser is finally allowed to return.
-
-The following illustration shows the positions at which the parser
-will stop consuming input when called repeatedly on the same input
-stream.  The dots represent the extent of each `Unit` being parsed,
-while the caret points at the last byte the parser will consume in
-that parse cycle.
-
-```
-foo (bar)[baz] foo;~bar foo;~bar;~baz;~bat foobar
-...^..........^...     ^...               ^......^
-```
-
-Notice how, in the fourth cycle, the parser is forced to consume all
-commented-out units before it can return, since it would otherwise
-leave the stream in an inappropriate state.
diff --git a/docs/c1/grammar/peg.txt b/docs/c1/grammar/peg.txt
deleted file mode 100644
index 7b28a99..0000000
--- a/docs/c1/grammar/peg.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-# Standard PEG notation
-
-Stream       <- Unit ( Blank Unit )* !.
-
-
-Unit         <- Blank* Datum
-
-Blank        <- [\t-\r ] / Comment
-
-
-Datum        <- OneDatum ( JoinChar? OneDatum )*
-
-JoinChar     <- '.' / ':'
-
-
-Comment      <- ';' ( SkipUnit / SkipLine )
-
-SkipUnit     <- '~' Unit
-
-SkipLine     <- (!'\n' .)* '\n'?
-
-
-OneDatum     <- BareString / CladDatum
-
-
-BareString   <- SpecBareChar ( BareChar / JoinChar )*
-              / BareChar+
-
-SpecBareChar <- '+' / '-' / JoinChar / DIGIT
-
-BareChar     <- ALPHA / DIGIT
-              / '!' / '$' / '%' / '*' / '+' / '-' / '/'
-              / '<' / '=' / '>' / '?' / '^' / '_' / '~'
-
-
-CladDatum    <- PipeStr / QuoteStr / HashExpr / QuoteExpr / List
-
-PipeStr      <- '|' ( PipeStrChar / '\' StringEsc )* '|'
-QuoteStr     <- '"' ( QuotStrChar / '\' StringEsc )* '"'
-HashExpr     <- '#' HashExprs
-QuoteExpr    <- "'" Datum / '`' Datum / ',' Datum
-List         <- ParenList / SquareList / BraceList
-
-
-PipeStrChar  <- (![|\\] .)
-QuotStrChar  <- (!["\\] .)
-
-StringEsc    <- '\' / '|' / '"' / ( HTAB / SP )* LF ( HTAB / SP )*
-              / '0' / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e'
-              / 'x' HexByte* ';'
-              / 'u' UnicodeSV ';'
-
-HexByte      <- HEXDIG HEXDIG
-UnicodeSV    <- HEXDIG+
-
-
-HashExprs    <- '!' [\t ]* HBangLine '\n'?
-              / '%' Label ( '%' / '=' Datum )
-              / '\' BareString / CladDatum
-              / Rune ( '\' BareString / CladDatum )?
-
-HBangLine    <- HBChars+ [\t ]* ( HBChars+ )?
-HBChars      <- (![\t\n ] .)
-Label        <- HEXDIG+
-Rune         <- ALPHA ( ALPHA / DIGIT )*
-
-
-ParenList    <- '(' ListBody ')'
-SquareList   <- '[' ListBody ']'
-BraceList    <- '{' ListBody '}'
-
-ListBody     <- Unit* ( Blank* '&' Unit )? Blank*
-
-
-DIGIT        <- [0-9]
-ALPHA        <- [a-zA-Z]
-HEXDIG       <- [0-9a-fA-F]
-
-
-# Keep this in sync line-for-line with the ZBNF grammar for easy
-# comparison between the two.
-
-# This file is meant to be compatible with:
-# https://piumarta.com/software/peg
-
-# Due to a quirk in the peg tool this file is used with, the grammar
-# must not allow an empty stream.  Therefore, the Unit rule has its
-# Datum declared as mandatory rather than optional.
-
-
-# Local Variables:
-# eval: (flyspell-mode -1)
-# End:
diff --git a/docs/c1/grammar/zbnf.txt b/docs/c1/grammar/zbnf.txt
deleted file mode 100644
index 923ac83..0000000
--- a/docs/c1/grammar/zbnf.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-; Custom notation with PEG semantics
-
-Stream        : Unit ( Blank Unit )*
-
-
-Unit          : Blank* [Datum]
-
-Blank         : '\t'...'\r' | SP | Comment
-
-
-Datum         : OneDatum ( [JoinChar] OneDatum )*
-
-JoinChar      : '.' | ':'
-
-
-Comment       : ';' ( SkipUnit | SkipLine )
-
-SkipUnit      : '~' Unit
-
-SkipLine      : ( ~LF )* [LF]
-
-
-OneDatum      : BareString | CladDatum
-
-
-BareString    : SpecBareChar ( BareChar | JoinChar )*
-              | BareChar+
-
-SpecBareChar  : '+' | '-' | JoinChar | DIGIT
-
-BareChar      : ALPHA | DIGIT
-              | '!' | '$' | '%' | '*' | '+' | '-' | '/'
-              | '<' | '=' | '>' | '?' | '^' | '_' | '~'
-
-
-CladDatum     : PipeStr | QuoteStr | HashExpr | QuoteExpr | List
-
-PipeStr       : '|' ( PipeStrChar | '\' StringEsc )* '|'
-QuoteStr      : '"' ( QuotStrChar | '\' StringEsc )* '"'
-HashExpr      : '#' HashExprs
-QuoteExpr     : "'" Datum | '`' Datum | ',' Datum
-List          : ParenList | SquareList | BraceList
-
-
-PipeStrChar   : ~( '|' | '\' )
-QuotStrChar   : ~( '"' | '\' )
-
-StringEsc     : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )*
-              | '0' | 'a' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | 'e'
-              | 'x' HexByte* ';'
-              | 'u' UnicodeSV ';'
-
-HexByte       : HEXDIG HEXDIG
-UnicodeSV     : HEXDIG+
-
-
-HashExprs     : '!' ( SP | HTAB )* HBangLine [ LF ]
-              | '%' Label ( '%' | '=' Datum )
-              | '\' BareString | CladDatum
-              | Rune [ '\' BareString | CladDatum ]
-
-HBangLine     : HBChars+ ( SP | HTAB )* [ HBChars+ ]
-HBChars       : ~( SP | HTAB | LF )
-Label         : HEXDIG+
-Rune          : ALPHA ( ALPHA | DIGIT )*
-
-
-ParenList     : '(' ListBody ')'
-SquareList    : '[' ListBody ']'
-BraceList     : '{' ListBody '}'
-
-ListBody      : Unit* [ Blank* '&' Unit ] Blank*
-
-
-;; Local Variables:
-;; eval: (flyspell-mode -1)
-;; End:
author	Taylan Kammer <taylan.kammer@gmail.com>	2026-06-01 21:49:37 +0200
committer	Taylan Kammer <taylan.kammer@gmail.com>	2026-06-01 21:49:37 +0200
commit	724ac8ae394675a78c2977c6e35555b210256e01 (patch)
tree	d7f5574b49ec71341ea8079f18a33b9c17b60221 /docs/c1/grammar
parent	9ce0aa66cedc985322e06db4bac130910610c113 (diff)