From 2f77b3ceaa2989d944296c572a07b2caee39d9d4 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Mon, 12 Jan 2026 08:03:38 +0100 Subject: Update HTML stuff. --- .gitignore | 5 +-- docs/c1/1-parse.md | 2 +- docs/c1/grammar.abnf.txt | 110 ---------------------------------------------- docs/c1/grammar.md | 101 ------------------------------------------ docs/c1/grammar.peg.txt | 75 ------------------------------- docs/c1/grammar.zbnf.txt | 63 -------------------------- docs/c1/grammar/abnf.txt | 112 +++++++++++++++++++++++++++++++++++++++++++++++ docs/c1/grammar/index.md | 101 ++++++++++++++++++++++++++++++++++++++++++ docs/c1/grammar/peg.txt | 78 +++++++++++++++++++++++++++++++++ docs/c1/grammar/zbnf.txt | 63 ++++++++++++++++++++++++++ html/gen.sh | 15 ++++--- html/index.md | 2 +- html/prelude.html | 4 +- 13 files changed, 367 insertions(+), 364 deletions(-) delete mode 100644 docs/c1/grammar.abnf.txt delete mode 100644 docs/c1/grammar.md delete mode 100644 docs/c1/grammar.peg.txt delete mode 100644 docs/c1/grammar.zbnf.txt create mode 100644 docs/c1/grammar/abnf.txt create mode 100644 docs/c1/grammar/index.md create mode 100644 docs/c1/grammar/peg.txt create mode 100644 docs/c1/grammar/zbnf.txt diff --git a/.gitignore b/.gitignore index 66c4be2..32584fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,4 @@ -/html/highlightjs -/html/index.html -/html/notes -/html/docs +/html/zisp .zig-cache zig-out a.out diff --git a/docs/c1/1-parse.md b/docs/c1/1-parse.md index b01d759..73b8d8a 100644 --- a/docs/c1/1-parse.md +++ b/docs/c1/1-parse.md @@ -1,6 +1,6 @@ # Parser for Code & Data -*For an exact specification of the grammar, see [grammar](grammar.html).* +*For an exact specification of the grammar, see [grammar](grammar/).* Zisp S-Expressions represent an extremely minimal set of data types; only that which is necessary to strategically construct more complex code and data: diff --git a/docs/c1/grammar.abnf.txt b/docs/c1/grammar.abnf.txt deleted file mode 100644 index 9279210..0000000 --- a/docs/c1/grammar.abnf.txt +++ /dev/null @@ -1,110 +0,0 @@ -; Compatible with https://www.quut.com/abnfgen/ - -; It's unclear whether this grammar is truly complete. It has been -; verified not to produce text that is rejected by the Zisp parser -; --except for Unicode escape sequences for surrogate code points-- -; but there may be some text that is accepted by the parser despite -; not being grammatical according to these rules. - - -Stream = [ Unit *( Blank Unit ) ] *Blank [Trail] - - -Unit = *Blank Datum - -Blank = HTAB / LF / %x0b / %x0c / CR / SP / Comment - -Trail = SkipLine / SkipUnit / ";" "~" *Blank - - -Datum = BareString / DottedStr / CladDatum / Rune / RuneStr - / RuneDotStr / RuneClad / LabelRef / LabelDef / HashStr - / HashDotStr / HashClad / QuoteExpr / JoinExpr - -Comment = SkipLine LF / SkipUnit Blank - -SkipLine = ";" [ SkipLStart *AnyButLF ] - -SkipUnit = ";" "~" Unit - -SkipLStart = %x00-09 / %x0b-7d / %x7f-ff ; any but LF or "~" - -AnyButLF = %x00-09 / %x0b-ff - - -BareString = BareChar *( BareChar / Numeric ) - -DottedStr = ( "." / Numeric ) *( "." / Numeric / BareChar ) - -CladDatum = "|" *( PipeStrChar / "\" StringEsc ) "|" - / DQUOTE *( QuotStrChar / "\" StringEsc ) DQUOTE - / "(" List ")" - / "[" List "]" - / "{" List "}" - -Rune = "#" RuneName - -RuneStr = "#" RuneName "\" BareString - -RuneDotStr = "#" RuneName "\" DottedStr - -RuneClad = "#" RuneName CladDatum - -LabelRef = "#" "%" Label "%" - -LabelDef = "#" "%" Label "=" Datum - -HashStr = "#" "\" BareString - -HashDotStr = "#" "\" DottedStr - -HashClad = "#" CladDatum - -QuoteExpr = "'" Datum - / "`" Datum - / "," Datum - -JoinExpr = Datum RJoinDatum - / LJoinDatum NoStartDot - / Datum ":" Datum - / NoEndDot "." Datum - - -BareChar = "!" / "$" / "%" / "*" / "/" / "<" / "=" / ">" - / "?" / "@" / "^" / "_" / "~" / ALPHA - -Numeric = "+" / "-" / DIGIT - -PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff ; any but "|" or "\" - -QuotStrChar = %x00-21 / %x23-5b / %x5d-ff ; any but DQUOTE or "\" - -StringEsc = "\" / "|" / DQUOTE / *( HTAB / SP ) LF *( HTAB / SP ) - / %s"a" / %s"b" / %s"t" / %s"n" - / %s"v" / %s"f" / %s"r" / %s"e" - / %s"x" 1*( 2HEXDIG ) ";" - / %s"u" ["0"] 1*5HEXDIG ";" - / %s"u" "1" "0" 4HEXDIG ";" - -List = [ Unit *( Blank Unit ) ] *Blank [Tail] [SkipUnit] - -Tail = "&" Unit *Blank - - -RuneName = ALPHA *5( ALPHA / DIGIT ) - -Label = 1*12( HEXDIG ) - - -RJoinDatum = CladDatum / Rune / RuneStr / RuneDotStr / RuneClad - / LabelRef / LabelDef / HashStr / HashDotStr / HashClad - / QuoteExpr - -LJoinDatum = CladDatum / RuneClad / LabelRef / HashClad - -NoStartDot = BareString / CladDatum / Rune / RuneStr / RuneDotStr - / RuneClad / LabelRef / LabelDef / HashStr / HashDotStr - / HashClad / QuoteExpr - -NoEndDot = BareString / Rune / RuneStr / RuneClad / LabelRef - / HashStr / HashClad diff --git a/docs/c1/grammar.md b/docs/c1/grammar.md deleted file mode 100644 index 3364150..0000000 --- a/docs/c1/grammar.md +++ /dev/null @@ -1,101 +0,0 @@ -# Zisp S-Expression Grammar - -The grammar is available in several different formats: - -* [ZBNF](grammar.zbnf.txt): See below for the rules of this notation -* [ABNF](grammar.abnf.txt): Compatible with the `abnfgen` tool -* [PEG](grammar.peg.txt): Compatible with `peg/leg` tool - - -## ZBNF notation - -The ZBNF grammar specification uses a BNF-like notation with PEG-like -semantics: - -* Concatenation of expressions is implicit: `foo bar` means `foo` - followed by `bar`. - -* Parentheses are used for grouping, and the pipe symbol `|` is used - for alternatives. - -* The suffixes `?`, `*`, and `+` have the same meaning as in regular - expressions, although `[foo]` is used in place of `(foo)?`. - -* The syntax is defined in terms of bytes, not characters. Terminals - `'c'` and `"c"` refer to the ASCII value of the given character `c`. - Standard C escape sequences are supported. - -* The prefix `~` means NOT. It only applies to rules that match one - byte, and negates them. For example, `~( 'a' | 'b' )` matches any - byte other than 'a' and 'b'. - -* Ranges of terminal values are expressed as `x...y` (inclusive). - -* ABNF "core rules" like `ALPHA` and `HEXDIG` are supported. - -* There is no ambiguity, or look-ahead / backtracking beyond one byte. - Rules match left to right, depth-first, and greedy. As soon as the - input matches the first terminal of a rule --explicit or implied by - recursively descending into the first non-terminal-- it must match - that rule to the end or a syntax error is reported. - -The last point makes the notation simple to translate to code. - - -## Limitations outside the grammar - -The following limits are not represented in the grammar: - -* A `UnicodeSV` is the hexadecimal representation of a Unicode scalar - value; it must represent a value in the range 0 to D7FF, or E000 to - 10FFFF, inclusive. Any other value signals an error. Valid values - are converted into a UTF-8 byte sequence encoding the value. - -* A `Rune` longer than 6 bytes is grammatical, but signals an error. - This is important because runes are not self-terminating; defining - their grammar as ending after a maximum of 6 bytes would allow - another datum beginning with an alphabetic character to follow a - rune immediately without any visual delineation, which would be - terribly confusing for a human reader. Consider: `#foobarbaz`. - This would parse as a `Datum` joining `#foobar` and `baz`. - -* A `Label` is the hexadecimal representation of a 48-bit integer, - meaning it allows for a maximum of 12 hexadecimal digits. Longer - values are grammatical, but signal an out-of-range error, so as to - avoid signaling a confusing "invalid character" error on input that - appears grammatical. Consider: `#%123456789abcd=foo`. This would - signal an invalid character error at the letter `d` if the grammar - limited a `Label` to 12 hexadecimal digits. - - -## Stream-parsing strategy - -The parser consumes one `Unit` from the input stream every time it's -called; it returns the `Datum` therein if found, or else it returns -the Zisp EOF token. - -Since a `Datum` is not self-terminating, the parser must read beyond -it to realize that it has ended (if not followed by the EOF). Thus, -it will consume one more `Blank` following the `Unit` that it parsed. -If this `Blank` is a comment, it will be consumed entirely, ensuring -that parsing resumes properly on a subsequent parser call on the same -input stream, without needing to store any state in between. - -Since comments of type `SkipUnit` are likewise not self-terminating, -an arbitrary number of chained `SkipUnit` comments may need to be -consumed before the parser is finally allowed to return. - -The following illustration shows the positions at which the parser -will stop consuming input when called repeatedly on the same input -stream. The dots represent the extent of each `Unit` being parsed, -while the caret points at the last byte the parser will consume in -that parse cycle. - -``` -foo (bar)[baz] foo;~bar foo;~bar;~baz;~bat foobar -...^..........^... ^... ^......^ -``` - -Notice how, in the fourth cycle, the parser is forced to consume all -commented-out units before it can return, since it would otherwise -leave the stream in an inappropriate state. diff --git a/docs/c1/grammar.peg.txt b/docs/c1/grammar.peg.txt deleted file mode 100644 index d194652..0000000 --- a/docs/c1/grammar.peg.txt +++ /dev/null @@ -1,75 +0,0 @@ -# Compatible with https://piumarta.com/software/peg - -Stream <- Unit ( Blank Unit )* !. - -Unit <- Blank* Datum - - -Blank <- [\t-\r ] / Comment - -Datum <- OneDatum ( JoinChar? OneDatum )* - -JoinChar <- '.' / ':' - - -Comment <- ';' ( SkipUnit / SkipLine ) - -SkipUnit <- '~' Unit - -SkipLine <- (!'\n' .)* '\n'? - - -OneDatum <- BareString / CladDatum - - -BareString <- ( '.' / '+' / '-' / DIGIT ) ( BareChar / '.' )* - / BareChar+ - -CladDatum <- PipeStr / QuoteStr / HashExpr / QuoteExpr / List - -PipeStr <- '|' ( PipeStrChar / '\' StringEsc )* '|' -QuoteStr <- '"' ( QuotStrChar / '\' StringEsc )* '"' -HashExpr <- '#' ( RuneExpr / LabelExpr / HashDatum ) -QuoteExpr <- "'" Datum / '`' Datum / ',' Datum -List <- ParenList / SquareList / BraceList - -BareChar <- ALPHA / DIGIT - / '!' / '$' / '%' / '*' / '+' - / '-' / '/' / '<' / '=' / '>' - / '?' / '@' / '^' / '_' / '~' - -PipeStrChar <- (![|\\] .) -QuotStrChar <- (!["\\] .) - -StringEsc <- '\' / '|' / '"' / ( HTAB / SP )* LF ( HTAB / SP )* - / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e' - / 'x' HexByte+ ';' - / 'u' UnicodeSV ';' - -HexByte <- HEXDIG HEXDIG -UnicodeSV <- HEXDIG+ - -RuneExpr <- Rune ( '\' BareString / CladDatum )? -LabelExpr <- '%' Label ( '%' / '=' Datum ) -HashDatum <- '\' BareString / CladDatum - -Rune <- ALPHA ( ALPHA / DIGIT )* -Label <- HEXDIG+ - -ParenList <- '(' ListBody ')' -SquareList <- '[' ListBody ']' -BraceList <- '{' ListBody '}' - -ListBody <- Unit* ( Blank* '&' Unit )? Blank* - -DIGIT <- [0-9] -ALPHA <- [a-zA-Z] -HEXDIG <- [0-9a-fA-F] - - -# This file should be kept in perfect sync with zbnf.txt for easy -# comparison between the two. - -# Due to a quirk in the peg tool this file is used with, the grammar -# must not allow an empty stream. Therefore, the Unit rule has its -# Datum declared as mandatory rather than optional. diff --git a/docs/c1/grammar.zbnf.txt b/docs/c1/grammar.zbnf.txt deleted file mode 100644 index a8792f0..0000000 --- a/docs/c1/grammar.zbnf.txt +++ /dev/null @@ -1,63 +0,0 @@ -# Custom notation with PEG semantics; see grammar.html - -Stream : Unit ( Blank Unit )* - -Unit : Blank* [Datum] - - -Blank : '\t'...'\r' | SP | Comment - -Datum : OneDatum ( [JoinChar] OneDatum )* - -JoinChar : '.' | ':' - - -Comment : ';' ( SkipUnit | SkipLine ) - -SkipUnit : '~' Unit - -SkipLine : ( ~LF )* [LF] - - -OneDatum : BareString | CladDatum - - -BareString : ( '.' | '+' | '-' | DIGIT ) ( BareChar | '.' )* - | BareChar+ - -CladDatum : PipeStr | QuoteStr | HashExpr | QuoteExpr | List - -PipeStr : '|' ( PipeStrChar | '\' StringEsc )* '|' -QuoteStr : '"' ( QuotStrChar | '\' StringEsc )* '"' -HashExpr : '#' ( RuneExpr | LabelExpr | HashDatum ) -QuoteExpr : "'" Datum | '`' Datum | ',' Datum -List : ParenList | SquareList | BraceList - -BareChar : ALPHA | DIGIT - | '!' | '$' | '%' | '*' | '+' - | '-' | '/' | '<' | '=' | '>' - | '?' | '@' | '^' | '_' | '~' - -PipeStrChar : ~( '|' | '\' ) -QuotStrChar : ~( '"' | '\' ) - -StringEsc : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )* - | 'a' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | 'e' - | 'x' HexByte+ ';' - | 'u' UnicodeSV ';' - -HexByte : HEXDIG HEXDIG -UnicodeSV : HEXDIG+ - -RuneExpr : Rune [ '\' BareString | CladDatum ] -LabelExpr : '%' Label ( '%' | '=' Datum ) -HashDatum : '\' BareString | CladDatum - -Rune : ALPHA ( ALPHA | DIGIT )* -Label : HEXDIG+ - -ParenList : '(' ListBody ')' -SquareList : '[' ListBody ']' -BraceList : '{' ListBody '}' - -ListBody : Unit* [ Blank* '&' Unit ] Blank* diff --git a/docs/c1/grammar/abnf.txt b/docs/c1/grammar/abnf.txt new file mode 100644 index 0000000..6daaceb --- /dev/null +++ b/docs/c1/grammar/abnf.txt @@ -0,0 +1,112 @@ +; Standards-compliant ABNF (RFC 5234, RFC 7405) + +; Compatible with: https://www.quut.com/abnfgen/ + +; It's unclear whether this grammar is truly complete. It has been +; verified not to produce text that is rejected by the Zisp parser +; --except for Unicode escape sequences for surrogate code points-- +; but there may be some text that is accepted by the parser despite +; not being grammatical according to these rules. + + +Stream = [ Unit *( Blank Unit ) ] *Blank [Trail] + + +Unit = *Blank Datum + +Blank = HTAB / LF / %x0b / %x0c / CR / SP / Comment + +Trail = SkipLine / SkipUnit / ";" "~" *Blank + + +Datum = BareString / DottedStr / CladDatum / Rune / RuneStr + / RuneDotStr / RuneClad / LabelRef / LabelDef / HashStr + / HashDotStr / HashClad / QuoteExpr / JoinExpr + +Comment = SkipLine LF / SkipUnit Blank + +SkipLine = ";" [ SkipLStart *AnyButLF ] + +SkipUnit = ";" "~" Unit + +SkipLStart = %x00-09 / %x0b-7d / %x7f-ff ; any but LF or "~" + +AnyButLF = %x00-09 / %x0b-ff + + +BareString = BareChar *( BareChar / Numeric ) + +DottedStr = ( "." / Numeric ) *( "." / Numeric / BareChar ) + +CladDatum = "|" *( PipeStrChar / "\" StringEsc ) "|" + / DQUOTE *( QuotStrChar / "\" StringEsc ) DQUOTE + / "(" List ")" + / "[" List "]" + / "{" List "}" + +Rune = "#" RuneName + +RuneStr = "#" RuneName "\" BareString + +RuneDotStr = "#" RuneName "\" DottedStr + +RuneClad = "#" RuneName CladDatum + +LabelRef = "#" "%" Label "%" + +LabelDef = "#" "%" Label "=" Datum + +HashStr = "#" "\" BareString + +HashDotStr = "#" "\" DottedStr + +HashClad = "#" CladDatum + +QuoteExpr = "'" Datum + / "`" Datum + / "," Datum + +JoinExpr = Datum RJoinDatum + / LJoinDatum NoStartDot + / Datum ":" Datum + / NoEndDot "." Datum + + +BareChar = "!" / "$" / "%" / "*" / "/" / "<" / "=" / ">" + / "?" / "@" / "^" / "_" / "~" / ALPHA + +Numeric = "+" / "-" / DIGIT + +PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff ; any but "|" or "\" + +QuotStrChar = %x00-21 / %x23-5b / %x5d-ff ; any but DQUOTE or "\" + +StringEsc = "\" / "|" / DQUOTE / *( HTAB / SP ) LF *( HTAB / SP ) + / %s"a" / %s"b" / %s"t" / %s"n" + / %s"v" / %s"f" / %s"r" / %s"e" + / %s"x" 1*( 2HEXDIG ) ";" + / %s"u" ["0"] 1*5HEXDIG ";" + / %s"u" "1" "0" 4HEXDIG ";" + +List = [ Unit *( Blank Unit ) ] *Blank [Tail] [SkipUnit] + +Tail = "&" Unit *Blank + + +RuneName = ALPHA *5( ALPHA / DIGIT ) + +Label = 1*12( HEXDIG ) + + +RJoinDatum = CladDatum / Rune / RuneStr / RuneDotStr / RuneClad + / LabelRef / LabelDef / HashStr / HashDotStr / HashClad + / QuoteExpr + +LJoinDatum = CladDatum / RuneClad / LabelRef / HashClad + +NoStartDot = BareString / CladDatum / Rune / RuneStr / RuneDotStr + / RuneClad / LabelRef / LabelDef / HashStr / HashDotStr + / HashClad / QuoteExpr + +NoEndDot = BareString / Rune / RuneStr / RuneClad / LabelRef + / HashStr / HashClad diff --git a/docs/c1/grammar/index.md b/docs/c1/grammar/index.md new file mode 100644 index 0000000..5bedbfc --- /dev/null +++ b/docs/c1/grammar/index.md @@ -0,0 +1,101 @@ +# Zisp S-Expression Grammar + +The grammar is available in several different formats: + +* [ZBNF](zbnf.txt): See below for the rules of this notation +* [ABNF](abnf.txt): Compatible with the `abnfgen` tool +* [PEG](peg.txt): Compatible with `peg/leg` tool + + +## ZBNF notation + +The ZBNF grammar specification uses a BNF-like notation with PEG-like +semantics: + +* Concatenation of expressions is implicit: `foo bar` means `foo` + followed by `bar`. + +* Parentheses are used for grouping, and the pipe symbol `|` is used + for alternatives. + +* The suffixes `?`, `*`, and `+` have the same meaning as in regular + expressions, although `[foo]` is used in place of `(foo)?`. + +* The syntax is defined in terms of bytes, not characters. Terminals + `'c'` and `"c"` refer to the ASCII value of the given character `c`. + Standard C escape sequences are supported. + +* The prefix `~` means NOT. It only applies to rules that match one + byte, and negates them. For example, `~( 'a' | 'b' )` matches any + byte other than 'a' and 'b'. + +* Ranges of terminal values are expressed as `x...y` (inclusive). + +* ABNF "core rules" like `ALPHA` and `HEXDIG` are supported. + +* There is no ambiguity, or look-ahead / backtracking beyond one byte. + Rules match left to right, depth-first, and greedy. As soon as the + input matches the first terminal of a rule --explicit or implied by + recursively descending into the first non-terminal-- it must match + that rule to the end or a syntax error is reported. + +The last point makes the notation simple to translate to code. + + +## Limitations outside the grammar + +The following limits are not represented in the grammar: + +* A `UnicodeSV` is the hexadecimal representation of a Unicode scalar + value; it must represent a value in the range 0 to D7FF, or E000 to + 10FFFF, inclusive. Any other value signals an error. Valid values + are converted into a UTF-8 byte sequence encoding the value. + +* A `Rune` longer than 6 bytes is grammatical, but signals an error. + This is important because runes are not self-terminating; defining + their grammar as ending after a maximum of 6 bytes would allow + another datum beginning with an alphabetic character to follow a + rune immediately without any visual delineation, which would be + terribly confusing for a human reader. Consider: `#foobarbaz`. + This would parse as a `Datum` joining `#foobar` and `baz`. + +* A `Label` is the hexadecimal representation of a 48-bit integer, + meaning it allows for a maximum of 12 hexadecimal digits. Longer + values are grammatical, but signal an out-of-range error, so as to + avoid signaling a confusing "invalid character" error on input that + appears grammatical. Consider: `#%123456789abcd=foo`. This would + signal an invalid character error at the letter `d` if the grammar + limited a `Label` to 12 hexadecimal digits. + + +## Stream-parsing strategy + +The parser consumes one `Unit` from the input stream every time it's +called; it returns the `Datum` therein if found, or else it returns +the Zisp EOF token. + +Since a `Datum` is not self-terminating, the parser must read beyond +it to realize that it has ended (if not followed by the EOF). Thus, +it will consume one more `Blank` following the `Unit` that it parsed. +If this `Blank` is a comment, it will be consumed entirely, ensuring +that parsing resumes properly on a subsequent parser call on the same +input stream, without needing to store any state in between. + +Since comments of type `SkipUnit` are likewise not self-terminating, +an arbitrary number of chained `SkipUnit` comments may need to be +consumed before the parser is finally allowed to return. + +The following illustration shows the positions at which the parser +will stop consuming input when called repeatedly on the same input +stream. The dots represent the extent of each `Unit` being parsed, +while the caret points at the last byte the parser will consume in +that parse cycle. + +``` +foo (bar)[baz] foo;~bar foo;~bar;~baz;~bat foobar +...^..........^... ^... ^......^ +``` + +Notice how, in the fourth cycle, the parser is forced to consume all +commented-out units before it can return, since it would otherwise +leave the stream in an inappropriate state. diff --git a/docs/c1/grammar/peg.txt b/docs/c1/grammar/peg.txt new file mode 100644 index 0000000..1e060ec --- /dev/null +++ b/docs/c1/grammar/peg.txt @@ -0,0 +1,78 @@ +# Standard PEG notation + +Stream <- Unit ( Blank Unit )* !. + +Unit <- Blank* Datum + + +Blank <- [\t-\r ] / Comment + +Datum <- OneDatum ( JoinChar? OneDatum )* + +JoinChar <- '.' / ':' + + +Comment <- ';' ( SkipUnit / SkipLine ) + +SkipUnit <- '~' Unit + +SkipLine <- (!'\n' .)* '\n'? + + +OneDatum <- BareString / CladDatum + + +BareString <- ( '.' / '+' / '-' / DIGIT ) ( BareChar / '.' )* + / BareChar+ + +CladDatum <- PipeStr / QuoteStr / HashExpr / QuoteExpr / List + +PipeStr <- '|' ( PipeStrChar / '\' StringEsc )* '|' +QuoteStr <- '"' ( QuotStrChar / '\' StringEsc )* '"' +HashExpr <- '#' ( RuneExpr / LabelExpr / HashDatum ) +QuoteExpr <- "'" Datum / '`' Datum / ',' Datum +List <- ParenList / SquareList / BraceList + +BareChar <- ALPHA / DIGIT + / '!' / '$' / '%' / '*' / '+' + / '-' / '/' / '<' / '=' / '>' + / '?' / '@' / '^' / '_' / '~' + +PipeStrChar <- (![|\\] .) +QuotStrChar <- (!["\\] .) + +StringEsc <- '\' / '|' / '"' / ( HTAB / SP )* LF ( HTAB / SP )* + / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e' + / 'x' HexByte+ ';' + / 'u' UnicodeSV ';' + +HexByte <- HEXDIG HEXDIG +UnicodeSV <- HEXDIG+ + +RuneExpr <- Rune ( '\' BareString / CladDatum )? +LabelExpr <- '%' Label ( '%' / '=' Datum ) +HashDatum <- '\' BareString / CladDatum + +Rune <- ALPHA ( ALPHA / DIGIT )* +Label <- HEXDIG+ + +ParenList <- '(' ListBody ')' +SquareList <- '[' ListBody ']' +BraceList <- '{' ListBody '}' + +ListBody <- Unit* ( Blank* '&' Unit )? Blank* + +DIGIT <- [0-9] +ALPHA <- [a-zA-Z] +HEXDIG <- [0-9a-fA-F] + + +# Keep this in sync line-for-line with the ZBNF grammar for easy +# comparison between the two. + +# This file is meant to be compatible with: +# https://piumarta.com/software/peg + +# Due to a quirk in the peg tool this file is used with, the grammar +# must not allow an empty stream. Therefore, the Unit rule has its +# Datum declared as mandatory rather than optional. diff --git a/docs/c1/grammar/zbnf.txt b/docs/c1/grammar/zbnf.txt new file mode 100644 index 0000000..551c319 --- /dev/null +++ b/docs/c1/grammar/zbnf.txt @@ -0,0 +1,63 @@ +# Custom notation with PEG semantics + +Stream : Unit ( Blank Unit )* + +Unit : Blank* [Datum] + + +Blank : '\t'...'\r' | SP | Comment + +Datum : OneDatum ( [JoinChar] OneDatum )* + +JoinChar : '.' | ':' + + +Comment : ';' ( SkipUnit | SkipLine ) + +SkipUnit : '~' Unit + +SkipLine : ( ~LF )* [LF] + + +OneDatum : BareString | CladDatum + + +BareString : ( '.' | '+' | '-' | DIGIT ) ( BareChar | '.' )* + | BareChar+ + +CladDatum : PipeStr | QuoteStr | HashExpr | QuoteExpr | List + +PipeStr : '|' ( PipeStrChar | '\' StringEsc )* '|' +QuoteStr : '"' ( QuotStrChar | '\' StringEsc )* '"' +HashExpr : '#' ( RuneExpr | LabelExpr | HashDatum ) +QuoteExpr : "'" Datum | '`' Datum | ',' Datum +List : ParenList | SquareList | BraceList + +BareChar : ALPHA | DIGIT + | '!' | '$' | '%' | '*' | '+' + | '-' | '/' | '<' | '=' | '>' + | '?' | '@' | '^' | '_' | '~' + +PipeStrChar : ~( '|' | '\' ) +QuotStrChar : ~( '"' | '\' ) + +StringEsc : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )* + | 'a' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | 'e' + | 'x' HexByte+ ';' + | 'u' UnicodeSV ';' + +HexByte : HEXDIG HEXDIG +UnicodeSV : HEXDIG+ + +RuneExpr : Rune [ '\' BareString | CladDatum ] +LabelExpr : '%' Label ( '%' | '=' Datum ) +HashDatum : '\' BareString | CladDatum + +Rune : ALPHA ( ALPHA | DIGIT )* +Label : HEXDIG+ + +ParenList : '(' ListBody ')' +SquareList : '[' ListBody ']' +BraceList : '{' ListBody '}' + +ListBody : Unit* [ Blank* '&' Unit ] Blank* diff --git a/html/gen.sh b/html/gen.sh index 03ae570..ad362c7 100755 --- a/html/gen.sh +++ b/html/gen.sh @@ -26,13 +26,14 @@ md2ht() { } > "$dst" } -md2ht index.md index.html +md2ht index.md zisp/index.html +cp style.css zisp/ for file in ../notes/*.md do name=${file#../notes/} name=${name%.md} - md2ht "$file" "notes/$name.html" + md2ht "$file" "zisp/notes/$name.html" done shopt -s globstar @@ -43,17 +44,17 @@ do name=${name%.md} dir=${file#../} dir=${dir%/*} - mkdir -p "$dir" - md2ht "$file" "docs/$name.html" + mkdir -p "zisp/$dir" + md2ht "$file" "zisp/docs/$name.html" done for file in ../docs/**/*.txt do dir=${file#../} dir=${dir%/*} - mkdir -p "$dir" - dest=docs/${file#../docs/} + mkdir -p "zisp/$dir" + dest=zisp/docs/${file#../docs/} cp "$file" "$dest" done -rsync -a ./ tk:/var/www/html/zisp +rsync -a zisp tk:/var/www/html diff --git a/html/index.md b/html/index.md index cc9091c..f8fe67c 100644 --- a/html/index.md +++ b/html/index.md @@ -1,4 +1,4 @@ -# Zisp: A full-stack Lisp for the 21st Century +# Zisp: A full-stack Lisp for the 22nd Century Zisp is my experimental language project that first started with the idea of writing a simple Scheme implementation in Zig, just to learn diff --git a/html/prelude.html b/html/prelude.html index 14a155b..84ee345 100644 --- a/html/prelude.html +++ b/html/prelude.html @@ -4,8 +4,8 @@ __TITLE__ - - + +