summaryrefslogtreecommitdiff
path: root/spec
diff options
context:
space:
mode:
Diffstat (limited to 'spec')
-rw-r--r--spec/syntax.abnf96
-rw-r--r--spec/syntax.md117
-rw-r--r--spec/syntax.peg63
-rw-r--r--spec/syntax.zbnf59
4 files changed, 0 insertions, 335 deletions
diff --git a/spec/syntax.abnf b/spec/syntax.abnf
deleted file mode 100644
index 132deeb..0000000
--- a/spec/syntax.abnf
+++ /dev/null
@@ -1,96 +0,0 @@
-;This file follows strict ABNF rules and can be used with abnfgen.
-
-
-File = [Unit] *( Blank Unit ) *Blank [Trail]
-
-
-Unit = *Blank Datum
-
-Blank = HTAB / LF / %x0b / %x0c / CR / SP / Comment
-
-Trail = SkipLine / SkipUnit
-
-
-Datum = BareString
- / DottedString
- / CladDatum
- / HashExpr
- / HashDotExpr
- / QuoteExpr
- / JoinExpr
-
-Comment = SkipLine LF / SkipUnit Blank
-
-SkipLine = ';' [ SkipLStart *AnyButLF ]
-
-SkipLStart = %x00-09 / %x0b-7d / %x7f-ff
- ; any but LF or '~'
-
-AnyButLF = %x00-09 / %x0b-ff
-
-SkipUnit = ';' '~' Unit
-
-
-BareString = BareChar *( BareChar / Numeric )
-
-DottedString = ( '.' / Numeric ) *( '.' / Numeric / BareChar )
-
-CladDatum = '|' *( PipeStrChar / '\' StringEsc ) '|'
- / '"' *( QuotStrChar / '\' StringEsc ) '"'
- / '(' List ')' / '[' List ']' / '{' List '}'
-
-HashExpr = LabelExpr / RuneExpr / HashDatum
-
-HashDotExpr = RuneDotExpr / HashDotDatum
-
-QuoteExpr = "'" Datum / '`' Datum / ',' Datum
-
-JoinExpr = Datum LeftCladDatum
- / Datum ':' Datum
- / DotlessDatum '.' Datum
-
-LeftCladDatum = CladDatum / HashExpr / QuoteExpr
-
-DotlessDatum = BareString / CladDatum / RuneExpr / HashDatum
-
-
-BareChar = '!' / '$' / '%' / '*' / '/' / '<' / '=' / '>'
- / '?' / '@' / '^' / '_' / '~' / ALPHA
-
-Numeric = '+' / '-' / DIGIT
-
-PipeStrChar = %x00-5b / %x5d-7b / %x7d-ff
- ; any but '|' or '\'
-
-QuotStrChar = %x00-21 / %x23-5b / %x5d-ff
- ; any but '"' or '\'
-
-List = [Unit] *( Blank Unit ) *Blank [Tail] [SkipUnit]
-
-Tail = '&' Unit *Blank
-
-LabelExpr = '#' '%' Label ( '%' / '=' Datum )
-
-RuneExpr = '#' Rune [ '\' BareString / CladDatum ]
-
-RuneDotExpr = '#' Rune '\' DottedString
-
-HashDatum = '#' '\' BareString / CladDatum
-
-HashDotDatum = '#' '\' DottedString
-
-
-; Unicode escapes must not represent surrogate code points.
-; This is difficult to express in ABNF. But we do at least
-; disallow code points greater than \u10FFFF which are also
-; invalid, since U+10FFFF is the highest allowed.
-StringEsc = '\' / '|' / '"' / *( HTAB / SP ) LF *( HTAB / SP )
- / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e'
- / 'x' 1*( 2HEXDIG ) ';'
- / 'u' ['0'] 1*5HEXDIG ';'
- / 'u' '1' '0' 4HEXDIG ';'
-
-
-Rune = ALPHA *5( ALPHA / DIGIT )
-
-Label = 1*12( HEXDIG )
diff --git a/spec/syntax.md b/spec/syntax.md
deleted file mode 100644
index d1a17ad..0000000
--- a/spec/syntax.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# Zisp S-Expression Syntax
-
-We use a BNF-like grammar notation with the following rules:
-
-* Concatenation of expressions is implicit: `foo bar` means `foo`
- followed by `bar`.
-
-* The suffixes `?`, `*`, and `+` have the same meaning as in regular
- expressions, although `[foo]` is used in place of `(foo)?`.
-
-* The syntax is defined in terms of bytes, not characters. Terminals
- `'c'` and `"c"` refer to the ASCII value of the given character `c`.
- Numbers are in decimal and refer to a byte with the given value.
-
-* The prefix `~` means NOT. It only applies to rules that match one
- byte, and negates them. For example, `~( 'a' | 'b' )` matches any
- byte other than 97 and 98.
-
-* Ranges of terminal values are expressed as `x...y` (inclusive).
-
-* ABNF "core rules" like `ALPHA` and `HEXDIG` are supported.
-
-* There is no ambiguity, or look-ahead / backtracking beyond one byte.
- Rules match left to right, depth-first, and greedy. As soon as the
- input matches the first terminal of a rule (explicit or implied by
- recursively descending into the first non-terminal), it must match
- that rule to the end, or it is considered a syntax error.
-
-The last rule means that the notation is simple to translate to code.
-It ostensibly makes the notation equivalent to PEG in expression.
-
-The parser consumes one `Unit` from an input stream every time it's
-called; it returns the `Datum` therein, or EOF. The final optional
-`Blank` represents the fact that the parser will consume one more
-blank at the end if it finds one; this is because `Datum` is not
-self-closing so the parser has to check if it goes on.
-
-The following limits are not represented in the grammar:
-
-* A `UnicodeSV` is the hexadecimal representation of a Unicode scalar
- value; it must represent a value in the range 0 to D7FF, or E000 to
- 10FFFF, inclusive. Any other value signals an error. Valid values
- are converted into a UTF-8 byte sequence encoding the value.
-
-* A `Rune` longer than 6 bytes is grammatical, but signals an error.
- This is important because runes are not self-terminating; defining
- their grammar as ending after a maximum of 6 bytes would allow
- another datum beginning with an alphabetic character to follow a
- rune immediately without any visual delineation, which would be
- terribly confusing for a human reader. Consider: `#foo123bar`.
- This would parse as a concatenation of `#foo123` and `bar`.
-
-* A `Label` is the hexadecimal representation of a 48-bit integer,
- meaning it allows for a maximum of 12 hexadecimal digits. Longer
- values are grammatical, but signal an out-of-range error.
-
-```
-Unit : Blank* [ Datum [Blank] ]
-
-
-Blank : 9...13 | SP | Comment
-
-Datum : OneDatum ( [JoinChar] OneDatum )*
-
-JoinChar : '.' | ':'
-
-
-Comment : ';' ( SkipUnit | SkipLine )
-
-SkipUnit : '~' Unit
-
-SkipLine : ( ~LF )* [LF]
-
-
-OneDatum : BareString | CladDatum
-
-
-BareString : ( '.' | '+' | '-' | DIGIT ) ( BareChar | '.' )*
- | BareChar+
-
-CladDatum : PipeStr | QuoteStr | HashExpr | QuoteExpr | List
-
-PipeStr : '|' ( PipeStrChar | '\' StringEsc )* '|'
-QuoteStr : '"' ( QuotStrChar | '\' StringEsc )* '"'
-HashExpr : '#' ( RuneExpr | LabelExpr | HashDatum )
-QuoteExpr : "'" Datum | '`' Datum | ',' Datum
-List : ParenList | SquareList | BraceList
-
-BareChar : ALPHA | DIGIT
- | '!' | '$' | '%' | '*' | '+'
- | '-' | '/' | '<' | '=' | '>'
- | '?' | '@' | '^' | '_' | '~'
-
-PipeStrChar : ~( '|' | '\' )
-QuotStrChar : ~( '"' | '\' )
-
-StringEsc : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )*
- | 'a' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | 'e'
- | 'x' HexByte+ ';'
- | 'u' UnicodeSV ';'
-
-HexByte : HEXDIG HEXDIG
-UnicodeSV : HEXDIG+
-
-RuneExpr : Rune [ '\' BareString | CladDatum ]
-LabelExpr : '%' Label ( '%' | '=' Datum )
-HashDatum : '\' BareString | CladDatum
-
-Rune : ALPHA ( ALPHA | DIGIT )*
-Label : HEXDIG+
-
-ParenList : '(' ListBody ')'
-SquareList : '[' ListBody ']'
-BraceList : '{' ListBody '}'
-
-ListBody : Unit* [ Blank* '&' Unit ] Blank*
-```
diff --git a/spec/syntax.peg b/spec/syntax.peg
deleted file mode 100644
index 97b9632..0000000
--- a/spec/syntax.peg
+++ /dev/null
@@ -1,63 +0,0 @@
-Unit <- Blank* ( Datum Blank? )?
-
-
-Blank <- ' ' / '\t' / '\n' / Comment
-
-Datum <- OneDatum ( JoinChar? OneDatum )*
-
-JoinChar <- '.' / ':'
-
-
-Comment <- ';' ( SkipUnit / SkipLine )
-
-SkipUnit <- '~' Unit
-
-SkipLine <- (!'\n' .)* '\n'?
-
-
-OneDatum <- BareString / CladDatum
-
-
-BareString <- ( '.' / '+' / '-' / DIGIT ) ( BareChar / '.' )*
- / BareChar+
-
-CladDatum <- PipeStr / QuoteStr / HashExpr / QuoteExpr / List
-
-PipeStr <- '|' ( PipeStrChar / '\' StringEsc )* '|'
-QuoteStr <- '"' ( QuotStrChar / '\' StringEsc )* '"'
-HashExpr <- '#' ( RuneExpr / LabelExpr / HashDatum )
-QuoteExpr <- "'" Datum / '`' Datum / ',' Datum
-List <- ParenList / SquareList / BraceList
-
-BareChar <- ALPHA / DIGIT
- / '!' / '$' / '%' / '*' / '+'
- / '-' / '/' / '<' / '=' / '>'
- / '?' / '@' / '^' / '_' / '~'
-
-PipeStrChar <- (![|\\] .)
-QuotStrChar <- (!["\\] .)
-
-StringEsc <- '\' / '|' / '"' / ( HTAB / SP )* LF ( HTAB / SP )*
- / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e'
- / 'x' HexByte+ ';'
- / 'u' UnicodeSV ';'
-
-HexByte <- HEXDIG HEXDIG
-UnicodeSV <- HEXDIG+
-
-RuneExpr <- Rune [ '\' BareString / CladDatum ]
-LabelExpr <- '%' Label ( '%' / '=' Datum )
-HashDatum <- '\' BareString / CladDatum
-
-Rune <- ALPHA ( ALPHA / DIGIT )*
-Label <- HEXDIG+
-
-ParenList <- '(' ListBody ')'
-SquareList <- '[' ListBody ']'
-BraceList <- '{' ListBody '}'
-
-ListBody <- Unit* [ Blank* '&' Unit ] Blank*
-
-DIGIT <- [0-9]
-ALPHA <- [a-zA-Z]
-HEXDIG <- [0-9a-fA-F]
diff --git a/spec/syntax.zbnf b/spec/syntax.zbnf
deleted file mode 100644
index b87efb5..0000000
--- a/spec/syntax.zbnf
+++ /dev/null
@@ -1,59 +0,0 @@
-Unit : Blank* [ Datum [Blank] ]
-
-
-Blank : 9...13 | SP | Comment
-
-Datum : OneDatum ( [JoinChar] OneDatum )*
-
-JoinChar : '.' | ':'
-
-
-Comment : ';' ( SkipUnit | SkipLine )
-
-SkipUnit : '~' Unit
-
-SkipLine : ( ~LF )* [LF]
-
-
-OneDatum : BareString | CladDatum
-
-
-BareString : ( '.' | '+' | '-' | DIGIT ) ( BareChar | '.' )*
- | BareChar+
-
-CladDatum : PipeStr | QuoteStr | HashExpr | QuoteExpr | List
-
-PipeStr : '|' ( PipeStrChar | '\' StringEsc )* '|'
-QuoteStr : '"' ( QuotStrChar | '\' StringEsc )* '"'
-HashExpr : '#' ( RuneExpr | LabelExpr | HashDatum )
-QuoteExpr : "'" Datum | '`' Datum | ',' Datum
-List : ParenList | SquareList | BraceList
-
-BareChar : ALPHA | DIGIT
- | '!' | '$' | '%' | '*' | '+'
- | '-' | '/' | '<' | '=' | '>'
- | '?' | '@' | '^' | '_' | '~'
-
-PipeStrChar : ~( '|' | '\' )
-QuotStrChar : ~( '"' | '\' )
-
-StringEsc : '\' | '|' | '"' | ( HTAB | SP )* LF ( HTAB | SP )*
- | 'a' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | 'e'
- | 'x' HexByte+ ';'
- | 'u' UnicodeSV ';'
-
-HexByte : HEXDIG HEXDIG
-UnicodeSV : HEXDIG+
-
-RuneExpr : Rune [ '\' BareString | CladDatum ]
-LabelExpr : '%' Label ( '%' | '=' Datum )
-HashDatum : '\' BareString | CladDatum
-
-Rune : ALPHA ( ALPHA | DIGIT )*
-Label : HEXDIG+
-
-ParenList : '(' ListBody ')'
-SquareList : '[' ListBody ']'
-BraceList : '{' ListBody '}'
-
-ListBody : Unit* [ Blank* '&' Unit ] Blank*