From a6040abeac8cdcba8a139a9d5b52ce28e94a14ef Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Sun, 31 May 2026 17:35:35 +0200 Subject: Allow blanks after hash-bang; update grammar files. --- docs/c1/grammar/abnf.txt | 7 +++++++ docs/c1/grammar/index.md | 4 +++- docs/c1/grammar/peg.txt | 20 +++++++++++++------- docs/c1/grammar/zbnf.txt | 11 ++++++++--- src/zisp/io/Parser.zig | 10 ++++++---- 5 files changed, 37 insertions(+), 15 deletions(-) diff --git a/docs/c1/grammar/abnf.txt b/docs/c1/grammar/abnf.txt index 77b39f3..a5b9eca 100644 --- a/docs/c1/grammar/abnf.txt +++ b/docs/c1/grammar/abnf.txt @@ -52,6 +52,8 @@ RuneDotStr = "#" RuneName "\" SpecialStr RuneClad = "#" RuneName CladDatum +HashBang = "#" "!" *( SP / HTAB ) BareString + LabelRef = "#" "%" Label "%" LabelDef = "#" "%" Label "=" Datum @@ -112,3 +114,8 @@ NoStartDot = BareString / CladDatum / Rune / RuneStr / RuneDotStr NoEndDot = BareString / Rune / RuneStr / RuneClad / LabelRef / HashStr / HashClad + + +;; Local Variables: +;; eval: (flyspell-mode -1) +;; End: diff --git a/docs/c1/grammar/index.md b/docs/c1/grammar/index.md index 8fefe0e..e3716ea 100644 --- a/docs/c1/grammar/index.md +++ b/docs/c1/grammar/index.md @@ -77,7 +77,9 @@ The following limits are not represented in the grammar: ## At-quoted strings The mechanism of at-quoted strings is not represented in any of the -grammars, since it essentially has 256 variants. +grammars, since it essentially has 256 variants. Representing it +sanely in a grammar requires the ability to save and reference +variables. ## Stream-parsing strategy diff --git a/docs/c1/grammar/peg.txt b/docs/c1/grammar/peg.txt index c391162..465123f 100644 --- a/docs/c1/grammar/peg.txt +++ b/docs/c1/grammar/peg.txt @@ -21,28 +21,28 @@ SkipLine <- (!'\n' .)* '\n'? OneDatum <- BareString / CladDatum - -BareString <- ( '.' / '+' / '-' / DIGIT ) ( BareChar / '.' )* +BareString <- SpecBareChar ( BareChar / JoinChar )* / BareChar+ CladDatum <- PipeStr / QuoteStr / HashExpr / QuoteExpr / List PipeStr <- '|' ( PipeStrChar / '\' StringEsc )* '|' QuoteStr <- '"' ( QuotStrChar / '\' StringEsc )* '"' -HashExpr <- '#' ( RuneExpr / LabelExpr / HashDatum ) +HashExpr <- '#' ( RuneExpr / HashBang / LabelExpr / HashDatum ) QuoteExpr <- "'" Datum / '`' Datum / ',' Datum List <- ParenList / SquareList / BraceList +SpecBareChar <- '+' / '-' / JoinChar / DIGIT + BareChar <- ALPHA / DIGIT - / '!' / '$' / '%' / '*' / '+' - / '-' / '/' / '<' / '=' / '>' - / '?' / '@' / '^' / '_' / '~' + / '!' / '$' / '%' / '*' / '+' / '-' / '/' + / '<' / '=' / '>' / '?' / '^' / '_' / '~' PipeStrChar <- (![|\\] .) QuotStrChar <- (!["\\] .) StringEsc <- '\' / '|' / '"' / ( HTAB / SP )* LF ( HTAB / SP )* - / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e' + / '0' / 'a' / 'b' / 't' / 'n' / 'v' / 'f' / 'r' / 'e' / 'x' HexByte* ';' / 'u' UnicodeSV ';' @@ -50,6 +50,7 @@ HexByte <- HEXDIG HEXDIG UnicodeSV <- HEXDIG+ RuneExpr <- Rune ( '\' BareString / CladDatum )? +HashBang <- '!' [\t ]* BareString LabelExpr <- '%' Label ( '%' / '=' Datum ) HashDatum <- '\' BareString / CladDatum @@ -76,3 +77,8 @@ HEXDIG <- [0-9a-fA-F] # Due to a quirk in the peg tool this file is used with, the grammar # must not allow an empty stream. Therefore, the Unit rule has its # Datum declared as mandatory rather than optional. + + +# Local Variables: +# eval: (flyspell-mode -1) +# End: diff --git a/docs/c1/grammar/zbnf.txt b/docs/c1/grammar/zbnf.txt index 83a2394..0cbceab 100644 --- a/docs/c1/grammar/zbnf.txt +++ b/docs/c1/grammar/zbnf.txt @@ -1,4 +1,4 @@ -# Custom notation with PEG semantics +; Custom notation with PEG semantics Stream : Unit ( Blank Unit )* @@ -21,7 +21,6 @@ SkipLine : ( ~LF )* [LF] OneDatum : BareString | CladDatum - BareString : SpecBareChar ( BareChar | JoinChar )* | BareChar+ @@ -29,7 +28,7 @@ CladDatum : PipeStr | QuoteStr | HashExpr | QuoteExpr | List PipeStr : '|' ( PipeStrChar | '\' StringEsc )* '|' QuoteStr : '"' ( QuotStrChar | '\' StringEsc )* '"' -HashExpr : '#' ( RuneExpr | LabelExpr | HashDatum ) +HashExpr : '#' ( RuneExpr | HashBang | LabelExpr | HashDatum ) QuoteExpr : "'" Datum | '`' Datum | ',' Datum List : ParenList | SquareList | BraceList @@ -51,6 +50,7 @@ HexByte : HEXDIG HEXDIG UnicodeSV : HEXDIG+ RuneExpr : Rune [ '\' BareString | CladDatum ] +HashBang : '!' ( SP | HTAB )* BareString LabelExpr : '%' Label ( '%' | '=' Datum ) HashDatum : '\' BareString | CladDatum @@ -62,3 +62,8 @@ SquareList : '[' ListBody ']' BraceList : '{' ListBody '}' ListBody : Unit* [ Blank* '&' Unit ] Blank* + + +;; Local Variables: +;; eval: (flyspell-mode -1) +;; End: diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig index a57652b..8e2908d 100644 --- a/src/zisp/io/Parser.zig +++ b/src/zisp/io/Parser.zig @@ -156,7 +156,7 @@ fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 { return try p.read() orelse p.err(.UnexpectedEof, emsg); } -// Fake optional, for use in: while (readToEof()) |c| { } +// Fake optional, for use in: while (readNoEof2()) |c| { } fn readNoEof2(p: *Parser, comptime emsg: []const u8) !?u8 { return try p.read() orelse p.err(.UnexpectedEof, emsg); } @@ -620,9 +620,11 @@ fn endRuneDatum(p: *Parser) !void { } fn parseHashBang(p: *Parser, next: Fn) !void { - const c = try p.readNoEof("hash-bang"); - const s = try p.getBareString(c); - return p.jump(next, p.cons(SHBANG, s)); + while (try p.readNoEof2("hash-bang")) |c| { + if (c == ' ' or c == '\t') continue; + const s = try p.getBareString(c); + return p.jump(next, p.cons(SHBANG, s)); + } } fn parseLabel(p: *Parser, next: Fn) !void { -- cgit v1.2.3