From cf934006c650d3d008a4408bedbd95597f906e43 Mon Sep 17 00:00:00 2001
From: Taylan Kammer <taylan.kammer@gmail.com>
Date: Mon, 31 Mar 2025 23:46:24 +0200
Subject: parser cleanup

---
 spec/parser.bnf        | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
 spec/parser.ebnf       | 75 --------------------------------------------------
 src/zisp/io/Parser.zig | 68 ++++++++++++++++++++++-----------------------
 3 files changed, 109 insertions(+), 109 deletions(-)
 create mode 100644 spec/parser.bnf
 delete mode 100644 spec/parser.ebnf

diff --git a/spec/parser.bnf b/spec/parser.bnf
new file mode 100644
index 0000000..caa24f3
--- /dev/null
+++ b/spec/parser.bnf
@@ -0,0 +1,75 @@
+unit          : blank* ( datum blank? | EOF ) ;
+
+
+blank         : 9...13 | comment ;
+
+datum         : one_datum ( join_char? one_datum )* ;
+
+join_char     : '.' | ':' ;
+
+
+comment       : ';' ( skip_unit | skip_line ) ;
+
+skip_unit     : '~' unit ;
+
+skip_line     : ( ~LF )* LF? ;
+
+
+one_datum     : bare_string | clad_datum ;
+
+bare_string   : ( '.' | '+' | '-' | DIGIT ) ( bare_str_elt | '.' )*
+              | bare_str_elt+
+              ;
+
+clad_datum    : '\' bare_esc_str
+              | '|' pipe_str_elt* '|'
+              | '"' quot_str_elt* '"'
+              | '#' hash_expr
+              | '(' list ')'
+              | '[' list ']'
+              | '{' list '}'
+              | quote_expr
+              ;
+
+
+bare_str_elt  : bare_char | '\' bare_esc ;
+
+
+bare_esc_str  : bare_esc bare_str_elt* ;
+
+pipe_str_elt  : ~( '|' | '\' ) | '\' pipe_esc ;
+
+quot_str_elt  : ~( '"' | '\' ) | '\' quot_esc ;
+
+hash_expr     : rune clad_datum?
+              | '%' label ( '%' | '=' datum )
+              | clad_datum
+              ;
+
+list          : unit* ( '.' unit )? blank* ;
+
+quote_expr    : ( "'" | "`" | "," ) datum ;
+
+
+bare_char     : ALPHA | DIGIT | bare_punct ;
+
+bare_punct    : '!' | '$' | '%' | '&' | '*' | '+' | '-' | '/'
+              | '<' | '=' | '>' | '?' | '@' | '^' | '_' | '~'
+              ;
+
+bare_esc      : 33...126 ;
+
+
+pipe_esc      : string_esc | '|' ;
+
+quot_esc      : string_esc | '"' ;
+
+string_esc    : '\' | 'a' | 'b' | 'e' | 'f' | 'n' | 'r' | 't' | 'v'
+              | 'x' HEXDIG{2}
+              | 'u' '{' HEXDIG+ '}'
+              ;
+
+
+rune          : ALPHA ( ALPHA | DIGIT ){0,5} ;
+
+label         : HEXDIG{1,12} ;
diff --git a/spec/parser.ebnf b/spec/parser.ebnf
deleted file mode 100644
index caa24f3..0000000
--- a/spec/parser.ebnf
+++ /dev/null
@@ -1,75 +0,0 @@
-unit          : blank* ( datum blank? | EOF ) ;
-
-
-blank         : 9...13 | comment ;
-
-datum         : one_datum ( join_char? one_datum )* ;
-
-join_char     : '.' | ':' ;
-
-
-comment       : ';' ( skip_unit | skip_line ) ;
-
-skip_unit     : '~' unit ;
-
-skip_line     : ( ~LF )* LF? ;
-
-
-one_datum     : bare_string | clad_datum ;
-
-bare_string   : ( '.' | '+' | '-' | DIGIT ) ( bare_str_elt | '.' )*
-              | bare_str_elt+
-              ;
-
-clad_datum    : '\' bare_esc_str
-              | '|' pipe_str_elt* '|'
-              | '"' quot_str_elt* '"'
-              | '#' hash_expr
-              | '(' list ')'
-              | '[' list ']'
-              | '{' list '}'
-              | quote_expr
-              ;
-
-
-bare_str_elt  : bare_char | '\' bare_esc ;
-
-
-bare_esc_str  : bare_esc bare_str_elt* ;
-
-pipe_str_elt  : ~( '|' | '\' ) | '\' pipe_esc ;
-
-quot_str_elt  : ~( '"' | '\' ) | '\' quot_esc ;
-
-hash_expr     : rune clad_datum?
-              | '%' label ( '%' | '=' datum )
-              | clad_datum
-              ;
-
-list          : unit* ( '.' unit )? blank* ;
-
-quote_expr    : ( "'" | "`" | "," ) datum ;
-
-
-bare_char     : ALPHA | DIGIT | bare_punct ;
-
-bare_punct    : '!' | '$' | '%' | '&' | '*' | '+' | '-' | '/'
-              | '<' | '=' | '>' | '?' | '@' | '^' | '_' | '~'
-              ;
-
-bare_esc      : 33...126 ;
-
-
-pipe_esc      : string_esc | '|' ;
-
-quot_esc      : string_esc | '"' ;
-
-string_esc    : '\' | 'a' | 'b' | 'e' | 'f' | 'n' | 'r' | 't' | 'v'
-              | 'x' HEXDIG{2}
-              | 'u' '{' HEXDIG+ '}'
-              ;
-
-
-rune          : ALPHA ( ALPHA | DIGIT ){0,5} ;
-
-label         : HEXDIG{1,12} ;
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
index df9e238..7d14808 100644
--- a/src/zisp/io/Parser.zig
+++ b/src/zisp/io/Parser.zig
@@ -344,7 +344,7 @@ fn retval(p: *Parser, val: Value) void {
 fn parseUnit(p: *Parser) !void {
     var c1 = p.getUnread() orelse try p.read();
     while (c1) |c| : (c1 = try p.read()) {
-        switch (try checkBlanks(p, c)) {
+        switch (try p.checkBlanks(c)) {
             .yes => {},
             .skip_unit => try p.push(.parseUnit),
             .no => {
@@ -358,7 +358,7 @@ fn parseUnit(p: *Parser) !void {
 
 fn endUnit(p: *Parser) !void {
     const c = p.getUnread() orelse return p.ret();
-    switch (try checkBlanks(p, c)) {
+    switch (try p.checkBlanks(c)) {
         .yes => {},
         .skip_unit => return skipUnitAndReturn(p),
         .no => p.unread(c),
@@ -376,7 +376,7 @@ fn returnContext(p: *Parser) !void {
 }
 
 fn parseDatum(p: *Parser) !void {
-    return parseOneDatum(p, p.getUnread().?, .endFirstDatum);
+    return p.parseOneDatum(p.getUnread().?, .endFirstDatum);
 }
 
 fn endFirstDatum(p: *Parser) !void {
@@ -424,9 +424,9 @@ fn endJoinDatum(p: *Parser) !void {
 
 fn parseOneDatum(p: *Parser, c: u8, next: Fn) !void {
     if (isBareChar(c) or c == '.') {
-        return p.jump(next, try parseBareString(p, c));
+        return p.jump(next, try p.parseBareString(c));
     }
-    return parseCladDatum(p, c, next);
+    return p.parseCladDatum(c, next);
 }
 
 fn parseBareString(p: *Parser, c: u8) !Value {
@@ -435,12 +435,12 @@ fn parseBareString(p: *Parser, c: u8) !Value {
         else => false,
     };
     try p.addChar(c);
-    return parseBareStringRest(p, allow_dots);
+    return p.parseBareStringRest(allow_dots);
 }
 
 fn parseBareEscString(p: *Parser) !Value {
     try p.addChar(try parseBareEsc(p));
-    return parseBareStringRest(p, false);
+    return p.parseBareStringRest(false);
 }
 
 fn parseBareStringRest(p: *Parser, allow_dots: bool) !Value {
@@ -471,15 +471,15 @@ fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void {
         return p.jump(next, try parseBareEscString(p));
     }
     if (c == '"') {
-        return p.jump(next, try parseQuotedString(p, '"'));
+        return p.jump(next, try p.parseQuotedString('"'));
     }
     if (c == '|') {
-        return p.jump(next, try parseQuotedString(p, '|'));
+        return p.jump(next, try p.parseQuotedString('|'));
     }
     return switch (c) {
-        '#' => parseHashExpression(p, next),
-        '(', '[', '{' => parseList(p, c, next),
-        '\'', '`', ',' => parseQuoteExpr(p, c, next),
+        '#' => p.parseHashExpression(next),
+        '(', '[', '{' => p.parseList(c, next),
+        '\'', '`', ',' => p.parseQuoteExpr(c, next),
         else => p.abort(next, c),
     };
 }
@@ -492,7 +492,7 @@ fn parseQuotedString(p: *Parser, close: u8) !Value {
         if (c != '\\') {
             try p.addChar(c);
         } else {
-            try parseQuotedEsc(p, close);
+            try p.parseQuotedEsc(close);
         }
     }
     return error.UnclosedString;
@@ -517,7 +517,7 @@ fn parseQuotedEsc(p: *Parser, close: u8) !void {
         'f' => 12,
         'r' => 13,
         'e' => 27,
-        'x' => try parseHexByte(p, "hex escape"),
+        'x' => try p.parseHexByte("hex escape"),
         else => return p.err(.InvalidCharacter, "quoted escape"),
     });
 }
@@ -539,7 +539,7 @@ fn parseUniHex(p: *Parser) !void {
         return p.err(.InvalidCharacter, msg);
     }
 
-    const uc = try parseHex(p, u21, msg);
+    const uc = try p.parseHex(u21, msg);
     const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg);
     if (c != '}') {
         return p.err(.InvalidCharacter, msg);
@@ -552,23 +552,23 @@ fn parseUniHex(p: *Parser) !void {
 
 fn parseHashExpression(p: *Parser, next: Fn) !void {
     const c = try p.readNoEof("hash expression");
-    if (try checkBlanks(p, c) != .no) {
+    if (try p.checkBlanks(c) != .no) {
         return p.err(.InvalidCharacter, "hash expression");
     }
     if (std.ascii.isAlphabetic(c)) {
-        const r = try parseRune(p, c);
-        return parseRuneEnd(p, r, next);
+        const r = try p.parseRune(c);
+        return p.parseRuneEnd(r, next);
     }
     if (c == '%') {
         const l = try parseLabel(p);
-        return parseLabelEnd(p, l, next);
+        return p.parseLabelEnd(l, next);
     }
     p.unread(c);
     return p.subr(.parseHashDatum, next);
 }
 
 fn parseHashDatum(p: *Parser) !void {
-    return parseCladDatum(p, p.getUnread().?, .endHashDatum);
+    return p.parseCladDatum(p.getUnread().?, .endHashDatum);
 }
 
 fn endHashDatum(p: *Parser) !void {
@@ -594,13 +594,13 @@ fn parseRune(p: *Parser, c1: u8) !Value {
 fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
     const c = p.getUnread() orelse return p.jump(next, r);
     if (c == '\\') {
-        return p.jump(next, p.cons(r, try parseBareString(p, c)));
+        return p.jump(next, p.cons(r, try p.parseBareString(c)));
     }
     if (c == '"') {
-        return p.jump(next, p.cons(r, try parseQuotedString(p, '"')));
+        return p.jump(next, p.cons(r, try p.parseQuotedString('"')));
     }
     if (c == '|') {
-        return p.jump(next, p.cons(r, try parseQuotedString(p, '|')));
+        return p.jump(next, p.cons(r, try p.parseQuotedString('|')));
     }
     p.unread(c);
     switch (c) {
@@ -615,7 +615,7 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
 }
 
 fn parseRuneDatum(p: *Parser) !void {
-    return parseCladDatum(p, p.getUnread().?, .endRuneDatum);
+    return p.parseCladDatum(p.getUnread().?, .endRuneDatum);
 }
 
 fn endRuneDatum(p: *Parser) !void {
@@ -626,7 +626,7 @@ fn endRuneDatum(p: *Parser) !void {
 }
 
 fn parseLabel(p: *Parser) !Value {
-    const label = try parseHex(p, u48, "datum label");
+    const label = try p.parseHex(u48, "datum label");
     return value.fixnum.pack(label);
 }
 
@@ -667,14 +667,14 @@ fn parseList(p: *Parser, open: u8, next: Fn) !void {
         if (c == close) {
             return p.jump(next, head);
         }
-        switch (try checkBlanks(p, c)) {
+        switch (try p.checkBlanks(c)) {
             .yes => {},
             .skip_unit => {
-                try listParserSetup(p, head, close, next);
+                try p.listParserSetup(head, close, next);
                 return p.subr(.parseUnit, .parseUnit);
             },
             .no => {
-                try listParserSetup(p, head, close, next);
+                try p.listParserSetup(head, close, next);
                 p.unread(c);
                 return p.jump(.parseDatum, null);
             },
@@ -712,7 +712,7 @@ fn continueList(p: *Parser) !void {
         if (c == close) {
             return endList(p);
         }
-        switch (try checkBlanks(p, c)) {
+        switch (try p.checkBlanks(c)) {
             .yes => {},
             .skip_unit => {
                 try p.pushContext(.continueList);
@@ -747,7 +747,7 @@ fn closeImproperList(p: *Parser) !void {
         if (c == close) {
             return p.retval(result);
         }
-        switch (try checkBlanks(p, c)) {
+        switch (try p.checkBlanks(c)) {
             .yes => {},
             .skip_unit => return p.subr(.parseUnit, .closeImproperList),
             .no => return p.err(.InvalidCharacter, "after list tail"),
@@ -814,7 +814,7 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
     var uc: T = undefined;
 
     const c1 = try p.readNoEof(emsg);
-    uc = try parseHexDigit(p, c1, emsg);
+    uc = try p.parseHexDigit(c1, emsg);
 
     while (try p.read()) |c| {
         if (!std.ascii.isHex(c)) {
@@ -823,7 +823,7 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
         }
         const shl = std.math.shlExact;
         uc = shl(T, uc, 4) catch return p.err(.OutOfRange, emsg);
-        uc |= try parseHexDigit(p, c, emsg);
+        uc |= try p.parseHexDigit(c, emsg);
     }
     return uc;
 }
@@ -831,8 +831,8 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
 fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 {
     const h1 = try p.readNoEof(emsg);
     const h2 = try p.readNoEof(emsg);
-    const hi = try parseHexDigit(p, h1, emsg);
-    const lo = try parseHexDigit(p, h2, emsg);
+    const hi = try p.parseHexDigit(h1, emsg);
+    const lo = try p.parseHexDigit(h2, emsg);
     return hi << 4 | lo;
 }
 
-- 
cgit v1.2.3