Parser cleanup.

author: Taylan Kammer <taylan.kammer@gmail.com> 2025-03-30 12:39:43 +0200
committer: Taylan Kammer <taylan.kammer@gmail.com> 2025-03-30 12:39:43 +0200
commit: 49736f6748344e191077c38a49385aa3a2efb600 (patch)
tree: db072fe43808766debeb1e6dc4b5548aa52dfa8d
parent: d09bff09f8c1a4a4353cfe4a294fbaf96d6656a1 (diff)
2 files changed, 40 insertions, 42 deletions
diff --git a/src/libzisp/io/Parser.zig b/src/libzisp/io/Parser.zig
index d9eeca9..7264eaa 100644
--- a/src/libzisp/io/Parser.zig
+++ b/src/libzisp/io/Parser.zig
@@ -27,7 +27,7 @@
 // For efficiency, call the parser on an input stream with implicit buffering.
 //
 // The parser does not use its own buffer, beyond one character that may be
-// written back into the unused_char field, which is checked at the end to
+// written back into the unread_char field, which is checked at the end to
 // ensure it's nothing other than a trailing blank or comment.
 //
 // This lack of buffering is to ensure that the parser never reads more bytes
@@ -82,6 +82,10 @@ const VOID   = value.rune.packForced("");
 const LSTAIL = value.sstr.pack(".");
 // zig fmt: on
 
+// We could implement an optimization where we swap in a dummy cons when the
+// parser is handling a commented-out datum, but this would require changes to
+// the algorithm and doesn't seem very important, so it's not implemented.
+
 const Cons = *const fn (v1: Value, v2: Value) Value;
 
 fn dummyCons(v1: Value, v2: Value) Value {
@@ -126,7 +130,7 @@ stack: List(Context) = undefined,
 chars: List(u8) = undefined,
 cons: Cons = real_cons,
 result: Value = undefined,
-unused_char: ?u8 = null,
+unread_char: ?u8 = null,
 err_msg: []const u8 = undefined,
 
 pub fn init(
@@ -155,7 +159,7 @@ pub fn deinit(p: *Parser) void {
 //
 
 fn read(p: *Parser) !?u8 {
-    if (is_debug and p.unused_char != null) {
+    if (is_debug and p.unread_char != null) {
         @panic("Called read() while there was an unused character!");
     }
     const c = p.input.readByte() catch |e| switch (e) {
@@ -173,15 +177,13 @@ fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 {
 }
 
 fn unread(p: *Parser, c: u8) void {
-    p.unused_char = c;
+    p.unread_char = c;
 }
 
-fn getUnused(p: *Parser) ?u8 {
-    if (p.unused_char) |c| {
-        p.unused_char = null;
-        return c;
-    }
-    return null;
+fn getUnread(p: *Parser) ?u8 {
+    const c = p.unread_char orelse return null;
+    p.unread_char = null;
+    return c;
 }
 
 //
@@ -224,7 +226,7 @@ pub fn run(p: *Parser, input: std.io.AnyReader) !Value {
         if (detailed_debug) printStack(p);
         try next(p);
     }
-    if (p.getUnused()) |_| {
+    if (p.unread_char) |_| {
         return p.err(.InvalidCharacter, "top-level");
     }
     return p.result;
@@ -232,17 +234,17 @@ pub fn run(p: *Parser, input: std.io.AnyReader) !Value {
 
 fn printStack(p: *Parser) void {
     const stack = p.stack.items;
-    std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{
+    std.debug.print("\n\n{}:{any} ctx:'{c}' unread:'{c}' \n", .{
         stack.len,
         p.context.next,
         p.context.char,
-        p.unused_char orelse '_',
+        p.unread_char orelse '_',
     });
     if (stack.len > 0) {
         var i = stack.len;
         while (i > 0) : (i -= 1) {
             const prev = stack[i - 1];
-            std.debug.print("{}:{} ctx:'{c}'\n", .{
+            std.debug.print("{}:{any} ctx:'{c}'\n", .{
                 i - 1,
                 prev.next,
                 prev.char,
@@ -290,9 +292,9 @@ fn jump(p: *Parser, next: Fn, val: ?Value) void {
     p.context.next = next;
 }
 
-fn abort(p: *Parser, next: Fn, unused_c: u8) void {
+fn abort(p: *Parser, next: Fn, unread_c: u8) void {
     p.result = VOID;
-    p.unused_char = unused_c;
+    p.unread_char = unread_c;
     p.context.next = next;
 }
 
@@ -306,7 +308,7 @@ fn retval(p: *Parser, val: Value) void {
 //
 
 fn parseUnit(p: *Parser) !void {
-    var c1 = p.getUnused() orelse try p.read();
+    var c1 = p.getUnread() orelse try p.read();
     while (c1) |c| : (c1 = try p.read()) {
         switch (try checkBlanks(p, c)) {
             .yes => {},
@@ -321,12 +323,11 @@ fn parseUnit(p: *Parser) !void {
 }
 
 fn endUnit(p: *Parser) !void {
-    if (p.getUnused()) |c| {
-        switch (try checkBlanks(p, c)) {
-            .yes => {},
-            .skip_unit => return skipUnitAndReturn(p),
-            .no => p.unread(c),
-        }
+    const c = p.getUnread() orelse return p.ret();
+    switch (try checkBlanks(p, c)) {
+        .yes => {},
+        .skip_unit => return skipUnitAndReturn(p),
+        .no => p.unread(c),
     }
     return p.ret();
 }
@@ -341,7 +342,7 @@ fn returnContext(p: *Parser) !void {
 }
 
 fn parseDatum(p: *Parser) !void {
-    return parseOneDatum(p, p.getUnused().?, &endFirstDatum);
+    return parseOneDatum(p, p.getUnread().?, &endFirstDatum);
 }
 
 fn endFirstDatum(p: *Parser) !void {
@@ -352,7 +353,7 @@ fn endFirstDatum(p: *Parser) !void {
 }
 
 fn parseJoin(p: *Parser) !void {
-    const c = p.getUnused() orelse try p.read() orelse return p.ret();
+    const c = p.getUnread() orelse try p.read() orelse return p.ret();
     switch (c) {
         '.', ':' => {
             p.context.char = c;
@@ -505,12 +506,9 @@ fn parseUniHex(p: *Parser) !void {
     }
 
     const uc = try parseHex(p, u21, msg);
-    if (p.getUnused()) |c| {
-        if (c != '}') {
-            return p.err(.InvalidCharacter, msg);
-        }
-    } else {
-        return p.err(.UnexpectedEof, msg);
+    const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg);
+    if (c != '}') {
+        return p.err(.InvalidCharacter, msg);
     }
 
     const n = try std.unicode.utf8CodepointSequenceLength(uc);
@@ -550,7 +548,7 @@ fn parseHashExpression(p: *Parser, next: Fn) !void {
 }
 
 fn parseHashDatum(p: *Parser) !void {
-    return parseCladDatum(p, p.getUnused().?, &endHashDatum);
+    return parseCladDatum(p, p.getUnread().?, &endHashDatum);
 }
 
 fn endHashDatum(p: *Parser) !void {
@@ -574,7 +572,7 @@ fn parseRune(p: *Parser, c1: u8) !Value {
 }
 
 fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
-    const c = p.getUnused() orelse return p.jump(next, r);
+    const c = p.getUnread() orelse return p.jump(next, r);
     if (c == '\\') {
         return p.jump(next, p.cons(r, try parseBareString(p, c)));
     }
@@ -597,7 +595,7 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
 }
 
 fn parseRuneDatum(p: *Parser) !void {
-    return parseCladDatum(p, p.getUnused().?, &endRuneDatum);
+    return parseCladDatum(p, p.getUnread().?, &endRuneDatum);
 }
 
 fn endRuneDatum(p: *Parser) !void {
@@ -613,7 +611,7 @@ fn parseLabel(p: *Parser) !Value {
 }
 
 fn parseLabelEnd(p: *Parser, l: Value, next: Fn) !void {
-    const c = p.getUnused() orelse return p.err(.UnexpectedEof, "datum label");
+    const c = p.getUnread() orelse return p.err(.UnexpectedEof, "datum label");
     if (c == '%') {
         return p.jump(next, p.cons(LABEL, l));
     }
@@ -676,7 +674,7 @@ fn continueList(p: *Parser) !void {
     const close = p.context.char;
 
     if (p.result.eq(VOID)) {
-        const c = p.getUnused().?;
+        const c = p.getUnread().?;
         if (c == close) {
             return endList(p);
         }
@@ -689,7 +687,7 @@ fn continueList(p: *Parser) !void {
 
     p.context.val = p.cons(p.result, p.context.val);
 
-    var c1 = p.getUnused() orelse try p.read();
+    var c1 = p.getUnread() orelse try p.read();
     while (c1) |c| : (c1 = try p.read()) {
         if (c == close) {
             return endList(p);
@@ -724,8 +722,8 @@ fn endImproperList(p: *Parser) !void {
 fn closeImproperList(p: *Parser) !void {
     const result = p.context.val;
     const close = p.context.char;
-    var c1 = p.getUnused() orelse try p.read();
-    while (c1) |c| : (c1 = try p.readNoEof("after list tail")) {
+    var c1 = p.getUnread() orelse try p.read();
+    while (c1) |c| : (c1 = try p.read()) {
         if (c == close) {
             return p.retval(result);
         }
@@ -735,7 +733,7 @@ fn closeImproperList(p: *Parser) !void {
             .no => return p.err(.InvalidCharacter, "after list tail"),
         }
     }
-    unreachable;
+    return p.err(.UnexpectedEof, "after list tail");
 }
 
 fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void {
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 0a39c38..d004c91 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -59,9 +59,9 @@ pub fn parse(input: std.io.AnyReader) Value {
     var p = default(&fb_alloc, &stack_sfa, &chars_sfa) catch @panic("OOM");
     defer p.deinit();
     return p.run(input) catch {
-        if (p.unused_char) |c| {
+        if (p.unread_char) |c| {
             std.debug.panic(
-                "Parse error: {s}, unused_char: 0x{x}\n",
+                "Parse error: {s}, unread_char: 0x{x}\n",
                 .{ p.err_msg, c },
             );
         } else {
author	Taylan Kammer <taylan.kammer@gmail.com>	2025-03-30 12:39:43 +0200
committer	Taylan Kammer <taylan.kammer@gmail.com>	2025-03-30 12:39:43 +0200
commit	49736f6748344e191077c38a49385aa3a2efb600 (patch)
tree	db072fe43808766debeb1e6dc4b5548aa52dfa8d
parent	d09bff09f8c1a4a4353cfe4a294fbaf96d6656a1 (diff)