diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-30 12:39:43 +0200 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-30 12:39:43 +0200 |
| commit | 49736f6748344e191077c38a49385aa3a2efb600 (patch) | |
| tree | db072fe43808766debeb1e6dc4b5548aa52dfa8d | |
| parent | d09bff09f8c1a4a4353cfe4a294fbaf96d6656a1 (diff) | |
Parser cleanup.
| -rw-r--r-- | src/libzisp/io/Parser.zig | 78 | ||||
| -rw-r--r-- | src/libzisp/io/parser.zig | 4 |
2 files changed, 40 insertions, 42 deletions
diff --git a/src/libzisp/io/Parser.zig b/src/libzisp/io/Parser.zig index d9eeca9..7264eaa 100644 --- a/src/libzisp/io/Parser.zig +++ b/src/libzisp/io/Parser.zig @@ -27,7 +27,7 @@ // For efficiency, call the parser on an input stream with implicit buffering. // // The parser does not use its own buffer, beyond one character that may be -// written back into the unused_char field, which is checked at the end to +// written back into the unread_char field, which is checked at the end to // ensure it's nothing other than a trailing blank or comment. // // This lack of buffering is to ensure that the parser never reads more bytes @@ -82,6 +82,10 @@ const VOID = value.rune.packForced(""); const LSTAIL = value.sstr.pack("."); // zig fmt: on +// We could implement an optimization where we swap in a dummy cons when the +// parser is handling a commented-out datum, but this would require changes to +// the algorithm and doesn't seem very important, so it's not implemented. + const Cons = *const fn (v1: Value, v2: Value) Value; fn dummyCons(v1: Value, v2: Value) Value { @@ -126,7 +130,7 @@ stack: List(Context) = undefined, chars: List(u8) = undefined, cons: Cons = real_cons, result: Value = undefined, -unused_char: ?u8 = null, +unread_char: ?u8 = null, err_msg: []const u8 = undefined, pub fn init( @@ -155,7 +159,7 @@ pub fn deinit(p: *Parser) void { // fn read(p: *Parser) !?u8 { - if (is_debug and p.unused_char != null) { + if (is_debug and p.unread_char != null) { @panic("Called read() while there was an unused character!"); } const c = p.input.readByte() catch |e| switch (e) { @@ -173,15 +177,13 @@ fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 { } fn unread(p: *Parser, c: u8) void { - p.unused_char = c; + p.unread_char = c; } -fn getUnused(p: *Parser) ?u8 { - if (p.unused_char) |c| { - p.unused_char = null; - return c; - } - return null; +fn getUnread(p: *Parser) ?u8 { + const c = p.unread_char orelse return null; + p.unread_char = null; + return c; } // @@ -224,7 +226,7 @@ pub fn run(p: *Parser, input: std.io.AnyReader) !Value { if (detailed_debug) printStack(p); try next(p); } - if (p.getUnused()) |_| { + if (p.unread_char) |_| { return p.err(.InvalidCharacter, "top-level"); } return p.result; @@ -232,17 +234,17 @@ pub fn run(p: *Parser, input: std.io.AnyReader) !Value { fn printStack(p: *Parser) void { const stack = p.stack.items; - std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{ + std.debug.print("\n\n{}:{any} ctx:'{c}' unread:'{c}' \n", .{ stack.len, p.context.next, p.context.char, - p.unused_char orelse '_', + p.unread_char orelse '_', }); if (stack.len > 0) { var i = stack.len; while (i > 0) : (i -= 1) { const prev = stack[i - 1]; - std.debug.print("{}:{} ctx:'{c}'\n", .{ + std.debug.print("{}:{any} ctx:'{c}'\n", .{ i - 1, prev.next, prev.char, @@ -290,9 +292,9 @@ fn jump(p: *Parser, next: Fn, val: ?Value) void { p.context.next = next; } -fn abort(p: *Parser, next: Fn, unused_c: u8) void { +fn abort(p: *Parser, next: Fn, unread_c: u8) void { p.result = VOID; - p.unused_char = unused_c; + p.unread_char = unread_c; p.context.next = next; } @@ -306,7 +308,7 @@ fn retval(p: *Parser, val: Value) void { // fn parseUnit(p: *Parser) !void { - var c1 = p.getUnused() orelse try p.read(); + var c1 = p.getUnread() orelse try p.read(); while (c1) |c| : (c1 = try p.read()) { switch (try checkBlanks(p, c)) { .yes => {}, @@ -321,12 +323,11 @@ fn parseUnit(p: *Parser) !void { } fn endUnit(p: *Parser) !void { - if (p.getUnused()) |c| { - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => return skipUnitAndReturn(p), - .no => p.unread(c), - } + const c = p.getUnread() orelse return p.ret(); + switch (try checkBlanks(p, c)) { + .yes => {}, + .skip_unit => return skipUnitAndReturn(p), + .no => p.unread(c), } return p.ret(); } @@ -341,7 +342,7 @@ fn returnContext(p: *Parser) !void { } fn parseDatum(p: *Parser) !void { - return parseOneDatum(p, p.getUnused().?, &endFirstDatum); + return parseOneDatum(p, p.getUnread().?, &endFirstDatum); } fn endFirstDatum(p: *Parser) !void { @@ -352,7 +353,7 @@ fn endFirstDatum(p: *Parser) !void { } fn parseJoin(p: *Parser) !void { - const c = p.getUnused() orelse try p.read() orelse return p.ret(); + const c = p.getUnread() orelse try p.read() orelse return p.ret(); switch (c) { '.', ':' => { p.context.char = c; @@ -505,12 +506,9 @@ fn parseUniHex(p: *Parser) !void { } const uc = try parseHex(p, u21, msg); - if (p.getUnused()) |c| { - if (c != '}') { - return p.err(.InvalidCharacter, msg); - } - } else { - return p.err(.UnexpectedEof, msg); + const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg); + if (c != '}') { + return p.err(.InvalidCharacter, msg); } const n = try std.unicode.utf8CodepointSequenceLength(uc); @@ -550,7 +548,7 @@ fn parseHashExpression(p: *Parser, next: Fn) !void { } fn parseHashDatum(p: *Parser) !void { - return parseCladDatum(p, p.getUnused().?, &endHashDatum); + return parseCladDatum(p, p.getUnread().?, &endHashDatum); } fn endHashDatum(p: *Parser) !void { @@ -574,7 +572,7 @@ fn parseRune(p: *Parser, c1: u8) !Value { } fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { - const c = p.getUnused() orelse return p.jump(next, r); + const c = p.getUnread() orelse return p.jump(next, r); if (c == '\\') { return p.jump(next, p.cons(r, try parseBareString(p, c))); } @@ -597,7 +595,7 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { } fn parseRuneDatum(p: *Parser) !void { - return parseCladDatum(p, p.getUnused().?, &endRuneDatum); + return parseCladDatum(p, p.getUnread().?, &endRuneDatum); } fn endRuneDatum(p: *Parser) !void { @@ -613,7 +611,7 @@ fn parseLabel(p: *Parser) !Value { } fn parseLabelEnd(p: *Parser, l: Value, next: Fn) !void { - const c = p.getUnused() orelse return p.err(.UnexpectedEof, "datum label"); + const c = p.getUnread() orelse return p.err(.UnexpectedEof, "datum label"); if (c == '%') { return p.jump(next, p.cons(LABEL, l)); } @@ -676,7 +674,7 @@ fn continueList(p: *Parser) !void { const close = p.context.char; if (p.result.eq(VOID)) { - const c = p.getUnused().?; + const c = p.getUnread().?; if (c == close) { return endList(p); } @@ -689,7 +687,7 @@ fn continueList(p: *Parser) !void { p.context.val = p.cons(p.result, p.context.val); - var c1 = p.getUnused() orelse try p.read(); + var c1 = p.getUnread() orelse try p.read(); while (c1) |c| : (c1 = try p.read()) { if (c == close) { return endList(p); @@ -724,8 +722,8 @@ fn endImproperList(p: *Parser) !void { fn closeImproperList(p: *Parser) !void { const result = p.context.val; const close = p.context.char; - var c1 = p.getUnused() orelse try p.read(); - while (c1) |c| : (c1 = try p.readNoEof("after list tail")) { + var c1 = p.getUnread() orelse try p.read(); + while (c1) |c| : (c1 = try p.read()) { if (c == close) { return p.retval(result); } @@ -735,7 +733,7 @@ fn closeImproperList(p: *Parser) !void { .no => return p.err(.InvalidCharacter, "after list tail"), } } - unreachable; + return p.err(.UnexpectedEof, "after list tail"); } fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void { diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig index 0a39c38..d004c91 100644 --- a/src/libzisp/io/parser.zig +++ b/src/libzisp/io/parser.zig @@ -59,9 +59,9 @@ pub fn parse(input: std.io.AnyReader) Value { var p = default(&fb_alloc, &stack_sfa, &chars_sfa) catch @panic("OOM"); defer p.deinit(); return p.run(input) catch { - if (p.unused_char) |c| { + if (p.unread_char) |c| { std.debug.panic( - "Parse error: {s}, unused_char: 0x{x}\n", + "Parse error: {s}, unread_char: 0x{x}\n", .{ p.err_msg, c }, ); } else { |
