summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-03-30 12:39:43 +0200
committerTaylan Kammer <taylan.kammer@gmail.com>2025-03-30 12:39:43 +0200
commit49736f6748344e191077c38a49385aa3a2efb600 (patch)
treedb072fe43808766debeb1e6dc4b5548aa52dfa8d
parentd09bff09f8c1a4a4353cfe4a294fbaf96d6656a1 (diff)
Parser cleanup.
-rw-r--r--src/libzisp/io/Parser.zig78
-rw-r--r--src/libzisp/io/parser.zig4
2 files changed, 40 insertions, 42 deletions
diff --git a/src/libzisp/io/Parser.zig b/src/libzisp/io/Parser.zig
index d9eeca9..7264eaa 100644
--- a/src/libzisp/io/Parser.zig
+++ b/src/libzisp/io/Parser.zig
@@ -27,7 +27,7 @@
// For efficiency, call the parser on an input stream with implicit buffering.
//
// The parser does not use its own buffer, beyond one character that may be
-// written back into the unused_char field, which is checked at the end to
+// written back into the unread_char field, which is checked at the end to
// ensure it's nothing other than a trailing blank or comment.
//
// This lack of buffering is to ensure that the parser never reads more bytes
@@ -82,6 +82,10 @@ const VOID = value.rune.packForced("");
const LSTAIL = value.sstr.pack(".");
// zig fmt: on
+// We could implement an optimization where we swap in a dummy cons when the
+// parser is handling a commented-out datum, but this would require changes to
+// the algorithm and doesn't seem very important, so it's not implemented.
+
const Cons = *const fn (v1: Value, v2: Value) Value;
fn dummyCons(v1: Value, v2: Value) Value {
@@ -126,7 +130,7 @@ stack: List(Context) = undefined,
chars: List(u8) = undefined,
cons: Cons = real_cons,
result: Value = undefined,
-unused_char: ?u8 = null,
+unread_char: ?u8 = null,
err_msg: []const u8 = undefined,
pub fn init(
@@ -155,7 +159,7 @@ pub fn deinit(p: *Parser) void {
//
fn read(p: *Parser) !?u8 {
- if (is_debug and p.unused_char != null) {
+ if (is_debug and p.unread_char != null) {
@panic("Called read() while there was an unused character!");
}
const c = p.input.readByte() catch |e| switch (e) {
@@ -173,15 +177,13 @@ fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 {
}
fn unread(p: *Parser, c: u8) void {
- p.unused_char = c;
+ p.unread_char = c;
}
-fn getUnused(p: *Parser) ?u8 {
- if (p.unused_char) |c| {
- p.unused_char = null;
- return c;
- }
- return null;
+fn getUnread(p: *Parser) ?u8 {
+ const c = p.unread_char orelse return null;
+ p.unread_char = null;
+ return c;
}
//
@@ -224,7 +226,7 @@ pub fn run(p: *Parser, input: std.io.AnyReader) !Value {
if (detailed_debug) printStack(p);
try next(p);
}
- if (p.getUnused()) |_| {
+ if (p.unread_char) |_| {
return p.err(.InvalidCharacter, "top-level");
}
return p.result;
@@ -232,17 +234,17 @@ pub fn run(p: *Parser, input: std.io.AnyReader) !Value {
fn printStack(p: *Parser) void {
const stack = p.stack.items;
- std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{
+ std.debug.print("\n\n{}:{any} ctx:'{c}' unread:'{c}' \n", .{
stack.len,
p.context.next,
p.context.char,
- p.unused_char orelse '_',
+ p.unread_char orelse '_',
});
if (stack.len > 0) {
var i = stack.len;
while (i > 0) : (i -= 1) {
const prev = stack[i - 1];
- std.debug.print("{}:{} ctx:'{c}'\n", .{
+ std.debug.print("{}:{any} ctx:'{c}'\n", .{
i - 1,
prev.next,
prev.char,
@@ -290,9 +292,9 @@ fn jump(p: *Parser, next: Fn, val: ?Value) void {
p.context.next = next;
}
-fn abort(p: *Parser, next: Fn, unused_c: u8) void {
+fn abort(p: *Parser, next: Fn, unread_c: u8) void {
p.result = VOID;
- p.unused_char = unused_c;
+ p.unread_char = unread_c;
p.context.next = next;
}
@@ -306,7 +308,7 @@ fn retval(p: *Parser, val: Value) void {
//
fn parseUnit(p: *Parser) !void {
- var c1 = p.getUnused() orelse try p.read();
+ var c1 = p.getUnread() orelse try p.read();
while (c1) |c| : (c1 = try p.read()) {
switch (try checkBlanks(p, c)) {
.yes => {},
@@ -321,12 +323,11 @@ fn parseUnit(p: *Parser) !void {
}
fn endUnit(p: *Parser) !void {
- if (p.getUnused()) |c| {
- switch (try checkBlanks(p, c)) {
- .yes => {},
- .skip_unit => return skipUnitAndReturn(p),
- .no => p.unread(c),
- }
+ const c = p.getUnread() orelse return p.ret();
+ switch (try checkBlanks(p, c)) {
+ .yes => {},
+ .skip_unit => return skipUnitAndReturn(p),
+ .no => p.unread(c),
}
return p.ret();
}
@@ -341,7 +342,7 @@ fn returnContext(p: *Parser) !void {
}
fn parseDatum(p: *Parser) !void {
- return parseOneDatum(p, p.getUnused().?, &endFirstDatum);
+ return parseOneDatum(p, p.getUnread().?, &endFirstDatum);
}
fn endFirstDatum(p: *Parser) !void {
@@ -352,7 +353,7 @@ fn endFirstDatum(p: *Parser) !void {
}
fn parseJoin(p: *Parser) !void {
- const c = p.getUnused() orelse try p.read() orelse return p.ret();
+ const c = p.getUnread() orelse try p.read() orelse return p.ret();
switch (c) {
'.', ':' => {
p.context.char = c;
@@ -505,12 +506,9 @@ fn parseUniHex(p: *Parser) !void {
}
const uc = try parseHex(p, u21, msg);
- if (p.getUnused()) |c| {
- if (c != '}') {
- return p.err(.InvalidCharacter, msg);
- }
- } else {
- return p.err(.UnexpectedEof, msg);
+ const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg);
+ if (c != '}') {
+ return p.err(.InvalidCharacter, msg);
}
const n = try std.unicode.utf8CodepointSequenceLength(uc);
@@ -550,7 +548,7 @@ fn parseHashExpression(p: *Parser, next: Fn) !void {
}
fn parseHashDatum(p: *Parser) !void {
- return parseCladDatum(p, p.getUnused().?, &endHashDatum);
+ return parseCladDatum(p, p.getUnread().?, &endHashDatum);
}
fn endHashDatum(p: *Parser) !void {
@@ -574,7 +572,7 @@ fn parseRune(p: *Parser, c1: u8) !Value {
}
fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
- const c = p.getUnused() orelse return p.jump(next, r);
+ const c = p.getUnread() orelse return p.jump(next, r);
if (c == '\\') {
return p.jump(next, p.cons(r, try parseBareString(p, c)));
}
@@ -597,7 +595,7 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
}
fn parseRuneDatum(p: *Parser) !void {
- return parseCladDatum(p, p.getUnused().?, &endRuneDatum);
+ return parseCladDatum(p, p.getUnread().?, &endRuneDatum);
}
fn endRuneDatum(p: *Parser) !void {
@@ -613,7 +611,7 @@ fn parseLabel(p: *Parser) !Value {
}
fn parseLabelEnd(p: *Parser, l: Value, next: Fn) !void {
- const c = p.getUnused() orelse return p.err(.UnexpectedEof, "datum label");
+ const c = p.getUnread() orelse return p.err(.UnexpectedEof, "datum label");
if (c == '%') {
return p.jump(next, p.cons(LABEL, l));
}
@@ -676,7 +674,7 @@ fn continueList(p: *Parser) !void {
const close = p.context.char;
if (p.result.eq(VOID)) {
- const c = p.getUnused().?;
+ const c = p.getUnread().?;
if (c == close) {
return endList(p);
}
@@ -689,7 +687,7 @@ fn continueList(p: *Parser) !void {
p.context.val = p.cons(p.result, p.context.val);
- var c1 = p.getUnused() orelse try p.read();
+ var c1 = p.getUnread() orelse try p.read();
while (c1) |c| : (c1 = try p.read()) {
if (c == close) {
return endList(p);
@@ -724,8 +722,8 @@ fn endImproperList(p: *Parser) !void {
fn closeImproperList(p: *Parser) !void {
const result = p.context.val;
const close = p.context.char;
- var c1 = p.getUnused() orelse try p.read();
- while (c1) |c| : (c1 = try p.readNoEof("after list tail")) {
+ var c1 = p.getUnread() orelse try p.read();
+ while (c1) |c| : (c1 = try p.read()) {
if (c == close) {
return p.retval(result);
}
@@ -735,7 +733,7 @@ fn closeImproperList(p: *Parser) !void {
.no => return p.err(.InvalidCharacter, "after list tail"),
}
}
- unreachable;
+ return p.err(.UnexpectedEof, "after list tail");
}
fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void {
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 0a39c38..d004c91 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -59,9 +59,9 @@ pub fn parse(input: std.io.AnyReader) Value {
var p = default(&fb_alloc, &stack_sfa, &chars_sfa) catch @panic("OOM");
defer p.deinit();
return p.run(input) catch {
- if (p.unused_char) |c| {
+ if (p.unread_char) |c| {
std.debug.panic(
- "Parse error: {s}, unused_char: 0x{x}\n",
+ "Parse error: {s}, unread_char: 0x{x}\n",
.{ p.err_msg, c },
);
} else {