summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-03-28 12:19:54 +0100
committerTaylan Kammer <taylan.kammer@gmail.com>2025-03-28 12:57:15 +0100
commit00fd32b6c0d35140bdc160aa759bbac52242d7d0 (patch)
tree8c110df628c7b0e7675beb3f8e55a26c0efa451c
parentf2b18d64448ab09dd5e5e6a180d38d90d5aaf367 (diff)
blah
-rw-r--r--_tests/test.zig2
-rw-r--r--spec/parser.ebnf2
-rw-r--r--src/libzisp.zig194
-rw-r--r--src/libzisp/io/parser.zig147
4 files changed, 193 insertions, 152 deletions
diff --git a/_tests/test.zig b/_tests/test.zig
index 7b4a04c..5acb628 100644
--- a/_tests/test.zig
+++ b/_tests/test.zig
@@ -5,7 +5,7 @@ pub fn main() void {
// const x: struct { u8, u64, u8 } = y;
// @import("std").debug.print("{}\n", .{x[0] + x[1] + x[2]});
- std.debug.print("{}\n", .{@sizeOf(struct { a: u8, b: u64, c: u8, d: bool })});
+ std.debug.print("{}\n", .{@sizeOf(struct { u64, ?u8 })});
}
// const x: ?u8 = 5;
diff --git a/spec/parser.ebnf b/spec/parser.ebnf
index 9e02fba..44b1967 100644
--- a/spec/parser.ebnf
+++ b/spec/parser.ebnf
@@ -12,7 +12,7 @@ comment : ';' ( skip_unit | skip_line ) ;
skip_unit : '~' unit ;
-skip_line : ( ~LF )* LF? ;
+skip_line : ( ~10 )* 10? ;
one_datum : ( bare_str | clad_datum ) ;
diff --git a/src/libzisp.zig b/src/libzisp.zig
index e6c8ac5..df8422b 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -316,86 +316,114 @@ test "parse2" {
try std.testing.expectEqualStrings("foo", f.slice());
}
-// test "parse3" {
-// const val = parseString(
-// \\(foo ;~x ;~(x y) ;~x #bar [#x #"baz"] 'bat)
-// );
-
-// const car = value.pair.car;
-// const cdr = value.pair.cdr;
-
-// const e1 = car(val);
-// const e2 = car(cdr(val));
-// const e3 = car(cdr(cdr(val)));
-// const e4 = car(cdr(cdr(cdr(val))));
-
-// try std.testing.expect(value.sstr.check(e1));
-// try std.testing.expect(value.rune.check(e2));
-// try std.testing.expect(value.pair.check(e3));
-// try std.testing.expect(value.pair.check(e4));
-// }
-
-// test "parse4" {
-// const val = parseString("(foo . ;~x bar ;~y)");
-
-// const s = value.sstr.unpack(value.pair.car(val));
-// try std.testing.expectEqualStrings("foo", s.slice());
-
-// const f = value.sstr.unpack(value.pair.cdr(val));
-// try std.testing.expectEqualStrings("bar", f.slice());
-// }
-
-// fn parseBench(path: []const u8, iters: usize) !void {
-// const file = try std.fs.cwd().openFile(path, .{});
-// defer file.close();
-
-// var timer = try std.time.Timer.start();
-// for (0..iters) |i| {
-// _ = i;
-// var br = std.io.bufferedReader(file.reader());
-// const reader = br.reader().any();
-// var v: Value = undefined;
-// while (true) {
-// v = io.parser.parse(reader);
-// if (value.eof.check(v)) {
-// break;
-// }
-// }
-// try file.seekTo(0);
-// }
-// const ns: f64 = @floatFromInt(timer.lap());
-// const secs = ns / 1_000_000_000;
-// std.debug.print(
-// "parse {s} x {}: {d:.3}s\n",
-// .{ path, iters, secs },
-// );
-// }
-
-// test "parse bench" {
-// // try parseBench("test-data/parser-test-1.scm", 200);
-// // try parseBench("test-data/parser-test-2.scm", 800);
-// try parseBench("test-data/parser-torture.scm", 1);
-// }
-
-// test "unparse" {
-// var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
-// var out: std.ArrayList(u8) = .init(gpa.allocator());
-
-// const w = out.writer();
-// const v = parseString("#foo");
-// try io.unparser.unparse(w, v);
-// try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice());
-// }
-
-// test "unparse2" {
-// var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
-// var out: std.ArrayList(u8) = .init(gpa.allocator());
-
-// const w = out.writer();
-// const v = parseString("#{foo bar['x]}");
-// try io.unparser.unparse(w, v);
-// try std.testing.expectEqualStrings(
-// "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))",
-// try out.toOwnedSlice(),
-// );
-// }
+test "parse3" {
+ const val = parseString(
+ \\(foo ;~x ;~(x y) ;~x #bar [#x #"baz"] 'bat)
+ );
+
+ const car = value.pair.car;
+ const cdr = value.pair.cdr;
+
+ const e1 = car(val);
+ const e2 = car(cdr(val));
+ const e3 = car(cdr(cdr(val)));
+ const e4 = car(cdr(cdr(cdr(val))));
+
+ try std.testing.expect(value.sstr.check(e1));
+ try std.testing.expect(value.rune.check(e2));
+ try std.testing.expect(value.pair.check(e3));
+ try std.testing.expect(value.pair.check(e4));
+}
+
+test "parse4" {
+ const val = parseString("(foo . ;~x bar ;~y)");
+
+ const s = value.sstr.unpack(value.pair.car(val));
+ try std.testing.expectEqualStrings("foo", s.slice());
+
+ const f = value.sstr.unpack(value.pair.cdr(val));
+ try std.testing.expectEqualStrings("bar", f.slice());
+}
+
+fn parseBench(path: []const u8, iters: usize) !void {
+ const file = try std.fs.cwd().openFile(path, .{});
+ defer file.close();
+
+ var timer = try std.time.Timer.start();
+ for (0..iters) |i| {
+ _ = i;
+ var br = std.io.bufferedReader(file.reader());
+ const reader = br.reader().any();
+ var v: Value = undefined;
+ while (true) {
+ v = io.parser.parse(reader);
+ if (value.eof.check(v)) {
+ break;
+ }
+ }
+ try file.seekTo(0);
+ }
+ const ns: f64 = @floatFromInt(timer.lap());
+ const secs = ns / 1_000_000_000;
+ std.debug.print(
+ "parse {s} x {}: {d:.3}s\n",
+ .{ path, iters, secs },
+ );
+}
+
+test "parse bench" {
+ try parseBench("test-data/parser-test-1.scm", 1000);
+ try parseBench("test-data/parser-test-2.scm", 1000);
+ // try parseBench("test-data/parser-torture.scm", 1);
+}
+
+test "unparse" {
+ var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
+ var out: std.ArrayList(u8) = .init(gpa.allocator());
+
+ const w = out.writer();
+ const v = parseString("#foo");
+ try io.unparser.unparse(w, v);
+ try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice());
+}
+
+test "unparse2" {
+ var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
+ var out: std.ArrayList(u8) = .init(gpa.allocator());
+
+ const w = out.writer();
+ const v = parseString("#{foo bar['x]}");
+ try io.unparser.unparse(w, v);
+ try std.testing.expectEqualStrings(
+ "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))",
+ try out.toOwnedSlice(),
+ );
+}
+
+test "unparse3" {
+ const w = std.io.getStdErr().writer();
+ const v = parseString("#{foo bar['x](y)(z)}");
+ try io.unparser.unparse(w, v);
+ try w.writeByte('\n');
+}
+
+test "unparse4" {
+ const w = std.io.getStdErr().writer();
+ const v = parseString("(foo ;~bar)");
+ try io.unparser.unparse(w, v);
+ try w.writeByte('\n');
+}
+
+test "unparse5" {
+ const w = std.io.getStdErr().writer();
+ const v = parseString("(;~foo foo ;~bar . ;~bar bar ;~bar)");
+ try io.unparser.unparse(w, v);
+ try w.writeByte('\n');
+}
+
+test "unparse6" {
+ const w = std.io.getStdErr().writer();
+ const v = parseString("(foo .bar ... baz. bat.(qux))");
+ try io.unparser.unparse(w, v);
+ try w.writeByte('\n');
+}
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 651d124..643f7e8 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -257,11 +257,11 @@ const cons = value.pair.cons;
const is_test = builtin.is_test;
const is_debug = builtin.mode == .Debug;
-const detailed_debug = true;
+const detailed_debug = false;
// In debug, we want to see if we leak, so very small numbers.
-const init_stack_capacity = if (is_debug) 20 else 32;
-const init_chars_capacity = if (is_debug) 100 else 512;
+const init_stack_capacity = if (is_debug) 32 else 32;
+const init_chars_capacity = if (is_debug) 512 else 512;
// zig fmt: off
const DOT = value.rune.pack("DOT");
@@ -277,6 +277,8 @@ const SQUARE = value.rune.pack("SQUARE");
const BRACE = value.rune.pack("BRACE");
// zig fmt: on
+const S_DOT = value.sstr.pack(".");
+
const Context = struct {
// What to do next.
next: Fn = .parse_unit,
@@ -393,12 +395,9 @@ const State = struct {
value.istr.intern(s.chars.items, true);
}
- fn getRune(s: *State) !Value {
+ fn getRune(s: *State) Value {
defer s.chars.clearRetainingCapacity();
- return if (s.chars.items.len <= 6)
- value.rune.pack(s.chars.items)
- else
- error.RuneTooLong;
+ return value.rune.pack(s.chars.items);
}
fn push(s: *State, next: Fn) !void {
@@ -466,11 +465,21 @@ pub fn parse(input: std.io.AnyReader) Value {
var s = State.init(input, stack_alloc, chars_alloc) catch @panic("");
defer s.deinit();
- while (s.context.next != .done) callNext(&s) catch |e| switch (e) {
- else => @panic(s.err_msg), // TODO
+ while (s.context.next != .done) callNext(&s) catch {
+ if (s.unused_char) |c| {
+ std.debug.panic(
+ "Parse error: {} at: {s}, char: {c}\n",
+ .{ s.err_code, s.err_msg, c },
+ );
+ } else {
+ std.debug.panic(
+ "Parse error: {} at: {s}\n",
+ .{ s.err_code, s.err_msg },
+ );
+ }
};
- if (s.unused_char) |_| {
- @panic("invalid character");
+ if (s.unused_char) |c| {
+ std.debug.panic("Invalid character: {c}\n", .{c});
}
return s.result;
}
@@ -489,7 +498,6 @@ const Fn = enum {
end_label_datum,
parse_list_element,
continue_list,
- parse_list_tail,
end_improper_list,
close_improper_list,
end_quote_expr,
@@ -498,12 +506,24 @@ const Fn = enum {
fn callNext(s: *State) !void {
if (detailed_debug) {
- std.debug.print("\n{}:{} ctx:'{c}' unused:'{c}' \n", .{
- s.stack.items.len,
+ const stack = s.stack.items;
+ std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{
+ stack.len,
s.context.next,
s.context.char,
s.unused_char orelse '_',
});
+ if (stack.len > 0) {
+ var i = stack.len;
+ while (i > 0) : (i -= 1) {
+ const prev = stack[i - 1];
+ std.debug.print("{}:{} ctx:'{c}'\n", .{
+ i - 1,
+ prev.next,
+ prev.char,
+ });
+ }
+ }
}
try switch (s.context.next) {
.parse_unit => parseUnit(s),
@@ -519,9 +539,8 @@ fn callNext(s: *State) !void {
.end_label_datum => endLabelDatum(s),
.parse_list_element => parseListElement(s),
.continue_list => continueList(s),
- .parse_list_tail => parseListTail(s),
.end_improper_list => endImproperList(s),
- .close_improper_list => endImproperList(s),
+ .close_improper_list => closeImproperList(s),
.end_quote_expr => endQuoteExpr(s),
.done => unreachable,
};
@@ -532,12 +551,7 @@ fn parseUnit(s: *State) !void {
while (c1) |c| : (c1 = try s.read()) {
switch (try checkBlank(s, c)) {
.yes => {},
- .skip_unit => {
- // Simply push another parse_unit onto the stack, which will
- // ignore the result of the current one and start anew; then
- // keep looping to read the datum that will be ignored.
- try s.push(.parse_unit);
- },
+ .skip_unit => try s.push(.parse_unit),
.skip_line => try s.skipLine(),
.no => return parseDatum(s, c),
}
@@ -562,10 +576,10 @@ fn parseDatum(s: *State, c: u8) !void {
}
fn endOneDatum(s: *State) !void {
- const d = s.result;
- if (d.eq(value.undef)) {
- return s.retval(d);
+ if (s.result.eq(value.undef)) {
+ return s.retval(value.undef);
}
+ const d = s.result;
const c1 = s.getUnused() orelse try s.read();
if (c1) |c| {
switch (try checkBlank(s, c)) {
@@ -590,10 +604,16 @@ fn returnContext(s: *State) !void {
fn parseJoin(s: *State, d: Value, c: u8) !void {
s.context.val = d;
s.context.char = c;
- s.unused_char = switch (c) {
- '.', ':', '|' => try s.readNoEof("start of joined datum"),
- else => c,
- };
+ switch (c) {
+ '.', ':', '|' => {
+ s.context.char = c;
+ s.unused_char = try s.readNoEof("join datum");
+ },
+ else => {
+ s.context.char = 0;
+ s.unused_char = c;
+ },
+ }
return s.subr(.parse_join_datum, .join_data);
}
@@ -610,16 +630,21 @@ fn joinData(s: *State) !void {
const join = s.context.char;
const tail = s.result;
if (tail.eq(value.undef)) {
- return s.retval(head);
+ if (join == 0) {
+ return s.retval(head);
+ } else {
+ return s.err(error.InvalidCharacter, "join datum");
+ }
}
const rune = switch (join) {
+ 0 => JOIN,
'.' => DOT,
':' => COLON,
'|' => PIPE,
- else => JOIN,
+ else => unreachable,
};
- const result = cons(rune, cons(head, tail));
- return s.jump(.end_one_datum, result);
+ const data = cons(head, tail);
+ return s.jump(.end_one_datum, cons(rune, data));
}
fn parseOneDatum(s: *State, c: u8, next: Fn) !void {
@@ -653,7 +678,7 @@ fn isBareChar(c: u8) bool {
'a'...'z' , 'A'...'Z' , '0'...'9',
'!' , '$' , '%' , '&' , '*' , '+',
'-' , '/' , '<' , '=' , '>' , '?',
- '@' , '^' , '_' , '~' => true,
+ '@' , '^' , '_' , '~' , '.' => true,
// zig fmt: on
else => false,
};
@@ -811,11 +836,11 @@ fn parseRune(s: *State, c1: u8) !struct { Value, ?u8 } {
var len: usize = 1;
while (try s.read()) |c| : (len += 1) {
if (len == 6 or !std.ascii.isAlphanumeric(c)) {
- return .{ try s.getRune(), c };
+ return .{ s.getRune(), c };
}
try s.addChar(c);
}
- return .{ try s.getRune(), null };
+ return .{ s.getRune(), null };
}
fn parseRuneEnd(s: *State, r: Value, c1: ?u8, next: Fn) !void {
@@ -933,22 +958,20 @@ fn continueList(s: *State) !void {
if (c == close) {
return endList(s);
}
- if (c == '.') {
- return s.jump(.parse_list_tail, null);
- }
return s.err(error.InvalidCharacter, "list");
}
+ if (s.result.eq(S_DOT)) {
+ return s.subr(.parse_unit, .end_improper_list);
+ }
+
s.context.val = cons(s.result, s.context.val);
- var c1 = s.unused_char orelse try s.read();
+ var c1 = s.getUnused() orelse try s.read();
while (c1) |c| : (c1 = try s.read()) {
if (c == close) {
return endList(s);
}
- if (c == '.') {
- return s.jump(.parse_list_tail, null);
- }
switch (try checkBlank(s, c)) {
.yes => {},
.skip_unit => {
@@ -958,7 +981,7 @@ fn continueList(s: *State) !void {
.skip_line => try s.skipLine(),
.no => {
s.unused_char = c;
- return s.jump(.parse_list_element, null);
+ return s.subr(.parse_list_element, .continue_list);
},
}
}
@@ -969,19 +992,6 @@ fn endList(s: *State) !void {
return s.retval(lib.list.reverse(s.context.val));
}
-fn parseListTail(s: *State) !void {
- const c = try s.readNoEof("list tail");
- try s.pushContext(.end_improper_list);
- switch (try checkBlank(s, c)) {
- .yes => {},
- .skip_unit => return s.subr(.parse_unit, .parse_unit),
- .skip_line => try s.skipLine(),
- // One blank mandatory here.
- .no => return s.err(error.InvalidCharacter, "list tail"),
- }
- return s.jump(.parse_unit, null);
-}
-
fn endImproperList(s: *State) !void {
const tail = s.result;
if (tail.eq(value.undef)) {
@@ -992,22 +1002,21 @@ fn endImproperList(s: *State) !void {
}
fn closeImproperList(s: *State) !void {
+ const result = s.context.val;
const close = s.context.char;
var c1 = s.getUnused() orelse try s.read();
- while (c1) |c| : (c1 = try s.read()) {
+ while (c1) |c| : (c1 = try s.readNoEof("after list tail")) {
+ if (c == close) {
+ return s.retval(result);
+ }
switch (try checkBlank(s, c)) {
.yes => {},
.skip_unit => return s.subr(.parse_unit, .close_improper_list),
.skip_line => try s.skipLine(),
- .no => {
- if (c == close) {
- return s.retval(s.context.val);
- }
- return s.err(error.InvalidCharacter, "after list tail");
- },
+ .no => return s.err(error.InvalidCharacter, "after list tail"),
}
}
- return s.err(error.UnexpectedEof, "after list tail");
+ unreachable;
}
fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void {
@@ -1026,10 +1035,14 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void {
}
s.context.val = q;
- return s.subr(.parse_unit, .end_quote_expr);
+ s.unused_char = c;
+ return s.subr(.parse_list_element, .end_quote_expr);
}
fn endQuoteExpr(s: *State) !void {
+ if (s.result.eq(value.undef)) {
+ return s.err(error.InvalidCharacter, "quote expression datum");
+ }
const q = s.context.val;
const d = s.result;
return s.retval(cons(q, d));