diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-28 12:19:54 +0100 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-28 12:57:15 +0100 |
| commit | 00fd32b6c0d35140bdc160aa759bbac52242d7d0 (patch) | |
| tree | 8c110df628c7b0e7675beb3f8e55a26c0efa451c | |
| parent | f2b18d64448ab09dd5e5e6a180d38d90d5aaf367 (diff) | |
blah
| -rw-r--r-- | _tests/test.zig | 2 | ||||
| -rw-r--r-- | spec/parser.ebnf | 2 | ||||
| -rw-r--r-- | src/libzisp.zig | 194 | ||||
| -rw-r--r-- | src/libzisp/io/parser.zig | 147 |
4 files changed, 193 insertions, 152 deletions
diff --git a/_tests/test.zig b/_tests/test.zig index 7b4a04c..5acb628 100644 --- a/_tests/test.zig +++ b/_tests/test.zig @@ -5,7 +5,7 @@ pub fn main() void { // const x: struct { u8, u64, u8 } = y; // @import("std").debug.print("{}\n", .{x[0] + x[1] + x[2]}); - std.debug.print("{}\n", .{@sizeOf(struct { a: u8, b: u64, c: u8, d: bool })}); + std.debug.print("{}\n", .{@sizeOf(struct { u64, ?u8 })}); } // const x: ?u8 = 5; diff --git a/spec/parser.ebnf b/spec/parser.ebnf index 9e02fba..44b1967 100644 --- a/spec/parser.ebnf +++ b/spec/parser.ebnf @@ -12,7 +12,7 @@ comment : ';' ( skip_unit | skip_line ) ; skip_unit : '~' unit ; -skip_line : ( ~LF )* LF? ; +skip_line : ( ~10 )* 10? ; one_datum : ( bare_str | clad_datum ) ; diff --git a/src/libzisp.zig b/src/libzisp.zig index e6c8ac5..df8422b 100644 --- a/src/libzisp.zig +++ b/src/libzisp.zig @@ -316,86 +316,114 @@ test "parse2" { try std.testing.expectEqualStrings("foo", f.slice()); } -// test "parse3" { -// const val = parseString( -// \\(foo ;~x ;~(x y) ;~x #bar [#x #"baz"] 'bat) -// ); - -// const car = value.pair.car; -// const cdr = value.pair.cdr; - -// const e1 = car(val); -// const e2 = car(cdr(val)); -// const e3 = car(cdr(cdr(val))); -// const e4 = car(cdr(cdr(cdr(val)))); - -// try std.testing.expect(value.sstr.check(e1)); -// try std.testing.expect(value.rune.check(e2)); -// try std.testing.expect(value.pair.check(e3)); -// try std.testing.expect(value.pair.check(e4)); -// } - -// test "parse4" { -// const val = parseString("(foo . ;~x bar ;~y)"); - -// const s = value.sstr.unpack(value.pair.car(val)); -// try std.testing.expectEqualStrings("foo", s.slice()); - -// const f = value.sstr.unpack(value.pair.cdr(val)); -// try std.testing.expectEqualStrings("bar", f.slice()); -// } - -// fn parseBench(path: []const u8, iters: usize) !void { -// const file = try std.fs.cwd().openFile(path, .{}); -// defer file.close(); - -// var timer = try std.time.Timer.start(); -// for (0..iters) |i| { -// _ = i; -// var br = std.io.bufferedReader(file.reader()); -// const reader = br.reader().any(); -// var v: Value = undefined; -// while (true) { -// v = io.parser.parse(reader); -// if (value.eof.check(v)) { -// break; -// } -// } -// try file.seekTo(0); -// } -// const ns: f64 = @floatFromInt(timer.lap()); -// const secs = ns / 1_000_000_000; -// std.debug.print( -// "parse {s} x {}: {d:.3}s\n", -// .{ path, iters, secs }, -// ); -// } - -// test "parse bench" { -// // try parseBench("test-data/parser-test-1.scm", 200); -// // try parseBench("test-data/parser-test-2.scm", 800); -// try parseBench("test-data/parser-torture.scm", 1); -// } - -// test "unparse" { -// var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; -// var out: std.ArrayList(u8) = .init(gpa.allocator()); - -// const w = out.writer(); -// const v = parseString("#foo"); -// try io.unparser.unparse(w, v); -// try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); -// } - -// test "unparse2" { -// var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; -// var out: std.ArrayList(u8) = .init(gpa.allocator()); - -// const w = out.writer(); -// const v = parseString("#{foo bar['x]}"); -// try io.unparser.unparse(w, v); -// try std.testing.expectEqualStrings( -// "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", -// try out.toOwnedSlice(), -// ); -// } +test "parse3" { + const val = parseString( + \\(foo ;~x ;~(x y) ;~x #bar [#x #"baz"] 'bat) + ); + + const car = value.pair.car; + const cdr = value.pair.cdr; + + const e1 = car(val); + const e2 = car(cdr(val)); + const e3 = car(cdr(cdr(val))); + const e4 = car(cdr(cdr(cdr(val)))); + + try std.testing.expect(value.sstr.check(e1)); + try std.testing.expect(value.rune.check(e2)); + try std.testing.expect(value.pair.check(e3)); + try std.testing.expect(value.pair.check(e4)); +} + +test "parse4" { + const val = parseString("(foo . ;~x bar ;~y)"); + + const s = value.sstr.unpack(value.pair.car(val)); + try std.testing.expectEqualStrings("foo", s.slice()); + + const f = value.sstr.unpack(value.pair.cdr(val)); + try std.testing.expectEqualStrings("bar", f.slice()); +} + +fn parseBench(path: []const u8, iters: usize) !void { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + + var timer = try std.time.Timer.start(); + for (0..iters) |i| { + _ = i; + var br = std.io.bufferedReader(file.reader()); + const reader = br.reader().any(); + var v: Value = undefined; + while (true) { + v = io.parser.parse(reader); + if (value.eof.check(v)) { + break; + } + } + try file.seekTo(0); + } + const ns: f64 = @floatFromInt(timer.lap()); + const secs = ns / 1_000_000_000; + std.debug.print( + "parse {s} x {}: {d:.3}s\n", + .{ path, iters, secs }, + ); +} + +test "parse bench" { + try parseBench("test-data/parser-test-1.scm", 1000); + try parseBench("test-data/parser-test-2.scm", 1000); + // try parseBench("test-data/parser-torture.scm", 1); +} + +test "unparse" { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#foo"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); +} + +test "unparse2" { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#{foo bar['x]}"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings( + "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", + try out.toOwnedSlice(), + ); +} + +test "unparse3" { + const w = std.io.getStdErr().writer(); + const v = parseString("#{foo bar['x](y)(z)}"); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} + +test "unparse4" { + const w = std.io.getStdErr().writer(); + const v = parseString("(foo ;~bar)"); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} + +test "unparse5" { + const w = std.io.getStdErr().writer(); + const v = parseString("(;~foo foo ;~bar . ;~bar bar ;~bar)"); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} + +test "unparse6" { + const w = std.io.getStdErr().writer(); + const v = parseString("(foo .bar ... baz. bat.(qux))"); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig index 651d124..643f7e8 100644 --- a/src/libzisp/io/parser.zig +++ b/src/libzisp/io/parser.zig @@ -257,11 +257,11 @@ const cons = value.pair.cons; const is_test = builtin.is_test; const is_debug = builtin.mode == .Debug; -const detailed_debug = true; +const detailed_debug = false; // In debug, we want to see if we leak, so very small numbers. -const init_stack_capacity = if (is_debug) 20 else 32; -const init_chars_capacity = if (is_debug) 100 else 512; +const init_stack_capacity = if (is_debug) 32 else 32; +const init_chars_capacity = if (is_debug) 512 else 512; // zig fmt: off const DOT = value.rune.pack("DOT"); @@ -277,6 +277,8 @@ const SQUARE = value.rune.pack("SQUARE"); const BRACE = value.rune.pack("BRACE"); // zig fmt: on +const S_DOT = value.sstr.pack("."); + const Context = struct { // What to do next. next: Fn = .parse_unit, @@ -393,12 +395,9 @@ const State = struct { value.istr.intern(s.chars.items, true); } - fn getRune(s: *State) !Value { + fn getRune(s: *State) Value { defer s.chars.clearRetainingCapacity(); - return if (s.chars.items.len <= 6) - value.rune.pack(s.chars.items) - else - error.RuneTooLong; + return value.rune.pack(s.chars.items); } fn push(s: *State, next: Fn) !void { @@ -466,11 +465,21 @@ pub fn parse(input: std.io.AnyReader) Value { var s = State.init(input, stack_alloc, chars_alloc) catch @panic(""); defer s.deinit(); - while (s.context.next != .done) callNext(&s) catch |e| switch (e) { - else => @panic(s.err_msg), // TODO + while (s.context.next != .done) callNext(&s) catch { + if (s.unused_char) |c| { + std.debug.panic( + "Parse error: {} at: {s}, char: {c}\n", + .{ s.err_code, s.err_msg, c }, + ); + } else { + std.debug.panic( + "Parse error: {} at: {s}\n", + .{ s.err_code, s.err_msg }, + ); + } }; - if (s.unused_char) |_| { - @panic("invalid character"); + if (s.unused_char) |c| { + std.debug.panic("Invalid character: {c}\n", .{c}); } return s.result; } @@ -489,7 +498,6 @@ const Fn = enum { end_label_datum, parse_list_element, continue_list, - parse_list_tail, end_improper_list, close_improper_list, end_quote_expr, @@ -498,12 +506,24 @@ const Fn = enum { fn callNext(s: *State) !void { if (detailed_debug) { - std.debug.print("\n{}:{} ctx:'{c}' unused:'{c}' \n", .{ - s.stack.items.len, + const stack = s.stack.items; + std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{ + stack.len, s.context.next, s.context.char, s.unused_char orelse '_', }); + if (stack.len > 0) { + var i = stack.len; + while (i > 0) : (i -= 1) { + const prev = stack[i - 1]; + std.debug.print("{}:{} ctx:'{c}'\n", .{ + i - 1, + prev.next, + prev.char, + }); + } + } } try switch (s.context.next) { .parse_unit => parseUnit(s), @@ -519,9 +539,8 @@ fn callNext(s: *State) !void { .end_label_datum => endLabelDatum(s), .parse_list_element => parseListElement(s), .continue_list => continueList(s), - .parse_list_tail => parseListTail(s), .end_improper_list => endImproperList(s), - .close_improper_list => endImproperList(s), + .close_improper_list => closeImproperList(s), .end_quote_expr => endQuoteExpr(s), .done => unreachable, }; @@ -532,12 +551,7 @@ fn parseUnit(s: *State) !void { while (c1) |c| : (c1 = try s.read()) { switch (try checkBlank(s, c)) { .yes => {}, - .skip_unit => { - // Simply push another parse_unit onto the stack, which will - // ignore the result of the current one and start anew; then - // keep looping to read the datum that will be ignored. - try s.push(.parse_unit); - }, + .skip_unit => try s.push(.parse_unit), .skip_line => try s.skipLine(), .no => return parseDatum(s, c), } @@ -562,10 +576,10 @@ fn parseDatum(s: *State, c: u8) !void { } fn endOneDatum(s: *State) !void { - const d = s.result; - if (d.eq(value.undef)) { - return s.retval(d); + if (s.result.eq(value.undef)) { + return s.retval(value.undef); } + const d = s.result; const c1 = s.getUnused() orelse try s.read(); if (c1) |c| { switch (try checkBlank(s, c)) { @@ -590,10 +604,16 @@ fn returnContext(s: *State) !void { fn parseJoin(s: *State, d: Value, c: u8) !void { s.context.val = d; s.context.char = c; - s.unused_char = switch (c) { - '.', ':', '|' => try s.readNoEof("start of joined datum"), - else => c, - }; + switch (c) { + '.', ':', '|' => { + s.context.char = c; + s.unused_char = try s.readNoEof("join datum"); + }, + else => { + s.context.char = 0; + s.unused_char = c; + }, + } return s.subr(.parse_join_datum, .join_data); } @@ -610,16 +630,21 @@ fn joinData(s: *State) !void { const join = s.context.char; const tail = s.result; if (tail.eq(value.undef)) { - return s.retval(head); + if (join == 0) { + return s.retval(head); + } else { + return s.err(error.InvalidCharacter, "join datum"); + } } const rune = switch (join) { + 0 => JOIN, '.' => DOT, ':' => COLON, '|' => PIPE, - else => JOIN, + else => unreachable, }; - const result = cons(rune, cons(head, tail)); - return s.jump(.end_one_datum, result); + const data = cons(head, tail); + return s.jump(.end_one_datum, cons(rune, data)); } fn parseOneDatum(s: *State, c: u8, next: Fn) !void { @@ -653,7 +678,7 @@ fn isBareChar(c: u8) bool { 'a'...'z' , 'A'...'Z' , '0'...'9', '!' , '$' , '%' , '&' , '*' , '+', '-' , '/' , '<' , '=' , '>' , '?', - '@' , '^' , '_' , '~' => true, + '@' , '^' , '_' , '~' , '.' => true, // zig fmt: on else => false, }; @@ -811,11 +836,11 @@ fn parseRune(s: *State, c1: u8) !struct { Value, ?u8 } { var len: usize = 1; while (try s.read()) |c| : (len += 1) { if (len == 6 or !std.ascii.isAlphanumeric(c)) { - return .{ try s.getRune(), c }; + return .{ s.getRune(), c }; } try s.addChar(c); } - return .{ try s.getRune(), null }; + return .{ s.getRune(), null }; } fn parseRuneEnd(s: *State, r: Value, c1: ?u8, next: Fn) !void { @@ -933,22 +958,20 @@ fn continueList(s: *State) !void { if (c == close) { return endList(s); } - if (c == '.') { - return s.jump(.parse_list_tail, null); - } return s.err(error.InvalidCharacter, "list"); } + if (s.result.eq(S_DOT)) { + return s.subr(.parse_unit, .end_improper_list); + } + s.context.val = cons(s.result, s.context.val); - var c1 = s.unused_char orelse try s.read(); + var c1 = s.getUnused() orelse try s.read(); while (c1) |c| : (c1 = try s.read()) { if (c == close) { return endList(s); } - if (c == '.') { - return s.jump(.parse_list_tail, null); - } switch (try checkBlank(s, c)) { .yes => {}, .skip_unit => { @@ -958,7 +981,7 @@ fn continueList(s: *State) !void { .skip_line => try s.skipLine(), .no => { s.unused_char = c; - return s.jump(.parse_list_element, null); + return s.subr(.parse_list_element, .continue_list); }, } } @@ -969,19 +992,6 @@ fn endList(s: *State) !void { return s.retval(lib.list.reverse(s.context.val)); } -fn parseListTail(s: *State) !void { - const c = try s.readNoEof("list tail"); - try s.pushContext(.end_improper_list); - switch (try checkBlank(s, c)) { - .yes => {}, - .skip_unit => return s.subr(.parse_unit, .parse_unit), - .skip_line => try s.skipLine(), - // One blank mandatory here. - .no => return s.err(error.InvalidCharacter, "list tail"), - } - return s.jump(.parse_unit, null); -} - fn endImproperList(s: *State) !void { const tail = s.result; if (tail.eq(value.undef)) { @@ -992,22 +1002,21 @@ fn endImproperList(s: *State) !void { } fn closeImproperList(s: *State) !void { + const result = s.context.val; const close = s.context.char; var c1 = s.getUnused() orelse try s.read(); - while (c1) |c| : (c1 = try s.read()) { + while (c1) |c| : (c1 = try s.readNoEof("after list tail")) { + if (c == close) { + return s.retval(result); + } switch (try checkBlank(s, c)) { .yes => {}, .skip_unit => return s.subr(.parse_unit, .close_improper_list), .skip_line => try s.skipLine(), - .no => { - if (c == close) { - return s.retval(s.context.val); - } - return s.err(error.InvalidCharacter, "after list tail"); - }, + .no => return s.err(error.InvalidCharacter, "after list tail"), } } - return s.err(error.UnexpectedEof, "after list tail"); + unreachable; } fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { @@ -1026,10 +1035,14 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { } s.context.val = q; - return s.subr(.parse_unit, .end_quote_expr); + s.unused_char = c; + return s.subr(.parse_list_element, .end_quote_expr); } fn endQuoteExpr(s: *State) !void { + if (s.result.eq(value.undef)) { + return s.err(error.InvalidCharacter, "quote expression datum"); + } const q = s.context.val; const d = s.result; return s.retval(cons(q, d)); |
