diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-28 12:19:54 +0100 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-28 12:57:15 +0100 |
| commit | 00fd32b6c0d35140bdc160aa759bbac52242d7d0 (patch) | |
| tree | 8c110df628c7b0e7675beb3f8e55a26c0efa451c /src/libzisp | |
| parent | f2b18d64448ab09dd5e5e6a180d38d90d5aaf367 (diff) | |
blah
Diffstat (limited to 'src/libzisp')
| -rw-r--r-- | src/libzisp/io/parser.zig | 147 |
1 files changed, 80 insertions, 67 deletions
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig index 651d124..643f7e8 100644 --- a/src/libzisp/io/parser.zig +++ b/src/libzisp/io/parser.zig @@ -257,11 +257,11 @@ const cons = value.pair.cons; const is_test = builtin.is_test; const is_debug = builtin.mode == .Debug; -const detailed_debug = true; +const detailed_debug = false; // In debug, we want to see if we leak, so very small numbers. -const init_stack_capacity = if (is_debug) 20 else 32; -const init_chars_capacity = if (is_debug) 100 else 512; +const init_stack_capacity = if (is_debug) 32 else 32; +const init_chars_capacity = if (is_debug) 512 else 512; // zig fmt: off const DOT = value.rune.pack("DOT"); @@ -277,6 +277,8 @@ const SQUARE = value.rune.pack("SQUARE"); const BRACE = value.rune.pack("BRACE"); // zig fmt: on +const S_DOT = value.sstr.pack("."); + const Context = struct { // What to do next. next: Fn = .parse_unit, @@ -393,12 +395,9 @@ const State = struct { value.istr.intern(s.chars.items, true); } - fn getRune(s: *State) !Value { + fn getRune(s: *State) Value { defer s.chars.clearRetainingCapacity(); - return if (s.chars.items.len <= 6) - value.rune.pack(s.chars.items) - else - error.RuneTooLong; + return value.rune.pack(s.chars.items); } fn push(s: *State, next: Fn) !void { @@ -466,11 +465,21 @@ pub fn parse(input: std.io.AnyReader) Value { var s = State.init(input, stack_alloc, chars_alloc) catch @panic(""); defer s.deinit(); - while (s.context.next != .done) callNext(&s) catch |e| switch (e) { - else => @panic(s.err_msg), // TODO + while (s.context.next != .done) callNext(&s) catch { + if (s.unused_char) |c| { + std.debug.panic( + "Parse error: {} at: {s}, char: {c}\n", + .{ s.err_code, s.err_msg, c }, + ); + } else { + std.debug.panic( + "Parse error: {} at: {s}\n", + .{ s.err_code, s.err_msg }, + ); + } }; - if (s.unused_char) |_| { - @panic("invalid character"); + if (s.unused_char) |c| { + std.debug.panic("Invalid character: {c}\n", .{c}); } return s.result; } @@ -489,7 +498,6 @@ const Fn = enum { end_label_datum, parse_list_element, continue_list, - parse_list_tail, end_improper_list, close_improper_list, end_quote_expr, @@ -498,12 +506,24 @@ const Fn = enum { fn callNext(s: *State) !void { if (detailed_debug) { - std.debug.print("\n{}:{} ctx:'{c}' unused:'{c}' \n", .{ - s.stack.items.len, + const stack = s.stack.items; + std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{ + stack.len, s.context.next, s.context.char, s.unused_char orelse '_', }); + if (stack.len > 0) { + var i = stack.len; + while (i > 0) : (i -= 1) { + const prev = stack[i - 1]; + std.debug.print("{}:{} ctx:'{c}'\n", .{ + i - 1, + prev.next, + prev.char, + }); + } + } } try switch (s.context.next) { .parse_unit => parseUnit(s), @@ -519,9 +539,8 @@ fn callNext(s: *State) !void { .end_label_datum => endLabelDatum(s), .parse_list_element => parseListElement(s), .continue_list => continueList(s), - .parse_list_tail => parseListTail(s), .end_improper_list => endImproperList(s), - .close_improper_list => endImproperList(s), + .close_improper_list => closeImproperList(s), .end_quote_expr => endQuoteExpr(s), .done => unreachable, }; @@ -532,12 +551,7 @@ fn parseUnit(s: *State) !void { while (c1) |c| : (c1 = try s.read()) { switch (try checkBlank(s, c)) { .yes => {}, - .skip_unit => { - // Simply push another parse_unit onto the stack, which will - // ignore the result of the current one and start anew; then - // keep looping to read the datum that will be ignored. - try s.push(.parse_unit); - }, + .skip_unit => try s.push(.parse_unit), .skip_line => try s.skipLine(), .no => return parseDatum(s, c), } @@ -562,10 +576,10 @@ fn parseDatum(s: *State, c: u8) !void { } fn endOneDatum(s: *State) !void { - const d = s.result; - if (d.eq(value.undef)) { - return s.retval(d); + if (s.result.eq(value.undef)) { + return s.retval(value.undef); } + const d = s.result; const c1 = s.getUnused() orelse try s.read(); if (c1) |c| { switch (try checkBlank(s, c)) { @@ -590,10 +604,16 @@ fn returnContext(s: *State) !void { fn parseJoin(s: *State, d: Value, c: u8) !void { s.context.val = d; s.context.char = c; - s.unused_char = switch (c) { - '.', ':', '|' => try s.readNoEof("start of joined datum"), - else => c, - }; + switch (c) { + '.', ':', '|' => { + s.context.char = c; + s.unused_char = try s.readNoEof("join datum"); + }, + else => { + s.context.char = 0; + s.unused_char = c; + }, + } return s.subr(.parse_join_datum, .join_data); } @@ -610,16 +630,21 @@ fn joinData(s: *State) !void { const join = s.context.char; const tail = s.result; if (tail.eq(value.undef)) { - return s.retval(head); + if (join == 0) { + return s.retval(head); + } else { + return s.err(error.InvalidCharacter, "join datum"); + } } const rune = switch (join) { + 0 => JOIN, '.' => DOT, ':' => COLON, '|' => PIPE, - else => JOIN, + else => unreachable, }; - const result = cons(rune, cons(head, tail)); - return s.jump(.end_one_datum, result); + const data = cons(head, tail); + return s.jump(.end_one_datum, cons(rune, data)); } fn parseOneDatum(s: *State, c: u8, next: Fn) !void { @@ -653,7 +678,7 @@ fn isBareChar(c: u8) bool { 'a'...'z' , 'A'...'Z' , '0'...'9', '!' , '$' , '%' , '&' , '*' , '+', '-' , '/' , '<' , '=' , '>' , '?', - '@' , '^' , '_' , '~' => true, + '@' , '^' , '_' , '~' , '.' => true, // zig fmt: on else => false, }; @@ -811,11 +836,11 @@ fn parseRune(s: *State, c1: u8) !struct { Value, ?u8 } { var len: usize = 1; while (try s.read()) |c| : (len += 1) { if (len == 6 or !std.ascii.isAlphanumeric(c)) { - return .{ try s.getRune(), c }; + return .{ s.getRune(), c }; } try s.addChar(c); } - return .{ try s.getRune(), null }; + return .{ s.getRune(), null }; } fn parseRuneEnd(s: *State, r: Value, c1: ?u8, next: Fn) !void { @@ -933,22 +958,20 @@ fn continueList(s: *State) !void { if (c == close) { return endList(s); } - if (c == '.') { - return s.jump(.parse_list_tail, null); - } return s.err(error.InvalidCharacter, "list"); } + if (s.result.eq(S_DOT)) { + return s.subr(.parse_unit, .end_improper_list); + } + s.context.val = cons(s.result, s.context.val); - var c1 = s.unused_char orelse try s.read(); + var c1 = s.getUnused() orelse try s.read(); while (c1) |c| : (c1 = try s.read()) { if (c == close) { return endList(s); } - if (c == '.') { - return s.jump(.parse_list_tail, null); - } switch (try checkBlank(s, c)) { .yes => {}, .skip_unit => { @@ -958,7 +981,7 @@ fn continueList(s: *State) !void { .skip_line => try s.skipLine(), .no => { s.unused_char = c; - return s.jump(.parse_list_element, null); + return s.subr(.parse_list_element, .continue_list); }, } } @@ -969,19 +992,6 @@ fn endList(s: *State) !void { return s.retval(lib.list.reverse(s.context.val)); } -fn parseListTail(s: *State) !void { - const c = try s.readNoEof("list tail"); - try s.pushContext(.end_improper_list); - switch (try checkBlank(s, c)) { - .yes => {}, - .skip_unit => return s.subr(.parse_unit, .parse_unit), - .skip_line => try s.skipLine(), - // One blank mandatory here. - .no => return s.err(error.InvalidCharacter, "list tail"), - } - return s.jump(.parse_unit, null); -} - fn endImproperList(s: *State) !void { const tail = s.result; if (tail.eq(value.undef)) { @@ -992,22 +1002,21 @@ fn endImproperList(s: *State) !void { } fn closeImproperList(s: *State) !void { + const result = s.context.val; const close = s.context.char; var c1 = s.getUnused() orelse try s.read(); - while (c1) |c| : (c1 = try s.read()) { + while (c1) |c| : (c1 = try s.readNoEof("after list tail")) { + if (c == close) { + return s.retval(result); + } switch (try checkBlank(s, c)) { .yes => {}, .skip_unit => return s.subr(.parse_unit, .close_improper_list), .skip_line => try s.skipLine(), - .no => { - if (c == close) { - return s.retval(s.context.val); - } - return s.err(error.InvalidCharacter, "after list tail"); - }, + .no => return s.err(error.InvalidCharacter, "after list tail"), } } - return s.err(error.UnexpectedEof, "after list tail"); + unreachable; } fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { @@ -1026,10 +1035,14 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { } s.context.val = q; - return s.subr(.parse_unit, .end_quote_expr); + s.unused_char = c; + return s.subr(.parse_list_element, .end_quote_expr); } fn endQuoteExpr(s: *State) !void { + if (s.result.eq(value.undef)) { + return s.err(error.InvalidCharacter, "quote expression datum"); + } const q = s.context.val; const d = s.result; return s.retval(cons(q, d)); |
