1 files changed, 80 insertions, 67 deletions
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 651d124..643f7e8 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -257,11 +257,11 @@ const cons = value.pair.cons;
 const is_test = builtin.is_test;
 const is_debug = builtin.mode == .Debug;
 
-const detailed_debug = true;
+const detailed_debug = false;
 
 // In debug, we want to see if we leak, so very small numbers.
-const init_stack_capacity = if (is_debug) 20 else 32;
-const init_chars_capacity = if (is_debug) 100 else 512;
+const init_stack_capacity = if (is_debug) 32 else 32;
+const init_chars_capacity = if (is_debug) 512 else 512;
 
 // zig fmt: off
 const DOT    = value.rune.pack("DOT");
@@ -277,6 +277,8 @@ const SQUARE = value.rune.pack("SQUARE");
 const BRACE  = value.rune.pack("BRACE");
 // zig fmt: on
 
+const S_DOT = value.sstr.pack(".");
+
 const Context = struct {
     // What to do next.
     next: Fn = .parse_unit,
@@ -393,12 +395,9 @@ const State = struct {
             value.istr.intern(s.chars.items, true);
     }
 
-    fn getRune(s: *State) !Value {
+    fn getRune(s: *State) Value {
         defer s.chars.clearRetainingCapacity();
-        return if (s.chars.items.len <= 6)
-            value.rune.pack(s.chars.items)
-        else
-            error.RuneTooLong;
+        return value.rune.pack(s.chars.items);
     }
 
     fn push(s: *State, next: Fn) !void {
@@ -466,11 +465,21 @@ pub fn parse(input: std.io.AnyReader) Value {
     var s = State.init(input, stack_alloc, chars_alloc) catch @panic("");
     defer s.deinit();
 
-    while (s.context.next != .done) callNext(&s) catch |e| switch (e) {
-        else => @panic(s.err_msg), // TODO
+    while (s.context.next != .done) callNext(&s) catch {
+        if (s.unused_char) |c| {
+            std.debug.panic(
+                "Parse error: {} at: {s}, char: {c}\n",
+                .{ s.err_code, s.err_msg, c },
+            );
+        } else {
+            std.debug.panic(
+                "Parse error: {} at: {s}\n",
+                .{ s.err_code, s.err_msg },
+            );
+        }
     };
-    if (s.unused_char) |_| {
-        @panic("invalid character");
+    if (s.unused_char) |c| {
+        std.debug.panic("Invalid character: {c}\n", .{c});
     }
     return s.result;
 }
@@ -489,7 +498,6 @@ const Fn = enum {
     end_label_datum,
     parse_list_element,
     continue_list,
-    parse_list_tail,
     end_improper_list,
     close_improper_list,
     end_quote_expr,
@@ -498,12 +506,24 @@ const Fn = enum {
 
 fn callNext(s: *State) !void {
     if (detailed_debug) {
-        std.debug.print("\n{}:{} ctx:'{c}' unused:'{c}' \n", .{
-            s.stack.items.len,
+        const stack = s.stack.items;
+        std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{
+            stack.len,
             s.context.next,
             s.context.char,
             s.unused_char orelse '_',
         });
+        if (stack.len > 0) {
+            var i = stack.len;
+            while (i > 0) : (i -= 1) {
+                const prev = stack[i - 1];
+                std.debug.print("{}:{} ctx:'{c}'\n", .{
+                    i - 1,
+                    prev.next,
+                    prev.char,
+                });
+            }
+        }
     }
     try switch (s.context.next) {
         .parse_unit => parseUnit(s),
@@ -519,9 +539,8 @@ fn callNext(s: *State) !void {
         .end_label_datum => endLabelDatum(s),
         .parse_list_element => parseListElement(s),
         .continue_list => continueList(s),
-        .parse_list_tail => parseListTail(s),
         .end_improper_list => endImproperList(s),
-        .close_improper_list => endImproperList(s),
+        .close_improper_list => closeImproperList(s),
         .end_quote_expr => endQuoteExpr(s),
         .done => unreachable,
     };
@@ -532,12 +551,7 @@ fn parseUnit(s: *State) !void {
     while (c1) |c| : (c1 = try s.read()) {
         switch (try checkBlank(s, c)) {
             .yes => {},
-            .skip_unit => {
-                // Simply push another parse_unit onto the stack, which will
-                // ignore the result of the current one and start anew; then
-                // keep looping to read the datum that will be ignored.
-                try s.push(.parse_unit);
-            },
+            .skip_unit => try s.push(.parse_unit),
             .skip_line => try s.skipLine(),
             .no => return parseDatum(s, c),
         }
@@ -562,10 +576,10 @@ fn parseDatum(s: *State, c: u8) !void {
 }
 
 fn endOneDatum(s: *State) !void {
-    const d = s.result;
-    if (d.eq(value.undef)) {
-        return s.retval(d);
+    if (s.result.eq(value.undef)) {
+        return s.retval(value.undef);
     }
+    const d = s.result;
     const c1 = s.getUnused() orelse try s.read();
     if (c1) |c| {
         switch (try checkBlank(s, c)) {
@@ -590,10 +604,16 @@ fn returnContext(s: *State) !void {
 fn parseJoin(s: *State, d: Value, c: u8) !void {
     s.context.val = d;
     s.context.char = c;
-    s.unused_char = switch (c) {
-        '.', ':', '|' => try s.readNoEof("start of joined datum"),
-        else => c,
-    };
+    switch (c) {
+        '.', ':', '|' => {
+            s.context.char = c;
+            s.unused_char = try s.readNoEof("join datum");
+        },
+        else => {
+            s.context.char = 0;
+            s.unused_char = c;
+        },
+    }
     return s.subr(.parse_join_datum, .join_data);
 }
 
@@ -610,16 +630,21 @@ fn joinData(s: *State) !void {
     const join = s.context.char;
     const tail = s.result;
     if (tail.eq(value.undef)) {
-        return s.retval(head);
+        if (join == 0) {
+            return s.retval(head);
+        } else {
+            return s.err(error.InvalidCharacter, "join datum");
+        }
     }
     const rune = switch (join) {
+        0 => JOIN,
         '.' => DOT,
         ':' => COLON,
         '|' => PIPE,
-        else => JOIN,
+        else => unreachable,
     };
-    const result = cons(rune, cons(head, tail));
-    return s.jump(.end_one_datum, result);
+    const data = cons(head, tail);
+    return s.jump(.end_one_datum, cons(rune, data));
 }
 
 fn parseOneDatum(s: *State, c: u8, next: Fn) !void {
@@ -653,7 +678,7 @@ fn isBareChar(c: u8) bool {
         'a'...'z' , 'A'...'Z' , '0'...'9',
         '!' , '$' , '%' , '&' , '*' , '+',
         '-' , '/' , '<' , '=' , '>' , '?',
-        '@' , '^' , '_' , '~' => true,
+        '@' , '^' , '_' , '~' , '.' => true,
         // zig fmt: on
         else => false,
     };
@@ -811,11 +836,11 @@ fn parseRune(s: *State, c1: u8) !struct { Value, ?u8 } {
     var len: usize = 1;
     while (try s.read()) |c| : (len += 1) {
         if (len == 6 or !std.ascii.isAlphanumeric(c)) {
-            return .{ try s.getRune(), c };
+            return .{ s.getRune(), c };
         }
         try s.addChar(c);
     }
-    return .{ try s.getRune(), null };
+    return .{ s.getRune(), null };
 }
 
 fn parseRuneEnd(s: *State, r: Value, c1: ?u8, next: Fn) !void {
@@ -933,22 +958,20 @@ fn continueList(s: *State) !void {
         if (c == close) {
             return endList(s);
         }
-        if (c == '.') {
-            return s.jump(.parse_list_tail, null);
-        }
         return s.err(error.InvalidCharacter, "list");
     }
 
+    if (s.result.eq(S_DOT)) {
+        return s.subr(.parse_unit, .end_improper_list);
+    }
+
     s.context.val = cons(s.result, s.context.val);
 
-    var c1 = s.unused_char orelse try s.read();
+    var c1 = s.getUnused() orelse try s.read();
     while (c1) |c| : (c1 = try s.read()) {
         if (c == close) {
             return endList(s);
         }
-        if (c == '.') {
-            return s.jump(.parse_list_tail, null);
-        }
         switch (try checkBlank(s, c)) {
             .yes => {},
             .skip_unit => {
@@ -958,7 +981,7 @@ fn continueList(s: *State) !void {
             .skip_line => try s.skipLine(),
             .no => {
                 s.unused_char = c;
-                return s.jump(.parse_list_element, null);
+                return s.subr(.parse_list_element, .continue_list);
             },
         }
     }
@@ -969,19 +992,6 @@ fn endList(s: *State) !void {
     return s.retval(lib.list.reverse(s.context.val));
 }
 
-fn parseListTail(s: *State) !void {
-    const c = try s.readNoEof("list tail");
-    try s.pushContext(.end_improper_list);
-    switch (try checkBlank(s, c)) {
-        .yes => {},
-        .skip_unit => return s.subr(.parse_unit, .parse_unit),
-        .skip_line => try s.skipLine(),
-        // One blank mandatory here.
-        .no => return s.err(error.InvalidCharacter, "list tail"),
-    }
-    return s.jump(.parse_unit, null);
-}
-
 fn endImproperList(s: *State) !void {
     const tail = s.result;
     if (tail.eq(value.undef)) {
@@ -992,22 +1002,21 @@ fn endImproperList(s: *State) !void {
 }
 
 fn closeImproperList(s: *State) !void {
+    const result = s.context.val;
     const close = s.context.char;
     var c1 = s.getUnused() orelse try s.read();
-    while (c1) |c| : (c1 = try s.read()) {
+    while (c1) |c| : (c1 = try s.readNoEof("after list tail")) {
+        if (c == close) {
+            return s.retval(result);
+        }
         switch (try checkBlank(s, c)) {
             .yes => {},
             .skip_unit => return s.subr(.parse_unit, .close_improper_list),
             .skip_line => try s.skipLine(),
-            .no => {
-                if (c == close) {
-                    return s.retval(s.context.val);
-                }
-                return s.err(error.InvalidCharacter, "after list tail");
-            },
+            .no => return s.err(error.InvalidCharacter, "after list tail"),
         }
     }
-    return s.err(error.UnexpectedEof, "after list tail");
+    unreachable;
 }
 
 fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void {
@@ -1026,10 +1035,14 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void {
     }
 
     s.context.val = q;
-    return s.subr(.parse_unit, .end_quote_expr);
+    s.unused_char = c;
+    return s.subr(.parse_list_element, .end_quote_expr);
 }
 
 fn endQuoteExpr(s: *State) !void {
+    if (s.result.eq(value.undef)) {
+        return s.err(error.InvalidCharacter, "quote expression datum");
+    }
     const q = s.context.val;
     const d = s.result;
     return s.retval(cons(q, d));