summaryrefslogtreecommitdiff
path: root/src/libzisp/io/parser.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/libzisp/io/parser.zig')
-rw-r--r--src/libzisp/io/parser.zig147
1 files changed, 80 insertions, 67 deletions
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 651d124..643f7e8 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -257,11 +257,11 @@ const cons = value.pair.cons;
const is_test = builtin.is_test;
const is_debug = builtin.mode == .Debug;
-const detailed_debug = true;
+const detailed_debug = false;
// In debug, we want to see if we leak, so very small numbers.
-const init_stack_capacity = if (is_debug) 20 else 32;
-const init_chars_capacity = if (is_debug) 100 else 512;
+const init_stack_capacity = if (is_debug) 32 else 32;
+const init_chars_capacity = if (is_debug) 512 else 512;
// zig fmt: off
const DOT = value.rune.pack("DOT");
@@ -277,6 +277,8 @@ const SQUARE = value.rune.pack("SQUARE");
const BRACE = value.rune.pack("BRACE");
// zig fmt: on
+const S_DOT = value.sstr.pack(".");
+
const Context = struct {
// What to do next.
next: Fn = .parse_unit,
@@ -393,12 +395,9 @@ const State = struct {
value.istr.intern(s.chars.items, true);
}
- fn getRune(s: *State) !Value {
+ fn getRune(s: *State) Value {
defer s.chars.clearRetainingCapacity();
- return if (s.chars.items.len <= 6)
- value.rune.pack(s.chars.items)
- else
- error.RuneTooLong;
+ return value.rune.pack(s.chars.items);
}
fn push(s: *State, next: Fn) !void {
@@ -466,11 +465,21 @@ pub fn parse(input: std.io.AnyReader) Value {
var s = State.init(input, stack_alloc, chars_alloc) catch @panic("");
defer s.deinit();
- while (s.context.next != .done) callNext(&s) catch |e| switch (e) {
- else => @panic(s.err_msg), // TODO
+ while (s.context.next != .done) callNext(&s) catch {
+ if (s.unused_char) |c| {
+ std.debug.panic(
+ "Parse error: {} at: {s}, char: {c}\n",
+ .{ s.err_code, s.err_msg, c },
+ );
+ } else {
+ std.debug.panic(
+ "Parse error: {} at: {s}\n",
+ .{ s.err_code, s.err_msg },
+ );
+ }
};
- if (s.unused_char) |_| {
- @panic("invalid character");
+ if (s.unused_char) |c| {
+ std.debug.panic("Invalid character: {c}\n", .{c});
}
return s.result;
}
@@ -489,7 +498,6 @@ const Fn = enum {
end_label_datum,
parse_list_element,
continue_list,
- parse_list_tail,
end_improper_list,
close_improper_list,
end_quote_expr,
@@ -498,12 +506,24 @@ const Fn = enum {
fn callNext(s: *State) !void {
if (detailed_debug) {
- std.debug.print("\n{}:{} ctx:'{c}' unused:'{c}' \n", .{
- s.stack.items.len,
+ const stack = s.stack.items;
+ std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{
+ stack.len,
s.context.next,
s.context.char,
s.unused_char orelse '_',
});
+ if (stack.len > 0) {
+ var i = stack.len;
+ while (i > 0) : (i -= 1) {
+ const prev = stack[i - 1];
+ std.debug.print("{}:{} ctx:'{c}'\n", .{
+ i - 1,
+ prev.next,
+ prev.char,
+ });
+ }
+ }
}
try switch (s.context.next) {
.parse_unit => parseUnit(s),
@@ -519,9 +539,8 @@ fn callNext(s: *State) !void {
.end_label_datum => endLabelDatum(s),
.parse_list_element => parseListElement(s),
.continue_list => continueList(s),
- .parse_list_tail => parseListTail(s),
.end_improper_list => endImproperList(s),
- .close_improper_list => endImproperList(s),
+ .close_improper_list => closeImproperList(s),
.end_quote_expr => endQuoteExpr(s),
.done => unreachable,
};
@@ -532,12 +551,7 @@ fn parseUnit(s: *State) !void {
while (c1) |c| : (c1 = try s.read()) {
switch (try checkBlank(s, c)) {
.yes => {},
- .skip_unit => {
- // Simply push another parse_unit onto the stack, which will
- // ignore the result of the current one and start anew; then
- // keep looping to read the datum that will be ignored.
- try s.push(.parse_unit);
- },
+ .skip_unit => try s.push(.parse_unit),
.skip_line => try s.skipLine(),
.no => return parseDatum(s, c),
}
@@ -562,10 +576,10 @@ fn parseDatum(s: *State, c: u8) !void {
}
fn endOneDatum(s: *State) !void {
- const d = s.result;
- if (d.eq(value.undef)) {
- return s.retval(d);
+ if (s.result.eq(value.undef)) {
+ return s.retval(value.undef);
}
+ const d = s.result;
const c1 = s.getUnused() orelse try s.read();
if (c1) |c| {
switch (try checkBlank(s, c)) {
@@ -590,10 +604,16 @@ fn returnContext(s: *State) !void {
fn parseJoin(s: *State, d: Value, c: u8) !void {
s.context.val = d;
s.context.char = c;
- s.unused_char = switch (c) {
- '.', ':', '|' => try s.readNoEof("start of joined datum"),
- else => c,
- };
+ switch (c) {
+ '.', ':', '|' => {
+ s.context.char = c;
+ s.unused_char = try s.readNoEof("join datum");
+ },
+ else => {
+ s.context.char = 0;
+ s.unused_char = c;
+ },
+ }
return s.subr(.parse_join_datum, .join_data);
}
@@ -610,16 +630,21 @@ fn joinData(s: *State) !void {
const join = s.context.char;
const tail = s.result;
if (tail.eq(value.undef)) {
- return s.retval(head);
+ if (join == 0) {
+ return s.retval(head);
+ } else {
+ return s.err(error.InvalidCharacter, "join datum");
+ }
}
const rune = switch (join) {
+ 0 => JOIN,
'.' => DOT,
':' => COLON,
'|' => PIPE,
- else => JOIN,
+ else => unreachable,
};
- const result = cons(rune, cons(head, tail));
- return s.jump(.end_one_datum, result);
+ const data = cons(head, tail);
+ return s.jump(.end_one_datum, cons(rune, data));
}
fn parseOneDatum(s: *State, c: u8, next: Fn) !void {
@@ -653,7 +678,7 @@ fn isBareChar(c: u8) bool {
'a'...'z' , 'A'...'Z' , '0'...'9',
'!' , '$' , '%' , '&' , '*' , '+',
'-' , '/' , '<' , '=' , '>' , '?',
- '@' , '^' , '_' , '~' => true,
+ '@' , '^' , '_' , '~' , '.' => true,
// zig fmt: on
else => false,
};
@@ -811,11 +836,11 @@ fn parseRune(s: *State, c1: u8) !struct { Value, ?u8 } {
var len: usize = 1;
while (try s.read()) |c| : (len += 1) {
if (len == 6 or !std.ascii.isAlphanumeric(c)) {
- return .{ try s.getRune(), c };
+ return .{ s.getRune(), c };
}
try s.addChar(c);
}
- return .{ try s.getRune(), null };
+ return .{ s.getRune(), null };
}
fn parseRuneEnd(s: *State, r: Value, c1: ?u8, next: Fn) !void {
@@ -933,22 +958,20 @@ fn continueList(s: *State) !void {
if (c == close) {
return endList(s);
}
- if (c == '.') {
- return s.jump(.parse_list_tail, null);
- }
return s.err(error.InvalidCharacter, "list");
}
+ if (s.result.eq(S_DOT)) {
+ return s.subr(.parse_unit, .end_improper_list);
+ }
+
s.context.val = cons(s.result, s.context.val);
- var c1 = s.unused_char orelse try s.read();
+ var c1 = s.getUnused() orelse try s.read();
while (c1) |c| : (c1 = try s.read()) {
if (c == close) {
return endList(s);
}
- if (c == '.') {
- return s.jump(.parse_list_tail, null);
- }
switch (try checkBlank(s, c)) {
.yes => {},
.skip_unit => {
@@ -958,7 +981,7 @@ fn continueList(s: *State) !void {
.skip_line => try s.skipLine(),
.no => {
s.unused_char = c;
- return s.jump(.parse_list_element, null);
+ return s.subr(.parse_list_element, .continue_list);
},
}
}
@@ -969,19 +992,6 @@ fn endList(s: *State) !void {
return s.retval(lib.list.reverse(s.context.val));
}
-fn parseListTail(s: *State) !void {
- const c = try s.readNoEof("list tail");
- try s.pushContext(.end_improper_list);
- switch (try checkBlank(s, c)) {
- .yes => {},
- .skip_unit => return s.subr(.parse_unit, .parse_unit),
- .skip_line => try s.skipLine(),
- // One blank mandatory here.
- .no => return s.err(error.InvalidCharacter, "list tail"),
- }
- return s.jump(.parse_unit, null);
-}
-
fn endImproperList(s: *State) !void {
const tail = s.result;
if (tail.eq(value.undef)) {
@@ -992,22 +1002,21 @@ fn endImproperList(s: *State) !void {
}
fn closeImproperList(s: *State) !void {
+ const result = s.context.val;
const close = s.context.char;
var c1 = s.getUnused() orelse try s.read();
- while (c1) |c| : (c1 = try s.read()) {
+ while (c1) |c| : (c1 = try s.readNoEof("after list tail")) {
+ if (c == close) {
+ return s.retval(result);
+ }
switch (try checkBlank(s, c)) {
.yes => {},
.skip_unit => return s.subr(.parse_unit, .close_improper_list),
.skip_line => try s.skipLine(),
- .no => {
- if (c == close) {
- return s.retval(s.context.val);
- }
- return s.err(error.InvalidCharacter, "after list tail");
- },
+ .no => return s.err(error.InvalidCharacter, "after list tail"),
}
}
- return s.err(error.UnexpectedEof, "after list tail");
+ unreachable;
}
fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void {
@@ -1026,10 +1035,14 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void {
}
s.context.val = q;
- return s.subr(.parse_unit, .end_quote_expr);
+ s.unused_char = c;
+ return s.subr(.parse_list_element, .end_quote_expr);
}
fn endQuoteExpr(s: *State) !void {
+ if (s.result.eq(value.undef)) {
+ return s.err(error.InvalidCharacter, "quote expression datum");
+ }
const q = s.context.val;
const d = s.result;
return s.retval(cons(q, d));