From f2b18d64448ab09dd5e5e6a180d38d90d5aaf367 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Thu, 27 Mar 2025 21:18:09 +0100 Subject: new parser --- _tests/antlr.bnf | 99 ++++++++++++++++++++++++++++++++++++++++ _tests/string | 1 + _tests/switchtable.zig | 93 ++++++++++++++++++++++++++++++++++++++ _tests/test.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++ _tests/test.scm | 89 ++++++++++++++++++++++++++++++++++++ _tests/test.zig | 84 ++++++++++++++++++++++++++++++++++ 6 files changed, 485 insertions(+) create mode 100644 _tests/antlr.bnf create mode 100644 _tests/string create mode 100644 _tests/switchtable.zig create mode 100644 _tests/test.c create mode 100644 _tests/test.scm create mode 100644 _tests/test.zig (limited to '_tests') diff --git a/_tests/antlr.bnf b/_tests/antlr.bnf new file mode 100644 index 0000000..7b0cf83 --- /dev/null +++ b/_tests/antlr.bnf @@ -0,0 +1,99 @@ +grammar ExprLexer; + +LF : '\n' ; +SP : ' ' ; +WS_CC : [\t\r\f] ; +SEMI : ';' ; +DOT : '.' ; +COLON : ':' ; +PIPE : '|' ; +TILDE : '~' ; +BSLASH : '\\' ; +DQUOTE : '"' ; +HASH : '#' ; +LPAREN : '(' ; +RPAREN : ')' ; +LSQBR : '[' ; +RSQBR : ']' ; +LCURLY : '{' ; +RCURLY : '}' ; +EQUAL : '=' ; +APOS : '\'' ; +GRAVE : '`' ; +COMMA : ',' ; + + : [\u0000-\u0009\u000b-\u00ff] ; + +BARE_CHAR : [a-zA-Z0-9!$%&*+/<=>?@^_~-] | '\\' BARE_ESC ; + +BARE_ESC : [\u0021-\u007e] ; + +QUOTED_CHAR : [^"\\] | '\\' QUOTED_ESC ; + +QUOTED_ESC : [\\"abefnrtv] + | 'x' HEX_DIGIT HEX_DIGIT + | 'u' '{' HEX_DIGIT+ '}' + ; + + +HEX_DIGIT : [0-9a-fA-F] ; +ALPHA : [a-zA-Z] ; +ALNUM : [a-zA-Z0-9] ; + + + +parse_unit : blank* datum blank? ; + + +blank : LF | SP | WS_CC | comment ; + +datum : datum_one ( join_char? datum )? ; + + +comment : ';' ( skip_datum | skip_line ) ; + + +datum_one : bare_string | fancy_datum ; + +join_char : '.' | ':' | '|' ; + + +skip_datum : '~' parse_unit ; + +skip_line : ANY_BUT_LF* LF? ; + + +bare_string : BARE_CHAR+ ; + +fancy_datum : '\\' bare_esc_str + | '"' quoted_str '"' + | '#' hash_expr + | '(' list_body ')' + | '[' list_body ']' + | '{' list_body '}' + | quote_expr + ; + + +bare_esc_str : '\\' BARE_ESC BARE_CHAR* ; + +quoted_str : QUOTED_CHAR* ; + +hash_expr : rune fancy_datum? + | label ( '=' fancy_datum )? + | fancy_datum + ; + +list_body : blank* ( list_head list_tail? )? ; + +quote_expr : ( '\'' | '`' | ',' ) datum ; + + +rune : ALPHA ALNUM* ; + +label : '%' HEX_DIGIT+ ; + + +list_head : datum blank* list_head? ; + +list_tail : blank+ '.' blank+ datum blank* ; diff --git a/_tests/string b/_tests/string new file mode 100644 index 0000000..c365d58 --- /dev/null +++ b/_tests/string @@ -0,0 +1 @@ +\0\a\b\t\n\v\f\r\e\e\r\f\v\n\t\b\a\0 \ No newline at end of file diff --git a/_tests/switchtable.zig b/_tests/switchtable.zig new file mode 100644 index 0000000..722ecdd --- /dev/null +++ b/_tests/switchtable.zig @@ -0,0 +1,93 @@ +const std = @import("std"); + +const Reader = std.io.AnyReader; + +pub fn main() !u8 { + return f(); +} + +fn f() !u8 { + const file = try std.fs.cwd().openFile("string", .{}); + defer file.close(); + + var br = std.io.bufferedReader(file.reader()); + const r = br.reader().any(); + + var n: u8 = 0; + for (0..1_000_000) |i| { + _ = i; + while (r.readByte() catch null) |c| { + n +%= try f1(r, c); + } + br.start = 0; + br.end = 0; + try file.seekTo(0); + } + return n; +} + +fn f1(r: Reader, c1: u8) !u8 { + if (c1 != '\\') return c1; + const c = try r.readByte(); + if (c == 'u') return unknown1(); + return switch (c) { + '\\', '"' => c, + '0' => 0, + 'a' => 7, + 'b' => 8, + 't' => 9, + 'n' => 10, + 'v' => 11, + 'f' => 12, + 'r' => 13, + 'e' => 27, + 'x' => unknown2(), + else => unknown3(), + }; +} + +fn f2(r: Reader, c1: u8) !u8 { + if (c1 != '\\') return c1; + const c = try r.readByte(); + if (c == 'u') return unknown1(); + if (c == 'x') return unknown2(); + if (c == '\\' or c == '"') return c; + const itable = .{ '0', 'a', 'b', 't', 'n', 'v', 'f', 'r', 'e' }; + const ctable: []const u8 = &.{ 0, 7, 8, 9, 10, 11, 12, 13, 27 }; + const i = std.mem.indexOfScalar(u8, &itable, c) orelse return unknown3(); + return ctable[i]; +} + +fn f3(r: Reader, c1: u8) !u8 { + if (c1 != '\\') return c1; + const c = try r.readByte(); + if (c == 'u') return unknown1(); + if (c == 'x') return unknown2(); + if (c == '\\' or c == '"') return c; + if (c == '0') return 0; + const table = comptime t: { + var table: [26]u8 = .{0} ** 26; + table['a' - 'a'] = 7; + table['b' - 'a'] = 8; + table['t' - 'a'] = 9; + table['n' - 'a'] = 10; + table['v' - 'a'] = 11; + table['f' - 'a'] = 12; + table['r' - 'a'] = 13; + table['e' - 'a'] = 27; + break :t table; + }; + if (c < 'a') return unknown3(); + const result = table[c - 'a']; + return if (result != 0) result else unknown3(); +} + +fn unknown1() u8 { + return 0; +} +fn unknown2() u8 { + return 0; +} +fn unknown3() u8 { + return 0; +} diff --git a/_tests/test.c b/_tests/test.c new file mode 100644 index 0000000..0b0917e --- /dev/null +++ b/_tests/test.c @@ -0,0 +1,119 @@ +#include +#include +#include +#include + +union test { + double d; + uint64_t u; +}; + +int main(int argc, char** argv) { + + volatile uint64_t mask; + volatile uint64_t min; + volatile uint64_t max; + + volatile double d1; + volatile double d2; + + volatile uint64_t pd1; + volatile uint64_t pd2; + + + // 0 .. 2^51 + + // SE__________QP__ + mask = 0b1111111111110111111111111111111111111111111111111111111111111111; + min = 0b1111111111110111111111111111111111111111111111111111111111111111; + max = 0b1111111111110000000000000000000000000000000000000000000000000001; + + memcpy(&d1, &min, 8); + memcpy(&d2, &max, 8); + + printf("%lf\n", d1); + printf("%lf\n", d2); + + memcpy(&pd1, &d1, 8); + memcpy(&pd2, &d2, 8); + pd1 ^= mask; + pd2 ^= mask; + + printf("%ld\n", pd1); + printf("%ld\n", pd2); + + printf("\n"); + + // -2^51 + 1 .. -1 + + // SE__________QP__ + min = 0b1111111111111000000000000000000000000000000000000000000000000001; + max = 0b1111111111111111111111111111111111111111111111111111111111111111; + + memcpy(&d1, &min, 8); + memcpy(&d2, &max, 8); + + printf("%lf\n", d1); + printf("%lf\n", d2); + + memcpy(&pd1, &d1, 8); + memcpy(&pd2, &d2, 8); + + printf("%ld\n", pd1); + printf("%ld\n", pd2); + + printf("\n"); + + return 0; + + // -2^50 + 1 .. -1 + + // SE__________QFP__ + mask = 0b1111111111111100000000000000000000000000000000000000000000000000; + min = 0b0000000000000000000000000000000000000000000000000000000000000001; + max = 0b0000000000000011111111111111111111111111111111111111111111111111; + + printf("%ld\n", (int64_t) (min | mask)); + printf("%ld\n", (int64_t) (max | mask)); + + + // 0 .. 2^50 + + // SE__________QFP__ + mask = 0b0000000000000011111111111111111111111111111111111111111111111111; + min = 0b0000000000000100000000000000000000000000000000000000000000000000; + max = 0b0000000000000111111111111111111111111111111111111111111111111111; + + printf("%ld\n", (int64_t) (min & mask)); + printf("%ld\n", (int64_t) (max & mask)); + + + + + /* volatile union test x; */ + /* volatile double f1; */ + /* volatile double f2; */ + + /* f1 = 0.0; */ + /* f2 = 0.0; */ + /* x.d = f1 / f2; */ + /* printf(" 0/0: %lx\n", x.u); */ + + /* f1 = -0.0; */ + /* f2 = +0.0; */ + /* x.d = f1 / f2; */ + /* printf("-0/0: %lx\n", x.u); */ + + /* x.d = sqrt(-1); */ + /* printf("sqrt(-1): %lx\n", x.u); */ + + + /* double nan_value = fabs(sqrt(-1.0)); // Standard way to generate NaN */ + + /* uint64_t nan_bits; */ + /* memcpy(&nan_bits, &nan_value, sizeof(nan_bits)); */ + + /* printf("NaN in hex: 0x%016lx\n", nan_bits); */ + + /* return 0; */ +} diff --git a/_tests/test.scm b/_tests/test.scm new file mode 100644 index 0000000..d893c9f --- /dev/null +++ b/_tests/test.scm @@ -0,0 +1,89 @@ +(import + (rnrs eval) + (rnrs hashtables)) + +(define-syntax eval-when-compile + (lambda (stx) + (syntax-case stx () + ((_ imports body ...) + (eval + (syntax->datum #'(begin body ...)) + (apply environment (syntax->datum #'imports))))))) + +(eval-when-compile + ((rnrs)) + (display "foo\n")) + +(define-syntax process-data + (lambda (stx) + (syntax-case stx () + ((_ file) + (let ((ht (make-eqv-hashtable))) + (hashtable-set! ht 1 2) + ht))))) + +(define lookup-table (process-data "lookup-table.dat")) + +(define seconds-per-day (number->string (* 24 60 60))) + +(define (foo arg1:Num arg2:Record) + (do-stuff arg2.field )) + + + +(define-class Person + (fields + name date-of-birth sex + (age person-age set-person-age!)) + (methods + ((jump height) + (if (string=? name "lebron") + (perform-jump height))))) + + + +(define-record (r1 a b)) + +(define-record (r1 a b) + (fields a b) + (set-r1-a! a) + (set-r1-b! b)) + +(define my-r1 (r1 1 2)) + + +(define-record (r2 a b c d) + (parent (r1 a b))) + +(define-record (r2 a b c d) + (parent (r1 a b)) + (fields c d) + (set-r2-c! c) + (set-r2-d! d)) + +(define-record (r2 a b c d) + (parent (r1 (* 2 c) (* 4 d))) + (fields x y z) + (set-r2-x! a) + (set-r2-y! b) + (set-r2-z! (/ a b))) + +(define my-r2 (r2 1 2 3 4)) + + +(define-record (r3 a b c d e f) + (parent r2)) + +(define-record r3 + (parent r2) + (fields e f)) + +(define (init-r3! r a b c d e f) + (init-r2! r a b c d) + (set-r3-e! e) + (set-r3-f! f)) + + +(define r (make-r3)) +(init-r3! r 1 2 3 4 5 6) + diff --git a/_tests/test.zig b/_tests/test.zig new file mode 100644 index 0000000..7b4a04c --- /dev/null +++ b/_tests/test.zig @@ -0,0 +1,84 @@ +const std = @import("std"); + +pub fn main() void { + // const y: [3]u64 = .{ 1, 2, 3 }; + // const x: struct { u8, u64, u8 } = y; + // @import("std").debug.print("{}\n", .{x[0] + x[1] + x[2]}); + + std.debug.print("{}\n", .{@sizeOf(struct { a: u8, b: u64, c: u8, d: bool })}); +} + +// const x: ?u8 = 5; +// if (x == null) { +// return 1; +// } else |val| { +// return val; +// } +// var list = std.ArrayList(u8).init(std.heap.smp_allocator); +// try parseUniHex("1f4a9", &list); +// std.debug.print("{s}\n", .{list.items}); + +// fn parseUniHex( +// str: []const u8, +// s: *std.ArrayList(u8), +// ) !void { +// var uc: u21 = parseHexDigit1(str[0]); +// for (str[1..]) |c| { +// uc = try std.math.shlExact(u21, uc, 4); +// uc |= parseHexDigit1(c); +// } + +// std.debug.print("{u}\n", .{uc}); + +// const n = try std.unicode.utf8CodepointSequenceLength(uc); +// const buf = try s.addManyAsSlice(n); +// _ = try std.unicode.utf8Encode(uc, buf); +// } + +// fn parseHexByte1(h1: u8, h2: u8) u8 { +// const hi = parseHexDigit1(h1); +// const lo = parseHexDigit1(h2); +// return hi << 4 | lo; +// } + +// fn parseHexDigit1(c: u8) u8 { +// return switch (c) { +// '0'...'9' => c - '0', +// 'A'...'F' => c - 'A' + 10, +// 'a'...'f' => c - 'a' + 10, +// else => @panic(""), +// }; +// } + +// fn parseHexByte2(h1: u8, h2: u8) u8 { +// const hi: u8 = parseHexDigit2(h1); +// const lo = parseHexDigit2(h2); +// return hi << 4 | lo; +// } + +// fn parseHexDigit2(c: u8) u4 { +// return @intCast(switch (c) { +// '0'...'9' => c - '0', +// 'A'...'F' => c - 'A' + 10, +// 'a'...'f' => c - 'a' + 10, +// else => @panic(""), +// }); +// } + +// fn parseUniHex1(str: []const u8) !u21 { +// var uc: u21 = parseHexDigit1(str[0]); +// for (str[1..]) |c| { +// uc = try std.math.shlExact(u21, uc, 4); +// uc |= parseHexDigit1(c); +// } +// return uc; +// } + +// fn parseUniHex2(str: []const u8) !u21 { +// var uc: u21 = parseHexDigit2(str[0]); +// for (str[1..]) |c| { +// uc = try std.math.shlExact(u21, uc, 4); +// uc |= parseHexDigit2(c); +// } +// return uc; +// } -- cgit v1.2.3 From 00fd32b6c0d35140bdc160aa759bbac52242d7d0 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Fri, 28 Mar 2025 12:19:54 +0100 Subject: blah --- _tests/test.zig | 2 +- spec/parser.ebnf | 2 +- src/libzisp.zig | 194 ++++++++++++++++++++++++++-------------------- src/libzisp/io/parser.zig | 147 +++++++++++++++++++---------------- 4 files changed, 193 insertions(+), 152 deletions(-) (limited to '_tests') diff --git a/_tests/test.zig b/_tests/test.zig index 7b4a04c..5acb628 100644 --- a/_tests/test.zig +++ b/_tests/test.zig @@ -5,7 +5,7 @@ pub fn main() void { // const x: struct { u8, u64, u8 } = y; // @import("std").debug.print("{}\n", .{x[0] + x[1] + x[2]}); - std.debug.print("{}\n", .{@sizeOf(struct { a: u8, b: u64, c: u8, d: bool })}); + std.debug.print("{}\n", .{@sizeOf(struct { u64, ?u8 })}); } // const x: ?u8 = 5; diff --git a/spec/parser.ebnf b/spec/parser.ebnf index 9e02fba..44b1967 100644 --- a/spec/parser.ebnf +++ b/spec/parser.ebnf @@ -12,7 +12,7 @@ comment : ';' ( skip_unit | skip_line ) ; skip_unit : '~' unit ; -skip_line : ( ~LF )* LF? ; +skip_line : ( ~10 )* 10? ; one_datum : ( bare_str | clad_datum ) ; diff --git a/src/libzisp.zig b/src/libzisp.zig index e6c8ac5..df8422b 100644 --- a/src/libzisp.zig +++ b/src/libzisp.zig @@ -316,86 +316,114 @@ test "parse2" { try std.testing.expectEqualStrings("foo", f.slice()); } -// test "parse3" { -// const val = parseString( -// \\(foo ;~x ;~(x y) ;~x #bar [#x #"baz"] 'bat) -// ); - -// const car = value.pair.car; -// const cdr = value.pair.cdr; - -// const e1 = car(val); -// const e2 = car(cdr(val)); -// const e3 = car(cdr(cdr(val))); -// const e4 = car(cdr(cdr(cdr(val)))); - -// try std.testing.expect(value.sstr.check(e1)); -// try std.testing.expect(value.rune.check(e2)); -// try std.testing.expect(value.pair.check(e3)); -// try std.testing.expect(value.pair.check(e4)); -// } - -// test "parse4" { -// const val = parseString("(foo . ;~x bar ;~y)"); - -// const s = value.sstr.unpack(value.pair.car(val)); -// try std.testing.expectEqualStrings("foo", s.slice()); - -// const f = value.sstr.unpack(value.pair.cdr(val)); -// try std.testing.expectEqualStrings("bar", f.slice()); -// } - -// fn parseBench(path: []const u8, iters: usize) !void { -// const file = try std.fs.cwd().openFile(path, .{}); -// defer file.close(); - -// var timer = try std.time.Timer.start(); -// for (0..iters) |i| { -// _ = i; -// var br = std.io.bufferedReader(file.reader()); -// const reader = br.reader().any(); -// var v: Value = undefined; -// while (true) { -// v = io.parser.parse(reader); -// if (value.eof.check(v)) { -// break; -// } -// } -// try file.seekTo(0); -// } -// const ns: f64 = @floatFromInt(timer.lap()); -// const secs = ns / 1_000_000_000; -// std.debug.print( -// "parse {s} x {}: {d:.3}s\n", -// .{ path, iters, secs }, -// ); -// } - -// test "parse bench" { -// // try parseBench("test-data/parser-test-1.scm", 200); -// // try parseBench("test-data/parser-test-2.scm", 800); -// try parseBench("test-data/parser-torture.scm", 1); -// } - -// test "unparse" { -// var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; -// var out: std.ArrayList(u8) = .init(gpa.allocator()); - -// const w = out.writer(); -// const v = parseString("#foo"); -// try io.unparser.unparse(w, v); -// try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); -// } - -// test "unparse2" { -// var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; -// var out: std.ArrayList(u8) = .init(gpa.allocator()); - -// const w = out.writer(); -// const v = parseString("#{foo bar['x]}"); -// try io.unparser.unparse(w, v); -// try std.testing.expectEqualStrings( -// "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", -// try out.toOwnedSlice(), -// ); -// } +test "parse3" { + const val = parseString( + \\(foo ;~x ;~(x y) ;~x #bar [#x #"baz"] 'bat) + ); + + const car = value.pair.car; + const cdr = value.pair.cdr; + + const e1 = car(val); + const e2 = car(cdr(val)); + const e3 = car(cdr(cdr(val))); + const e4 = car(cdr(cdr(cdr(val)))); + + try std.testing.expect(value.sstr.check(e1)); + try std.testing.expect(value.rune.check(e2)); + try std.testing.expect(value.pair.check(e3)); + try std.testing.expect(value.pair.check(e4)); +} + +test "parse4" { + const val = parseString("(foo . ;~x bar ;~y)"); + + const s = value.sstr.unpack(value.pair.car(val)); + try std.testing.expectEqualStrings("foo", s.slice()); + + const f = value.sstr.unpack(value.pair.cdr(val)); + try std.testing.expectEqualStrings("bar", f.slice()); +} + +fn parseBench(path: []const u8, iters: usize) !void { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + + var timer = try std.time.Timer.start(); + for (0..iters) |i| { + _ = i; + var br = std.io.bufferedReader(file.reader()); + const reader = br.reader().any(); + var v: Value = undefined; + while (true) { + v = io.parser.parse(reader); + if (value.eof.check(v)) { + break; + } + } + try file.seekTo(0); + } + const ns: f64 = @floatFromInt(timer.lap()); + const secs = ns / 1_000_000_000; + std.debug.print( + "parse {s} x {}: {d:.3}s\n", + .{ path, iters, secs }, + ); +} + +test "parse bench" { + try parseBench("test-data/parser-test-1.scm", 1000); + try parseBench("test-data/parser-test-2.scm", 1000); + // try parseBench("test-data/parser-torture.scm", 1); +} + +test "unparse" { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#foo"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); +} + +test "unparse2" { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#{foo bar['x]}"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings( + "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", + try out.toOwnedSlice(), + ); +} + +test "unparse3" { + const w = std.io.getStdErr().writer(); + const v = parseString("#{foo bar['x](y)(z)}"); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} + +test "unparse4" { + const w = std.io.getStdErr().writer(); + const v = parseString("(foo ;~bar)"); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} + +test "unparse5" { + const w = std.io.getStdErr().writer(); + const v = parseString("(;~foo foo ;~bar . ;~bar bar ;~bar)"); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} + +test "unparse6" { + const w = std.io.getStdErr().writer(); + const v = parseString("(foo .bar ... baz. bat.(qux))"); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig index 651d124..643f7e8 100644 --- a/src/libzisp/io/parser.zig +++ b/src/libzisp/io/parser.zig @@ -257,11 +257,11 @@ const cons = value.pair.cons; const is_test = builtin.is_test; const is_debug = builtin.mode == .Debug; -const detailed_debug = true; +const detailed_debug = false; // In debug, we want to see if we leak, so very small numbers. -const init_stack_capacity = if (is_debug) 20 else 32; -const init_chars_capacity = if (is_debug) 100 else 512; +const init_stack_capacity = if (is_debug) 32 else 32; +const init_chars_capacity = if (is_debug) 512 else 512; // zig fmt: off const DOT = value.rune.pack("DOT"); @@ -277,6 +277,8 @@ const SQUARE = value.rune.pack("SQUARE"); const BRACE = value.rune.pack("BRACE"); // zig fmt: on +const S_DOT = value.sstr.pack("."); + const Context = struct { // What to do next. next: Fn = .parse_unit, @@ -393,12 +395,9 @@ const State = struct { value.istr.intern(s.chars.items, true); } - fn getRune(s: *State) !Value { + fn getRune(s: *State) Value { defer s.chars.clearRetainingCapacity(); - return if (s.chars.items.len <= 6) - value.rune.pack(s.chars.items) - else - error.RuneTooLong; + return value.rune.pack(s.chars.items); } fn push(s: *State, next: Fn) !void { @@ -466,11 +465,21 @@ pub fn parse(input: std.io.AnyReader) Value { var s = State.init(input, stack_alloc, chars_alloc) catch @panic(""); defer s.deinit(); - while (s.context.next != .done) callNext(&s) catch |e| switch (e) { - else => @panic(s.err_msg), // TODO + while (s.context.next != .done) callNext(&s) catch { + if (s.unused_char) |c| { + std.debug.panic( + "Parse error: {} at: {s}, char: {c}\n", + .{ s.err_code, s.err_msg, c }, + ); + } else { + std.debug.panic( + "Parse error: {} at: {s}\n", + .{ s.err_code, s.err_msg }, + ); + } }; - if (s.unused_char) |_| { - @panic("invalid character"); + if (s.unused_char) |c| { + std.debug.panic("Invalid character: {c}\n", .{c}); } return s.result; } @@ -489,7 +498,6 @@ const Fn = enum { end_label_datum, parse_list_element, continue_list, - parse_list_tail, end_improper_list, close_improper_list, end_quote_expr, @@ -498,12 +506,24 @@ const Fn = enum { fn callNext(s: *State) !void { if (detailed_debug) { - std.debug.print("\n{}:{} ctx:'{c}' unused:'{c}' \n", .{ - s.stack.items.len, + const stack = s.stack.items; + std.debug.print("\n\n{}:{} ctx:'{c}' unused:'{c}' \n", .{ + stack.len, s.context.next, s.context.char, s.unused_char orelse '_', }); + if (stack.len > 0) { + var i = stack.len; + while (i > 0) : (i -= 1) { + const prev = stack[i - 1]; + std.debug.print("{}:{} ctx:'{c}'\n", .{ + i - 1, + prev.next, + prev.char, + }); + } + } } try switch (s.context.next) { .parse_unit => parseUnit(s), @@ -519,9 +539,8 @@ fn callNext(s: *State) !void { .end_label_datum => endLabelDatum(s), .parse_list_element => parseListElement(s), .continue_list => continueList(s), - .parse_list_tail => parseListTail(s), .end_improper_list => endImproperList(s), - .close_improper_list => endImproperList(s), + .close_improper_list => closeImproperList(s), .end_quote_expr => endQuoteExpr(s), .done => unreachable, }; @@ -532,12 +551,7 @@ fn parseUnit(s: *State) !void { while (c1) |c| : (c1 = try s.read()) { switch (try checkBlank(s, c)) { .yes => {}, - .skip_unit => { - // Simply push another parse_unit onto the stack, which will - // ignore the result of the current one and start anew; then - // keep looping to read the datum that will be ignored. - try s.push(.parse_unit); - }, + .skip_unit => try s.push(.parse_unit), .skip_line => try s.skipLine(), .no => return parseDatum(s, c), } @@ -562,10 +576,10 @@ fn parseDatum(s: *State, c: u8) !void { } fn endOneDatum(s: *State) !void { - const d = s.result; - if (d.eq(value.undef)) { - return s.retval(d); + if (s.result.eq(value.undef)) { + return s.retval(value.undef); } + const d = s.result; const c1 = s.getUnused() orelse try s.read(); if (c1) |c| { switch (try checkBlank(s, c)) { @@ -590,10 +604,16 @@ fn returnContext(s: *State) !void { fn parseJoin(s: *State, d: Value, c: u8) !void { s.context.val = d; s.context.char = c; - s.unused_char = switch (c) { - '.', ':', '|' => try s.readNoEof("start of joined datum"), - else => c, - }; + switch (c) { + '.', ':', '|' => { + s.context.char = c; + s.unused_char = try s.readNoEof("join datum"); + }, + else => { + s.context.char = 0; + s.unused_char = c; + }, + } return s.subr(.parse_join_datum, .join_data); } @@ -610,16 +630,21 @@ fn joinData(s: *State) !void { const join = s.context.char; const tail = s.result; if (tail.eq(value.undef)) { - return s.retval(head); + if (join == 0) { + return s.retval(head); + } else { + return s.err(error.InvalidCharacter, "join datum"); + } } const rune = switch (join) { + 0 => JOIN, '.' => DOT, ':' => COLON, '|' => PIPE, - else => JOIN, + else => unreachable, }; - const result = cons(rune, cons(head, tail)); - return s.jump(.end_one_datum, result); + const data = cons(head, tail); + return s.jump(.end_one_datum, cons(rune, data)); } fn parseOneDatum(s: *State, c: u8, next: Fn) !void { @@ -653,7 +678,7 @@ fn isBareChar(c: u8) bool { 'a'...'z' , 'A'...'Z' , '0'...'9', '!' , '$' , '%' , '&' , '*' , '+', '-' , '/' , '<' , '=' , '>' , '?', - '@' , '^' , '_' , '~' => true, + '@' , '^' , '_' , '~' , '.' => true, // zig fmt: on else => false, }; @@ -811,11 +836,11 @@ fn parseRune(s: *State, c1: u8) !struct { Value, ?u8 } { var len: usize = 1; while (try s.read()) |c| : (len += 1) { if (len == 6 or !std.ascii.isAlphanumeric(c)) { - return .{ try s.getRune(), c }; + return .{ s.getRune(), c }; } try s.addChar(c); } - return .{ try s.getRune(), null }; + return .{ s.getRune(), null }; } fn parseRuneEnd(s: *State, r: Value, c1: ?u8, next: Fn) !void { @@ -933,22 +958,20 @@ fn continueList(s: *State) !void { if (c == close) { return endList(s); } - if (c == '.') { - return s.jump(.parse_list_tail, null); - } return s.err(error.InvalidCharacter, "list"); } + if (s.result.eq(S_DOT)) { + return s.subr(.parse_unit, .end_improper_list); + } + s.context.val = cons(s.result, s.context.val); - var c1 = s.unused_char orelse try s.read(); + var c1 = s.getUnused() orelse try s.read(); while (c1) |c| : (c1 = try s.read()) { if (c == close) { return endList(s); } - if (c == '.') { - return s.jump(.parse_list_tail, null); - } switch (try checkBlank(s, c)) { .yes => {}, .skip_unit => { @@ -958,7 +981,7 @@ fn continueList(s: *State) !void { .skip_line => try s.skipLine(), .no => { s.unused_char = c; - return s.jump(.parse_list_element, null); + return s.subr(.parse_list_element, .continue_list); }, } } @@ -969,19 +992,6 @@ fn endList(s: *State) !void { return s.retval(lib.list.reverse(s.context.val)); } -fn parseListTail(s: *State) !void { - const c = try s.readNoEof("list tail"); - try s.pushContext(.end_improper_list); - switch (try checkBlank(s, c)) { - .yes => {}, - .skip_unit => return s.subr(.parse_unit, .parse_unit), - .skip_line => try s.skipLine(), - // One blank mandatory here. - .no => return s.err(error.InvalidCharacter, "list tail"), - } - return s.jump(.parse_unit, null); -} - fn endImproperList(s: *State) !void { const tail = s.result; if (tail.eq(value.undef)) { @@ -992,22 +1002,21 @@ fn endImproperList(s: *State) !void { } fn closeImproperList(s: *State) !void { + const result = s.context.val; const close = s.context.char; var c1 = s.getUnused() orelse try s.read(); - while (c1) |c| : (c1 = try s.read()) { + while (c1) |c| : (c1 = try s.readNoEof("after list tail")) { + if (c == close) { + return s.retval(result); + } switch (try checkBlank(s, c)) { .yes => {}, .skip_unit => return s.subr(.parse_unit, .close_improper_list), .skip_line => try s.skipLine(), - .no => { - if (c == close) { - return s.retval(s.context.val); - } - return s.err(error.InvalidCharacter, "after list tail"); - }, + .no => return s.err(error.InvalidCharacter, "after list tail"), } } - return s.err(error.UnexpectedEof, "after list tail"); + unreachable; } fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { @@ -1026,10 +1035,14 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { } s.context.val = q; - return s.subr(.parse_unit, .end_quote_expr); + s.unused_char = c; + return s.subr(.parse_list_element, .end_quote_expr); } fn endQuoteExpr(s: *State) !void { + if (s.result.eq(value.undef)) { + return s.err(error.InvalidCharacter, "quote expression datum"); + } const q = s.context.val; const d = s.result; return s.retval(cons(q, d)); -- cgit v1.2.3 From 6eedf5394997b91467a392732cdb7fbb80a790b8 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Fri, 28 Mar 2025 18:02:38 +0100 Subject: blub --- _tests/test.zig | 4 +- spec/parser.ebnf | 30 ++++-- src/libzisp.zig | 16 ++- src/libzisp/io/parser.zig | 250 +++++++++++++++++++++++---------------------- src/libzisp/value/rune.zig | 4 + 5 files changed, 170 insertions(+), 134 deletions(-) (limited to '_tests') diff --git a/_tests/test.zig b/_tests/test.zig index 5acb628..e746851 100644 --- a/_tests/test.zig +++ b/_tests/test.zig @@ -1,11 +1,13 @@ const std = @import("std"); -pub fn main() void { +pub fn main() u8 { // const y: [3]u64 = .{ 1, 2, 3 }; // const x: struct { u8, u64, u8 } = y; // @import("std").debug.print("{}\n", .{x[0] + x[1] + x[2]}); std.debug.print("{}\n", .{@sizeOf(struct { u64, ?u8 })}); + + return while (true) if (true) break 1; } // const x: ?u8 = 5; diff --git a/spec/parser.ebnf b/spec/parser.ebnf index 44b1967..60f7890 100644 --- a/spec/parser.ebnf +++ b/spec/parser.ebnf @@ -1,11 +1,14 @@ -unit : blank* ( datum blank? | EOF ) ; +unit : empty_unit | datum_unit ; -blank : 9...13 | comment ; +empty_unit : blank* EOF ; -datum : one_datum ( join_char? one_datum )* ; +datum_unit : blank* datum blank? ; -join_char : '.' | ':' | '|' ; + +blank : 9...13 | comment ; + +datum : join_data | dot_string ; comment : ';' ( skip_unit | skip_line ) ; @@ -15,9 +18,18 @@ skip_unit : '~' unit ; skip_line : ( ~10 )* 10? ; -one_datum : ( bare_str | clad_datum ) ; +join_data : one_datum ( join_char? one_datum )* + +join_char : '.' | ':' | '|' ; + +dot_string : '.'{2,} + -bare_str : bare_str_elt+ ; +one_datum : ( num_string | bare_string | clad_datum ) ; + +num_string : ( '+' | '-' )? digit ( bare_str_elt | '.' )* ; + +bare_string : bare_str_elt+ ; clad_datum : '\' bare_esc_str | '"' quoted_str '"' @@ -37,11 +49,13 @@ bare_esc_str : bare_esc bare_str_elt* ; quoted_str : ( quoted_char | '\' quoted_esc )* ; hash_expr : rune clad_datum? - | '%' label ( '%' | '=' unit ) + | '%' label ( '%' | '=' datum_unit ) | clad_datum ; -list : unit+ ( '.' blank+ unit )? blank* ; +list : datum_unit+ list_tail? blank* ; + +list_tail : '.' blank+ datum_unit quote_expr : ( "'" | "`" | "," ) datum ; diff --git a/src/libzisp.zig b/src/libzisp.zig index df8422b..ceee3f6 100644 --- a/src/libzisp.zig +++ b/src/libzisp.zig @@ -352,11 +352,17 @@ fn parseBench(path: []const u8, iters: usize) !void { var timer = try std.time.Timer.start(); for (0..iters) |i| { _ = i; - var br = std.io.bufferedReader(file.reader()); - const reader = br.reader().any(); + // var br = std.io.bufferedReader(file.reader()); + // const reader = br.reader().any(); + const reader = file.reader().any(); var v: Value = undefined; while (true) { - v = io.parser.parse(reader); + v = io.parser._parse(reader) catch |e| { + std.debug.print("\nfile pos: {}\n", .{ + try file.getPos(), + }); + return e; + }; if (value.eof.check(v)) { break; } @@ -374,7 +380,7 @@ fn parseBench(path: []const u8, iters: usize) !void { test "parse bench" { try parseBench("test-data/parser-test-1.scm", 1000); try parseBench("test-data/parser-test-2.scm", 1000); - // try parseBench("test-data/parser-torture.scm", 1); + try parseBench("test-data/parser-torture.scm", 1); } test "unparse" { @@ -423,7 +429,7 @@ test "unparse5" { test "unparse6" { const w = std.io.getStdErr().writer(); - const v = parseString("(foo .bar ... baz. bat.(qux))"); + const v = parseString("(foo bar ... baz bat.(qux))"); try io.unparser.unparse(w, v); try w.writeByte('\n'); } diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig index 643f7e8..8093ffe 100644 --- a/src/libzisp/io/parser.zig +++ b/src/libzisp/io/parser.zig @@ -275,10 +275,10 @@ const GRAVE = value.rune.pack("GRAVE"); const COMMA = value.rune.pack("COMMA"); const SQUARE = value.rune.pack("SQUARE"); const BRACE = value.rune.pack("BRACE"); +const VOID = value.rune.packForced(""); +const LSTAIL = value.rune.packForced("."); // zig fmt: on -const S_DOT = value.sstr.pack("."); - const Context = struct { // What to do next. next: Fn = .parse_unit, @@ -288,7 +288,7 @@ const Context = struct { char: u8 = undefined, }; -const ParseError = error{ +const ParseError = enum { InvalidCharacter, UnclosedString, UnexpectedEof, @@ -314,7 +314,6 @@ const State = struct { result: Value = undefined, unused_char: ?u8 = null, - err_code: anyerror = undefined, err_msg: []const u8 = undefined, fn init( @@ -337,9 +336,13 @@ const State = struct { s.chars.deinit(s.chars_alloc); } - fn err(s: *State, e: ParseError, msg: []const u8) ParseError { - s.err_msg = msg; - return e; + fn err( + s: *State, + comptime e: ParseError, + comptime msg: []const u8, + ) error{ParseError} { + s.err_msg = @tagName(e) ++ " at: " ++ msg; + return error.ParseError; } fn read(s: *State) !?u8 { @@ -348,10 +351,7 @@ const State = struct { } const c = s.input.readByte() catch |e| switch (e) { error.EndOfStream => return null, - else => { - s.err_code = e; - return error.ReadError; - }, + else => return s.err(.ReadError, "???"), }; if (detailed_debug) { std.debug.print("{c}", .{c}); @@ -359,8 +359,8 @@ const State = struct { return c; } - fn readNoEof(s: *State, emsg: []const u8) !u8 { - return if (try s.read()) |c| c else s.err(error.UnexpectedEof, emsg); + fn readNoEof(s: *State, comptime emsg: []const u8) !u8 { + return if (try s.read()) |c| c else s.err(.UnexpectedEof, emsg); } fn getUnused(s: *State) ?u8 { @@ -371,10 +371,6 @@ const State = struct { return null; } - fn skipLine(s: *State) !void { - while (try s.read()) |c| if (c == '\n') break; - } - fn addChar(s: *State, c: u8) !void { try s.chars.append(s.chars_alloc, c); } @@ -423,7 +419,7 @@ const State = struct { } fn abort(s: *State, next: Fn, unused_c: u8) void { - s.result = value.undef; + s.result = VOID; s.unused_char = unused_c; s.context.next = next; } @@ -438,7 +434,7 @@ const State = struct { } }; -pub fn parse(input: std.io.AnyReader) Value { +pub fn _parse(input: std.io.AnyReader) !Value { var debug_alloc: std.heap.DebugAllocator(.{}) = undefined; if (!is_test and is_debug) { debug_alloc = .init; @@ -465,25 +461,28 @@ pub fn parse(input: std.io.AnyReader) Value { var s = State.init(input, stack_alloc, chars_alloc) catch @panic(""); defer s.deinit(); - while (s.context.next != .done) callNext(&s) catch { - if (s.unused_char) |c| { - std.debug.panic( - "Parse error: {} at: {s}, char: {c}\n", - .{ s.err_code, s.err_msg, c }, - ); - } else { - std.debug.panic( - "Parse error: {} at: {s}\n", - .{ s.err_code, s.err_msg }, - ); - } + while (s.context.next != .done) callNext(&s) catch |e| { + // _ = e; + // if (s.unused_char) |c| { + // std.debug.panic( + // "Parse error: {s}, unused_char: 0x{x}\n", + // .{ s.err_msg, c }, + // ); + // } else { + // std.debug.panic("Parse error: {s}\n", .{s.err_msg}); + // } + return e; }; if (s.unused_char) |c| { - std.debug.panic("Invalid character: {c}\n", .{c}); + std.debug.panic("Invalid trailing character: {c}\n", .{c}); } return s.result; } +pub fn parse(input: std.io.AnyReader) Value { + return _parse(input) catch @panic(""); +} + const Fn = enum { parse_unit, return_context, @@ -549,43 +548,74 @@ fn callNext(s: *State) !void { fn parseUnit(s: *State) !void { var c1 = s.getUnused() orelse try s.read(); while (c1) |c| : (c1 = try s.read()) { - switch (try checkBlank(s, c)) { + switch (try checkBlanks(s, c)) { .yes => {}, .skip_unit => try s.push(.parse_unit), - .skip_line => try s.skipLine(), .no => return parseDatum(s, c), } } return s.retval(value.eof.eof); } -fn checkBlank(s: *State, c: u8) !enum { yes, skip_unit, skip_line, no } { +fn checkBlanks(s: *State, c: u8) !enum { yes, skip_unit, no } { return switch (c) { '\t'...'\r', ' ' => .yes, ';' => switch (try s.read() orelse '\n') { '\n' => .yes, '~' => .skip_unit, - else => .skip_line, + else => while (try s.read() != '\n') {} else .yes, }, else => .no, }; } fn parseDatum(s: *State, c: u8) !void { + if (c == '.') { + return parseDotString(s); + } return parseOneDatum(s, c, .end_one_datum); } +fn parseDotString(s: *State) !void { + try s.addChar('.'); + while (try s.read()) |c| { + switch (try checkBlanks(s, c)) { + .yes => return dotString(s, false), + .skip_unit => return dotString(s, true), + .no => switch (c) { + '.' => try s.addChar('.'), + ')', ']', '}' => { + s.unused_char = c; + return dotString(s, false); + }, + else => return s.err(.InvalidCharacter, "dot string"), + }, + } + } + unreachable; +} + +fn dotString(s: *State, skip_unit: bool) !void { + const lstail = s.chars.items.len == 1; + const result = if (lstail) LSTAIL else s.getBareString(); + if (skip_unit) { + s.context.val = result; + return s.subr(.parse_unit, .return_context); + } else { + return s.retval(result); + } +} + fn endOneDatum(s: *State) !void { - if (s.result.eq(value.undef)) { - return s.retval(value.undef); + if (s.result.eq(VOID)) { + return s.retval(VOID); } const d = s.result; const c1 = s.getUnused() orelse try s.read(); if (c1) |c| { - switch (try checkBlank(s, c)) { + switch (try checkBlanks(s, c)) { .yes => {}, .skip_unit => return skipUnitAndReturn(s, d), - .skip_line => try s.skipLine(), .no => return parseJoin(s, d, c), } } @@ -629,11 +659,11 @@ fn joinData(s: *State) !void { const head = s.context.val; const join = s.context.char; const tail = s.result; - if (tail.eq(value.undef)) { + if (tail.eq(VOID)) { if (join == 0) { return s.retval(head); } else { - return s.err(error.InvalidCharacter, "join datum"); + return s.err(.InvalidCharacter, "join datum"); } } const rune = switch (join) { @@ -649,20 +679,17 @@ fn joinData(s: *State) !void { fn parseOneDatum(s: *State, c: u8, next: Fn) !void { if (isBareChar(c)) { - const d, s.unused_char = try parseBareString(s, c); - return s.jump(next, d); + return s.jump(next, try parseBareString(s, c)); } return parseCladDatum(s, c, next); } fn parseCladDatum(s: *State, c: u8, next: Fn) !void { if (c == '\\') { - const bs, s.unused_char = try parseBareEscString(s); - return s.jump(next, bs); + return s.jump(next, try parseBareEscString(s)); } if (c == '"') { - const qs = try parseQuotedString(s); - return s.jump(next, qs); + return s.jump(next, try parseQuotedString(s)); } return switch (c) { '#' => parseHashExpression(s, next), @@ -675,10 +702,8 @@ fn parseCladDatum(s: *State, c: u8, next: Fn) !void { fn isBareChar(c: u8) bool { return switch (c) { // zig fmt: off - 'a'...'z' , 'A'...'Z' , '0'...'9', - '!' , '$' , '%' , '&' , '*' , '+', - '-' , '/' , '<' , '=' , '>' , '?', - '@' , '^' , '_' , '~' , '.' => true, + 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' , + '+' , '-' , '/' , '<' , '=' , '>' , '?' , '@' , '^' , '_' , '~' => true, // zig fmt: on else => false, }; @@ -691,27 +716,28 @@ fn isBareEsc(c: u8) bool { }; } -fn parseBareString(s: *State, c: u8) !struct { Value, ?u8 } { +fn parseBareString(s: *State, c: u8) !Value { try s.addChar(c); return parseBareStringRest(s); } -fn parseBareEscString(s: *State) !struct { Value, ?u8 } { +fn parseBareEscString(s: *State) !Value { try s.addChar(try parseBareEsc(s)); return parseBareStringRest(s); } -fn parseBareStringRest(s: *State) !struct { Value, ?u8 } { +fn parseBareStringRest(s: *State) !Value { while (try s.read()) |c| { if (isBareChar(c)) { try s.addChar(c); } else if (c == '\\') { try s.addChar(try parseBareEsc(s)); } else { - return .{ s.getBareString(), c }; + s.unused_char = c; + break; } } - return .{ s.getBareString(), null }; + return s.getBareString(); } fn parseBareEsc(s: *State) !u8 { @@ -719,7 +745,7 @@ fn parseBareEsc(s: *State) !u8 { if (isBareEsc(c)) { return c; } else { - return s.err(error.InvalidCharacter, "bare escape"); + return s.err(.InvalidCharacter, "bare escape"); } } @@ -754,17 +780,16 @@ fn parseQuotedEsc(s: *State) !void { 'r' => 13, 'e' => 27, 'x' => try parseHexByte(s, "hex escape"), - else => return s.err(error.InvalidCharacter, "quoted escape"), + else => return s.err(.InvalidCharacter, "quoted escape"), }); } fn parseUniHexHandleErrors(s: *State) !void { return parseUniHex(s) catch |err| switch (err) { - error.Utf8CannotEncodeSurrogateHalf => e: { - s.err_code = err; - s.err_msg = "unicode escape"; - break :e error.UnicodeError; - }, + error.Utf8CannotEncodeSurrogateHalf => s.err( + .UnicodeError, + "unicode escape", + ), else => |e| e, }; } @@ -773,16 +798,16 @@ fn parseUniHex(s: *State) !void { const msg = "unicode escape"; if (try s.readNoEof(msg) != '{') { - return s.err(error.InvalidCharacter, msg); + return s.err(.InvalidCharacter, msg); } const uc, const unused_c = try parseHex(s, u21, msg); if (unused_c) |c| { if (c != '}') { - return s.err(error.InvalidCharacter, msg); + return s.err(.InvalidCharacter, msg); } } else { - return s.err(error.UnexpectedEof, msg); + return s.err(.UnexpectedEof, msg); } const n = try std.unicode.utf8CodepointSequenceLength(uc); @@ -792,8 +817,8 @@ fn parseUniHex(s: *State) !void { fn parseHashExpression(s: *State, next: Fn) !void { const c = try s.readNoEof("hash expression"); - if (try checkBlank(s, c) != .no) { - return s.err(error.InvalidCharacter, "hash expression"); + if (try checkBlanks(s, c) != .no) { + return s.err(.InvalidCharacter, "hash expression"); } if (std.ascii.isAlphabetic(c)) { const r, const unused_c = try parseRune(s, c); @@ -805,16 +830,14 @@ fn parseHashExpression(s: *State, next: Fn) !void { } if (isBareChar(c)) { // Reserved for future extensions to syntax sugar. - return s.err(error.InvalidCharacter, "hash expression"); + return s.err(.InvalidCharacter, "hash expression"); } // fast-path to avoid subr if (c == '\\') { - const bs, s.unused_char = try parseBareEscString(s); - return s.jump(next, cons(HASH, bs)); + return s.jump(next, cons(HASH, try parseBareEscString(s))); } if (c == '"') { - const qs = try parseQuotedString(s); - return s.jump(next, cons(HASH, qs)); + return s.jump(next, cons(HASH, try parseQuotedString(s))); } s.unused_char = c; return s.subr(.parse_hash_datum, next); @@ -825,8 +848,8 @@ fn parseHashDatum(s: *State) !void { } fn endHashDatum(s: *State) !void { - if (s.result.eq(value.undef)) { - return s.err(error.InvalidCharacter, "hash datum"); + if (s.result.eq(VOID)) { + return s.err(.InvalidCharacter, "hash datum"); } return s.retval(cons(HASH, s.result)); } @@ -846,12 +869,10 @@ fn parseRune(s: *State, c1: u8) !struct { Value, ?u8 } { fn parseRuneEnd(s: *State, r: Value, c1: ?u8, next: Fn) !void { const c = c1 orelse return s.jump(next, r); if (c == '\\') { - const bs, s.unused_char = try parseBareString(s, c); - return s.jump(next, cons(r, bs)); + return s.jump(next, cons(r, try parseBareString(s, c))); } if (c == '"') { - const qs = try parseQuotedString(s); - return s.jump(next, cons(r, qs)); + return s.jump(next, cons(r, try parseQuotedString(s))); } s.unused_char = c; switch (c) { @@ -869,12 +890,10 @@ fn parseRuneDatum(s: *State) !void { } fn endRuneDatum(s: *State) !void { - const r = s.context.val; - const d = s.result; - if (d.eq(value.undef)) { - s.retval(r); + if (s.result.eq(VOID)) { + s.retval(s.context.val); } - return s.retval(cons(r, d)); + return s.retval(cons(s.context.val, s.result)); } fn parseLabel(s: *State) !struct { Value, ?u8 } { @@ -883,7 +902,7 @@ fn parseLabel(s: *State) !struct { Value, ?u8 } { } fn parseLabelEnd(s: *State, l: Value, c1: ?u8, next: Fn) !void { - const c = c1 orelse return s.err(error.UnexpectedEof, "datum label"); + const c = c1 orelse return s.err(.UnexpectedEof, "datum label"); if (c == '%') { return s.jump(next, cons(LABEL, l)); } @@ -892,16 +911,14 @@ fn parseLabelEnd(s: *State, l: Value, c1: ?u8, next: Fn) !void { s.context.val = l; return s.subr(.parse_unit, .end_label_datum); } - return s.err(error.InvalidCharacter, "datum label"); + return s.err(.InvalidCharacter, "datum label"); } fn endLabelDatum(s: *State) !void { - const l = s.context.val; - const d = s.result; - if (d.eq(value.undef)) { - return s.err(error.InvalidCharacter, "label datum"); + if (s.result.eq(VOID)) { + return s.err(.InvalidCharacter, "label datum"); } - return s.retval(cons(LABEL, cons(l, d))); + return s.retval(cons(LABEL, cons(s.context.val, s.result))); } fn parseList(s: *State, open: u8, next: Fn) !void { @@ -921,14 +938,13 @@ fn parseList(s: *State, open: u8, next: Fn) !void { if (c == close) { return s.jump(next, head); } - switch (try checkBlank(s, c)) { + switch (try checkBlanks(s, c)) { .yes => {}, .skip_unit => { try listParserSetup(s, head, close, next); // Parse twice in a row, ignoring the first result. return s.subr(.parse_unit, .parse_unit); }, - .skip_line => try s.skipLine(), .no => { try listParserSetup(s, head, close, next); s.unused_char = c; @@ -936,7 +952,7 @@ fn parseList(s: *State, open: u8, next: Fn) !void { }, } } - return s.err(error.UnexpectedEof, "list"); + return s.err(.UnexpectedEof, "list"); } fn listParserSetup(s: *State, head: Value, close: u8, next: Fn) !void { @@ -953,15 +969,15 @@ fn parseListElement(s: *State) !void { fn continueList(s: *State) !void { const close = s.context.char; - if (s.result.eq(value.undef)) { + if (s.result.eq(VOID)) { const c = s.getUnused().?; if (c == close) { return endList(s); } - return s.err(error.InvalidCharacter, "list"); + return s.err(.InvalidCharacter, "list"); } - if (s.result.eq(S_DOT)) { + if (s.result.eq(LSTAIL)) { return s.subr(.parse_unit, .end_improper_list); } @@ -972,20 +988,19 @@ fn continueList(s: *State) !void { if (c == close) { return endList(s); } - switch (try checkBlank(s, c)) { + switch (try checkBlanks(s, c)) { .yes => {}, .skip_unit => { try s.pushContext(.continue_list); return s.subr(.parse_unit, .parse_unit); }, - .skip_line => try s.skipLine(), .no => { s.unused_char = c; return s.subr(.parse_list_element, .continue_list); }, } } - return s.err(error.UnexpectedEof, "list"); + return s.err(.UnexpectedEof, "list"); } fn endList(s: *State) !void { @@ -993,11 +1008,10 @@ fn endList(s: *State) !void { } fn endImproperList(s: *State) !void { - const tail = s.result; - if (tail.eq(value.undef)) { - return s.err(error.InvalidCharacter, "list tail"); + if (s.result.eq(VOID)) { + return s.err(.InvalidCharacter, "list tail"); } - s.context.val = lib.list.reverseWithTail(s.context.val, tail); + s.context.val = lib.list.reverseWithTail(s.context.val, s.result); return closeImproperList(s); } @@ -1009,11 +1023,10 @@ fn closeImproperList(s: *State) !void { if (c == close) { return s.retval(result); } - switch (try checkBlank(s, c)) { + switch (try checkBlanks(s, c)) { .yes => {}, .skip_unit => return s.subr(.parse_unit, .close_improper_list), - .skip_line => try s.skipLine(), - .no => return s.err(error.InvalidCharacter, "after list tail"), + .no => return s.err(.InvalidCharacter, "after list tail"), } } unreachable; @@ -1030,8 +1043,7 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { // fast-path to avoid subr const c = try s.readNoEof("quote expression"); if (isBareChar(c) or c == '\\') { - const bs, s.unused_char = try parseBareString(s, c); - return s.jump(next, cons(q, bs)); + return s.jump(next, cons(q, try parseBareString(s, c))); } s.context.val = q; @@ -1040,12 +1052,10 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { } fn endQuoteExpr(s: *State) !void { - if (s.result.eq(value.undef)) { - return s.err(error.InvalidCharacter, "quote expression datum"); + if (s.result.eq(VOID)) { + return s.err(.InvalidCharacter, "quote expression datum"); } - const q = s.context.val; - const d = s.result; - return s.retval(cons(q, d)); + return s.retval(cons(s.context.val, s.result)); } // Helpers @@ -1053,7 +1063,7 @@ fn endQuoteExpr(s: *State) !void { fn parseHex( s: *State, u_type: type, - emsg: []const u8, + comptime emsg: []const u8, ) !struct { u_type, ?u8 } { var uc: u_type = undefined; @@ -1065,13 +1075,13 @@ fn parseHex( return .{ uc, c }; } const shl = std.math.shlExact; - uc = shl(u_type, uc, 4) catch return s.err(error.OutOfRange, emsg); + uc = shl(u_type, uc, 4) catch return s.err(.OutOfRange, emsg); uc |= try parseHexDigit(s, c, emsg); } return .{ uc, null }; } -fn parseHexByte(s: *State, emsg: []const u8) !u8 { +fn parseHexByte(s: *State, comptime emsg: []const u8) !u8 { const h1 = try s.readNoEof(emsg); const h2 = try s.readNoEof(emsg); const hi = try parseHexDigit(s, h1, emsg); @@ -1079,11 +1089,11 @@ fn parseHexByte(s: *State, emsg: []const u8) !u8 { return hi << 4 | lo; } -fn parseHexDigit(s: *State, c: u8, emsg: []const u8) !u8 { +fn parseHexDigit(s: *State, c: u8, comptime emsg: []const u8) !u8 { return switch (c) { '0'...'9' => c - '0', 'A'...'F' => c - 'A' + 10, 'a'...'f' => c - 'a' + 10, - else => s.err(error.InvalidCharacter, emsg), + else => s.err(.InvalidCharacter, emsg), }; } diff --git a/src/libzisp/value/rune.zig b/src/libzisp/value/rune.zig index 154ec13..195210e 100644 --- a/src/libzisp/value/rune.zig +++ b/src/libzisp/value/rune.zig @@ -44,6 +44,10 @@ fn assertValidRune(s: []const u8) void { pub fn pack(s: []const u8) Value { assertValidRune(s); + return packForced(s); +} + +pub fn packForced(s: []const u8) Value { var v = Value{ .rune = .{ .name = 0 } }; const dest: [*]u8 = @ptrCast(&v.rune.name); @memcpy(dest, s); -- cgit v1.2.3