diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-28 20:44:01 +0100 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-28 20:44:01 +0100 |
| commit | d714cf3b57e39979b208369f9369b526409172b3 (patch) | |
| tree | ebcdcbbf6f034e8afce33e673a20a71cc03a52f9 | |
| parent | 6eedf5394997b91467a392732cdb7fbb80a790b8 (diff) | |
blip
| -rw-r--r-- | spec/parser.ebnf | 4 | ||||
| -rw-r--r-- | src/libzisp.zig | 101 | ||||
| -rw-r--r-- | src/libzisp/io/parser.zig | 65 | ||||
| -rw-r--r-- | src/libzisp/io/unparser.zig | 23 | ||||
| -rw-r--r-- | src/libzisp/value/istr.zig | 4 |
5 files changed, 126 insertions, 71 deletions
diff --git a/spec/parser.ebnf b/spec/parser.ebnf index 60f7890..ce7fa83 100644 --- a/spec/parser.ebnf +++ b/spec/parser.ebnf @@ -49,7 +49,7 @@ bare_esc_str : bare_esc bare_str_elt* ; quoted_str : ( quoted_char | '\' quoted_esc )* ; hash_expr : rune clad_datum? - | '%' label ( '%' | '=' datum_unit ) + | '%' label ( '%' | '=' blank* datum ) | clad_datum ; @@ -57,7 +57,7 @@ list : datum_unit+ list_tail? blank* ; list_tail : '.' blank+ datum_unit -quote_expr : ( "'" | "`" | "," ) datum ; +quote_expr : ( "'" | "`" | "," ) blank* datum ; bare_char : letter | digit diff --git a/src/libzisp.zig b/src/libzisp.zig index ceee3f6..de3f2e6 100644 --- a/src/libzisp.zig +++ b/src/libzisp.zig @@ -345,6 +345,56 @@ test "parse4" { try std.testing.expectEqualStrings("bar", f.slice()); } +test "unparse" { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#foo"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); +} + +test "unparse2" { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#{foo bar['x]}"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings( + "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", + try out.toOwnedSlice(), + ); +} + +fn writeParseResult(str: []const u8) !void { + const w = std.io.getStdErr().writer(); + const v = parseString(str); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} + +test "unparse3" { + try writeParseResult("#{foo bar['x](y)(z)}"); +} + +test "unparse4" { + try writeParseResult("(foo ;~bar)"); +} + +test "unparse5" { + try writeParseResult("(;~foo foo ;~bar . ;~bar bar ;~bar)"); +} + +test "unparse6" { + try writeParseResult("(foo bar ... baz bat.(qux))"); +} + +test "unparse7" { + try writeParseResult("#`(#,(->keyword (syntax->datum #'sym)) . in)"); +} + fn parseBench(path: []const u8, iters: usize) !void { const file = try std.fs.cwd().openFile(path, .{}); defer file.close(); @@ -382,54 +432,3 @@ test "parse bench" { try parseBench("test-data/parser-test-2.scm", 1000); try parseBench("test-data/parser-torture.scm", 1); } - -test "unparse" { - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - var out: std.ArrayList(u8) = .init(gpa.allocator()); - - const w = out.writer(); - const v = parseString("#foo"); - try io.unparser.unparse(w, v); - try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); -} - -test "unparse2" { - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - var out: std.ArrayList(u8) = .init(gpa.allocator()); - - const w = out.writer(); - const v = parseString("#{foo bar['x]}"); - try io.unparser.unparse(w, v); - try std.testing.expectEqualStrings( - "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", - try out.toOwnedSlice(), - ); -} - -test "unparse3" { - const w = std.io.getStdErr().writer(); - const v = parseString("#{foo bar['x](y)(z)}"); - try io.unparser.unparse(w, v); - try w.writeByte('\n'); -} - -test "unparse4" { - const w = std.io.getStdErr().writer(); - const v = parseString("(foo ;~bar)"); - try io.unparser.unparse(w, v); - try w.writeByte('\n'); -} - -test "unparse5" { - const w = std.io.getStdErr().writer(); - const v = parseString("(;~foo foo ;~bar . ;~bar bar ;~bar)"); - try io.unparser.unparse(w, v); - try w.writeByte('\n'); -} - -test "unparse6" { - const w = std.io.getStdErr().writer(); - const v = parseString("(foo bar ... baz bat.(qux))"); - try io.unparser.unparse(w, v); - try w.writeByte('\n'); -} diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig index 8093ffe..209c548 100644 --- a/src/libzisp/io/parser.zig +++ b/src/libzisp/io/parser.zig @@ -257,7 +257,7 @@ const cons = value.pair.cons; const is_test = builtin.is_test; const is_debug = builtin.mode == .Debug; -const detailed_debug = false; +pub var detailed_debug = false; // In debug, we want to see if we leak, so very small numbers. const init_stack_capacity = if (is_debug) 32 else 32; @@ -474,7 +474,9 @@ pub fn _parse(input: std.io.AnyReader) !Value { return e; }; if (s.unused_char) |c| { - std.debug.panic("Invalid trailing character: {c}\n", .{c}); + if (c != ' ') { + std.debug.panic("Invalid trailing character: {c}\n", .{c}); + } } return s.result; } @@ -577,16 +579,16 @@ fn parseDatum(s: *State, c: u8) !void { } fn parseDotString(s: *State) !void { - try s.addChar('.'); - while (try s.read()) |c| { + var n: u48 = 1; + while (try s.read()) |c| : (n += 1) { switch (try checkBlanks(s, c)) { - .yes => return dotString(s, false), - .skip_unit => return dotString(s, true), + .yes => return dotString(s, n, false), + .skip_unit => return dotString(s, n, true), .no => switch (c) { - '.' => try s.addChar('.'), + '.' => {}, ')', ']', '}' => { s.unused_char = c; - return dotString(s, false); + return dotString(s, n, false); }, else => return s.err(.InvalidCharacter, "dot string"), }, @@ -595,9 +597,12 @@ fn parseDotString(s: *State) !void { unreachable; } -fn dotString(s: *State, skip_unit: bool) !void { - const lstail = s.chars.items.len == 1; - const result = if (lstail) LSTAIL else s.getBareString(); +fn dotString(s: *State, n: u48, skip_unit: bool) !void { + const result = if (n == 1) LSTAIL else r: { + const buf = try s.chars.addManyAsSlice(s.chars_alloc, n); + @memset(buf, '.'); + break :r s.getBareString(); + }; if (skip_unit) { s.context.val = result; return s.subr(.parse_unit, .return_context); @@ -619,6 +624,7 @@ fn endOneDatum(s: *State) !void { .no => return parseJoin(s, d, c), } } + s.unused_char = ' '; return s.retval(d); } @@ -628,13 +634,17 @@ fn skipUnitAndReturn(s: *State, d: Value) !void { } fn returnContext(s: *State) !void { + s.unused_char = ' '; return s.retval(s.context.val); } fn parseJoin(s: *State, d: Value, c: u8) !void { - s.context.val = d; - s.context.char = c; switch (c) { + ')', ']', '}' => { + // shortcut + s.unused_char = c; + return s.retval(d); + }, '.', ':', '|' => { s.context.char = c; s.unused_char = try s.readNoEof("join datum"); @@ -644,6 +654,7 @@ fn parseJoin(s: *State, d: Value, c: u8) !void { s.unused_char = c; }, } + s.context.val = d; return s.subr(.parse_join_datum, .join_data); } @@ -718,17 +729,34 @@ fn isBareEsc(c: u8) bool { fn parseBareString(s: *State, c: u8) !Value { try s.addChar(c); - return parseBareStringRest(s); + var is_num = false; + if (std.ascii.isDigit(c)) { + is_num = true; + } else if (c == '+' or c == '-') { + const c2 = try s.read() orelse return s.getBareString(); + if (std.ascii.isDigit(c2)) { + try s.addChar(c2); + is_num = true; + } else if (isBareChar(c2)) { + try s.addChar(c2); + } else if (c2 == '\\') { + try s.addChar(try parseBareEsc(s)); + } else { + s.unused_char = c2; + return s.getBareString(); + } + } + return parseBareStringRest(s, is_num); } fn parseBareEscString(s: *State) !Value { try s.addChar(try parseBareEsc(s)); - return parseBareStringRest(s); + return parseBareStringRest(s, false); } -fn parseBareStringRest(s: *State) !Value { +fn parseBareStringRest(s: *State, is_num: bool) !Value { while (try s.read()) |c| { - if (isBareChar(c)) { + if (isBareChar(c) or (is_num and c == '.')) { try s.addChar(c); } else if (c == '\\') { try s.addChar(try parseBareEsc(s)); @@ -1046,9 +1074,10 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void { return s.jump(next, cons(q, try parseBareString(s, c))); } + try s.push(next); s.context.val = q; s.unused_char = c; - return s.subr(.parse_list_element, .end_quote_expr); + return s.subr(.parse_unit, .end_quote_expr); } fn endQuoteExpr(s: *State) !void { diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig index d65ffb0..d703182 100644 --- a/src/libzisp/io/unparser.zig +++ b/src/libzisp/io/unparser.zig @@ -2,6 +2,9 @@ const std = @import("std"); const value = @import("../value.zig"); +const istr = value.istr; +const seq = value.seq; + const ShortString = value.ShortString; const OtherTag = value.OtherTag; const Value = value.Value; @@ -33,6 +36,7 @@ fn unparseHeap(w: anytype, v: Value) !void { const p, const t = value.ptr.unpack(v); try switch (t) { .pair => unparsePair(w, @ptrCast(p)), + .seq => unparseSeq(w, @ptrCast(p)), else => @panic("not implemented"), }; } @@ -97,3 +101,22 @@ fn unparsePair(w: anytype, p: *[2]Value) !void { } try w.writeByte(')'); } + +fn unparseSeq(w: anytype, p: *seq.Header) !void { + const h = istr.getHeaderFromPtr(@ptrCast(p)); + switch (h.type) { + .string => try unparseString(w, h), + else => @panic("not implemented"), + } +} + +fn unparseString(w: anytype, h: *seq.Header) !void { + const info = h.info.string; + if (info.quoted) { + try w.writeByte('"'); + } + try w.writeAll(h.bytes()); + if (info.quoted) { + try w.writeByte('"'); + } +} diff --git a/src/libzisp/value/istr.zig b/src/libzisp/value/istr.zig index 9834716..abd0447 100644 --- a/src/libzisp/value/istr.zig +++ b/src/libzisp/value/istr.zig @@ -46,6 +46,10 @@ pub fn getHeader(v: Value) *seq.Header { return gc.istrHeader(header_ptr); } +pub fn getHeaderFromPtr(p: *Hval) *seq.Header { + return gc.istrHeader(p); +} + // Zisp API pub fn pred(v: Value) Value { |
