summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-03-28 20:44:01 +0100
committerTaylan Kammer <taylan.kammer@gmail.com>2025-03-28 20:44:01 +0100
commitd714cf3b57e39979b208369f9369b526409172b3 (patch)
treeebcdcbbf6f034e8afce33e673a20a71cc03a52f9
parent6eedf5394997b91467a392732cdb7fbb80a790b8 (diff)
blip
-rw-r--r--spec/parser.ebnf4
-rw-r--r--src/libzisp.zig101
-rw-r--r--src/libzisp/io/parser.zig65
-rw-r--r--src/libzisp/io/unparser.zig23
-rw-r--r--src/libzisp/value/istr.zig4
5 files changed, 126 insertions, 71 deletions
diff --git a/spec/parser.ebnf b/spec/parser.ebnf
index 60f7890..ce7fa83 100644
--- a/spec/parser.ebnf
+++ b/spec/parser.ebnf
@@ -49,7 +49,7 @@ bare_esc_str : bare_esc bare_str_elt* ;
quoted_str : ( quoted_char | '\' quoted_esc )* ;
hash_expr : rune clad_datum?
- | '%' label ( '%' | '=' datum_unit )
+ | '%' label ( '%' | '=' blank* datum )
| clad_datum
;
@@ -57,7 +57,7 @@ list : datum_unit+ list_tail? blank* ;
list_tail : '.' blank+ datum_unit
-quote_expr : ( "'" | "`" | "," ) datum ;
+quote_expr : ( "'" | "`" | "," ) blank* datum ;
bare_char : letter | digit
diff --git a/src/libzisp.zig b/src/libzisp.zig
index ceee3f6..de3f2e6 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -345,6 +345,56 @@ test "parse4" {
try std.testing.expectEqualStrings("bar", f.slice());
}
+test "unparse" {
+ var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
+ var out: std.ArrayList(u8) = .init(gpa.allocator());
+
+ const w = out.writer();
+ const v = parseString("#foo");
+ try io.unparser.unparse(w, v);
+ try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice());
+}
+
+test "unparse2" {
+ var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
+ var out: std.ArrayList(u8) = .init(gpa.allocator());
+
+ const w = out.writer();
+ const v = parseString("#{foo bar['x]}");
+ try io.unparser.unparse(w, v);
+ try std.testing.expectEqualStrings(
+ "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))",
+ try out.toOwnedSlice(),
+ );
+}
+
+fn writeParseResult(str: []const u8) !void {
+ const w = std.io.getStdErr().writer();
+ const v = parseString(str);
+ try io.unparser.unparse(w, v);
+ try w.writeByte('\n');
+}
+
+test "unparse3" {
+ try writeParseResult("#{foo bar['x](y)(z)}");
+}
+
+test "unparse4" {
+ try writeParseResult("(foo ;~bar)");
+}
+
+test "unparse5" {
+ try writeParseResult("(;~foo foo ;~bar . ;~bar bar ;~bar)");
+}
+
+test "unparse6" {
+ try writeParseResult("(foo bar ... baz bat.(qux))");
+}
+
+test "unparse7" {
+ try writeParseResult("#`(#,(->keyword (syntax->datum #'sym)) . in)");
+}
+
fn parseBench(path: []const u8, iters: usize) !void {
const file = try std.fs.cwd().openFile(path, .{});
defer file.close();
@@ -382,54 +432,3 @@ test "parse bench" {
try parseBench("test-data/parser-test-2.scm", 1000);
try parseBench("test-data/parser-torture.scm", 1);
}
-
-test "unparse" {
- var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
- var out: std.ArrayList(u8) = .init(gpa.allocator());
-
- const w = out.writer();
- const v = parseString("#foo");
- try io.unparser.unparse(w, v);
- try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice());
-}
-
-test "unparse2" {
- var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
- var out: std.ArrayList(u8) = .init(gpa.allocator());
-
- const w = out.writer();
- const v = parseString("#{foo bar['x]}");
- try io.unparser.unparse(w, v);
- try std.testing.expectEqualStrings(
- "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))",
- try out.toOwnedSlice(),
- );
-}
-
-test "unparse3" {
- const w = std.io.getStdErr().writer();
- const v = parseString("#{foo bar['x](y)(z)}");
- try io.unparser.unparse(w, v);
- try w.writeByte('\n');
-}
-
-test "unparse4" {
- const w = std.io.getStdErr().writer();
- const v = parseString("(foo ;~bar)");
- try io.unparser.unparse(w, v);
- try w.writeByte('\n');
-}
-
-test "unparse5" {
- const w = std.io.getStdErr().writer();
- const v = parseString("(;~foo foo ;~bar . ;~bar bar ;~bar)");
- try io.unparser.unparse(w, v);
- try w.writeByte('\n');
-}
-
-test "unparse6" {
- const w = std.io.getStdErr().writer();
- const v = parseString("(foo bar ... baz bat.(qux))");
- try io.unparser.unparse(w, v);
- try w.writeByte('\n');
-}
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 8093ffe..209c548 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -257,7 +257,7 @@ const cons = value.pair.cons;
const is_test = builtin.is_test;
const is_debug = builtin.mode == .Debug;
-const detailed_debug = false;
+pub var detailed_debug = false;
// In debug, we want to see if we leak, so very small numbers.
const init_stack_capacity = if (is_debug) 32 else 32;
@@ -474,7 +474,9 @@ pub fn _parse(input: std.io.AnyReader) !Value {
return e;
};
if (s.unused_char) |c| {
- std.debug.panic("Invalid trailing character: {c}\n", .{c});
+ if (c != ' ') {
+ std.debug.panic("Invalid trailing character: {c}\n", .{c});
+ }
}
return s.result;
}
@@ -577,16 +579,16 @@ fn parseDatum(s: *State, c: u8) !void {
}
fn parseDotString(s: *State) !void {
- try s.addChar('.');
- while (try s.read()) |c| {
+ var n: u48 = 1;
+ while (try s.read()) |c| : (n += 1) {
switch (try checkBlanks(s, c)) {
- .yes => return dotString(s, false),
- .skip_unit => return dotString(s, true),
+ .yes => return dotString(s, n, false),
+ .skip_unit => return dotString(s, n, true),
.no => switch (c) {
- '.' => try s.addChar('.'),
+ '.' => {},
')', ']', '}' => {
s.unused_char = c;
- return dotString(s, false);
+ return dotString(s, n, false);
},
else => return s.err(.InvalidCharacter, "dot string"),
},
@@ -595,9 +597,12 @@ fn parseDotString(s: *State) !void {
unreachable;
}
-fn dotString(s: *State, skip_unit: bool) !void {
- const lstail = s.chars.items.len == 1;
- const result = if (lstail) LSTAIL else s.getBareString();
+fn dotString(s: *State, n: u48, skip_unit: bool) !void {
+ const result = if (n == 1) LSTAIL else r: {
+ const buf = try s.chars.addManyAsSlice(s.chars_alloc, n);
+ @memset(buf, '.');
+ break :r s.getBareString();
+ };
if (skip_unit) {
s.context.val = result;
return s.subr(.parse_unit, .return_context);
@@ -619,6 +624,7 @@ fn endOneDatum(s: *State) !void {
.no => return parseJoin(s, d, c),
}
}
+ s.unused_char = ' ';
return s.retval(d);
}
@@ -628,13 +634,17 @@ fn skipUnitAndReturn(s: *State, d: Value) !void {
}
fn returnContext(s: *State) !void {
+ s.unused_char = ' ';
return s.retval(s.context.val);
}
fn parseJoin(s: *State, d: Value, c: u8) !void {
- s.context.val = d;
- s.context.char = c;
switch (c) {
+ ')', ']', '}' => {
+ // shortcut
+ s.unused_char = c;
+ return s.retval(d);
+ },
'.', ':', '|' => {
s.context.char = c;
s.unused_char = try s.readNoEof("join datum");
@@ -644,6 +654,7 @@ fn parseJoin(s: *State, d: Value, c: u8) !void {
s.unused_char = c;
},
}
+ s.context.val = d;
return s.subr(.parse_join_datum, .join_data);
}
@@ -718,17 +729,34 @@ fn isBareEsc(c: u8) bool {
fn parseBareString(s: *State, c: u8) !Value {
try s.addChar(c);
- return parseBareStringRest(s);
+ var is_num = false;
+ if (std.ascii.isDigit(c)) {
+ is_num = true;
+ } else if (c == '+' or c == '-') {
+ const c2 = try s.read() orelse return s.getBareString();
+ if (std.ascii.isDigit(c2)) {
+ try s.addChar(c2);
+ is_num = true;
+ } else if (isBareChar(c2)) {
+ try s.addChar(c2);
+ } else if (c2 == '\\') {
+ try s.addChar(try parseBareEsc(s));
+ } else {
+ s.unused_char = c2;
+ return s.getBareString();
+ }
+ }
+ return parseBareStringRest(s, is_num);
}
fn parseBareEscString(s: *State) !Value {
try s.addChar(try parseBareEsc(s));
- return parseBareStringRest(s);
+ return parseBareStringRest(s, false);
}
-fn parseBareStringRest(s: *State) !Value {
+fn parseBareStringRest(s: *State, is_num: bool) !Value {
while (try s.read()) |c| {
- if (isBareChar(c)) {
+ if (isBareChar(c) or (is_num and c == '.')) {
try s.addChar(c);
} else if (c == '\\') {
try s.addChar(try parseBareEsc(s));
@@ -1046,9 +1074,10 @@ fn parseQuoteExpr(s: *State, c1: u8, next: Fn) !void {
return s.jump(next, cons(q, try parseBareString(s, c)));
}
+ try s.push(next);
s.context.val = q;
s.unused_char = c;
- return s.subr(.parse_list_element, .end_quote_expr);
+ return s.subr(.parse_unit, .end_quote_expr);
}
fn endQuoteExpr(s: *State) !void {
diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig
index d65ffb0..d703182 100644
--- a/src/libzisp/io/unparser.zig
+++ b/src/libzisp/io/unparser.zig
@@ -2,6 +2,9 @@ const std = @import("std");
const value = @import("../value.zig");
+const istr = value.istr;
+const seq = value.seq;
+
const ShortString = value.ShortString;
const OtherTag = value.OtherTag;
const Value = value.Value;
@@ -33,6 +36,7 @@ fn unparseHeap(w: anytype, v: Value) !void {
const p, const t = value.ptr.unpack(v);
try switch (t) {
.pair => unparsePair(w, @ptrCast(p)),
+ .seq => unparseSeq(w, @ptrCast(p)),
else => @panic("not implemented"),
};
}
@@ -97,3 +101,22 @@ fn unparsePair(w: anytype, p: *[2]Value) !void {
}
try w.writeByte(')');
}
+
+fn unparseSeq(w: anytype, p: *seq.Header) !void {
+ const h = istr.getHeaderFromPtr(@ptrCast(p));
+ switch (h.type) {
+ .string => try unparseString(w, h),
+ else => @panic("not implemented"),
+ }
+}
+
+fn unparseString(w: anytype, h: *seq.Header) !void {
+ const info = h.info.string;
+ if (info.quoted) {
+ try w.writeByte('"');
+ }
+ try w.writeAll(h.bytes());
+ if (info.quoted) {
+ try w.writeByte('"');
+ }
+}
diff --git a/src/libzisp/value/istr.zig b/src/libzisp/value/istr.zig
index 9834716..abd0447 100644
--- a/src/libzisp/value/istr.zig
+++ b/src/libzisp/value/istr.zig
@@ -46,6 +46,10 @@ pub fn getHeader(v: Value) *seq.Header {
return gc.istrHeader(header_ptr);
}
+pub fn getHeaderFromPtr(p: *Hval) *seq.Header {
+ return gc.istrHeader(p);
+}
+
// Zisp API
pub fn pred(v: Value) Value {