diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-02-25 20:49:49 +0100 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-02-25 20:49:49 +0100 |
| commit | ca8de6eb6bd0fe1ee3ef22c659cf416d41bc7a2f (patch) | |
| tree | 4fc98874a5aafecca3aeb95efccb5d95eb386459 | |
| parent | 0f432b2c76813f2c0f9e508f10227df491712837 (diff) | |
update
| -rw-r--r-- | src/libzisp.zig | 420 | ||||
| -rw-r--r-- | src/libzisp/io/parser.zig | 194 | ||||
| -rw-r--r-- | src/libzisp/io/unparser.zig | 9 | ||||
| -rw-r--r-- | src/libzisp/value.zig | 48 | ||||
| -rw-r--r-- | src/libzisp/value/char.zig | 4 | ||||
| -rw-r--r-- | src/libzisp/value/rune.zig | 13 | ||||
| -rw-r--r-- | src/libzisp/value/sstr.zig | 27 |
7 files changed, 400 insertions, 315 deletions
diff --git a/src/libzisp.zig b/src/libzisp.zig index 4bf8b08..17264a8 100644 --- a/src/libzisp.zig +++ b/src/libzisp.zig @@ -10,99 +10,100 @@ pub const io = @import("libzisp/io.zig"); pub const lib = @import("libzisp/lib.zig"); pub const value = @import("libzisp/value.zig"); +pub const ShortString = value.ShortString; pub const Value = value.Value; -test "double" { - const d1: f64 = 0.123456789; - const d2: f64 = -0.987654321; - const v1 = value.double.pack(d1); - const v2 = value.double.pack(d2); - const v3 = value.double.add(v1, v2); - const result = value.double.unpack(v3); - - try std.testing.expect(value.double.check(v1)); - try std.testing.expect(value.double.check(v2)); - try std.testing.expect(value.double.check(v3)); - - try std.testing.expectEqual(d1 + d2, result); -} - -test "fixnum" { - const int1: i64 = 123456789; - const int2: i64 = -987654321; - const v1 = value.fixnum.pack(int1); - const v2 = value.fixnum.pack(int2); - const v3 = value.fixnum.add(v1, v2); - const result = value.fixnum.unpack(v3); - - try std.testing.expect(value.fixnum.check(v1)); - try std.testing.expect(value.fixnum.check(v2)); - try std.testing.expect(value.fixnum.check(v3)); - - try std.testing.expectEqual(int1 + int2, result); -} - -test "ptr" { - const ptr = value.ptr; - - const val: [*]gc.Bucket = @ptrFromInt(256); - const tag = ptr.Tag.string; - - const p = ptr.pack(val, tag); - try std.testing.expect(ptr.check(p)); - try std.testing.expect(ptr.checkZisp(p, tag)); - try std.testing.expect(ptr.checkStrong(p)); - - const pv, const pt = ptr.unpack(p); - try std.testing.expectEqual(val, pv); - try std.testing.expectEqual(tag, pt); - - var w = ptr.makeWeak(p); - try std.testing.expect(ptr.check(w)); - try std.testing.expect(ptr.checkZisp(w, tag)); - try std.testing.expect(ptr.checkWeak(w)); - try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w))); - try std.testing.expectEqual(false, value.boole.unpack(ptr.predWeakNull(w))); - - const wv, const wt = ptr.unpack(w); - try std.testing.expectEqual(val, wv); - try std.testing.expectEqual(tag, wt); - - const wv2, const wt2 = ptr.unpack(ptr.getWeak(w)); - try std.testing.expectEqual(val, wv2); - try std.testing.expectEqual(tag, wt2); - - ptr.setWeakNull(&w); - try std.testing.expect(ptr.check(w)); - try std.testing.expect(ptr.checkWeak(w)); - try std.testing.expect(ptr.isWeakNull(w)); - try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w))); - try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeakNull(w))); - try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w))); -} - -test "fptr" { - const ptr = value.ptr; - - const int1: u50 = 0; - const int2: u50 = std.math.maxInt(u50); - - const f1 = ptr.packForeign(int1); - try std.testing.expect(ptr.checkForeign(f1)); - try std.testing.expectEqual(int1, ptr.unpackForeign(f1)); - - const f2 = ptr.packForeign(int2); - try std.testing.expect(ptr.checkForeign(f2)); - try std.testing.expectEqual(int2, ptr.unpackForeign(f2)); -} - -test "rune" { - const r1 = value.rune.pack("test"); - try std.testing.expect(value.rune.check(r1)); - - const s1, const l1 = value.rune.unpack(r1); - try std.testing.expectEqualStrings("test", s1[0..l1]); -} +// test "double" { +// const d1: f64 = 0.123456789; +// const d2: f64 = -0.987654321; +// const v1 = value.double.pack(d1); +// const v2 = value.double.pack(d2); +// const v3 = value.double.add(v1, v2); +// const result = value.double.unpack(v3); + +// try std.testing.expect(value.double.check(v1)); +// try std.testing.expect(value.double.check(v2)); +// try std.testing.expect(value.double.check(v3)); + +// try std.testing.expectEqual(d1 + d2, result); +// } + +// test "fixnum" { +// const int1: i64 = 123456789; +// const int2: i64 = -987654321; +// const v1 = value.fixnum.pack(int1); +// const v2 = value.fixnum.pack(int2); +// const v3 = value.fixnum.add(v1, v2); +// const result = value.fixnum.unpack(v3); + +// try std.testing.expect(value.fixnum.check(v1)); +// try std.testing.expect(value.fixnum.check(v2)); +// try std.testing.expect(value.fixnum.check(v3)); + +// try std.testing.expectEqual(int1 + int2, result); +// } + +// test "ptr" { +// const ptr = value.ptr; + +// const val: [*]gc.Bucket = @ptrFromInt(256); +// const tag = ptr.Tag.string; + +// const p = ptr.pack(val, tag); +// try std.testing.expect(ptr.check(p)); +// try std.testing.expect(ptr.checkZisp(p, tag)); +// try std.testing.expect(ptr.checkStrong(p)); + +// const pv, const pt = ptr.unpack(p); +// try std.testing.expectEqual(val, pv); +// try std.testing.expectEqual(tag, pt); + +// var w = ptr.makeWeak(p); +// try std.testing.expect(ptr.check(w)); +// try std.testing.expect(ptr.checkZisp(w, tag)); +// try std.testing.expect(ptr.checkWeak(w)); +// try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w))); +// try std.testing.expectEqual(false, value.boole.unpack(ptr.predWeakNull(w))); + +// const wv, const wt = ptr.unpack(w); +// try std.testing.expectEqual(val, wv); +// try std.testing.expectEqual(tag, wt); + +// const wv2, const wt2 = ptr.unpack(ptr.getWeak(w)); +// try std.testing.expectEqual(val, wv2); +// try std.testing.expectEqual(tag, wt2); + +// ptr.setWeakNull(&w); +// try std.testing.expect(ptr.check(w)); +// try std.testing.expect(ptr.checkWeak(w)); +// try std.testing.expect(ptr.isWeakNull(w)); +// try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w))); +// try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeakNull(w))); +// try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w))); +// } + +// test "fptr" { +// const ptr = value.ptr; + +// const int1: u50 = 0; +// const int2: u50 = std.math.maxInt(u50); + +// const f1 = ptr.packForeign(int1); +// try std.testing.expect(ptr.checkForeign(f1)); +// try std.testing.expectEqual(int1, ptr.unpackForeign(f1)); + +// const f2 = ptr.packForeign(int2); +// try std.testing.expect(ptr.checkForeign(f2)); +// try std.testing.expectEqual(int2, ptr.unpackForeign(f2)); +// } + +// test "rune" { +// const r = value.rune.pack("test"); +// try std.testing.expect(value.rune.check(r)); + +// const s = value.rune.unpack(r); +// try std.testing.expectEqualStrings("test", s.slice()); +// } const SstrImpl = struct { SstrPack, SstrUnpack }; const SstrPack = *const fn ([]const u8) Value; @@ -111,8 +112,8 @@ const SstrUnpack = *const fn (Value) struct { [6]u8, u3 }; test "sstr" { const impls = [_]SstrImpl{ .{ value.sstr.pack, value.sstr.unpack }, - // .{ value.sstr.pack1, value.sstr.unpack1 }, - // .{ value.sstr.pack2, value.sstr.unpack2 }, + .{ value.sstr.pack, value.sstr.unpack1 }, + // .{ value.sstr.pack, value.sstr.unpack2 }, // .{ value.sstr.pack3, value.sstr.unpack3 }, // .{ value.sstr.pack4, value.sstr.unpack4 }, }; @@ -125,7 +126,7 @@ test "sstr" { const iters = switch (@import("builtin").mode) { .Debug, .ReleaseSmall => 10_000_000, .ReleaseSafe => 100_000_000, - .ReleaseFast => 1_000_000_000, + .ReleaseFast => 100_000_000, }; std.debug.print("Benchmarking with {} iters.\n", .{iters}); inline for (impls, 0..) |impl, i| { @@ -141,14 +142,14 @@ fn testSstr(impl: SstrImpl) !void { const ss2 = pack("123"); const ss3 = pack("123456"); - const s1, const l1 = unpack(ss1); - const s2, const l2 = unpack(ss2); - const s3, const l3 = unpack(ss3); - try std.testing.expect(value.sstr.check(ss1)); try std.testing.expect(value.sstr.check(ss2)); try std.testing.expect(value.sstr.check(ss3)); + const s1, const l1 = unpack(ss1); + const s2, const l2 = unpack(ss2); + const s3, const l3 = unpack(ss3); + try std.testing.expectEqual(1, l1); try std.testing.expectEqualStrings("1", s1[0..l1]); @@ -195,117 +196,116 @@ fn benchmarkSstr(impl: SstrImpl, id: usize, iters: usize) !void { std.debug.print("unpack{}: {d:.3}s\n", .{ id, secs }); } -test "char" { - const c1 = value.char.pack('\x00'); - try std.testing.expect(value.char.check(c1)); - try std.testing.expectEqual('\x00', value.char.unpack(c1)); - - const c2 = value.char.pack('😀'); - try std.testing.expect(value.char.check(c2)); - try std.testing.expectEqual('😀', value.char.unpack(c2)); -} - -test "misc" { - const f = value.boole.pack(false); - try std.testing.expect(value.boole.check(f)); - try std.testing.expectEqual(false, value.boole.unpack(f)); - try std.testing.expect(value.boole.unpack(value.boole.pred(f))); +// test "char" { +// const c1 = value.char.pack('\x00'); +// try std.testing.expect(value.char.check(c1)); +// try std.testing.expectEqual('\x00', value.char.unpack(c1)); - const t = value.boole.pack(true); - try std.testing.expect(value.boole.check(t)); - try std.testing.expectEqual(true, value.boole.unpack(t)); - try std.testing.expect(value.boole.unpack(value.boole.pred(t))); +// const c2 = value.char.pack('😀'); +// try std.testing.expect(value.char.check(c2)); +// try std.testing.expectEqual('😀', value.char.unpack(c2)); +// } - const nil = value.nil.get(); - try std.testing.expect(value.nil.check(nil)); - try std.testing.expect(value.boole.unpack(value.nil.pred(nil))); - - const eof = value.eof.get(); - try std.testing.expect(value.eof.check(eof)); - try std.testing.expect(value.boole.unpack(value.eof.pred(eof))); -} - -test "pair" { - const v1 = value.fixnum.pack(1); - const v2 = value.fixnum.pack(2); - - const v3 = value.fixnum.pack(3); - const v4 = value.fixnum.pack(4); - - const p = value.pair.cons(v1, v2); - try std.testing.expect(value.pair.check(p)); - try std.testing.expect(value.boole.unpack(value.pair.pred(p))); - - const car = value.pair.car(p); - const cdr = value.pair.cdr(p); - try std.testing.expectEqual(1, value.fixnum.unpack(car)); - try std.testing.expectEqual(2, value.fixnum.unpack(cdr)); - - value.pair.setcar(p, v3); - value.pair.setcdr(p, v4); - - const car2 = value.pair.car(p); - const cdr2 = value.pair.cdr(p); - try std.testing.expectEqual(3, value.fixnum.unpack(car2)); - try std.testing.expectEqual(4, value.fixnum.unpack(cdr2)); -} - -test "parse" { - const val = io.parser.parseCode("\"foo\""); - const r, const rl = value.rune.unpack(value.pair.car(val)); - const s, const sl = value.sstr.unpack(value.pair.cdr(val)); - try std.testing.expectEqualStrings("STRING", r[0..rl]); - try std.testing.expectEqualStrings("foo", s[0..sl]); -} - -test "parse2" { - const val = io.parser.parseCode( - \\ ;; Testing some crazy datum comments - \\ ##;"bar"#;([x #"y"]{##`,'z})"foo" - \\ ;; end - ); - - const r, const rl = value.rune.unpack(value.pair.car(val)); - try std.testing.expectEqualStrings("HASH", r[0..rl]); - - const cdr = value.pair.cdr(val); - - const s, const sl = value.rune.unpack(value.pair.car(cdr)); - try std.testing.expectEqualStrings("STRING", s[0..sl]); - - const f, const fl = value.sstr.unpack(value.pair.cdr(cdr)); - try std.testing.expectEqualStrings("foo", f[0..fl]); -} - -test "parse3" { - const val = io.parser.parseCode( - \\(foo #;x #;(x y) #;x #bar [#x #"baz"] 'bat) - ); - - const car = value.pair.car; - const cdr = value.pair.cdr; - - // const e1 = car(val); - const e2 = car(cdr(val)); - // const e3 = car(cdr(cdr(val))); - // const e4 = car(cdr(cdr(cdr(val)))); - - try std.testing.expect(value.rune.check(e2)); -} - -test "parse4" { - const val = io.parser.parseCode("(foo . #;x bar #;y)"); - - const s, const sl = value.sstr.unpack(value.pair.car(val)); - try std.testing.expectEqualStrings("foo", s[0..sl]); - - const f, const fl = value.sstr.unpack(value.pair.cdr(val)); - try std.testing.expectEqualStrings("bar", f[0..fl]); -} - -test "unparse" { - try std.testing.expectEqualStrings( - "#foo", - io.unparser.unparse(io.parser.parseCode("#foo")), - ); -} +// test "misc" { +// const f = value.boole.pack(false); +// try std.testing.expect(value.boole.check(f)); +// try std.testing.expectEqual(false, value.boole.unpack(f)); +// try std.testing.expect(value.boole.unpack(value.boole.pred(f))); + +// const t = value.boole.pack(true); +// try std.testing.expect(value.boole.check(t)); +// try std.testing.expectEqual(true, value.boole.unpack(t)); +// try std.testing.expect(value.boole.unpack(value.boole.pred(t))); + +// const nil = value.nil.get(); +// try std.testing.expect(value.nil.check(nil)); +// try std.testing.expect(value.boole.unpack(value.nil.pred(nil))); + +// const eof = value.eof.get(); +// try std.testing.expect(value.eof.check(eof)); +// try std.testing.expect(value.boole.unpack(value.eof.pred(eof))); +// } + +// test "pair" { +// const v1 = value.fixnum.pack(1); +// const v2 = value.fixnum.pack(2); + +// const v3 = value.fixnum.pack(3); +// const v4 = value.fixnum.pack(4); + +// const p = value.pair.cons(v1, v2); +// try std.testing.expect(value.pair.check(p)); +// try std.testing.expect(value.boole.unpack(value.pair.pred(p))); + +// const car = value.pair.car(p); +// const cdr = value.pair.cdr(p); +// try std.testing.expectEqual(1, value.fixnum.unpack(car)); +// try std.testing.expectEqual(2, value.fixnum.unpack(cdr)); + +// value.pair.setcar(p, v3); +// value.pair.setcdr(p, v4); + +// const car2 = value.pair.car(p); +// const cdr2 = value.pair.cdr(p); +// try std.testing.expectEqual(3, value.fixnum.unpack(car2)); +// try std.testing.expectEqual(4, value.fixnum.unpack(cdr2)); +// } + +// test "parse" { +// const val = io.parser.parseCode("\"foo\""); + +// try std.testing.expect(value.sstr.check(val)); + +// const s = value.sstr.unpack(val); +// try std.testing.expectEqualStrings("foo", s.slice()); +// } + +// test "parse2" { +// const val = io.parser.parseCode( +// \\ ;; Testing some crazy datum comments +// \\ ##;"bar"#;([x #"y"]{##`,'z})"foo" +// \\ ;; end +// ); + +// const r = value.rune.unpack(value.pair.car(val)); +// try std.testing.expectEqualStrings("HASH", r.slice()); + +// const s = value.pair.cdr(val); +// try std.testing.expect(value.sstr.check(s)); + +// const f = value.sstr.unpack(s); +// try std.testing.expectEqualStrings("foo", f.slice()); +// } + +// test "parse3" { +// const val = io.parser.parseCode( +// \\(foo #;x #;(x y) #;x #bar [#x #"baz"] 'bat) +// ); + +// const car = value.pair.car; +// const cdr = value.pair.cdr; + +// // const e1 = car(val); +// const e2 = car(cdr(val)); +// // const e3 = car(cdr(cdr(val))); +// // const e4 = car(cdr(cdr(cdr(val)))); + +// try std.testing.expect(value.rune.check(e2)); +// } + +// test "parse4" { +// const val = io.parser.parseCode("(foo . #;x bar #;y)"); + +// const s = value.sstr.unpack(value.pair.car(val)); +// try std.testing.expectEqualStrings("foo", s.slice()); + +// const f = value.sstr.unpack(value.pair.cdr(val)); +// try std.testing.expectEqualStrings("bar", f.slice()); +// } + +// test "unparse" { +// try std.testing.expectEqualStrings( +// "#foo", +// io.unparser.unparse(io.parser.parseCode("#foo")), +// ); +// } diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig index 1359dcc..45a752e 100644 --- a/src/libzisp/io/parser.zig +++ b/src/libzisp/io/parser.zig @@ -32,45 +32,66 @@ // switching between modes. // // When the code parser encounters syntax sugar, it always transforms it into a -// list starting with a rune, like in the following examples: +// list starting with a rune. The list of all such transformations follows. // -// #(...) -> (#HASH ...) +// #datum -> (#HASH . datum) #name(...) -> (#name ...) // -// [...] -> (#SQUARE ...) +// [...] -> (#SQUARE ...) dat1dat2 -> (#JOIN dat1 . dat2) // -// 'foo -> (#QUOTE . foo) +// {...} -> (#BRACE ...) dat1.dat2 -> (#DOT dat1 . dat2) // -// These can combine arbitrarily: +// 'datum -> (#QUOTE . datum) #n#=datum -> (#LABEL n . datum) // -// #{...} -> (#HASH #BRACE ...) +// `datum -> (#GRAVE . datum) #n# -> (#LABEL . n) // -// #'foo -> (#HASH #QUOTE . foo) +// ,datum -> (#COMMA . datum) // -// ##'[...] -> (#HASH #HASH #QUOTE #SQUARE ...) +// (The "#datum" form refers to expressions that cannot be mistaken for a rune, +// such as for example: #(...) or #"..." etc.) // -// As a specialty, double-quoted strings are actually considered sugar by the -// code parser, and are transformed as follows into data: +// The terms "datum", "dat1", and "dat2" refer to an arbitrary datum; "name" is +// a rune name; ellipsis mean zero or more data; "n" is a non-negative integer. // -// "..." -> (#STRING . "...") +// Though not represented in the table above due to notational difficulty, the +// format "#name(...)" doesn't require a list in the second position; any datum +// works, so long as there's no ambiguity: // -// (Otherwise, all string literals would be identifiers, or all identifiers -// would be string literals, because Zisp doesn't differentiate strings and -// symbols like traditional lisps. Also, note that although we could reuse -// #QUOTE here, instead of using #STRING, this would make it impossible to -// differentiate between the code expressions #'foo and #"foo".) +// #name1#name2 -> (#name1 . #name2) +// +// #name"text" -> (#name . "text") +// +// As a counter-example, following a rune immediately with a bare string is not +// possible, since it's ambiguous: +// +// #abcdefgh ;Could be (#abcdef . gh) or (#abcde . fgh) or ... +// +// The parser will see this as an attempt to use an 8-letter rune name, and +// raise an error, since rune names are limited to 6 characters. +// +// Syntax sugar can combine arbitrarily: +// +// #{...} -> (#HASH #BRACE ...) +// +// #'foo -> (#HASH #QUOTE . foo) +// +// ##'[...] -> (#HASH #HASH #QUOTE #SQUARE ...) +// +// {x y}[i j] -> (#JOIN (#BRACE x y) #SQUARE i j) +// +// foo.bar.baz{x y} -> (#JOIN (#DOT (#DOT foo . bar) . baz) #BRACE x y) // // Runes are case-sensitive, and the code parser only emits runes using // upper-case letters, so lower-case runes are free for user extensions. +// Exceptions are runes used directly in code, like #true and #false. +// +// Although strings and symbols aren't disjoint types in Zisp, the parser flags +// double-quoted string literals to allow distinguishing them from bare strings. +// Otherwise, it would not be possible for the compiler to tell the difference +// between an identifier and a string literal. // // You may be wondering about numbers. As far as the parser is concerned, // numbers are strings. It's the decoder (see below) that will turn bare -// strings (those not marked with #STRING) into numbers where appropriate. -// -// Datum labels are also handled by the decoder; they desugar like so: -// -// #n# -> (#LABEL . n) -// -// #n#=DATUM -> (#LABEL n . DATUM) +// strings into numbers where appropriate. // // Note that 'foo becomes (quote foo) in Scheme, but (#QUOTE . foo) in Zisp. // The operand of #QUOTE is the entire cdr. The same principle is used when @@ -84,6 +105,8 @@ // // #{x} -> (#HASH (#BRACE (x))) #{x} -> (#HASH #BRACE x) // +// foo(x y) -> (#JOIN foo (x y)) foo(bar) -> (#JOIN foo x y) +// // // === Decoder === // @@ -94,11 +117,12 @@ // expect a vector literal like #(...) to work in Scheme. // // Runes may be decoded in isolation as well, rather than transforming a list -// whose head they appear in. This is how #true and #false are implemented. +// whose head they appear in. This can implement #true and #false. (These +// would be used verbatim in code, rather than emitted by the parser.) // // The decoder may also perform arbitrary transforms on any type; for example, -// it may turn bare strings (those not marked with #STRING) into numbers when -// it's decoding data representing code. This is how number literals are +// it may turn bare strings (those not flagged as double-quoted) into numbers +// when it's decoding data representing code. This is how number literals are // implemented in Zisp. // // The decoder recognizes (#QUOTE ...) to implement the traditional quoting @@ -217,7 +241,7 @@ const Value = value.Value; pub const Mode = enum { code, data }; const TopState = struct { - alloc: std.heap.MemoryPool(State), + alloc: std.mem.Allocator, input: []const u8, pos: usize = 0, mode: Mode = undefined, @@ -295,10 +319,6 @@ const State = struct { }; } - fn isFinalNull(s: *State) bool { - return s.peek() == 0 and s.top.pos == s.top.input.len - 1; - } - fn recurParse(s: *State, start_from: Fn, return_to: Fn) *State { const newState = s.top.alloc.create(State) catch @panic("OOM"); newState.* = .{ @@ -349,9 +369,10 @@ fn readShortString( const Fn = enum { start_parse, start_datum, + end_dotted_datum, + end_joined_datum, end_datum_label, end_hash_datum, - end_rune_datum, end_quote, continue_list, finish_improper_list, @@ -367,17 +388,18 @@ pub fn parse(input: []const u8, mode: Mode) Value { var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; defer if (gpa.deinit() == .leak) @panic("leak"); const alloc = gpa.allocator(); - var pool: std.heap.MemoryPool(State) = .init(alloc); - defer pool.deinit(); - var top = TopState{ .alloc = pool, .input = input, .mode = mode }; + // var pool: std.heap.MemoryPool(State) = .init(alloc); + // defer pool.deinit(); + var top = TopState{ .alloc = alloc, .input = input, .mode = mode }; var s0 = State{ .top = &top }; var s = &s0; while (true) s = switch (s.next) { .start_parse => startParse(s), .start_datum => startDatum(s), + .end_dotted_datum => endDottedDatum(s), + .end_joined_datum => endJoinedDatum(s), .end_datum_label => endDatumLabel(s), .end_hash_datum => endHashDatum(s), - .end_rune_datum => endRuneDatum(s), .end_quote => endQuote(s), .continue_list => continueList(s), .finish_improper_list => finishImproperList(s), @@ -424,17 +446,62 @@ fn startDatum(s: *State) *State { '(', '[', '{' => startList(s), - // Periods are only allowed in the middle of a string, or to express - // improper lists, because the following look too much like typos: - // - // (foo. bar) (foo .bar) (123. 456) (123 .456) - // '.' => err(s, "misplaced period"), else => startBareString(s), }; } +fn endDatum(s: *State, d: Value) *State { + // + // We're at the end of a datum; check for dot and join notations: + // + // DATUM|.DATUM2 + // + // DATUM|DATUM2 + // + + if (isEndOfDatum(s)) { + // Nope, end it. + return s.returnDatum(d); + } + + // These are only allowed in code mode. + if (s.mode() == .data) { + return err(s, "invalid use of hash in data mode"); + } + + s.context = d; + + if (s.peek() == '.') { + s.skip(); + return s.recurParse(.start_datum, .end_dotted_datum); + } + + return s.recurParse(.start_datum, .end_joined_datum); +} + +fn endDottedDatum(s: *State) *State { + const rune = value.rune.pack("DOT"); + const first = s.context; + const second = s.retval; + return endDatum(s, value.pair.cons(rune, value.pair.cons(first, second))); +} + +fn endJoinedDatum(s: *State) *State { + const rune = value.rune.pack("JOIN"); + const first = s.context; + const second = s.retval; + return endDatum(s, value.pair.cons(rune, value.pair.cons(first, second))); +} + +fn isEndOfDatum(s: *State) bool { + return s.eof() or switch (s.peek()) { + '\t', '\n', ' ', ';', ')', ']', '}' => true, + else => false, + }; +} + fn handleHash(s: *State) *State { s.skip(); // @@ -496,47 +563,37 @@ fn handleRune(s: *State) *State { // #foo|(...) // - if (isEndOfRune(s)) { + if (isEndOfDatum(s)) { // Nope, just a stand-alone rune. return s.returnDatum(rune); } // Otherwise, it's followed by a datum, like: #foo(...) - // Which is only allowed in code mode. - if (s.mode() == .data) { - return err(s, "invalid use of hash in data mode"); - } - - s.context = rune; - return s.recurParse(.start_datum, .end_rune_datum); + return endDatum(s, rune); } fn readRune(s: *State) ?Value { return readShortString(s, std.ascii.isAlphanumeric, value.rune.pack); } -fn isEndOfRune(s: *State) bool { - return s.eof() or switch (s.peek()) { - '\t', '\n', ' ', ')', ']', '}' => true, - else => false, - }; -} - -fn endRuneDatum(s: *State) *State { - return s.returnDatum(value.pair.cons(s.context, s.retval)); -} - fn handleDatumLabel(s: *State) *State { const n = readDatumLabel(s) orelse return err(s, "datum label too long"); // // We're at the end of the numeric label now; possibilities are: // - // #n#| + // #n|# // - // #n#|=DATUM + // #n|#=DATUM // + if (s.eof()) { + return err(s, "unexpected EOF while reading datum label"); + } + if (s.getc() != '#') { + return err(s, "invalid character while reading datum label"); + } + if (s.eof() or s.isWhitespace()) { const rune = value.rune.pack("LABEL"); return s.returnDatum(value.pair.cons(rune, n)); @@ -570,14 +627,7 @@ fn startQuotedString(s: *State) *State { s.skip(); const str = readQuotedString(s) catch return err(s, "unclosed string"); - if (s.mode() == .code) { - // "foo bar" => (#STRING . "foo bar") - const rune = value.rune.pack("STRING"); - const pair = value.pair.cons(rune, str); - return s.returnDatum(pair); - } else { - return s.returnDatum(str); - } + return s.returnDatum(str); } // RQS = Read Quoted String @@ -588,16 +638,16 @@ fn readQuotedString(s: *State) !Value { } fn readQuotedSstr(s: *State) !?Value { - // We will reset to this position if we fail. const start_pos = s.pos(); + // TODO: Handle escapes. var buf: [6]u8 = undefined; var i: u8 = 0; while (!s.eof()) { const c = s.getc(); if (c == '"') { // ok, return what we accumulated - return value.sstr.pack(buf[0..i]); + return value.sstr.packLiteral(buf[0..i]); } if (i == 6) { // failed; reset and bail out diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig index 83186c2..d835924 100644 --- a/src/libzisp/io/unparser.zig +++ b/src/libzisp/io/unparser.zig @@ -2,15 +2,20 @@ const std = @import("std"); const value = @import("../value.zig"); +const ShortString = value.ShortString; const Value = value.Value; +// const State = struct { + +// } + pub fn unparse(v: Value) []u8 { var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; var out: std.ArrayList(u8) = .init(gpa.allocator()); if (value.rune.check(v)) { - const name, const len = value.rune.unpack(v); + const name = value.rune.unpack(v); out.append('#') catch @panic(""); - out.appendSlice(name[0..len]) catch @panic(""); + out.appendSlice(name.slice()) catch @panic(""); } return out.toOwnedSlice() catch @panic(""); } diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig index c026cb2..273c659 100644 --- a/src/libzisp/value.zig +++ b/src/libzisp/value.zig @@ -87,41 +87,45 @@ // // This 51-bit range is divided as follows, based on the high bits: // -// 000 :: Runes +// 000 :: Rune // // 001 :: Short string // -// 010 :: Unicode code point +// 010 :: Short string literal // -// 011 :: Singleton values +// 011 :: Unicode code point // -// 1.. :: Undefined +// 100 :: Singleton values // -// Runes are symbols of 1 to 6 ASCII letters, used to implement reader syntax; -// both built-in and extensions. +// 101, 110, 111 :: Undefined +// +// Runes are symbols of 1 to 6 ASCII characters used to implement reader syntax. // // Zisp strings are immutable. Any string fitting into 6 bytes or less will be // stored as an immediate value, not requiring any heap allocation or interning. // It's implicitly interned, so to speak. This includes the empty string. // -// The null byte serves as a terminator and cannot appear in these strings; a -// string that short but actually containing a null byte will need to be heap -// allocated like other strings. +// The null byte serves as a terminator for strings shorter than 6 bytes, and +// therefore cannot appear in these strings; a string that short but actually +// containing a null byte will need to be heap allocated like other strings. +// +// There may also be strings that are this short, but ended up on the heap due +// to being uninterned. Interning them will return the equivalent short string +// as an immediate. // -// There may also be uninterned strings on the heap that are also as short but -// ended up on the heap due to being uninterned. Calling intern on them will -// return the equivalent short string as an immediate. +// The separate type for a short string *literal* is for an efficiency hack in +// the parser; see commentary there. // // Unicode code points need a maximum of 21 bits, yet we have 48 available. // This may be exploited for a future extension. // -// Similarly, it's extremely unlikely that we will ever need more than a few -// dozen singleton values (false, true, null, and so on). As such, this range -// of bit patterns may be subdivided in the future. Right now, only the lowest -// 8 bits are allowed to be set, with the other 40 being reserved, so there's a -// limit of 256 singleton values that can be defined. +// Similarly, it's very unlikely that we will ever need more than a handful of +// singleton values (false, true, nil, and so on). As such, this range of bit +// patterns may be subdivided in the future. Right now, only the lowest 8 bits +// are allowed to be set, with the other 40 being reserved, so there's a limit +// of 256 singleton values that can be defined. // -// And on top of all that we still have a 50-bit range left! +// And top of that, we have three more 48-bit value ranges that are unused! // // The forbidden value 4, Positive Infinity, would be the "empty string rune" // but that isn't allowed anyway, so all is fine. @@ -161,6 +165,8 @@ const FILL = 0x7ff; // Used when dealing with runes and short strings. pub const ShortString = std.BoundedArray(u8, 6); +pub const OtherTag = enum(u3) { rune, sstr, sstr_lit, char, misc }; + /// Represents a Zisp value/object. pub const Value = packed union { /// To get the value as a regular double. @@ -242,7 +248,7 @@ pub const Value = packed union { sstr: packed struct { // actually [6]u8 but packed struct cannot contain arrays string: u48, - _tag: OtherTag = .sstr, + tag: OtherTag, _is_ptr: bool = false, _: u11 = FILL, _is_fixnum: bool = false, @@ -250,7 +256,7 @@ pub const Value = packed union { /// For initializing and reading characters. char: packed struct { - char: u21, + value: u21, _reserved: u27 = 0, _tag: OtherTag = .char, _is_ptr: bool = false, @@ -268,8 +274,6 @@ pub const Value = packed union { _is_fixnum: bool = false, }, - const OtherTag = enum(u3) { rune, sstr, char, misc }; - const Self = @This(); /// Hexdumps the value. diff --git a/src/libzisp/value/char.zig b/src/libzisp/value/char.zig index 98bb26f..eb4bbc9 100644 --- a/src/libzisp/value/char.zig +++ b/src/libzisp/value/char.zig @@ -16,12 +16,12 @@ pub fn assert(v: Value) void { } pub fn pack(c: u21) Value { - return .{ .char = .{ .char = c } }; + return .{ .char = .{ .value = c } }; } pub fn unpack(v: Value) u21 { assert(v); - return @truncate(v.char.char); + return @truncate(v.char.value); } // Zisp API diff --git a/src/libzisp/value/rune.zig b/src/libzisp/value/rune.zig index ab251b4..3a4dc61 100644 --- a/src/libzisp/value/rune.zig +++ b/src/libzisp/value/rune.zig @@ -2,6 +2,7 @@ const std = @import("std"); const value = @import("../value.zig"); +const ShortString = value.ShortString; const Value = value.Value; // Zig API @@ -48,12 +49,16 @@ pub fn pack(s: []const u8) Value { return v; } -pub fn unpack(v: Value) struct { [6]u8, u3 } { - const s: [6]u8 = @bitCast(v.rune.name); +pub fn unpack(v: Value) ShortString { + var s = ShortString{ .buffer = @bitCast(v.sstr.string) }; inline for (0..6) |i| { - if (s[i] == 0) return .{ s, i }; + if (s.buffer[i] == 0) { + s.len = i; + return s; + } } - return .{ s, 6 }; + s.len = 6; + return s; } // Zisp API diff --git a/src/libzisp/value/sstr.zig b/src/libzisp/value/sstr.zig index 2be2647..a2f6bf8 100644 --- a/src/libzisp/value/sstr.zig +++ b/src/libzisp/value/sstr.zig @@ -1,11 +1,15 @@ const std = @import("std"); -const Value = @import("../value.zig").Value; +const value = @import("../value.zig"); + +const ShortString = value.ShortString; +const OtherTag = value.OtherTag; +const Value = value.Value; // Zig API pub fn check(v: Value) bool { - return v.isOther(.sstr); + return v.isOther(.sstr) or v.isOther(.sstr_lit); } pub fn assert(v: Value) void { @@ -43,8 +47,16 @@ fn assertValidSstr(s: []const u8) void { // Note: rune.zig uses equivalent code; probably good to keep in sync. pub fn pack(s: []const u8) Value { + return _pack(s, .sstr); +} + +pub fn packLiteral(s: []const u8) Value { + return _pack(s, .sstr_lit); +} + +fn _pack(s: []const u8, tag: OtherTag) Value { assertValidSstr(s); - var v = Value{ .sstr = .{ .string = 0 } }; + var v = Value{ .sstr = .{ .string = 0, .tag = tag } }; const dest: [*]u8 = @ptrCast(&v.sstr.string); @memcpy(dest, s); return v; @@ -59,4 +71,13 @@ pub fn unpack(v: Value) struct { [6]u8, u3 } { return .{ s, 6 }; } +pub fn unpack1(v: Value) struct { [6]u8, u3 } { + assert(v); + const s: [6]u8 = @bitCast(v.sstr.string); + for (0..6) |i| { + if (s[i] == 0) return .{ s, @intCast(i) }; + } + return .{ s, 6 }; +} + // No Zisp API for sstr specifically, since it's a string. See string.zig. |
