update

author: Taylan Kammer <taylan.kammer@gmail.com> 2025-02-25 20:49:49 +0100
committer: Taylan Kammer <taylan.kammer@gmail.com> 2025-02-25 20:49:49 +0100
commit: ca8de6eb6bd0fe1ee3ef22c659cf416d41bc7a2f (patch)
tree: 4fc98874a5aafecca3aeb95efccb5d95eb386459
parent: 0f432b2c76813f2c0f9e508f10227df491712837 (diff)
7 files changed, 400 insertions, 315 deletions
diff --git a/src/libzisp.zig b/src/libzisp.zig
index 4bf8b08..17264a8 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -10,99 +10,100 @@ pub const io = @import("libzisp/io.zig");
 pub const lib = @import("libzisp/lib.zig");
 pub const value = @import("libzisp/value.zig");
 
+pub const ShortString = value.ShortString;
 pub const Value = value.Value;
 
-test "double" {
-    const d1: f64 = 0.123456789;
-    const d2: f64 = -0.987654321;
-    const v1 = value.double.pack(d1);
-    const v2 = value.double.pack(d2);
-    const v3 = value.double.add(v1, v2);
-    const result = value.double.unpack(v3);
-
-    try std.testing.expect(value.double.check(v1));
-    try std.testing.expect(value.double.check(v2));
-    try std.testing.expect(value.double.check(v3));
-
-    try std.testing.expectEqual(d1 + d2, result);
-}
-
-test "fixnum" {
-    const int1: i64 = 123456789;
-    const int2: i64 = -987654321;
-    const v1 = value.fixnum.pack(int1);
-    const v2 = value.fixnum.pack(int2);
-    const v3 = value.fixnum.add(v1, v2);
-    const result = value.fixnum.unpack(v3);
-
-    try std.testing.expect(value.fixnum.check(v1));
-    try std.testing.expect(value.fixnum.check(v2));
-    try std.testing.expect(value.fixnum.check(v3));
-
-    try std.testing.expectEqual(int1 + int2, result);
-}
-
-test "ptr" {
-    const ptr = value.ptr;
-
-    const val: [*]gc.Bucket = @ptrFromInt(256);
-    const tag = ptr.Tag.string;
-
-    const p = ptr.pack(val, tag);
-    try std.testing.expect(ptr.check(p));
-    try std.testing.expect(ptr.checkZisp(p, tag));
-    try std.testing.expect(ptr.checkStrong(p));
-
-    const pv, const pt = ptr.unpack(p);
-    try std.testing.expectEqual(val, pv);
-    try std.testing.expectEqual(tag, pt);
-
-    var w = ptr.makeWeak(p);
-    try std.testing.expect(ptr.check(w));
-    try std.testing.expect(ptr.checkZisp(w, tag));
-    try std.testing.expect(ptr.checkWeak(w));
-    try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w)));
-    try std.testing.expectEqual(false, value.boole.unpack(ptr.predWeakNull(w)));
-
-    const wv, const wt = ptr.unpack(w);
-    try std.testing.expectEqual(val, wv);
-    try std.testing.expectEqual(tag, wt);
-
-    const wv2, const wt2 = ptr.unpack(ptr.getWeak(w));
-    try std.testing.expectEqual(val, wv2);
-    try std.testing.expectEqual(tag, wt2);
-
-    ptr.setWeakNull(&w);
-    try std.testing.expect(ptr.check(w));
-    try std.testing.expect(ptr.checkWeak(w));
-    try std.testing.expect(ptr.isWeakNull(w));
-    try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w)));
-    try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeakNull(w)));
-    try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w)));
-}
-
-test "fptr" {
-    const ptr = value.ptr;
-
-    const int1: u50 = 0;
-    const int2: u50 = std.math.maxInt(u50);
-
-    const f1 = ptr.packForeign(int1);
-    try std.testing.expect(ptr.checkForeign(f1));
-    try std.testing.expectEqual(int1, ptr.unpackForeign(f1));
-
-    const f2 = ptr.packForeign(int2);
-    try std.testing.expect(ptr.checkForeign(f2));
-    try std.testing.expectEqual(int2, ptr.unpackForeign(f2));
-}
-
-test "rune" {
-    const r1 = value.rune.pack("test");
-    try std.testing.expect(value.rune.check(r1));
-
-    const s1, const l1 = value.rune.unpack(r1);
-    try std.testing.expectEqualStrings("test", s1[0..l1]);
-}
+// test "double" {
+//     const d1: f64 = 0.123456789;
+//     const d2: f64 = -0.987654321;
+//     const v1 = value.double.pack(d1);
+//     const v2 = value.double.pack(d2);
+//     const v3 = value.double.add(v1, v2);
+//     const result = value.double.unpack(v3);
+
+//     try std.testing.expect(value.double.check(v1));
+//     try std.testing.expect(value.double.check(v2));
+//     try std.testing.expect(value.double.check(v3));
+
+//     try std.testing.expectEqual(d1 + d2, result);
+// }
+
+// test "fixnum" {
+//     const int1: i64 = 123456789;
+//     const int2: i64 = -987654321;
+//     const v1 = value.fixnum.pack(int1);
+//     const v2 = value.fixnum.pack(int2);
+//     const v3 = value.fixnum.add(v1, v2);
+//     const result = value.fixnum.unpack(v3);
+
+//     try std.testing.expect(value.fixnum.check(v1));
+//     try std.testing.expect(value.fixnum.check(v2));
+//     try std.testing.expect(value.fixnum.check(v3));
+
+//     try std.testing.expectEqual(int1 + int2, result);
+// }
+
+// test "ptr" {
+//     const ptr = value.ptr;
+
+//     const val: [*]gc.Bucket = @ptrFromInt(256);
+//     const tag = ptr.Tag.string;
+
+//     const p = ptr.pack(val, tag);
+//     try std.testing.expect(ptr.check(p));
+//     try std.testing.expect(ptr.checkZisp(p, tag));
+//     try std.testing.expect(ptr.checkStrong(p));
+
+//     const pv, const pt = ptr.unpack(p);
+//     try std.testing.expectEqual(val, pv);
+//     try std.testing.expectEqual(tag, pt);
+
+//     var w = ptr.makeWeak(p);
+//     try std.testing.expect(ptr.check(w));
+//     try std.testing.expect(ptr.checkZisp(w, tag));
+//     try std.testing.expect(ptr.checkWeak(w));
+//     try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w)));
+//     try std.testing.expectEqual(false, value.boole.unpack(ptr.predWeakNull(w)));
+
+//     const wv, const wt = ptr.unpack(w);
+//     try std.testing.expectEqual(val, wv);
+//     try std.testing.expectEqual(tag, wt);
+
+//     const wv2, const wt2 = ptr.unpack(ptr.getWeak(w));
+//     try std.testing.expectEqual(val, wv2);
+//     try std.testing.expectEqual(tag, wt2);
+
+//     ptr.setWeakNull(&w);
+//     try std.testing.expect(ptr.check(w));
+//     try std.testing.expect(ptr.checkWeak(w));
+//     try std.testing.expect(ptr.isWeakNull(w));
+//     try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w)));
+//     try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeakNull(w)));
+//     try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w)));
+// }
+
+// test "fptr" {
+//     const ptr = value.ptr;
+
+//     const int1: u50 = 0;
+//     const int2: u50 = std.math.maxInt(u50);
+
+//     const f1 = ptr.packForeign(int1);
+//     try std.testing.expect(ptr.checkForeign(f1));
+//     try std.testing.expectEqual(int1, ptr.unpackForeign(f1));
+
+//     const f2 = ptr.packForeign(int2);
+//     try std.testing.expect(ptr.checkForeign(f2));
+//     try std.testing.expectEqual(int2, ptr.unpackForeign(f2));
+// }
+
+// test "rune" {
+//     const r = value.rune.pack("test");
+//     try std.testing.expect(value.rune.check(r));
+
+//     const s = value.rune.unpack(r);
+//     try std.testing.expectEqualStrings("test", s.slice());
+// }
 
 const SstrImpl = struct { SstrPack, SstrUnpack };
 const SstrPack = *const fn ([]const u8) Value;
@@ -111,8 +112,8 @@ const SstrUnpack = *const fn (Value) struct { [6]u8, u3 };
 test "sstr" {
     const impls = [_]SstrImpl{
         .{ value.sstr.pack, value.sstr.unpack },
-        // .{ value.sstr.pack1, value.sstr.unpack1 },
-        // .{ value.sstr.pack2, value.sstr.unpack2 },
+        .{ value.sstr.pack, value.sstr.unpack1 },
+        // .{ value.sstr.pack, value.sstr.unpack2 },
         // .{ value.sstr.pack3, value.sstr.unpack3 },
         // .{ value.sstr.pack4, value.sstr.unpack4 },
     };
@@ -125,7 +126,7 @@ test "sstr" {
         const iters = switch (@import("builtin").mode) {
             .Debug, .ReleaseSmall => 10_000_000,
             .ReleaseSafe => 100_000_000,
-            .ReleaseFast => 1_000_000_000,
+            .ReleaseFast => 100_000_000,
         };
         std.debug.print("Benchmarking with {} iters.\n", .{iters});
         inline for (impls, 0..) |impl, i| {
@@ -141,14 +142,14 @@ fn testSstr(impl: SstrImpl) !void {
     const ss2 = pack("123");
     const ss3 = pack("123456");
 
-    const s1, const l1 = unpack(ss1);
-    const s2, const l2 = unpack(ss2);
-    const s3, const l3 = unpack(ss3);
-
     try std.testing.expect(value.sstr.check(ss1));
     try std.testing.expect(value.sstr.check(ss2));
     try std.testing.expect(value.sstr.check(ss3));
 
+    const s1, const l1 = unpack(ss1);
+    const s2, const l2 = unpack(ss2);
+    const s3, const l3 = unpack(ss3);
+
     try std.testing.expectEqual(1, l1);
     try std.testing.expectEqualStrings("1", s1[0..l1]);
 
@@ -195,117 +196,116 @@ fn benchmarkSstr(impl: SstrImpl, id: usize, iters: usize) !void {
     std.debug.print("unpack{}: {d:.3}s\n", .{ id, secs });
 }
 
-test "char" {
-    const c1 = value.char.pack('\x00');
-    try std.testing.expect(value.char.check(c1));
-    try std.testing.expectEqual('\x00', value.char.unpack(c1));
-
-    const c2 = value.char.pack('😀');
-    try std.testing.expect(value.char.check(c2));
-    try std.testing.expectEqual('😀', value.char.unpack(c2));
-}
-
-test "misc" {
-    const f = value.boole.pack(false);
-    try std.testing.expect(value.boole.check(f));
-    try std.testing.expectEqual(false, value.boole.unpack(f));
-    try std.testing.expect(value.boole.unpack(value.boole.pred(f)));
+// test "char" {
+//     const c1 = value.char.pack('\x00');
+//     try std.testing.expect(value.char.check(c1));
+//     try std.testing.expectEqual('\x00', value.char.unpack(c1));
 
-    const t = value.boole.pack(true);
-    try std.testing.expect(value.boole.check(t));
-    try std.testing.expectEqual(true, value.boole.unpack(t));
-    try std.testing.expect(value.boole.unpack(value.boole.pred(t)));
+//     const c2 = value.char.pack('😀');
+//     try std.testing.expect(value.char.check(c2));
+//     try std.testing.expectEqual('😀', value.char.unpack(c2));
+// }
 
-    const nil = value.nil.get();
-    try std.testing.expect(value.nil.check(nil));
-    try std.testing.expect(value.boole.unpack(value.nil.pred(nil)));
-
-    const eof = value.eof.get();
-    try std.testing.expect(value.eof.check(eof));
-    try std.testing.expect(value.boole.unpack(value.eof.pred(eof)));
-}
-
-test "pair" {
-    const v1 = value.fixnum.pack(1);
-    const v2 = value.fixnum.pack(2);
-
-    const v3 = value.fixnum.pack(3);
-    const v4 = value.fixnum.pack(4);
-
-    const p = value.pair.cons(v1, v2);
-    try std.testing.expect(value.pair.check(p));
-    try std.testing.expect(value.boole.unpack(value.pair.pred(p)));
-
-    const car = value.pair.car(p);
-    const cdr = value.pair.cdr(p);
-    try std.testing.expectEqual(1, value.fixnum.unpack(car));
-    try std.testing.expectEqual(2, value.fixnum.unpack(cdr));
-
-    value.pair.setcar(p, v3);
-    value.pair.setcdr(p, v4);
-
-    const car2 = value.pair.car(p);
-    const cdr2 = value.pair.cdr(p);
-    try std.testing.expectEqual(3, value.fixnum.unpack(car2));
-    try std.testing.expectEqual(4, value.fixnum.unpack(cdr2));
-}
-
-test "parse" {
-    const val = io.parser.parseCode("\"foo\"");
-    const r, const rl = value.rune.unpack(value.pair.car(val));
-    const s, const sl = value.sstr.unpack(value.pair.cdr(val));
-    try std.testing.expectEqualStrings("STRING", r[0..rl]);
-    try std.testing.expectEqualStrings("foo", s[0..sl]);
-}
-
-test "parse2" {
-    const val = io.parser.parseCode(
-        \\ ;; Testing some crazy datum comments
-        \\ ##;"bar"#;([x #"y"]{##`,'z})"foo"
-        \\ ;; end
-    );
-
-    const r, const rl = value.rune.unpack(value.pair.car(val));
-    try std.testing.expectEqualStrings("HASH", r[0..rl]);
-
-    const cdr = value.pair.cdr(val);
-
-    const s, const sl = value.rune.unpack(value.pair.car(cdr));
-    try std.testing.expectEqualStrings("STRING", s[0..sl]);
-
-    const f, const fl = value.sstr.unpack(value.pair.cdr(cdr));
-    try std.testing.expectEqualStrings("foo", f[0..fl]);
-}
-
-test "parse3" {
-    const val = io.parser.parseCode(
-        \\(foo #;x #;(x y) #;x #bar [#x #"baz"] 'bat)
-    );
-
-    const car = value.pair.car;
-    const cdr = value.pair.cdr;
-
-    // const e1 = car(val);
-    const e2 = car(cdr(val));
-    // const e3 = car(cdr(cdr(val)));
-    // const e4 = car(cdr(cdr(cdr(val))));
-
-    try std.testing.expect(value.rune.check(e2));
-}
-
-test "parse4" {
-    const val = io.parser.parseCode("(foo . #;x bar #;y)");
-
-    const s, const sl = value.sstr.unpack(value.pair.car(val));
-    try std.testing.expectEqualStrings("foo", s[0..sl]);
-
-    const f, const fl = value.sstr.unpack(value.pair.cdr(val));
-    try std.testing.expectEqualStrings("bar", f[0..fl]);
-}
-
-test "unparse" {
-    try std.testing.expectEqualStrings(
-        "#foo",
-        io.unparser.unparse(io.parser.parseCode("#foo")),
-    );
-}
+// test "misc" {
+//     const f = value.boole.pack(false);
+//     try std.testing.expect(value.boole.check(f));
+//     try std.testing.expectEqual(false, value.boole.unpack(f));
+//     try std.testing.expect(value.boole.unpack(value.boole.pred(f)));
+
+//     const t = value.boole.pack(true);
+//     try std.testing.expect(value.boole.check(t));
+//     try std.testing.expectEqual(true, value.boole.unpack(t));
+//     try std.testing.expect(value.boole.unpack(value.boole.pred(t)));
+
+//     const nil = value.nil.get();
+//     try std.testing.expect(value.nil.check(nil));
+//     try std.testing.expect(value.boole.unpack(value.nil.pred(nil)));
+
+//     const eof = value.eof.get();
+//     try std.testing.expect(value.eof.check(eof));
+//     try std.testing.expect(value.boole.unpack(value.eof.pred(eof)));
+// }
+
+// test "pair" {
+//     const v1 = value.fixnum.pack(1);
+//     const v2 = value.fixnum.pack(2);
+
+//     const v3 = value.fixnum.pack(3);
+//     const v4 = value.fixnum.pack(4);
+
+//     const p = value.pair.cons(v1, v2);
+//     try std.testing.expect(value.pair.check(p));
+//     try std.testing.expect(value.boole.unpack(value.pair.pred(p)));
+
+//     const car = value.pair.car(p);
+//     const cdr = value.pair.cdr(p);
+//     try std.testing.expectEqual(1, value.fixnum.unpack(car));
+//     try std.testing.expectEqual(2, value.fixnum.unpack(cdr));
+
+//     value.pair.setcar(p, v3);
+//     value.pair.setcdr(p, v4);
+
+//     const car2 = value.pair.car(p);
+//     const cdr2 = value.pair.cdr(p);
+//     try std.testing.expectEqual(3, value.fixnum.unpack(car2));
+//     try std.testing.expectEqual(4, value.fixnum.unpack(cdr2));
+// }
+
+// test "parse" {
+//     const val = io.parser.parseCode("\"foo\"");
+
+//     try std.testing.expect(value.sstr.check(val));
+
+//     const s = value.sstr.unpack(val);
+//     try std.testing.expectEqualStrings("foo", s.slice());
+// }
+
+// test "parse2" {
+//     const val = io.parser.parseCode(
+//         \\ ;; Testing some crazy datum comments
+//         \\ ##;"bar"#;([x #"y"]{##`,'z})"foo"
+//         \\ ;; end
+//     );
+
+//     const r = value.rune.unpack(value.pair.car(val));
+//     try std.testing.expectEqualStrings("HASH", r.slice());
+
+//     const s = value.pair.cdr(val);
+//     try std.testing.expect(value.sstr.check(s));
+
+//     const f = value.sstr.unpack(s);
+//     try std.testing.expectEqualStrings("foo", f.slice());
+// }
+
+// test "parse3" {
+//     const val = io.parser.parseCode(
+//         \\(foo #;x #;(x y) #;x #bar [#x #"baz"] 'bat)
+//     );
+
+//     const car = value.pair.car;
+//     const cdr = value.pair.cdr;
+
+//     // const e1 = car(val);
+//     const e2 = car(cdr(val));
+//     // const e3 = car(cdr(cdr(val)));
+//     // const e4 = car(cdr(cdr(cdr(val))));
+
+//     try std.testing.expect(value.rune.check(e2));
+// }
+
+// test "parse4" {
+//     const val = io.parser.parseCode("(foo . #;x bar #;y)");
+
+//     const s = value.sstr.unpack(value.pair.car(val));
+//     try std.testing.expectEqualStrings("foo", s.slice());
+
+//     const f = value.sstr.unpack(value.pair.cdr(val));
+//     try std.testing.expectEqualStrings("bar", f.slice());
+// }
+
+// test "unparse" {
+//     try std.testing.expectEqualStrings(
+//         "#foo",
+//         io.unparser.unparse(io.parser.parseCode("#foo")),
+//     );
+// }
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 1359dcc..45a752e 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -32,45 +32,66 @@
 // switching between modes.
 //
 // When the code parser encounters syntax sugar, it always transforms it into a
-// list starting with a rune, like in the following examples:
+// list starting with a rune.  The list of all such transformations follows.
 //
-//   #(...)   -> (#HASH ...)
+//   #datum  -> (#HASH . datum)        #name(...)  -> (#name ...)
 //
-//   [...]    -> (#SQUARE ...)
+//   [...]   -> (#SQUARE ...)          dat1dat2    -> (#JOIN dat1 . dat2)
 //
-//   'foo     -> (#QUOTE . foo)
+//   {...}   -> (#BRACE ...)           dat1.dat2   -> (#DOT dat1 . dat2)
 //
-// These can combine arbitrarily:
+//   'datum  -> (#QUOTE . datum)       #n#=datum   -> (#LABEL n . datum)
 //
-//   #{...}   -> (#HASH #BRACE ...)
+//   `datum  -> (#GRAVE . datum)       #n#         -> (#LABEL . n)
 //
-//   #'foo    -> (#HASH #QUOTE . foo)
+//   ,datum  -> (#COMMA . datum)
 //
-//   ##'[...] -> (#HASH #HASH #QUOTE #SQUARE ...)
+// (The "#datum" form refers to expressions that cannot be mistaken for a rune,
+// such as for example: #(...) or #"..." etc.)
 //
-// As a specialty, double-quoted strings are actually considered sugar by the
-// code parser, and are transformed as follows into data:
+// The terms "datum", "dat1", and "dat2" refer to an arbitrary datum; "name" is
+// a rune name; ellipsis mean zero or more data; "n" is a non-negative integer.
 //
-//   "..."    -> (#STRING . "...")
+// Though not represented in the table above due to notational difficulty, the
+// format "#name(...)" doesn't require a list in the second position; any datum
+// works, so long as there's no ambiguity:
 //
-// (Otherwise, all string literals would be identifiers, or all identifiers
-// would be string literals, because Zisp doesn't differentiate strings and
-// symbols like traditional lisps.  Also, note that although we could reuse
-// #QUOTE here, instead of using #STRING, this would make it impossible to
-// differentiate between the code expressions #'foo and #"foo".)
+//   #name1#name2  -> (#name1 . #name2)
+//
+//   #name"text"   -> (#name . "text")
+//
+// As a counter-example, following a rune immediately with a bare string is not
+// possible, since it's ambiguous:
+//
+//   #abcdefgh  ;Could be (#abcdef . gh) or (#abcde . fgh) or ...
+//
+// The parser will see this as an attempt to use an 8-letter rune name, and
+// raise an error, since rune names are limited to 6 characters.
+//
+// Syntax sugar can combine arbitrarily:
+//
+//   #{...}            -> (#HASH #BRACE ...)
+//
+//   #'foo             -> (#HASH #QUOTE . foo)
+//
+//   ##'[...]          -> (#HASH #HASH #QUOTE #SQUARE ...)
+//
+//   {x y}[i j]        -> (#JOIN (#BRACE x y) #SQUARE i j)
+//
+//   foo.bar.baz{x y}  -> (#JOIN (#DOT (#DOT foo . bar) . baz) #BRACE x y)
 //
 // Runes are case-sensitive, and the code parser only emits runes using
 // upper-case letters, so lower-case runes are free for user extensions.
+// Exceptions are runes used directly in code, like #true and #false.
+//
+// Although strings and symbols aren't disjoint types in Zisp, the parser flags
+// double-quoted string literals to allow distinguishing them from bare strings.
+// Otherwise, it would not be possible for the compiler to tell the difference
+// between an identifier and a string literal.
 //
 // You may be wondering about numbers.  As far as the parser is concerned,
 // numbers are strings.  It's the decoder (see below) that will turn bare
-// strings (those not marked with #STRING) into numbers where appropriate.
-//
-// Datum labels are also handled by the decoder; they desugar like so:
-//
-//   #n#       -> (#LABEL . n)
-//
-//   #n#=DATUM -> (#LABEL n . DATUM)
+// strings into numbers where appropriate.
 //
 // Note that 'foo becomes (quote foo) in Scheme, but (#QUOTE . foo) in Zisp.
 // The operand of #QUOTE is the entire cdr.  The same principle is used when
@@ -84,6 +105,8 @@
 //
 //   #{x}     -> (#HASH (#BRACE (x)))       #{x}     -> (#HASH #BRACE x)
 //
+//   foo(x y) -> (#JOIN foo (x y))          foo(bar) -> (#JOIN foo x y)
+//
 //
 // === Decoder ===
 //
@@ -94,11 +117,12 @@
 // expect a vector literal like #(...) to work in Scheme.
 //
 // Runes may be decoded in isolation as well, rather than transforming a list
-// whose head they appear in.  This is how #true and #false are implemented.
+// whose head they appear in.  This can implement #true and #false.  (These
+// would be used verbatim in code, rather than emitted by the parser.)
 //
 // The decoder may also perform arbitrary transforms on any type; for example,
-// it may turn bare strings (those not marked with #STRING) into numbers when
-// it's decoding data representing code.  This is how number literals are
+// it may turn bare strings (those not flagged as double-quoted) into numbers
+// when it's decoding data representing code.  This is how number literals are
 // implemented in Zisp.
 //
 // The decoder recognizes (#QUOTE ...) to implement the traditional quoting
@@ -217,7 +241,7 @@ const Value = value.Value;
 pub const Mode = enum { code, data };
 
 const TopState = struct {
-    alloc: std.heap.MemoryPool(State),
+    alloc: std.mem.Allocator,
     input: []const u8,
     pos: usize = 0,
     mode: Mode = undefined,
@@ -295,10 +319,6 @@ const State = struct {
         };
     }
 
-    fn isFinalNull(s: *State) bool {
-        return s.peek() == 0 and s.top.pos == s.top.input.len - 1;
-    }
-
     fn recurParse(s: *State, start_from: Fn, return_to: Fn) *State {
         const newState = s.top.alloc.create(State) catch @panic("OOM");
         newState.* = .{
@@ -349,9 +369,10 @@ fn readShortString(
 const Fn = enum {
     start_parse,
     start_datum,
+    end_dotted_datum,
+    end_joined_datum,
     end_datum_label,
     end_hash_datum,
-    end_rune_datum,
     end_quote,
     continue_list,
     finish_improper_list,
@@ -367,17 +388,18 @@ pub fn parse(input: []const u8, mode: Mode) Value {
     var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
     defer if (gpa.deinit() == .leak) @panic("leak");
     const alloc = gpa.allocator();
-    var pool: std.heap.MemoryPool(State) = .init(alloc);
-    defer pool.deinit();
-    var top = TopState{ .alloc = pool, .input = input, .mode = mode };
+    // var pool: std.heap.MemoryPool(State) = .init(alloc);
+    // defer pool.deinit();
+    var top = TopState{ .alloc = alloc, .input = input, .mode = mode };
     var s0 = State{ .top = &top };
     var s = &s0;
     while (true) s = switch (s.next) {
         .start_parse => startParse(s),
         .start_datum => startDatum(s),
+        .end_dotted_datum => endDottedDatum(s),
+        .end_joined_datum => endJoinedDatum(s),
         .end_datum_label => endDatumLabel(s),
         .end_hash_datum => endHashDatum(s),
-        .end_rune_datum => endRuneDatum(s),
         .end_quote => endQuote(s),
         .continue_list => continueList(s),
         .finish_improper_list => finishImproperList(s),
@@ -424,17 +446,62 @@ fn startDatum(s: *State) *State {
 
         '(', '[', '{' => startList(s),
 
-        // Periods are only allowed in the middle of a string, or to express
-        // improper lists, because the following look too much like typos:
-        //
-        //   (foo. bar)  (foo .bar)  (123. 456)  (123 .456)
-        //
         '.' => err(s, "misplaced period"),
 
         else => startBareString(s),
     };
 }
 
+fn endDatum(s: *State, d: Value) *State {
+    //
+    // We're at the end of a datum; check for dot and join notations:
+    //
+    //   DATUM|.DATUM2
+    //
+    //   DATUM|DATUM2
+    //
+
+    if (isEndOfDatum(s)) {
+        // Nope, end it.
+        return s.returnDatum(d);
+    }
+
+    // These are only allowed in code mode.
+    if (s.mode() == .data) {
+        return err(s, "invalid use of hash in data mode");
+    }
+
+    s.context = d;
+
+    if (s.peek() == '.') {
+        s.skip();
+        return s.recurParse(.start_datum, .end_dotted_datum);
+    }
+
+    return s.recurParse(.start_datum, .end_joined_datum);
+}
+
+fn endDottedDatum(s: *State) *State {
+    const rune = value.rune.pack("DOT");
+    const first = s.context;
+    const second = s.retval;
+    return endDatum(s, value.pair.cons(rune, value.pair.cons(first, second)));
+}
+
+fn endJoinedDatum(s: *State) *State {
+    const rune = value.rune.pack("JOIN");
+    const first = s.context;
+    const second = s.retval;
+    return endDatum(s, value.pair.cons(rune, value.pair.cons(first, second)));
+}
+
+fn isEndOfDatum(s: *State) bool {
+    return s.eof() or switch (s.peek()) {
+        '\t', '\n', ' ', ';', ')', ']', '}' => true,
+        else => false,
+    };
+}
+
 fn handleHash(s: *State) *State {
     s.skip();
     //
@@ -496,47 +563,37 @@ fn handleRune(s: *State) *State {
     //   #foo|(...)
     //
 
-    if (isEndOfRune(s)) {
+    if (isEndOfDatum(s)) {
         // Nope, just a stand-alone rune.
         return s.returnDatum(rune);
     }
 
     // Otherwise, it's followed by a datum, like: #foo(...)
 
-    // Which is only allowed in code mode.
-    if (s.mode() == .data) {
-        return err(s, "invalid use of hash in data mode");
-    }
-
-    s.context = rune;
-    return s.recurParse(.start_datum, .end_rune_datum);
+    return endDatum(s, rune);
 }
 
 fn readRune(s: *State) ?Value {
     return readShortString(s, std.ascii.isAlphanumeric, value.rune.pack);
 }
 
-fn isEndOfRune(s: *State) bool {
-    return s.eof() or switch (s.peek()) {
-        '\t', '\n', ' ', ')', ']', '}' => true,
-        else => false,
-    };
-}
-
-fn endRuneDatum(s: *State) *State {
-    return s.returnDatum(value.pair.cons(s.context, s.retval));
-}
-
 fn handleDatumLabel(s: *State) *State {
     const n = readDatumLabel(s) orelse return err(s, "datum label too long");
     //
     // We're at the end of the numeric label now; possibilities are:
     //
-    //   #n#|
+    //   #n|#
     //
-    //   #n#|=DATUM
+    //   #n|#=DATUM
     //
 
+    if (s.eof()) {
+        return err(s, "unexpected EOF while reading datum label");
+    }
+    if (s.getc() != '#') {
+        return err(s, "invalid character while reading datum label");
+    }
+
     if (s.eof() or s.isWhitespace()) {
         const rune = value.rune.pack("LABEL");
         return s.returnDatum(value.pair.cons(rune, n));
@@ -570,14 +627,7 @@ fn startQuotedString(s: *State) *State {
     s.skip();
 
     const str = readQuotedString(s) catch return err(s, "unclosed string");
-    if (s.mode() == .code) {
-        // "foo bar" => (#STRING . "foo bar")
-        const rune = value.rune.pack("STRING");
-        const pair = value.pair.cons(rune, str);
-        return s.returnDatum(pair);
-    } else {
-        return s.returnDatum(str);
-    }
+    return s.returnDatum(str);
 }
 
 // RQS = Read Quoted String
@@ -588,16 +638,16 @@ fn readQuotedString(s: *State) !Value {
 }
 
 fn readQuotedSstr(s: *State) !?Value {
-    // We will reset to this position if we fail.
     const start_pos = s.pos();
 
+    // TODO: Handle escapes.
     var buf: [6]u8 = undefined;
     var i: u8 = 0;
     while (!s.eof()) {
         const c = s.getc();
         if (c == '"') {
             // ok, return what we accumulated
-            return value.sstr.pack(buf[0..i]);
+            return value.sstr.packLiteral(buf[0..i]);
         }
         if (i == 6) {
             // failed; reset and bail out
diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig
index 83186c2..d835924 100644
--- a/src/libzisp/io/unparser.zig
+++ b/src/libzisp/io/unparser.zig
@@ -2,15 +2,20 @@ const std = @import("std");
 
 const value = @import("../value.zig");
 
+const ShortString = value.ShortString;
 const Value = value.Value;
 
+// const State = struct {
+
+// }
+
 pub fn unparse(v: Value) []u8 {
     var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
     var out: std.ArrayList(u8) = .init(gpa.allocator());
     if (value.rune.check(v)) {
-        const name, const len = value.rune.unpack(v);
+        const name = value.rune.unpack(v);
         out.append('#') catch @panic("");
-        out.appendSlice(name[0..len]) catch @panic("");
+        out.appendSlice(name.slice()) catch @panic("");
     }
     return out.toOwnedSlice() catch @panic("");
 }
diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig
index c026cb2..273c659 100644
--- a/src/libzisp/value.zig
+++ b/src/libzisp/value.zig
@@ -87,41 +87,45 @@
 //
 // This 51-bit range is divided as follows, based on the high bits:
 //
-//   000   :: Runes
+//   000   :: Rune
 //
 //   001   :: Short string
 //
-//   010   :: Unicode code point
+//   010   :: Short string literal
 //
-//   011   :: Singleton values
+//   011   :: Unicode code point
 //
-//   1..   :: Undefined
+//   100   :: Singleton values
 //
-// Runes are symbols of 1 to 6 ASCII letters, used to implement reader syntax;
-// both built-in and extensions.
+//   101, 110, 111  :: Undefined
+//
+// Runes are symbols of 1 to 6 ASCII characters used to implement reader syntax.
 //
 // Zisp strings are immutable.  Any string fitting into 6 bytes or less will be
 // stored as an immediate value, not requiring any heap allocation or interning.
 // It's implicitly interned, so to speak.  This includes the empty string.
 //
-// The null byte serves as a terminator and cannot appear in these strings; a
-// string that short but actually containing a null byte will need to be heap
-// allocated like other strings.
+// The null byte serves as a terminator for strings shorter than 6 bytes, and
+// therefore cannot appear in these strings; a string that short but actually
+// containing a null byte will need to be heap allocated like other strings.
+//
+// There may also be strings that are this short, but ended up on the heap due
+// to being uninterned.  Interning them will return the equivalent short string
+// as an immediate.
 //
-// There may also be uninterned strings on the heap that are also as short but
-// ended up on the heap due to being uninterned.  Calling intern on them will
-// return the equivalent short string as an immediate.
+// The separate type for a short string *literal* is for an efficiency hack in
+// the parser; see commentary there.
 //
 // Unicode code points need a maximum of 21 bits, yet we have 48 available.
 // This may be exploited for a future extension.
 //
-// Similarly, it's extremely unlikely that we will ever need more than a few
-// dozen singleton values (false, true, null, and so on).  As such, this range
-// of bit patterns may be subdivided in the future.  Right now, only the lowest
-// 8 bits are allowed to be set, with the other 40 being reserved, so there's a
-// limit of 256 singleton values that can be defined.
+// Similarly, it's very unlikely that we will ever need more than a handful of
+// singleton values (false, true, nil, and so on).  As such, this range of bit
+// patterns may be subdivided in the future.  Right now, only the lowest 8 bits
+// are allowed to be set, with the other 40 being reserved, so there's a limit
+// of 256 singleton values that can be defined.
 //
-// And on top of all that we still have a 50-bit range left!
+// And top of that, we have three more 48-bit value ranges that are unused!
 //
 // The forbidden value 4, Positive Infinity, would be the "empty string rune"
 // but that isn't allowed anyway, so all is fine.
@@ -161,6 +165,8 @@ const FILL = 0x7ff;
 // Used when dealing with runes and short strings.
 pub const ShortString = std.BoundedArray(u8, 6);
 
+pub const OtherTag = enum(u3) { rune, sstr, sstr_lit, char, misc };
+
 /// Represents a Zisp value/object.
 pub const Value = packed union {
     /// To get the value as a regular double.
@@ -242,7 +248,7 @@ pub const Value = packed union {
     sstr: packed struct {
         // actually [6]u8 but packed struct cannot contain arrays
         string: u48,
-        _tag: OtherTag = .sstr,
+        tag: OtherTag,
         _is_ptr: bool = false,
         _: u11 = FILL,
         _is_fixnum: bool = false,
@@ -250,7 +256,7 @@ pub const Value = packed union {
 
     /// For initializing and reading characters.
     char: packed struct {
-        char: u21,
+        value: u21,
         _reserved: u27 = 0,
         _tag: OtherTag = .char,
         _is_ptr: bool = false,
@@ -268,8 +274,6 @@ pub const Value = packed union {
         _is_fixnum: bool = false,
     },
 
-    const OtherTag = enum(u3) { rune, sstr, char, misc };
-
     const Self = @This();
 
     /// Hexdumps the value.
diff --git a/src/libzisp/value/char.zig b/src/libzisp/value/char.zig
index 98bb26f..eb4bbc9 100644
--- a/src/libzisp/value/char.zig
+++ b/src/libzisp/value/char.zig
@@ -16,12 +16,12 @@ pub fn assert(v: Value) void {
 }
 
 pub fn pack(c: u21) Value {
-    return .{ .char = .{ .char = c } };
+    return .{ .char = .{ .value = c } };
 }
 
 pub fn unpack(v: Value) u21 {
     assert(v);
-    return @truncate(v.char.char);
+    return @truncate(v.char.value);
 }
 
 // Zisp API
diff --git a/src/libzisp/value/rune.zig b/src/libzisp/value/rune.zig
index ab251b4..3a4dc61 100644
--- a/src/libzisp/value/rune.zig
+++ b/src/libzisp/value/rune.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 
 const value = @import("../value.zig");
 
+const ShortString = value.ShortString;
 const Value = value.Value;
 
 // Zig API
@@ -48,12 +49,16 @@ pub fn pack(s: []const u8) Value {
     return v;
 }
 
-pub fn unpack(v: Value) struct { [6]u8, u3 } {
-    const s: [6]u8 = @bitCast(v.rune.name);
+pub fn unpack(v: Value) ShortString {
+    var s = ShortString{ .buffer = @bitCast(v.sstr.string) };
     inline for (0..6) |i| {
-        if (s[i] == 0) return .{ s, i };
+        if (s.buffer[i] == 0) {
+            s.len = i;
+            return s;
+        }
     }
-    return .{ s, 6 };
+    s.len = 6;
+    return s;
 }
 
 // Zisp API
diff --git a/src/libzisp/value/sstr.zig b/src/libzisp/value/sstr.zig
index 2be2647..a2f6bf8 100644
--- a/src/libzisp/value/sstr.zig
+++ b/src/libzisp/value/sstr.zig
@@ -1,11 +1,15 @@
 const std = @import("std");
 
-const Value = @import("../value.zig").Value;
+const value = @import("../value.zig");
+
+const ShortString = value.ShortString;
+const OtherTag = value.OtherTag;
+const Value = value.Value;
 
 // Zig API
 
 pub fn check(v: Value) bool {
-    return v.isOther(.sstr);
+    return v.isOther(.sstr) or v.isOther(.sstr_lit);
 }
 
 pub fn assert(v: Value) void {
@@ -43,8 +47,16 @@ fn assertValidSstr(s: []const u8) void {
 // Note: rune.zig uses equivalent code; probably good to keep in sync.
 
 pub fn pack(s: []const u8) Value {
+    return _pack(s, .sstr);
+}
+
+pub fn packLiteral(s: []const u8) Value {
+    return _pack(s, .sstr_lit);
+}
+
+fn _pack(s: []const u8, tag: OtherTag) Value {
     assertValidSstr(s);
-    var v = Value{ .sstr = .{ .string = 0 } };
+    var v = Value{ .sstr = .{ .string = 0, .tag = tag } };
     const dest: [*]u8 = @ptrCast(&v.sstr.string);
     @memcpy(dest, s);
     return v;
@@ -59,4 +71,13 @@ pub fn unpack(v: Value) struct { [6]u8, u3 } {
     return .{ s, 6 };
 }
 
+pub fn unpack1(v: Value) struct { [6]u8, u3 } {
+    assert(v);
+    const s: [6]u8 = @bitCast(v.sstr.string);
+    for (0..6) |i| {
+        if (s[i] == 0) return .{ s, @intCast(i) };
+    }
+    return .{ s, 6 };
+}
+
 // No Zisp API for sstr specifically, since it's a string.  See string.zig.
author	Taylan Kammer <taylan.kammer@gmail.com>	2025-02-25 20:49:49 +0100
committer	Taylan Kammer <taylan.kammer@gmail.com>	2025-02-25 20:49:49 +0100
commit	ca8de6eb6bd0fe1ee3ef22c659cf416d41bc7a2f (patch)
tree	4fc98874a5aafecca3aeb95efccb5d95eb386459
parent	0f432b2c76813f2c0f9e508f10227df491712837 (diff)