summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-02-25 20:49:49 +0100
committerTaylan Kammer <taylan.kammer@gmail.com>2025-02-25 20:49:49 +0100
commitca8de6eb6bd0fe1ee3ef22c659cf416d41bc7a2f (patch)
tree4fc98874a5aafecca3aeb95efccb5d95eb386459
parent0f432b2c76813f2c0f9e508f10227df491712837 (diff)
update
-rw-r--r--src/libzisp.zig420
-rw-r--r--src/libzisp/io/parser.zig194
-rw-r--r--src/libzisp/io/unparser.zig9
-rw-r--r--src/libzisp/value.zig48
-rw-r--r--src/libzisp/value/char.zig4
-rw-r--r--src/libzisp/value/rune.zig13
-rw-r--r--src/libzisp/value/sstr.zig27
7 files changed, 400 insertions, 315 deletions
diff --git a/src/libzisp.zig b/src/libzisp.zig
index 4bf8b08..17264a8 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -10,99 +10,100 @@ pub const io = @import("libzisp/io.zig");
pub const lib = @import("libzisp/lib.zig");
pub const value = @import("libzisp/value.zig");
+pub const ShortString = value.ShortString;
pub const Value = value.Value;
-test "double" {
- const d1: f64 = 0.123456789;
- const d2: f64 = -0.987654321;
- const v1 = value.double.pack(d1);
- const v2 = value.double.pack(d2);
- const v3 = value.double.add(v1, v2);
- const result = value.double.unpack(v3);
-
- try std.testing.expect(value.double.check(v1));
- try std.testing.expect(value.double.check(v2));
- try std.testing.expect(value.double.check(v3));
-
- try std.testing.expectEqual(d1 + d2, result);
-}
-
-test "fixnum" {
- const int1: i64 = 123456789;
- const int2: i64 = -987654321;
- const v1 = value.fixnum.pack(int1);
- const v2 = value.fixnum.pack(int2);
- const v3 = value.fixnum.add(v1, v2);
- const result = value.fixnum.unpack(v3);
-
- try std.testing.expect(value.fixnum.check(v1));
- try std.testing.expect(value.fixnum.check(v2));
- try std.testing.expect(value.fixnum.check(v3));
-
- try std.testing.expectEqual(int1 + int2, result);
-}
-
-test "ptr" {
- const ptr = value.ptr;
-
- const val: [*]gc.Bucket = @ptrFromInt(256);
- const tag = ptr.Tag.string;
-
- const p = ptr.pack(val, tag);
- try std.testing.expect(ptr.check(p));
- try std.testing.expect(ptr.checkZisp(p, tag));
- try std.testing.expect(ptr.checkStrong(p));
-
- const pv, const pt = ptr.unpack(p);
- try std.testing.expectEqual(val, pv);
- try std.testing.expectEqual(tag, pt);
-
- var w = ptr.makeWeak(p);
- try std.testing.expect(ptr.check(w));
- try std.testing.expect(ptr.checkZisp(w, tag));
- try std.testing.expect(ptr.checkWeak(w));
- try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w)));
- try std.testing.expectEqual(false, value.boole.unpack(ptr.predWeakNull(w)));
-
- const wv, const wt = ptr.unpack(w);
- try std.testing.expectEqual(val, wv);
- try std.testing.expectEqual(tag, wt);
-
- const wv2, const wt2 = ptr.unpack(ptr.getWeak(w));
- try std.testing.expectEqual(val, wv2);
- try std.testing.expectEqual(tag, wt2);
-
- ptr.setWeakNull(&w);
- try std.testing.expect(ptr.check(w));
- try std.testing.expect(ptr.checkWeak(w));
- try std.testing.expect(ptr.isWeakNull(w));
- try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w)));
- try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeakNull(w)));
- try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w)));
-}
-
-test "fptr" {
- const ptr = value.ptr;
-
- const int1: u50 = 0;
- const int2: u50 = std.math.maxInt(u50);
-
- const f1 = ptr.packForeign(int1);
- try std.testing.expect(ptr.checkForeign(f1));
- try std.testing.expectEqual(int1, ptr.unpackForeign(f1));
-
- const f2 = ptr.packForeign(int2);
- try std.testing.expect(ptr.checkForeign(f2));
- try std.testing.expectEqual(int2, ptr.unpackForeign(f2));
-}
-
-test "rune" {
- const r1 = value.rune.pack("test");
- try std.testing.expect(value.rune.check(r1));
-
- const s1, const l1 = value.rune.unpack(r1);
- try std.testing.expectEqualStrings("test", s1[0..l1]);
-}
+// test "double" {
+// const d1: f64 = 0.123456789;
+// const d2: f64 = -0.987654321;
+// const v1 = value.double.pack(d1);
+// const v2 = value.double.pack(d2);
+// const v3 = value.double.add(v1, v2);
+// const result = value.double.unpack(v3);
+
+// try std.testing.expect(value.double.check(v1));
+// try std.testing.expect(value.double.check(v2));
+// try std.testing.expect(value.double.check(v3));
+
+// try std.testing.expectEqual(d1 + d2, result);
+// }
+
+// test "fixnum" {
+// const int1: i64 = 123456789;
+// const int2: i64 = -987654321;
+// const v1 = value.fixnum.pack(int1);
+// const v2 = value.fixnum.pack(int2);
+// const v3 = value.fixnum.add(v1, v2);
+// const result = value.fixnum.unpack(v3);
+
+// try std.testing.expect(value.fixnum.check(v1));
+// try std.testing.expect(value.fixnum.check(v2));
+// try std.testing.expect(value.fixnum.check(v3));
+
+// try std.testing.expectEqual(int1 + int2, result);
+// }
+
+// test "ptr" {
+// const ptr = value.ptr;
+
+// const val: [*]gc.Bucket = @ptrFromInt(256);
+// const tag = ptr.Tag.string;
+
+// const p = ptr.pack(val, tag);
+// try std.testing.expect(ptr.check(p));
+// try std.testing.expect(ptr.checkZisp(p, tag));
+// try std.testing.expect(ptr.checkStrong(p));
+
+// const pv, const pt = ptr.unpack(p);
+// try std.testing.expectEqual(val, pv);
+// try std.testing.expectEqual(tag, pt);
+
+// var w = ptr.makeWeak(p);
+// try std.testing.expect(ptr.check(w));
+// try std.testing.expect(ptr.checkZisp(w, tag));
+// try std.testing.expect(ptr.checkWeak(w));
+// try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w)));
+// try std.testing.expectEqual(false, value.boole.unpack(ptr.predWeakNull(w)));
+
+// const wv, const wt = ptr.unpack(w);
+// try std.testing.expectEqual(val, wv);
+// try std.testing.expectEqual(tag, wt);
+
+// const wv2, const wt2 = ptr.unpack(ptr.getWeak(w));
+// try std.testing.expectEqual(val, wv2);
+// try std.testing.expectEqual(tag, wt2);
+
+// ptr.setWeakNull(&w);
+// try std.testing.expect(ptr.check(w));
+// try std.testing.expect(ptr.checkWeak(w));
+// try std.testing.expect(ptr.isWeakNull(w));
+// try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w)));
+// try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeakNull(w)));
+// try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w)));
+// }
+
+// test "fptr" {
+// const ptr = value.ptr;
+
+// const int1: u50 = 0;
+// const int2: u50 = std.math.maxInt(u50);
+
+// const f1 = ptr.packForeign(int1);
+// try std.testing.expect(ptr.checkForeign(f1));
+// try std.testing.expectEqual(int1, ptr.unpackForeign(f1));
+
+// const f2 = ptr.packForeign(int2);
+// try std.testing.expect(ptr.checkForeign(f2));
+// try std.testing.expectEqual(int2, ptr.unpackForeign(f2));
+// }
+
+// test "rune" {
+// const r = value.rune.pack("test");
+// try std.testing.expect(value.rune.check(r));
+
+// const s = value.rune.unpack(r);
+// try std.testing.expectEqualStrings("test", s.slice());
+// }
const SstrImpl = struct { SstrPack, SstrUnpack };
const SstrPack = *const fn ([]const u8) Value;
@@ -111,8 +112,8 @@ const SstrUnpack = *const fn (Value) struct { [6]u8, u3 };
test "sstr" {
const impls = [_]SstrImpl{
.{ value.sstr.pack, value.sstr.unpack },
- // .{ value.sstr.pack1, value.sstr.unpack1 },
- // .{ value.sstr.pack2, value.sstr.unpack2 },
+ .{ value.sstr.pack, value.sstr.unpack1 },
+ // .{ value.sstr.pack, value.sstr.unpack2 },
// .{ value.sstr.pack3, value.sstr.unpack3 },
// .{ value.sstr.pack4, value.sstr.unpack4 },
};
@@ -125,7 +126,7 @@ test "sstr" {
const iters = switch (@import("builtin").mode) {
.Debug, .ReleaseSmall => 10_000_000,
.ReleaseSafe => 100_000_000,
- .ReleaseFast => 1_000_000_000,
+ .ReleaseFast => 100_000_000,
};
std.debug.print("Benchmarking with {} iters.\n", .{iters});
inline for (impls, 0..) |impl, i| {
@@ -141,14 +142,14 @@ fn testSstr(impl: SstrImpl) !void {
const ss2 = pack("123");
const ss3 = pack("123456");
- const s1, const l1 = unpack(ss1);
- const s2, const l2 = unpack(ss2);
- const s3, const l3 = unpack(ss3);
-
try std.testing.expect(value.sstr.check(ss1));
try std.testing.expect(value.sstr.check(ss2));
try std.testing.expect(value.sstr.check(ss3));
+ const s1, const l1 = unpack(ss1);
+ const s2, const l2 = unpack(ss2);
+ const s3, const l3 = unpack(ss3);
+
try std.testing.expectEqual(1, l1);
try std.testing.expectEqualStrings("1", s1[0..l1]);
@@ -195,117 +196,116 @@ fn benchmarkSstr(impl: SstrImpl, id: usize, iters: usize) !void {
std.debug.print("unpack{}: {d:.3}s\n", .{ id, secs });
}
-test "char" {
- const c1 = value.char.pack('\x00');
- try std.testing.expect(value.char.check(c1));
- try std.testing.expectEqual('\x00', value.char.unpack(c1));
-
- const c2 = value.char.pack('😀');
- try std.testing.expect(value.char.check(c2));
- try std.testing.expectEqual('😀', value.char.unpack(c2));
-}
-
-test "misc" {
- const f = value.boole.pack(false);
- try std.testing.expect(value.boole.check(f));
- try std.testing.expectEqual(false, value.boole.unpack(f));
- try std.testing.expect(value.boole.unpack(value.boole.pred(f)));
+// test "char" {
+// const c1 = value.char.pack('\x00');
+// try std.testing.expect(value.char.check(c1));
+// try std.testing.expectEqual('\x00', value.char.unpack(c1));
- const t = value.boole.pack(true);
- try std.testing.expect(value.boole.check(t));
- try std.testing.expectEqual(true, value.boole.unpack(t));
- try std.testing.expect(value.boole.unpack(value.boole.pred(t)));
+// const c2 = value.char.pack('😀');
+// try std.testing.expect(value.char.check(c2));
+// try std.testing.expectEqual('😀', value.char.unpack(c2));
+// }
- const nil = value.nil.get();
- try std.testing.expect(value.nil.check(nil));
- try std.testing.expect(value.boole.unpack(value.nil.pred(nil)));
-
- const eof = value.eof.get();
- try std.testing.expect(value.eof.check(eof));
- try std.testing.expect(value.boole.unpack(value.eof.pred(eof)));
-}
-
-test "pair" {
- const v1 = value.fixnum.pack(1);
- const v2 = value.fixnum.pack(2);
-
- const v3 = value.fixnum.pack(3);
- const v4 = value.fixnum.pack(4);
-
- const p = value.pair.cons(v1, v2);
- try std.testing.expect(value.pair.check(p));
- try std.testing.expect(value.boole.unpack(value.pair.pred(p)));
-
- const car = value.pair.car(p);
- const cdr = value.pair.cdr(p);
- try std.testing.expectEqual(1, value.fixnum.unpack(car));
- try std.testing.expectEqual(2, value.fixnum.unpack(cdr));
-
- value.pair.setcar(p, v3);
- value.pair.setcdr(p, v4);
-
- const car2 = value.pair.car(p);
- const cdr2 = value.pair.cdr(p);
- try std.testing.expectEqual(3, value.fixnum.unpack(car2));
- try std.testing.expectEqual(4, value.fixnum.unpack(cdr2));
-}
-
-test "parse" {
- const val = io.parser.parseCode("\"foo\"");
- const r, const rl = value.rune.unpack(value.pair.car(val));
- const s, const sl = value.sstr.unpack(value.pair.cdr(val));
- try std.testing.expectEqualStrings("STRING", r[0..rl]);
- try std.testing.expectEqualStrings("foo", s[0..sl]);
-}
-
-test "parse2" {
- const val = io.parser.parseCode(
- \\ ;; Testing some crazy datum comments
- \\ ##;"bar"#;([x #"y"]{##`,'z})"foo"
- \\ ;; end
- );
-
- const r, const rl = value.rune.unpack(value.pair.car(val));
- try std.testing.expectEqualStrings("HASH", r[0..rl]);
-
- const cdr = value.pair.cdr(val);
-
- const s, const sl = value.rune.unpack(value.pair.car(cdr));
- try std.testing.expectEqualStrings("STRING", s[0..sl]);
-
- const f, const fl = value.sstr.unpack(value.pair.cdr(cdr));
- try std.testing.expectEqualStrings("foo", f[0..fl]);
-}
-
-test "parse3" {
- const val = io.parser.parseCode(
- \\(foo #;x #;(x y) #;x #bar [#x #"baz"] 'bat)
- );
-
- const car = value.pair.car;
- const cdr = value.pair.cdr;
-
- // const e1 = car(val);
- const e2 = car(cdr(val));
- // const e3 = car(cdr(cdr(val)));
- // const e4 = car(cdr(cdr(cdr(val))));
-
- try std.testing.expect(value.rune.check(e2));
-}
-
-test "parse4" {
- const val = io.parser.parseCode("(foo . #;x bar #;y)");
-
- const s, const sl = value.sstr.unpack(value.pair.car(val));
- try std.testing.expectEqualStrings("foo", s[0..sl]);
-
- const f, const fl = value.sstr.unpack(value.pair.cdr(val));
- try std.testing.expectEqualStrings("bar", f[0..fl]);
-}
-
-test "unparse" {
- try std.testing.expectEqualStrings(
- "#foo",
- io.unparser.unparse(io.parser.parseCode("#foo")),
- );
-}
+// test "misc" {
+// const f = value.boole.pack(false);
+// try std.testing.expect(value.boole.check(f));
+// try std.testing.expectEqual(false, value.boole.unpack(f));
+// try std.testing.expect(value.boole.unpack(value.boole.pred(f)));
+
+// const t = value.boole.pack(true);
+// try std.testing.expect(value.boole.check(t));
+// try std.testing.expectEqual(true, value.boole.unpack(t));
+// try std.testing.expect(value.boole.unpack(value.boole.pred(t)));
+
+// const nil = value.nil.get();
+// try std.testing.expect(value.nil.check(nil));
+// try std.testing.expect(value.boole.unpack(value.nil.pred(nil)));
+
+// const eof = value.eof.get();
+// try std.testing.expect(value.eof.check(eof));
+// try std.testing.expect(value.boole.unpack(value.eof.pred(eof)));
+// }
+
+// test "pair" {
+// const v1 = value.fixnum.pack(1);
+// const v2 = value.fixnum.pack(2);
+
+// const v3 = value.fixnum.pack(3);
+// const v4 = value.fixnum.pack(4);
+
+// const p = value.pair.cons(v1, v2);
+// try std.testing.expect(value.pair.check(p));
+// try std.testing.expect(value.boole.unpack(value.pair.pred(p)));
+
+// const car = value.pair.car(p);
+// const cdr = value.pair.cdr(p);
+// try std.testing.expectEqual(1, value.fixnum.unpack(car));
+// try std.testing.expectEqual(2, value.fixnum.unpack(cdr));
+
+// value.pair.setcar(p, v3);
+// value.pair.setcdr(p, v4);
+
+// const car2 = value.pair.car(p);
+// const cdr2 = value.pair.cdr(p);
+// try std.testing.expectEqual(3, value.fixnum.unpack(car2));
+// try std.testing.expectEqual(4, value.fixnum.unpack(cdr2));
+// }
+
+// test "parse" {
+// const val = io.parser.parseCode("\"foo\"");
+
+// try std.testing.expect(value.sstr.check(val));
+
+// const s = value.sstr.unpack(val);
+// try std.testing.expectEqualStrings("foo", s.slice());
+// }
+
+// test "parse2" {
+// const val = io.parser.parseCode(
+// \\ ;; Testing some crazy datum comments
+// \\ ##;"bar"#;([x #"y"]{##`,'z})"foo"
+// \\ ;; end
+// );
+
+// const r = value.rune.unpack(value.pair.car(val));
+// try std.testing.expectEqualStrings("HASH", r.slice());
+
+// const s = value.pair.cdr(val);
+// try std.testing.expect(value.sstr.check(s));
+
+// const f = value.sstr.unpack(s);
+// try std.testing.expectEqualStrings("foo", f.slice());
+// }
+
+// test "parse3" {
+// const val = io.parser.parseCode(
+// \\(foo #;x #;(x y) #;x #bar [#x #"baz"] 'bat)
+// );
+
+// const car = value.pair.car;
+// const cdr = value.pair.cdr;
+
+// // const e1 = car(val);
+// const e2 = car(cdr(val));
+// // const e3 = car(cdr(cdr(val)));
+// // const e4 = car(cdr(cdr(cdr(val))));
+
+// try std.testing.expect(value.rune.check(e2));
+// }
+
+// test "parse4" {
+// const val = io.parser.parseCode("(foo . #;x bar #;y)");
+
+// const s = value.sstr.unpack(value.pair.car(val));
+// try std.testing.expectEqualStrings("foo", s.slice());
+
+// const f = value.sstr.unpack(value.pair.cdr(val));
+// try std.testing.expectEqualStrings("bar", f.slice());
+// }
+
+// test "unparse" {
+// try std.testing.expectEqualStrings(
+// "#foo",
+// io.unparser.unparse(io.parser.parseCode("#foo")),
+// );
+// }
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 1359dcc..45a752e 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -32,45 +32,66 @@
// switching between modes.
//
// When the code parser encounters syntax sugar, it always transforms it into a
-// list starting with a rune, like in the following examples:
+// list starting with a rune. The list of all such transformations follows.
//
-// #(...) -> (#HASH ...)
+// #datum -> (#HASH . datum) #name(...) -> (#name ...)
//
-// [...] -> (#SQUARE ...)
+// [...] -> (#SQUARE ...) dat1dat2 -> (#JOIN dat1 . dat2)
//
-// 'foo -> (#QUOTE . foo)
+// {...} -> (#BRACE ...) dat1.dat2 -> (#DOT dat1 . dat2)
//
-// These can combine arbitrarily:
+// 'datum -> (#QUOTE . datum) #n#=datum -> (#LABEL n . datum)
//
-// #{...} -> (#HASH #BRACE ...)
+// `datum -> (#GRAVE . datum) #n# -> (#LABEL . n)
//
-// #'foo -> (#HASH #QUOTE . foo)
+// ,datum -> (#COMMA . datum)
//
-// ##'[...] -> (#HASH #HASH #QUOTE #SQUARE ...)
+// (The "#datum" form refers to expressions that cannot be mistaken for a rune,
+// such as for example: #(...) or #"..." etc.)
//
-// As a specialty, double-quoted strings are actually considered sugar by the
-// code parser, and are transformed as follows into data:
+// The terms "datum", "dat1", and "dat2" refer to an arbitrary datum; "name" is
+// a rune name; ellipsis mean zero or more data; "n" is a non-negative integer.
//
-// "..." -> (#STRING . "...")
+// Though not represented in the table above due to notational difficulty, the
+// format "#name(...)" doesn't require a list in the second position; any datum
+// works, so long as there's no ambiguity:
//
-// (Otherwise, all string literals would be identifiers, or all identifiers
-// would be string literals, because Zisp doesn't differentiate strings and
-// symbols like traditional lisps. Also, note that although we could reuse
-// #QUOTE here, instead of using #STRING, this would make it impossible to
-// differentiate between the code expressions #'foo and #"foo".)
+// #name1#name2 -> (#name1 . #name2)
+//
+// #name"text" -> (#name . "text")
+//
+// As a counter-example, following a rune immediately with a bare string is not
+// possible, since it's ambiguous:
+//
+// #abcdefgh ;Could be (#abcdef . gh) or (#abcde . fgh) or ...
+//
+// The parser will see this as an attempt to use an 8-letter rune name, and
+// raise an error, since rune names are limited to 6 characters.
+//
+// Syntax sugar can combine arbitrarily:
+//
+// #{...} -> (#HASH #BRACE ...)
+//
+// #'foo -> (#HASH #QUOTE . foo)
+//
+// ##'[...] -> (#HASH #HASH #QUOTE #SQUARE ...)
+//
+// {x y}[i j] -> (#JOIN (#BRACE x y) #SQUARE i j)
+//
+// foo.bar.baz{x y} -> (#JOIN (#DOT (#DOT foo . bar) . baz) #BRACE x y)
//
// Runes are case-sensitive, and the code parser only emits runes using
// upper-case letters, so lower-case runes are free for user extensions.
+// Exceptions are runes used directly in code, like #true and #false.
+//
+// Although strings and symbols aren't disjoint types in Zisp, the parser flags
+// double-quoted string literals to allow distinguishing them from bare strings.
+// Otherwise, it would not be possible for the compiler to tell the difference
+// between an identifier and a string literal.
//
// You may be wondering about numbers. As far as the parser is concerned,
// numbers are strings. It's the decoder (see below) that will turn bare
-// strings (those not marked with #STRING) into numbers where appropriate.
-//
-// Datum labels are also handled by the decoder; they desugar like so:
-//
-// #n# -> (#LABEL . n)
-//
-// #n#=DATUM -> (#LABEL n . DATUM)
+// strings into numbers where appropriate.
//
// Note that 'foo becomes (quote foo) in Scheme, but (#QUOTE . foo) in Zisp.
// The operand of #QUOTE is the entire cdr. The same principle is used when
@@ -84,6 +105,8 @@
//
// #{x} -> (#HASH (#BRACE (x))) #{x} -> (#HASH #BRACE x)
//
+// foo(x y) -> (#JOIN foo (x y)) foo(bar) -> (#JOIN foo x y)
+//
//
// === Decoder ===
//
@@ -94,11 +117,12 @@
// expect a vector literal like #(...) to work in Scheme.
//
// Runes may be decoded in isolation as well, rather than transforming a list
-// whose head they appear in. This is how #true and #false are implemented.
+// whose head they appear in. This can implement #true and #false. (These
+// would be used verbatim in code, rather than emitted by the parser.)
//
// The decoder may also perform arbitrary transforms on any type; for example,
-// it may turn bare strings (those not marked with #STRING) into numbers when
-// it's decoding data representing code. This is how number literals are
+// it may turn bare strings (those not flagged as double-quoted) into numbers
+// when it's decoding data representing code. This is how number literals are
// implemented in Zisp.
//
// The decoder recognizes (#QUOTE ...) to implement the traditional quoting
@@ -217,7 +241,7 @@ const Value = value.Value;
pub const Mode = enum { code, data };
const TopState = struct {
- alloc: std.heap.MemoryPool(State),
+ alloc: std.mem.Allocator,
input: []const u8,
pos: usize = 0,
mode: Mode = undefined,
@@ -295,10 +319,6 @@ const State = struct {
};
}
- fn isFinalNull(s: *State) bool {
- return s.peek() == 0 and s.top.pos == s.top.input.len - 1;
- }
-
fn recurParse(s: *State, start_from: Fn, return_to: Fn) *State {
const newState = s.top.alloc.create(State) catch @panic("OOM");
newState.* = .{
@@ -349,9 +369,10 @@ fn readShortString(
const Fn = enum {
start_parse,
start_datum,
+ end_dotted_datum,
+ end_joined_datum,
end_datum_label,
end_hash_datum,
- end_rune_datum,
end_quote,
continue_list,
finish_improper_list,
@@ -367,17 +388,18 @@ pub fn parse(input: []const u8, mode: Mode) Value {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
defer if (gpa.deinit() == .leak) @panic("leak");
const alloc = gpa.allocator();
- var pool: std.heap.MemoryPool(State) = .init(alloc);
- defer pool.deinit();
- var top = TopState{ .alloc = pool, .input = input, .mode = mode };
+ // var pool: std.heap.MemoryPool(State) = .init(alloc);
+ // defer pool.deinit();
+ var top = TopState{ .alloc = alloc, .input = input, .mode = mode };
var s0 = State{ .top = &top };
var s = &s0;
while (true) s = switch (s.next) {
.start_parse => startParse(s),
.start_datum => startDatum(s),
+ .end_dotted_datum => endDottedDatum(s),
+ .end_joined_datum => endJoinedDatum(s),
.end_datum_label => endDatumLabel(s),
.end_hash_datum => endHashDatum(s),
- .end_rune_datum => endRuneDatum(s),
.end_quote => endQuote(s),
.continue_list => continueList(s),
.finish_improper_list => finishImproperList(s),
@@ -424,17 +446,62 @@ fn startDatum(s: *State) *State {
'(', '[', '{' => startList(s),
- // Periods are only allowed in the middle of a string, or to express
- // improper lists, because the following look too much like typos:
- //
- // (foo. bar) (foo .bar) (123. 456) (123 .456)
- //
'.' => err(s, "misplaced period"),
else => startBareString(s),
};
}
+fn endDatum(s: *State, d: Value) *State {
+ //
+ // We're at the end of a datum; check for dot and join notations:
+ //
+ // DATUM|.DATUM2
+ //
+ // DATUM|DATUM2
+ //
+
+ if (isEndOfDatum(s)) {
+ // Nope, end it.
+ return s.returnDatum(d);
+ }
+
+ // These are only allowed in code mode.
+ if (s.mode() == .data) {
+ return err(s, "invalid use of hash in data mode");
+ }
+
+ s.context = d;
+
+ if (s.peek() == '.') {
+ s.skip();
+ return s.recurParse(.start_datum, .end_dotted_datum);
+ }
+
+ return s.recurParse(.start_datum, .end_joined_datum);
+}
+
+fn endDottedDatum(s: *State) *State {
+ const rune = value.rune.pack("DOT");
+ const first = s.context;
+ const second = s.retval;
+ return endDatum(s, value.pair.cons(rune, value.pair.cons(first, second)));
+}
+
+fn endJoinedDatum(s: *State) *State {
+ const rune = value.rune.pack("JOIN");
+ const first = s.context;
+ const second = s.retval;
+ return endDatum(s, value.pair.cons(rune, value.pair.cons(first, second)));
+}
+
+fn isEndOfDatum(s: *State) bool {
+ return s.eof() or switch (s.peek()) {
+ '\t', '\n', ' ', ';', ')', ']', '}' => true,
+ else => false,
+ };
+}
+
fn handleHash(s: *State) *State {
s.skip();
//
@@ -496,47 +563,37 @@ fn handleRune(s: *State) *State {
// #foo|(...)
//
- if (isEndOfRune(s)) {
+ if (isEndOfDatum(s)) {
// Nope, just a stand-alone rune.
return s.returnDatum(rune);
}
// Otherwise, it's followed by a datum, like: #foo(...)
- // Which is only allowed in code mode.
- if (s.mode() == .data) {
- return err(s, "invalid use of hash in data mode");
- }
-
- s.context = rune;
- return s.recurParse(.start_datum, .end_rune_datum);
+ return endDatum(s, rune);
}
fn readRune(s: *State) ?Value {
return readShortString(s, std.ascii.isAlphanumeric, value.rune.pack);
}
-fn isEndOfRune(s: *State) bool {
- return s.eof() or switch (s.peek()) {
- '\t', '\n', ' ', ')', ']', '}' => true,
- else => false,
- };
-}
-
-fn endRuneDatum(s: *State) *State {
- return s.returnDatum(value.pair.cons(s.context, s.retval));
-}
-
fn handleDatumLabel(s: *State) *State {
const n = readDatumLabel(s) orelse return err(s, "datum label too long");
//
// We're at the end of the numeric label now; possibilities are:
//
- // #n#|
+ // #n|#
//
- // #n#|=DATUM
+ // #n|#=DATUM
//
+ if (s.eof()) {
+ return err(s, "unexpected EOF while reading datum label");
+ }
+ if (s.getc() != '#') {
+ return err(s, "invalid character while reading datum label");
+ }
+
if (s.eof() or s.isWhitespace()) {
const rune = value.rune.pack("LABEL");
return s.returnDatum(value.pair.cons(rune, n));
@@ -570,14 +627,7 @@ fn startQuotedString(s: *State) *State {
s.skip();
const str = readQuotedString(s) catch return err(s, "unclosed string");
- if (s.mode() == .code) {
- // "foo bar" => (#STRING . "foo bar")
- const rune = value.rune.pack("STRING");
- const pair = value.pair.cons(rune, str);
- return s.returnDatum(pair);
- } else {
- return s.returnDatum(str);
- }
+ return s.returnDatum(str);
}
// RQS = Read Quoted String
@@ -588,16 +638,16 @@ fn readQuotedString(s: *State) !Value {
}
fn readQuotedSstr(s: *State) !?Value {
- // We will reset to this position if we fail.
const start_pos = s.pos();
+ // TODO: Handle escapes.
var buf: [6]u8 = undefined;
var i: u8 = 0;
while (!s.eof()) {
const c = s.getc();
if (c == '"') {
// ok, return what we accumulated
- return value.sstr.pack(buf[0..i]);
+ return value.sstr.packLiteral(buf[0..i]);
}
if (i == 6) {
// failed; reset and bail out
diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig
index 83186c2..d835924 100644
--- a/src/libzisp/io/unparser.zig
+++ b/src/libzisp/io/unparser.zig
@@ -2,15 +2,20 @@ const std = @import("std");
const value = @import("../value.zig");
+const ShortString = value.ShortString;
const Value = value.Value;
+// const State = struct {
+
+// }
+
pub fn unparse(v: Value) []u8 {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
var out: std.ArrayList(u8) = .init(gpa.allocator());
if (value.rune.check(v)) {
- const name, const len = value.rune.unpack(v);
+ const name = value.rune.unpack(v);
out.append('#') catch @panic("");
- out.appendSlice(name[0..len]) catch @panic("");
+ out.appendSlice(name.slice()) catch @panic("");
}
return out.toOwnedSlice() catch @panic("");
}
diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig
index c026cb2..273c659 100644
--- a/src/libzisp/value.zig
+++ b/src/libzisp/value.zig
@@ -87,41 +87,45 @@
//
// This 51-bit range is divided as follows, based on the high bits:
//
-// 000 :: Runes
+// 000 :: Rune
//
// 001 :: Short string
//
-// 010 :: Unicode code point
+// 010 :: Short string literal
//
-// 011 :: Singleton values
+// 011 :: Unicode code point
//
-// 1.. :: Undefined
+// 100 :: Singleton values
//
-// Runes are symbols of 1 to 6 ASCII letters, used to implement reader syntax;
-// both built-in and extensions.
+// 101, 110, 111 :: Undefined
+//
+// Runes are symbols of 1 to 6 ASCII characters used to implement reader syntax.
//
// Zisp strings are immutable. Any string fitting into 6 bytes or less will be
// stored as an immediate value, not requiring any heap allocation or interning.
// It's implicitly interned, so to speak. This includes the empty string.
//
-// The null byte serves as a terminator and cannot appear in these strings; a
-// string that short but actually containing a null byte will need to be heap
-// allocated like other strings.
+// The null byte serves as a terminator for strings shorter than 6 bytes, and
+// therefore cannot appear in these strings; a string that short but actually
+// containing a null byte will need to be heap allocated like other strings.
+//
+// There may also be strings that are this short, but ended up on the heap due
+// to being uninterned. Interning them will return the equivalent short string
+// as an immediate.
//
-// There may also be uninterned strings on the heap that are also as short but
-// ended up on the heap due to being uninterned. Calling intern on them will
-// return the equivalent short string as an immediate.
+// The separate type for a short string *literal* is for an efficiency hack in
+// the parser; see commentary there.
//
// Unicode code points need a maximum of 21 bits, yet we have 48 available.
// This may be exploited for a future extension.
//
-// Similarly, it's extremely unlikely that we will ever need more than a few
-// dozen singleton values (false, true, null, and so on). As such, this range
-// of bit patterns may be subdivided in the future. Right now, only the lowest
-// 8 bits are allowed to be set, with the other 40 being reserved, so there's a
-// limit of 256 singleton values that can be defined.
+// Similarly, it's very unlikely that we will ever need more than a handful of
+// singleton values (false, true, nil, and so on). As such, this range of bit
+// patterns may be subdivided in the future. Right now, only the lowest 8 bits
+// are allowed to be set, with the other 40 being reserved, so there's a limit
+// of 256 singleton values that can be defined.
//
-// And on top of all that we still have a 50-bit range left!
+// And top of that, we have three more 48-bit value ranges that are unused!
//
// The forbidden value 4, Positive Infinity, would be the "empty string rune"
// but that isn't allowed anyway, so all is fine.
@@ -161,6 +165,8 @@ const FILL = 0x7ff;
// Used when dealing with runes and short strings.
pub const ShortString = std.BoundedArray(u8, 6);
+pub const OtherTag = enum(u3) { rune, sstr, sstr_lit, char, misc };
+
/// Represents a Zisp value/object.
pub const Value = packed union {
/// To get the value as a regular double.
@@ -242,7 +248,7 @@ pub const Value = packed union {
sstr: packed struct {
// actually [6]u8 but packed struct cannot contain arrays
string: u48,
- _tag: OtherTag = .sstr,
+ tag: OtherTag,
_is_ptr: bool = false,
_: u11 = FILL,
_is_fixnum: bool = false,
@@ -250,7 +256,7 @@ pub const Value = packed union {
/// For initializing and reading characters.
char: packed struct {
- char: u21,
+ value: u21,
_reserved: u27 = 0,
_tag: OtherTag = .char,
_is_ptr: bool = false,
@@ -268,8 +274,6 @@ pub const Value = packed union {
_is_fixnum: bool = false,
},
- const OtherTag = enum(u3) { rune, sstr, char, misc };
-
const Self = @This();
/// Hexdumps the value.
diff --git a/src/libzisp/value/char.zig b/src/libzisp/value/char.zig
index 98bb26f..eb4bbc9 100644
--- a/src/libzisp/value/char.zig
+++ b/src/libzisp/value/char.zig
@@ -16,12 +16,12 @@ pub fn assert(v: Value) void {
}
pub fn pack(c: u21) Value {
- return .{ .char = .{ .char = c } };
+ return .{ .char = .{ .value = c } };
}
pub fn unpack(v: Value) u21 {
assert(v);
- return @truncate(v.char.char);
+ return @truncate(v.char.value);
}
// Zisp API
diff --git a/src/libzisp/value/rune.zig b/src/libzisp/value/rune.zig
index ab251b4..3a4dc61 100644
--- a/src/libzisp/value/rune.zig
+++ b/src/libzisp/value/rune.zig
@@ -2,6 +2,7 @@ const std = @import("std");
const value = @import("../value.zig");
+const ShortString = value.ShortString;
const Value = value.Value;
// Zig API
@@ -48,12 +49,16 @@ pub fn pack(s: []const u8) Value {
return v;
}
-pub fn unpack(v: Value) struct { [6]u8, u3 } {
- const s: [6]u8 = @bitCast(v.rune.name);
+pub fn unpack(v: Value) ShortString {
+ var s = ShortString{ .buffer = @bitCast(v.sstr.string) };
inline for (0..6) |i| {
- if (s[i] == 0) return .{ s, i };
+ if (s.buffer[i] == 0) {
+ s.len = i;
+ return s;
+ }
}
- return .{ s, 6 };
+ s.len = 6;
+ return s;
}
// Zisp API
diff --git a/src/libzisp/value/sstr.zig b/src/libzisp/value/sstr.zig
index 2be2647..a2f6bf8 100644
--- a/src/libzisp/value/sstr.zig
+++ b/src/libzisp/value/sstr.zig
@@ -1,11 +1,15 @@
const std = @import("std");
-const Value = @import("../value.zig").Value;
+const value = @import("../value.zig");
+
+const ShortString = value.ShortString;
+const OtherTag = value.OtherTag;
+const Value = value.Value;
// Zig API
pub fn check(v: Value) bool {
- return v.isOther(.sstr);
+ return v.isOther(.sstr) or v.isOther(.sstr_lit);
}
pub fn assert(v: Value) void {
@@ -43,8 +47,16 @@ fn assertValidSstr(s: []const u8) void {
// Note: rune.zig uses equivalent code; probably good to keep in sync.
pub fn pack(s: []const u8) Value {
+ return _pack(s, .sstr);
+}
+
+pub fn packLiteral(s: []const u8) Value {
+ return _pack(s, .sstr_lit);
+}
+
+fn _pack(s: []const u8, tag: OtherTag) Value {
assertValidSstr(s);
- var v = Value{ .sstr = .{ .string = 0 } };
+ var v = Value{ .sstr = .{ .string = 0, .tag = tag } };
const dest: [*]u8 = @ptrCast(&v.sstr.string);
@memcpy(dest, s);
return v;
@@ -59,4 +71,13 @@ pub fn unpack(v: Value) struct { [6]u8, u3 } {
return .{ s, 6 };
}
+pub fn unpack1(v: Value) struct { [6]u8, u3 } {
+ assert(v);
+ const s: [6]u8 = @bitCast(v.sstr.string);
+ for (0..6) |i| {
+ if (s[i] == 0) return .{ s, @intCast(i) };
+ }
+ return .{ s, 6 };
+}
+
// No Zisp API for sstr specifically, since it's a string. See string.zig.