From 3d05c94b9d8aa964e4ff848c95d5999cec170e04 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Sun, 30 Mar 2025 18:34:00 +0200 Subject: Big cleanup. --- build.zig | 6 +- src/libzisp.zig | 443 ---------------------- src/libzisp/gc.zig | 54 --- src/libzisp/io.zig | 12 - src/libzisp/io/Parser.zig | 870 ------------------------------------------- src/libzisp/io/decoder.zig | 1 - src/libzisp/io/encoder.zig | 1 - src/libzisp/io/parser.zig | 80 ---- src/libzisp/io/reader.zig | 10 - src/libzisp/io/unparser.zig | 122 ------ src/libzisp/io/writer.zig | 1 - src/libzisp/lib.zig | 1 - src/libzisp/lib/list.zig | 20 - src/libzisp/value.zig | 325 ---------------- src/libzisp/value/boole.zig | 33 -- src/libzisp/value/char.zig | 31 -- src/libzisp/value/double.zig | 38 -- src/libzisp/value/eof.zig | 28 -- src/libzisp/value/fixnum.zig | 84 ----- src/libzisp/value/istr.zig | 65 ---- src/libzisp/value/nil.zig | 28 -- src/libzisp/value/pair.zig | 55 --- src/libzisp/value/ptr.zig | 174 --------- src/libzisp/value/rune.zig | 82 ---- src/libzisp/value/seq.zig | 57 --- src/libzisp/value/sstr.zig | 78 ---- src/main.zig | 2 +- src/zisp.zig | 440 ++++++++++++++++++++++ src/zisp/gc.zig | 47 +++ src/zisp/io.zig | 12 + src/zisp/io/Parser.zig | 847 +++++++++++++++++++++++++++++++++++++++++ src/zisp/io/decoder.zig | 1 + src/zisp/io/encoder.zig | 1 + src/zisp/io/parser.zig | 75 ++++ src/zisp/io/reader.zig | 10 + src/zisp/io/unparser.zig | 122 ++++++ src/zisp/io/writer.zig | 1 + src/zisp/lib.zig | 1 + src/zisp/lib/list.zig | 20 + src/zisp/value.zig | 326 ++++++++++++++++ src/zisp/value/boole.zig | 33 ++ src/zisp/value/char.zig | 31 ++ src/zisp/value/double.zig | 38 ++ src/zisp/value/eof.zig | 28 ++ src/zisp/value/fixnum.zig | 84 +++++ src/zisp/value/istr.zig | 65 ++++ src/zisp/value/nil.zig | 28 ++ src/zisp/value/pair.zig | 55 +++ src/zisp/value/ptr.zig | 174 +++++++++ src/zisp/value/rune.zig | 82 ++++ src/zisp/value/seq.zig | 57 +++ src/zisp/value/sstr.zig | 78 ++++ 52 files changed, 2660 insertions(+), 2697 deletions(-) delete mode 100644 src/libzisp.zig delete mode 100644 src/libzisp/gc.zig delete mode 100644 src/libzisp/io.zig delete mode 100644 src/libzisp/io/Parser.zig delete mode 100644 src/libzisp/io/decoder.zig delete mode 100644 src/libzisp/io/encoder.zig delete mode 100644 src/libzisp/io/parser.zig delete mode 100644 src/libzisp/io/reader.zig delete mode 100644 src/libzisp/io/unparser.zig delete mode 100644 src/libzisp/io/writer.zig delete mode 100644 src/libzisp/lib.zig delete mode 100644 src/libzisp/lib/list.zig delete mode 100644 src/libzisp/value.zig delete mode 100644 src/libzisp/value/boole.zig delete mode 100644 src/libzisp/value/char.zig delete mode 100644 src/libzisp/value/double.zig delete mode 100644 src/libzisp/value/eof.zig delete mode 100644 src/libzisp/value/fixnum.zig delete mode 100644 src/libzisp/value/istr.zig delete mode 100644 src/libzisp/value/nil.zig delete mode 100644 src/libzisp/value/pair.zig delete mode 100644 src/libzisp/value/ptr.zig delete mode 100644 src/libzisp/value/rune.zig delete mode 100644 src/libzisp/value/seq.zig delete mode 100644 src/libzisp/value/sstr.zig create mode 100644 src/zisp.zig create mode 100644 src/zisp/gc.zig create mode 100644 src/zisp/io.zig create mode 100644 src/zisp/io/Parser.zig create mode 100644 src/zisp/io/decoder.zig create mode 100644 src/zisp/io/encoder.zig create mode 100644 src/zisp/io/parser.zig create mode 100644 src/zisp/io/reader.zig create mode 100644 src/zisp/io/unparser.zig create mode 100644 src/zisp/io/writer.zig create mode 100644 src/zisp/lib.zig create mode 100644 src/zisp/lib/list.zig create mode 100644 src/zisp/value.zig create mode 100644 src/zisp/value/boole.zig create mode 100644 src/zisp/value/char.zig create mode 100644 src/zisp/value/double.zig create mode 100644 src/zisp/value/eof.zig create mode 100644 src/zisp/value/fixnum.zig create mode 100644 src/zisp/value/istr.zig create mode 100644 src/zisp/value/nil.zig create mode 100644 src/zisp/value/pair.zig create mode 100644 src/zisp/value/ptr.zig create mode 100644 src/zisp/value/rune.zig create mode 100644 src/zisp/value/seq.zig create mode 100644 src/zisp/value/sstr.zig diff --git a/build.zig b/build.zig index fea9d7f..14f4ec4 100644 --- a/build.zig +++ b/build.zig @@ -23,7 +23,7 @@ pub fn build(b: *std.Build) void { // only contains e.g. external object files, you can make this `null`. // In this case the main source file is merely a path, however, in more // complicated build scripts, this could be a generated file. - .root_source_file = b.path("src/libzisp.zig"), + .root_source_file = b.path("src/zisp.zig"), .target = target, .optimize = optimize, }); @@ -42,14 +42,14 @@ pub fn build(b: *std.Build) void { // Modules can depend on one another using the `std.Build.Module.addImport` function. // This is what allows Zig source code to use `@import("foo")` where 'foo' is not a // file path. In this case, we set up `exe_mod` to import `lib_mod`. - exe_mod.addImport("libzisp", lib_mod); + exe_mod.addImport("zisp", lib_mod); // Now, we will create a static library based on the module we created above. // This creates a `std.Build.Step.Compile`, which is the build step responsible // for actually invoking the compiler. const lib = b.addLibrary(.{ .linkage = .static, - .name = "libzisp", + .name = "zisp", .root_module = lib_mod, }); diff --git a/src/libzisp.zig b/src/libzisp.zig deleted file mode 100644 index ced15f0..0000000 --- a/src/libzisp.zig +++ /dev/null @@ -1,443 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); - -const testing = std.testing; - -pub const gc = @import("libzisp/gc.zig"); -pub const io = @import("libzisp/io.zig"); -pub const lib = @import("libzisp/lib.zig"); -pub const value = @import("libzisp/value.zig"); - -pub const Hval = gc.Hval; - -pub const ShortString = value.ShortString; -pub const Value = value.Value; - -fn benchmark(name: []const u8, iters: usize, func: fn () anyerror!void) !void { - var timer = try std.time.Timer.start(); - for (0..iters) |i| { - _ = i; - try func(); - } - const ns: f64 = @floatFromInt(timer.lap()); - const secs = ns / 1_000_000_000; - std.debug.print( - "bench {s} x {}: {d:.3}s\n", - .{ name, iters, secs }, - ); -} - -test "double" { - const d1: f64 = 0.123456789; - const d2: f64 = -0.987654321; - const v1 = value.double.pack(d1); - const v2 = value.double.pack(d2); - const v3 = value.double.add(v1, v2); - const result = value.double.unpack(v3); - - try std.testing.expect(value.double.check(v1)); - try std.testing.expect(value.double.check(v2)); - try std.testing.expect(value.double.check(v3)); - - try std.testing.expectEqual(d1 + d2, result); -} - -test "fixnum" { - const int1: i64 = 123456789; - const int2: i64 = -987654321; - const v1 = value.fixnum.pack(int1); - const v2 = value.fixnum.pack(int2); - const v3 = value.fixnum.add(v1, v2); - const result = value.fixnum.unpack(v3); - - try std.testing.expect(value.fixnum.check(v1)); - try std.testing.expect(value.fixnum.check(v2)); - try std.testing.expect(value.fixnum.check(v3)); - - try std.testing.expectEqual(int1 + int2, result); -} - -test "ptr" { - const ptr = value.ptr; - - const val: *Hval = @ptrFromInt(256); - const tag = ptr.Tag.pair; - - const p = ptr.pack(val, tag); - try std.testing.expect(ptr.check(p)); - try std.testing.expect(ptr.checkZispTag(p, tag)); - try std.testing.expect(ptr.checkStrong(p)); - - const pv, const pt = ptr.unpack(p); - try std.testing.expectEqual(val, pv); - try std.testing.expectEqual(tag, pt); - - var w = ptr.makeWeak(p); - try std.testing.expect(ptr.check(w)); - try std.testing.expect(ptr.checkZispTag(w, tag)); - try std.testing.expect(ptr.checkWeak(w)); - try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w))); - try std.testing.expectEqual(false, value.boole.unpack(ptr.predWeakNull(w))); - - const wv, const wt = ptr.unpack(w); - try std.testing.expectEqual(val, wv); - try std.testing.expectEqual(tag, wt); - - const wv2, const wt2 = ptr.unpack(ptr.getWeak(w)); - try std.testing.expectEqual(val, wv2); - try std.testing.expectEqual(tag, wt2); - - ptr.setWeakNull(&w); - try std.testing.expect(ptr.check(w)); - try std.testing.expect(ptr.checkWeak(w)); - try std.testing.expect(ptr.isWeakNull(w)); - try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w))); - try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeakNull(w))); - try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w))); -} - -test "fptr" { - const ptr = value.ptr; - - const int1: u50 = 0; - const int2: u50 = std.math.maxInt(u50); - - const f1 = ptr.packForeign(int1); - try std.testing.expect(ptr.checkForeign(f1)); - try std.testing.expectEqual(int1, ptr.unpackForeign(f1)); - - const f2 = ptr.packForeign(int2); - try std.testing.expect(ptr.checkForeign(f2)); - try std.testing.expectEqual(int2, ptr.unpackForeign(f2)); -} - -test "rune" { - const r = value.rune.pack("test"); - try std.testing.expect(value.rune.check(r)); - - const s = value.rune.unpack(r); - try std.testing.expectEqualStrings("test", s.slice()); -} - -const SstrImpl = struct { SstrPack, SstrUnpack }; -const SstrPack = *const fn ([]const u8) Value; -const SstrUnpack = *const fn (Value) ShortString; - -test "sstr" { - const impls = [_]SstrImpl{ - .{ value.sstr.pack, value.sstr.unpack }, - // .{ value.sstr.pack1, value.sstr.unpack1 }, - // .{ value.sstr.pack2, value.sstr.unpack2 }, - // .{ value.sstr.pack3, value.sstr.unpack3 }, - // .{ value.sstr.pack4, value.sstr.unpack4 }, - }; - - for (impls) |impl| { - try testSstr(impl); - } - - if (impls.len > 1) { - const iters = switch (@import("builtin").mode) { - .Debug, .ReleaseSmall => 10_000_000, - .ReleaseSafe => 100_000_000, - .ReleaseFast => 1_000_000_000, - }; - std.debug.print("Benchmarking sstr with {} iters.\n", .{iters}); - inline for (impls, 0..) |impl, i| { - try benchmarkSstr(impl, i, iters); - } - } -} - -fn testSstr(impl: SstrImpl) !void { - const pack, const unpack = impl; - - const ss1 = pack("1"); - const ss2 = pack("123"); - const ss3 = pack("123456"); - - const s1 = unpack(ss1); - const s2 = unpack(ss2); - const s3 = unpack(ss3); - - try std.testing.expect(value.sstr.check(ss1)); - try std.testing.expect(value.sstr.check(ss2)); - try std.testing.expect(value.sstr.check(ss3)); - - try std.testing.expectEqualStrings("1", s1.slice()); - try std.testing.expectEqualStrings("123", s2.slice()); - try std.testing.expectEqualStrings("123456", s3.slice()); -} - -fn benchmarkSstr(impl: SstrImpl, id: usize, iters: usize) !void { - const pack, const unpack = impl; - - var timer = try std.time.Timer.start(); - var ns: f64 = undefined; - var secs: f64 = undefined; - - var ss1: Value = undefined; - var ss2: Value = undefined; - var ss3: Value = undefined; - - for (0..iters) |_i| { - _ = _i; - ss1 = pack("1"); - ss2 = pack("123"); - ss3 = pack("123456"); - } - - ns = @floatFromInt(timer.lap()); - secs = ns / 1_000_000_000; - - std.debug.print("pack{}: {d:.3}s\t", .{ id, secs }); - - for (0..iters) |_i| { - _ = _i; - std.mem.doNotOptimizeAway(unpack(ss1)); - std.mem.doNotOptimizeAway(unpack(ss2)); - std.mem.doNotOptimizeAway(unpack(ss3)); - } - - ns = @floatFromInt(timer.lap()); - secs = ns / 1_000_000_000; - - std.debug.print("unpack{}: {d:.3}s\n", .{ id, secs }); -} - -test "char" { - const c1 = value.char.pack('\x00'); - try std.testing.expect(value.char.check(c1)); - try std.testing.expectEqual('\x00', value.char.unpack(c1)); - - const c2 = value.char.pack('😀'); - try std.testing.expect(value.char.check(c2)); - try std.testing.expectEqual('😀', value.char.unpack(c2)); -} - -test "misc" { - const f = value.boole.pack(false); - try std.testing.expect(value.boole.check(f)); - try std.testing.expectEqual(false, value.boole.unpack(f)); - try std.testing.expect(value.boole.unpack(value.boole.pred(f))); - - const t = value.boole.pack(true); - try std.testing.expect(value.boole.check(t)); - try std.testing.expectEqual(true, value.boole.unpack(t)); - try std.testing.expect(value.boole.unpack(value.boole.pred(t))); - - const nil = value.nil.get(); - try std.testing.expect(value.nil.check(nil)); - try std.testing.expect(value.boole.unpack(value.nil.pred(nil))); - - const eof = value.eof.get(); - try std.testing.expect(value.eof.check(eof)); - try std.testing.expect(value.boole.unpack(value.eof.pred(eof))); -} - -test "pair" { - const v1 = value.fixnum.pack(1); - const v2 = value.fixnum.pack(2); - - const v3 = value.fixnum.pack(3); - const v4 = value.fixnum.pack(4); - - const p = value.pair.cons(v1, v2); - try std.testing.expect(value.pair.check(p)); - try std.testing.expect(value.boole.unpack(value.pair.pred(p))); - - const car = value.pair.car(p); - const cdr = value.pair.cdr(p); - try std.testing.expectEqual(1, value.fixnum.unpack(car)); - try std.testing.expectEqual(2, value.fixnum.unpack(cdr)); - - value.pair.setcar(p, v3); - value.pair.setcdr(p, v4); - - const car2 = value.pair.car(p); - const cdr2 = value.pair.cdr(p); - try std.testing.expectEqual(3, value.fixnum.unpack(car2)); - try std.testing.expectEqual(4, value.fixnum.unpack(cdr2)); -} - -test "istr" { - const istr = value.istr; - const fx = value.fixnum; - - const s1 = "foo bar baz"; - const v1 = istr.intern(s1, false); - const v1_len: usize = @intCast(fx.unpack(istr.len(v1))); - try std.testing.expectEqualStrings(s1, istr.getHeader(v1).bytes()); - try std.testing.expectEqual(s1.len, v1_len); - - const file = try std.fs.cwd().openFile("test-data/string.txt", .{}); - defer file.close(); - var s2_buf: [4096]u8 = undefined; - const s2_len = try file.readAll(&s2_buf); - var s2: []u8 = s2_buf[0..s2_len]; - const v2 = istr.intern(s2, false); - const v2_len: usize = @intCast(fx.unpack(istr.len(v2))); - var s2_orig_buf: [4096]u8 = undefined; - @memcpy(&s2_orig_buf, &s2_buf); - const s2_orig = s2_orig_buf[0..s2_len]; - s2[0] = s2[0] +% 1; - try std.testing.expectEqualStrings(s2_orig, istr.getHeader(v2).bytes()); - try std.testing.expectEqual(s2_len, v2_len); -} - -fn parseString(str: []const u8) Value { - var fbs = std.io.fixedBufferStream(str); - return io.parser.parse(fbs.reader().any()); -} - -test "parse" { - const val = parseString("\"foo\""); - - try std.testing.expect(value.sstr.check(val)); - - const s = value.sstr.unpack(val); - try std.testing.expectEqualStrings("foo", s.slice()); -} - -test "parse2" { - const val = parseString( - \\ ;; Testing some crazy datum comments - \\ ;~"bar"([x #"y"]{##`,'z}) #"foo" - \\ ;; end - ); - - const r = value.rune.unpack(value.pair.car(val)); - try std.testing.expectEqualStrings("HASH", r.slice()); - - const s = value.pair.cdr(val); - try std.testing.expect(value.sstr.check(s)); - - const f = value.sstr.unpack(s); - try std.testing.expectEqualStrings("foo", f.slice()); -} - -test "parse3" { - const val = parseString( - \\(foo ;~x ;~(x y) ;~x #bar [#x #"baz"] 'bat) - ); - - const car = value.pair.car; - const cdr = value.pair.cdr; - - const e1 = car(val); - const e2 = car(cdr(val)); - const e3 = car(cdr(cdr(val))); - const e4 = car(cdr(cdr(cdr(val)))); - - try std.testing.expect(value.sstr.check(e1)); - try std.testing.expect(value.rune.check(e2)); - try std.testing.expect(value.pair.check(e3)); - try std.testing.expect(value.pair.check(e4)); -} - -test "parse4" { - const val = parseString("(foo . ;~x bar ;~y)"); - - const s = value.sstr.unpack(value.pair.car(val)); - try std.testing.expectEqualStrings("foo", s.slice()); - - const f = value.sstr.unpack(value.pair.cdr(val)); - try std.testing.expectEqualStrings("bar", f.slice()); -} - -test "unparse" { - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - var out: std.ArrayList(u8) = .init(gpa.allocator()); - - const w = out.writer(); - const v = parseString("#foo"); - try io.unparser.unparse(w, v); - try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); -} - -test "unparse2" { - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - var out: std.ArrayList(u8) = .init(gpa.allocator()); - - const w = out.writer(); - const v = parseString("#{foo bar['x]}"); - try io.unparser.unparse(w, v); - try std.testing.expectEqualStrings( - "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", - try out.toOwnedSlice(), - ); -} - -fn writeParseResult(str: []const u8) !void { - const w = std.io.getStdErr().writer(); - const v = parseString(str); - try io.unparser.unparse(w, v); - try w.writeByte('\n'); -} - -test "unparse3" { - try writeParseResult("#{foo bar['x](y)(z)}"); -} - -test "unparse4" { - try writeParseResult("(foo ;~bar)"); -} - -test "unparse5" { - try writeParseResult("(;~foo foo ;~bar . ;~bar bar ;~bar)"); -} - -test "unparse6" { - try writeParseResult("(foo bar ... baz bat.(qux))"); -} - -test "unparse7" { - try writeParseResult("#`(#,(->keyword (syntax->datum #'sym)) . in)"); -} - -fn parseBench(path: []const u8, iters: usize) !void { - const file = try std.fs.cwd().openFile(path, .{}); - defer file.close(); - - var fb_alloc: std.mem.Allocator = undefined; - var stack_sfa: io.parser.DefaultStackSfa = undefined; - var chars_sfa: io.parser.DefaultCharsSfa = undefined; - var parser = try io.parser.default(&fb_alloc, &stack_sfa, &chars_sfa); - defer parser.deinit(); - defer io.parser.deinit(&fb_alloc); - - var timer = try std.time.Timer.start(); - for (0..iters) |i| { - _ = i; - var br = std.io.bufferedReader(file.reader()); - const reader = br.reader().any(); - // const reader = file.reader().any(); - var v: Value = undefined; - while (true) { - // std.debug.print("hihi {s}\n", .{path}); - v = parser.run(reader) catch |e| { - std.debug.print("\nfile pos: {}\n", .{ - try file.getPos(), - }); - return e; - }; - // try io.unparser.unparse(std.io.getStdOut().writer().any(), v); - if (value.eof.check(v)) { - break; - } - } - try file.seekTo(0); - } - const ns: f64 = @floatFromInt(timer.lap()); - const secs = ns / 1_000_000_000; - std.debug.print( - "parse {s} x {}: {d:.3}s\n", - .{ path, iters, secs }, - ); -} - -test "parse bench" { - try parseBench("test-data/parser-test-1.scm", 200); - try parseBench("test-data/parser-test-2.scm", 800); - try parseBench("test-data/parser-torture.scm", 1); -} diff --git a/src/libzisp/gc.zig b/src/libzisp/gc.zig deleted file mode 100644 index 815e203..0000000 --- a/src/libzisp/gc.zig +++ /dev/null @@ -1,54 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); - -const value = @import("value.zig"); - -const seq = value.seq; - -const Value = value.Value; - -/// A "heap value" that could be a Value or object header. -pub const Hval = union { - value: Value, - seq_header: seq.Header, -}; - -var _debugAlloc = switch (builtin.mode) { - .Debug => std.heap.DebugAllocator(.{}).init, - else => undefined, -}; -const alloc = switch (builtin.mode) { - .Debug => _debugAlloc.allocator(), - else => std.heap.smp_allocator, -}; - -// Cons cells - -var cons_pool = std.heap.MemoryPool([2]Value).init(alloc); - -pub fn cons(v1: Value, v2: Value) *[2]Value { - const mem = cons_pool.create() catch @panic("OOM"); - mem[0] = v1; - mem[1] = v2; - return mem; -} - -// Interned strings - -var istr_pool = std.hash_map.StringHashMap(void).init(alloc); - -pub fn intern(header: seq.Header, str: []const u8) *seq.Header { - const hs = @sizeOf(seq.Header); - const size = str.len + hs; - const copy = alloc.alloc(u8, size) catch @panic("OOM"); - const header_bytes: [hs]u8 = @bitCast(header); - @memcpy(copy[0..hs], &header_bytes); - @memcpy(copy[hs..size], str); - const entry = istr_pool.getOrPutValue(copy, {}) catch @panic("OOM"); - return @ptrCast(entry.key_ptr); -} - -pub fn istrHeader(ptr: *Hval) *seq.Header { - const entry_key: *[]u8 = @ptrCast(ptr); - return @alignCast(@ptrCast(entry_key.ptr)); -} diff --git a/src/libzisp/io.zig b/src/libzisp/io.zig deleted file mode 100644 index 8adf495..0000000 --- a/src/libzisp/io.zig +++ /dev/null @@ -1,12 +0,0 @@ -pub const parser = @import("io/parser.zig"); -pub const unparser = @import("io/unparser.zig"); - -pub const decoder = @import("io/decoder.zig"); -pub const encoder = @import("io/encoder.zig"); - -pub const reader = @import("io/reader.zig"); -pub const writer = @import("io/writer.zig"); - -pub const parse = parser.parse; - -// pub const defaultUnparser = ... diff --git a/src/libzisp/io/Parser.zig b/src/libzisp/io/Parser.zig deleted file mode 100644 index 6684104..0000000 --- a/src/libzisp/io/Parser.zig +++ /dev/null @@ -1,870 +0,0 @@ -// === Syntax === -// -// See docs/parser.md and spec/syntax.bnf to understand the syntax. -// -// -// === Trampolining strategy === -// -// Instead of using recursion directly, the parser is written in a "trampoline" -// style, which ensures that parse depth is not limited by stack size. -// -// All state and context is passed around via a struct pointer. The parser has -// a main loop, which calls a function as dictated by parser.context.next, and -// the function may update the state to have another function called next. -// -// If a function wants to call a recursive subroutine, it pushes some of the -// current context onto a stack, including what function the subroutine should -// return to, and then updates the state to instruct the main loop to call one -// of the entry point subroutines. -// -// If a function wants to make the parser return, either from a subroutine or -// from the main loop, it sets the .result field, and tries to pop the saved -// context. If the context stack was empty, the main loop returns. -// -// -// === Buffering === -// -// For efficiency, call the parser on an input stream with implicit buffering. -// -// The parser does not use its own buffer, beyond one character that may be -// written back into the unread_char field, which is checked at the end to -// ensure it's nothing other than a trailing blank or comment. -// -// This lack of buffering is to ensure that the parser never reads more bytes -// from the input than what it needs to parse a datum. Consider the input: -// -// (a b c) (x y z) -// -// If we used proper buffering, like reading up to 4K bytes per read, then the -// whole stream would be consumed at once before it's parsed. Then, the parser -// would return the first datum, and the rest of the stream would be lost. The -// parser would need some way to reset the input stream's read head to the end -// of the first datum, but not all stream types may support this. -// -// Although in a scenario like this it would be wise to create a single Parser -// instance to call multiple times (in which case it could continue using its -// internal buffer from where it left), this may not always be practical. -// -// One could even have an input stream with Zisp s-expressions and other data -// intertwined, in which case this lack of buffering is also crucial, since one -// needs to alternate between calling the Zisp parser and some other parser on -// the same input stream. -// - -const builtin = @import("builtin"); -const std = @import("std"); - -const lib = @import("../lib.zig"); -const value = @import("../value.zig"); - -const List = std.ArrayListUnmanaged; - -const Value = value.Value; - -const Parser = @This(); - -const is_test = builtin.is_test; -const is_debug = builtin.mode == .Debug; -const detailed_debug = false; - -// zig fmt: off -const DOT = value.rune.pack("DOT"); -const COLON = value.rune.pack("COLON"); -const JOIN = value.rune.pack("JOIN"); -const LABEL = value.rune.pack("LABEL"); -const HASH = value.rune.pack("HASH"); -const QUOTE = value.rune.pack("QUOTE"); -const GRAVE = value.rune.pack("GRAVE"); -const COMMA = value.rune.pack("COMMA"); -const SQUARE = value.rune.pack("SQUARE"); -const BRACE = value.rune.pack("BRACE"); -const VOID = value.rune.packForced(""); -const LSTAIL = value.sstr.pack("."); -// zig fmt: on - -// We could implement an optimization where we swap in a dummy cons when the -// parser is handling a commented-out datum, but this would require changes to -// the algorithm and doesn't seem very important, so it's not implemented. - -const Cons = *const fn (v1: Value, v2: Value) Value; - -fn dummyCons(v1: Value, v2: Value) Value { - _ = v1; - _ = v2; - return value.undef; -} - -const real_cons = &value.pair.cons; -const fake_cons = &dummyCons; - -pub const ParseError = enum { - InvalidCharacter, - UnclosedString, - UnexpectedEof, - UnicodeError, - RuneTooLong, - OutOfMemory, - OutOfRange, - ReadError, -}; - -pub const Context = struct { - // What to do next. - next: ?Fn = undefined, - // For storing a context value, like accumulated list elements. - val: Value = undefined, - // For storing a context char, like list opening bracket. - char: u8 = undefined, -}; - -// parameters -stack_alloc: std.mem.Allocator, -chars_alloc: std.mem.Allocator, - -// state -input: std.io.AnyReader = undefined, -context: Context = .{}, -stack: List(Context) = undefined, -chars: List(u8) = undefined, -cons: Cons = real_cons, -result: Value = undefined, -unread_char: ?u8 = null, -err_msg: []const u8 = undefined, - -pub fn init( - stack_alloc: std.mem.Allocator, - chars_alloc: std.mem.Allocator, - init_stack_mem: usize, - init_chars_mem: usize, -) !Parser { - var p: Parser = .{ - .stack_alloc = stack_alloc, - .chars_alloc = chars_alloc, - }; - const csize = @sizeOf(Context); - p.stack = try .initCapacity(stack_alloc, init_stack_mem / csize); - p.chars = try .initCapacity(chars_alloc, init_chars_mem); - return p; -} - -pub fn deinit(p: *Parser) void { - p.stack.deinit(p.stack_alloc); - p.chars.deinit(p.chars_alloc); -} - -// -// Input reading & unreading -// - -fn read(p: *Parser) !?u8 { - if (is_debug and p.unread_char != null) { - @panic("Called read() while there was an unused character!"); - } - const c = p.input.readByte() catch |e| switch (e) { - error.EndOfStream => return null, - else => return p.err(.ReadError, "???"), - }; - if (detailed_debug) { - std.debug.print("{c}", .{c}); - } - return c; -} - -fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 { - return if (try p.read()) |c| c else p.err(.UnexpectedEof, emsg); -} - -fn unread(p: *Parser, c: u8) void { - p.unread_char = c; -} - -fn getUnread(p: *Parser) ?u8 { - const c = p.unread_char orelse return null; - p.unread_char = null; - return c; -} - -// -// String accumulation & creation -// - -fn addChar(p: *Parser, c: u8) !void { - try p.chars.append(p.chars_alloc, c); -} - -fn getBareString(p: *Parser) Value { - defer p.chars.clearRetainingCapacity(); - return if (p.chars.items.len <= 6) - value.sstr.pack(p.chars.items) - else - value.istr.intern(p.chars.items, false); -} - -fn getQuotedString(p: *Parser) Value { - defer p.chars.clearRetainingCapacity(); - return if (p.chars.items.len <= 6) - value.sstr.packQuoted(p.chars.items) - else - value.istr.intern(p.chars.items, true); -} - -fn getRune(p: *Parser) Value { - defer p.chars.clearRetainingCapacity(); - return value.rune.pack(p.chars.items); -} - -// -// Stack management & control flow -// - -const Fn = enum { - parseUnit, - parseDatum, - endUnit, - returnContext, - endFirstDatum, - endJoinDatum, - parseJoin, - parseHashDatum, - endHashDatum, - parseRuneDatum, - endRuneDatum, - endLabelDatum, - continueList, - endImproperList, - closeImproperList, - endQuoteExpr, -}; - -fn call(p: *Parser, f: Fn) !void { - try switch (f) { - .parseUnit => parseUnit(p), - .parseDatum => parseDatum(p), - .endUnit => endUnit(p), - .returnContext => returnContext(p), - .endFirstDatum => endFirstDatum(p), - .endJoinDatum => endJoinDatum(p), - .parseJoin => parseJoin(p), - .parseHashDatum => parseHashDatum(p), - .endHashDatum => endHashDatum(p), - .parseRuneDatum => parseRuneDatum(p), - .endRuneDatum => endRuneDatum(p), - .endLabelDatum => endLabelDatum(p), - .continueList => continueList(p), - .endImproperList => endImproperList(p), - .closeImproperList => closeImproperList(p), - .endQuoteExpr => endQuoteExpr(p), - }; -} - -pub fn run(p: *Parser, input: std.io.AnyReader) !Value { - p.input = input; - p.context.next = .parseUnit; - while (p.context.next) |next| { - if (detailed_debug) printStack(p); - try p.call(next); - } - if (p.unread_char) |_| { - return p.err(.InvalidCharacter, "top-level"); - } - return p.result; -} - -fn printStack(p: *Parser) void { - const stack = p.stack.items; - std.debug.print("\n\n{}:{any} ctx:'{c}' unread:'{c}' \n", .{ - stack.len, - p.context.next, - p.context.char, - p.unread_char orelse '_', - }); - if (stack.len > 0) { - var i = stack.len; - while (i > 0) : (i -= 1) { - const prev = stack[i - 1]; - std.debug.print("{}:{any} ctx:'{c}'\n", .{ - i - 1, - prev.next, - prev.char, - }); - } - } -} - -fn err( - p: *Parser, - comptime e: ParseError, - comptime msg: []const u8, -) error{ParseError} { - p.err_msg = @tagName(e) ++ " at: " ++ msg; - return error.ParseError; -} - -fn push(p: *Parser, next: Fn) !void { - try p.stack.append(p.stack_alloc, .{ .next = next }); -} - -fn pushContext(p: *Parser, next: Fn) !void { - try p.stack.append(p.stack_alloc, .{ - .next = next, - .val = p.context.val, - .char = p.context.char, - }); -} - -fn ret(p: *Parser) void { - if (p.stack.pop()) |c| { - p.context = c; - } else { - p.context.next = null; - } -} - -fn subr(p: *Parser, start: Fn, next: Fn) !void { - try p.pushContext(next); - p.context.next = start; -} - -fn jump(p: *Parser, next: Fn, val: ?Value) void { - if (val) |v| p.result = v; - p.context.next = next; -} - -fn abort(p: *Parser, next: Fn, unread_c: u8) void { - p.result = VOID; - p.unread_char = unread_c; - p.context.next = next; -} - -fn retval(p: *Parser, val: Value) void { - p.result = val; - p.ret(); -} - -// -// Start parser functions -// - -fn parseUnit(p: *Parser) !void { - var c1 = p.getUnread() orelse try p.read(); - while (c1) |c| : (c1 = try p.read()) { - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => try p.push(.parseUnit), - .no => { - p.unread(c); - return p.subr(.parseDatum, .endUnit); - }, - } - } - return p.retval(value.eof.eof); -} - -fn endUnit(p: *Parser) !void { - const c = p.getUnread() orelse return p.ret(); - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => return skipUnitAndReturn(p), - .no => p.unread(c), - } - return p.ret(); -} - -fn skipUnitAndReturn(p: *Parser) !void { - p.context.val = p.result; - return p.subr(.parseUnit, .returnContext); -} - -fn returnContext(p: *Parser) !void { - return p.retval(p.context.val); -} - -fn parseDatum(p: *Parser) !void { - return parseOneDatum(p, p.getUnread().?, .endFirstDatum); -} - -fn endFirstDatum(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.ret(); - } - return parseJoin(p); -} - -fn parseJoin(p: *Parser) !void { - const c = p.getUnread() orelse try p.read() orelse return p.ret(); - switch (c) { - '.', ':' => { - p.context.char = c; - p.unread(try p.readNoEof("join datum")); - }, - else => { - p.context.char = 0; - p.unread(c); - }, - } - p.context.val = p.result; - return p.subr(.parseDatum, .endJoinDatum); -} - -fn endJoinDatum(p: *Parser) !void { - const prev = p.context.val; - const join = p.context.char; - if (p.result.eq(VOID)) { - if (join == 0) { - return p.retval(prev); - } else { - return p.err(.InvalidCharacter, "join datum"); - } - } - const rune = switch (join) { - 0 => JOIN, - '.' => DOT, - ':' => COLON, - else => unreachable, - }; - const joined = p.cons(rune, p.cons(prev, p.result)); - return p.jump(.parseJoin, joined); -} - -fn parseOneDatum(p: *Parser, c: u8, next: Fn) !void { - if (isBareChar(c) or c == '.') { - return p.jump(next, try parseBareString(p, c)); - } - return parseCladDatum(p, c, next); -} - -fn parseBareString(p: *Parser, c: u8) !Value { - const allow_dots = std.ascii.isDigit(c) or switch (c) { - '.', '+', '-' => true, - else => false, - }; - try p.addChar(c); - return parseBareStringRest(p, allow_dots); -} - -fn parseBareEscString(p: *Parser) !Value { - try p.addChar(try parseBareEsc(p)); - return parseBareStringRest(p, false); -} - -fn parseBareStringRest(p: *Parser, allow_dots: bool) !Value { - while (try p.read()) |c| { - if (isBareChar(c) or (allow_dots and c == '.')) { - try p.addChar(c); - } else if (c == '\\') { - try p.addChar(try parseBareEsc(p)); - } else { - p.unread(c); - break; - } - } - return p.getBareString(); -} - -fn parseBareEsc(p: *Parser) !u8 { - const c = try p.readNoEof("bare escape"); - if (isBareEsc(c)) { - return c; - } else { - return p.err(.InvalidCharacter, "bare escape"); - } -} - -fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void { - if (c == '\\') { - return p.jump(next, try parseBareEscString(p)); - } - if (c == '"') { - return p.jump(next, try parseQuotedString(p, '"')); - } - if (c == '|') { - return p.jump(next, try parseQuotedString(p, '|')); - } - return switch (c) { - '#' => parseHashExpression(p, next), - '(', '[', '{' => parseList(p, c, next), - '\'', '`', ',' => parseQuoteExpr(p, c, next), - else => p.abort(next, c), - }; -} - -fn parseQuotedString(p: *Parser, close: u8) !Value { - while (try p.read()) |c| { - if (c == close) { - return p.getQuotedString(); - } - if (c != '\\') { - try p.addChar(c); - } else { - try parseQuotedEsc(p, close); - } - } - return error.UnclosedString; -} - -fn parseQuotedEsc(p: *Parser, close: u8) !void { - const c = try p.readNoEof("quoted escape"); - if (c == close) { - return p.addChar(close); - } - if (c == 'u') { - return parseUniHexHandleErrors(p); - } - try p.addChar(switch (c) { - '\\' => c, - '0' => 0, - 'a' => 7, - 'b' => 8, - 't' => 9, - 'n' => 10, - 'v' => 11, - 'f' => 12, - 'r' => 13, - 'e' => 27, - 'x' => try parseHexByte(p, "hex escape"), - else => return p.err(.InvalidCharacter, "quoted escape"), - }); -} - -fn parseUniHexHandleErrors(p: *Parser) !void { - return parseUniHex(p) catch |e| switch (e) { - error.Utf8CannotEncodeSurrogateHalf => p.err( - .UnicodeError, - "unicode escape", - ), - else => e, - }; -} - -fn parseUniHex(p: *Parser) !void { - const msg = "unicode escape"; - - if (try p.readNoEof(msg) != '{') { - return p.err(.InvalidCharacter, msg); - } - - const uc = try parseHex(p, u21, msg); - const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg); - if (c != '}') { - return p.err(.InvalidCharacter, msg); - } - - const n = try std.unicode.utf8CodepointSequenceLength(uc); - const buf = try p.chars.addManyAsSlice(p.chars_alloc, n); - _ = try std.unicode.utf8Encode(uc, buf); -} - -fn parseHashExpression(p: *Parser, next: Fn) !void { - const c = try p.readNoEof("hash expression"); - if (try checkBlanks(p, c) != .no) { - return p.err(.InvalidCharacter, "hash expression"); - } - if (std.ascii.isAlphabetic(c)) { - const r = try parseRune(p, c); - return parseRuneEnd(p, r, next); - } - if (c == '%') { - const l = try parseLabel(p); - return parseLabelEnd(p, l, next); - } - if (isBareChar(c)) { - // Reserved for future extensions to syntax sugar. - return p.err(.InvalidCharacter, "hash expression"); - } - // fast-path to avoid subr - if (c == '\\') { - return p.jump(next, p.cons(HASH, try parseBareEscString(p))); - } - if (c == '"') { - return p.jump(next, p.cons(HASH, try parseQuotedString(p, '"'))); - } - if (c == '|') { - return p.jump(next, p.cons(HASH, try parseQuotedString(p, '|'))); - } - p.unread(c); - return p.subr(.parseHashDatum, next); -} - -fn parseHashDatum(p: *Parser) !void { - return parseCladDatum(p, p.getUnread().?, .endHashDatum); -} - -fn endHashDatum(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.err(.InvalidCharacter, "hash datum"); - } - return p.retval(p.cons(HASH, p.result)); -} - -fn parseRune(p: *Parser, c1: u8) !Value { - try p.addChar(c1); - var len: usize = 1; - while (try p.read()) |c| : (len += 1) { - if (len == 6 or !std.ascii.isAlphanumeric(c)) { - p.unread(c); - return p.getRune(); - } - try p.addChar(c); - } - return p.getRune(); -} - -fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { - const c = p.getUnread() orelse return p.jump(next, r); - if (c == '\\') { - return p.jump(next, p.cons(r, try parseBareString(p, c))); - } - if (c == '"') { - return p.jump(next, p.cons(r, try parseQuotedString(p, '"'))); - } - if (c == '|') { - return p.jump(next, p.cons(r, try parseQuotedString(p, '|'))); - } - p.unread(c); - switch (c) { - '#', '(', '[', '{', '\'', '`', ',' => { - try p.push(next); - p.context.val = r; - // Use jump to prevent recursion. - return p.jump(.parseRuneDatum, null); - }, - else => return p.jump(next, r), - } -} - -fn parseRuneDatum(p: *Parser) !void { - return parseCladDatum(p, p.getUnread().?, .endRuneDatum); -} - -fn endRuneDatum(p: *Parser) !void { - if (p.result.eq(VOID)) { - p.retval(p.context.val); - } - return p.retval(p.cons(p.context.val, p.result)); -} - -fn parseLabel(p: *Parser) !Value { - const label = try parseHex(p, u48, "datum label"); - return value.fixnum.pack(label); -} - -fn parseLabelEnd(p: *Parser, l: Value, next: Fn) !void { - const c = p.getUnread() orelse return p.err(.UnexpectedEof, "datum label"); - if (c == '%') { - return p.jump(next, p.cons(LABEL, l)); - } - if (c == '=') { - try p.push(next); - p.context.val = l; - return p.subr(.parseDatum, .endLabelDatum); - } - return p.err(.InvalidCharacter, "datum label"); -} - -fn endLabelDatum(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.err(.InvalidCharacter, "label datum"); - } - return p.retval(p.cons(LABEL, p.cons(p.context.val, p.result))); -} - -fn parseList(p: *Parser, open: u8, next: Fn) !void { - const head = switch (open) { - '(' => value.nil.nil, - '[' => p.cons(SQUARE, value.nil.nil), - '{' => p.cons(BRACE, value.nil.nil), - else => unreachable, - }; - const close: u8 = switch (open) { - '(' => ')', - '[' => ']', - '{' => '}', - else => unreachable, - }; - while (try p.read()) |c| { - if (c == close) { - return p.jump(next, head); - } - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => { - try listParserSetup(p, head, close, next); - return p.subr(.parseUnit, .parseUnit); - }, - .no => { - try listParserSetup(p, head, close, next); - p.unread(c); - return p.jump(.parseDatum, null); - }, - } - } - return p.err(.UnexpectedEof, "list"); -} - -fn listParserSetup(p: *Parser, head: Value, close: u8, next: Fn) !void { - try p.push(next); - p.context.val = head; - p.context.char = close; - try p.pushContext(.continueList); -} - -fn continueList(p: *Parser) !void { - const close = p.context.char; - - if (p.result.eq(VOID)) { - const c = p.getUnread().?; - if (c == close) { - return endList(p); - } - return p.err(.InvalidCharacter, "list"); - } - - if (p.result.eq(LSTAIL)) { - return p.subr(.parseUnit, .endImproperList); - } - - p.context.val = p.cons(p.result, p.context.val); - - var c1 = p.getUnread() orelse try p.read(); - while (c1) |c| : (c1 = try p.read()) { - if (c == close) { - return endList(p); - } - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => { - try p.pushContext(.continueList); - return p.subr(.parseUnit, .parseUnit); - }, - .no => { - p.unread(c); - return p.subr(.parseDatum, .continueList); - }, - } - } - return p.err(.UnexpectedEof, "list"); -} - -fn endList(p: *Parser) !void { - return p.retval(lib.list.reverse(p.context.val)); -} - -fn endImproperList(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.err(.InvalidCharacter, "list tail"); - } - p.context.val = lib.list.reverseWithTail(p.context.val, p.result); - return closeImproperList(p); -} - -fn closeImproperList(p: *Parser) !void { - const result = p.context.val; - const close = p.context.char; - var c1 = p.getUnread() orelse try p.read(); - while (c1) |c| : (c1 = try p.read()) { - if (c == close) { - return p.retval(result); - } - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => return p.subr(.parseUnit, .closeImproperList), - .no => return p.err(.InvalidCharacter, "after list tail"), - } - } - return p.err(.UnexpectedEof, "after list tail"); -} - -fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void { - const q = switch (c1) { - '\'' => QUOTE, - '`' => GRAVE, - ',' => COMMA, - else => unreachable, - }; - - // fast-path to avoid subr - const c = try p.readNoEof("quote expression"); - if (isBareChar(c) or c == '\\') { - return p.jump(next, p.cons(q, try parseBareString(p, c))); - } - - try p.push(next); - p.context.val = q; - p.unread(c); - return p.subr(.parseDatum, .endQuoteExpr); -} - -fn endQuoteExpr(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.err(.InvalidCharacter, "quote expression datum"); - } - return p.retval(p.cons(p.context.val, p.result)); -} - -// Helpers - -fn checkBlanks(p: *Parser, c: u8) !enum { yes, skip_unit, no } { - switch (c) { - '\t'...'\r', ' ' => return .yes, - ';' => { - const c2 = try p.read() orelse return .yes; - if (c2 == '\n') return .yes; - if (c2 == '~') return .skip_unit; - while (try p.read()) |c3| if (c3 == '\n') break; - return .yes; - }, - else => return .no, - } -} - -fn isBareChar(c: u8) bool { - return switch (c) { - // zig fmt: off - 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' , - '+' , '-' , '/' , '<' , '=' , '>' , '?' , '@' , '^' , '_' , '~' => true, - // zig fmt: on - else => false, - }; -} - -fn isBareEsc(c: u8) bool { - return switch (c) { - 33...126 => true, - else => false, - }; -} - -fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T { - var uc: T = undefined; - - const c1 = try p.readNoEof(emsg); - uc = try parseHexDigit(p, c1, emsg); - - while (try p.read()) |c| { - if (!std.ascii.isHex(c)) { - p.unread(c); - return uc; - } - const shl = std.math.shlExact; - uc = shl(T, uc, 4) catch return p.err(.OutOfRange, emsg); - uc |= try parseHexDigit(p, c, emsg); - } - return uc; -} - -fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 { - const h1 = try p.readNoEof(emsg); - const h2 = try p.readNoEof(emsg); - const hi = try parseHexDigit(p, h1, emsg); - const lo = try parseHexDigit(p, h2, emsg); - return hi << 4 | lo; -} - -fn parseHexDigit(p: *Parser, c: u8, comptime emsg: []const u8) !u8 { - return switch (c) { - '0'...'9' => c - '0', - 'A'...'F' => c - 'A' + 10, - 'a'...'f' => c - 'a' + 10, - else => p.err(.InvalidCharacter, emsg), - }; -} diff --git a/src/libzisp/io/decoder.zig b/src/libzisp/io/decoder.zig deleted file mode 100644 index eb27e20..0000000 --- a/src/libzisp/io/decoder.zig +++ /dev/null @@ -1 +0,0 @@ -// wip diff --git a/src/libzisp/io/encoder.zig b/src/libzisp/io/encoder.zig deleted file mode 100644 index eb27e20..0000000 --- a/src/libzisp/io/encoder.zig +++ /dev/null @@ -1 +0,0 @@ -// wip diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig deleted file mode 100644 index d004c91..0000000 --- a/src/libzisp/io/parser.zig +++ /dev/null @@ -1,80 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); - -const value = @import("../value.zig"); - -const Parser = @import("Parser.zig"); - -const Value = value.Value; - -const is_test = builtin.is_test; -const is_debug = builtin.mode == .Debug; - -// In debug, we want to see if we leak, so very small numbers. -const def_init_stack_mem = (if (is_debug) 2 else 32) * @sizeOf(Parser.Context); -const def_init_chars_mem = (if (is_debug) 2 else 2048); - -pub const DefaultStackSfa = std.heap.StackFallbackAllocator(def_init_stack_mem); -pub const DefaultCharsSfa = std.heap.StackFallbackAllocator(def_init_chars_mem); - -pub fn default( - fb_alloc: *std.mem.Allocator, - stack_sfa: *DefaultStackSfa, - chars_sfa: *DefaultCharsSfa, -) !Parser { - if (!is_test and is_debug) { - fb_alloc.* = std.heap.DebugAllocator(.{}).init; - } - - const alloc = if (is_test) - std.testing.allocator - else if (is_debug) - fb_alloc.allocator() - else - std.heap.smp_allocator; - - stack_sfa.* = std.heap.stackFallback(def_init_stack_mem, alloc); - chars_sfa.* = std.heap.stackFallback(def_init_chars_mem, alloc); - - return Parser.init( - stack_sfa.get(), - chars_sfa.get(), - def_init_stack_mem, - def_init_chars_mem, - ); -} - -pub fn deinit(fb_alloc: *std.mem.Allocator) void { - if (!is_test and is_debug) { - if (fb_alloc.?.deinit() == .leak) { - @panic("parser leaked"); - } - } -} - -pub fn parse(input: std.io.AnyReader) Value { - var fb_alloc: std.mem.Allocator = undefined; - var stack_sfa: DefaultStackSfa = undefined; - var chars_sfa: DefaultCharsSfa = undefined; - var p = default(&fb_alloc, &stack_sfa, &chars_sfa) catch @panic("OOM"); - defer p.deinit(); - return p.run(input) catch { - if (p.unread_char) |c| { - std.debug.panic( - "Parse error: {s}, unread_char: 0x{x}\n", - .{ p.err_msg, c }, - ); - } else { - std.debug.panic("Parse error: {s}\n", .{p.err_msg}); - } - }; -} - -pub fn _parse(input: std.io.AnyReader) !Value { - var fb_alloc: std.mem.Allocator = undefined; - var stack_sfa: DefaultStackSfa = undefined; - var chars_sfa: DefaultCharsSfa = undefined; - var p = try default(&fb_alloc, &stack_sfa, &chars_sfa); - defer p.deinit(); - return p.run(input); -} diff --git a/src/libzisp/io/reader.zig b/src/libzisp/io/reader.zig deleted file mode 100644 index 3465cb3..0000000 --- a/src/libzisp/io/reader.zig +++ /dev/null @@ -1,10 +0,0 @@ -// See parse.zig for details. - -const parser = @import("parser.zig"); -const decoder = @import("decoder.zig"); - -const Value = @import("../value.zig").Value; - -pub fn readCode(input: []const u8) Value { - return decoder.decode(parser.parse(input, .code)); -} diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig deleted file mode 100644 index d703182..0000000 --- a/src/libzisp/io/unparser.zig +++ /dev/null @@ -1,122 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); - -const istr = value.istr; -const seq = value.seq; - -const ShortString = value.ShortString; -const OtherTag = value.OtherTag; -const Value = value.Value; - -pub fn unparse(w: anytype, v: Value) anyerror!void { - try if (value.double.check(v)) - unparseDouble(w, v) - else if (value.fixnum.check(v)) - unparseFixnum(w, v) - else if (value.ptr.checkZisp(v)) - unparseHeap(w, v) - else - unparseOther(w, v); -} - -fn unparseDouble(w: anytype, v: Value) !void { - _ = w; - _ = v; - @panic("not implemented"); -} - -fn unparseFixnum(w: anytype, v: Value) !void { - _ = w; - _ = v; - @panic("not implemented"); -} - -fn unparseHeap(w: anytype, v: Value) !void { - const p, const t = value.ptr.unpack(v); - try switch (t) { - .pair => unparsePair(w, @ptrCast(p)), - .seq => unparseSeq(w, @ptrCast(p)), - else => @panic("not implemented"), - }; -} - -fn unparseOther(w: anytype, v: Value) !void { - try switch (v.other.tag) { - .rune => unparseRune(w, v), - .sstr => unparseSstr(w, v), - .qstr => unparseQstr(w, v), - .char => unparseChar(w, v), - .misc => unparseMisc(w, v), - }; -} - -fn unparseRune(w: anytype, v: Value) !void { - const name = value.rune.unpack(v); - try w.writeByte('#'); - try w.writeAll(name.constSlice()); -} - -fn unparseSstr(w: anytype, v: Value) !void { - const str = value.sstr.unpack(v); - try w.writeAll(str.constSlice()); -} - -fn unparseQstr(w: anytype, v: Value) !void { - const str = value.sstr.unpack(v); - try w.writeByte('"'); - try w.writeAll(str.constSlice()); - try w.writeByte('"'); -} - -fn unparseChar(w: anytype, v: Value) !void { - var buf: [4]u8 = undefined; - const len = try std.unicode.utf8Encode(v.char.value, &buf); - try w.writeAll(buf[0..len]); -} - -fn unparseMisc(w: anytype, v: Value) !void { - try switch (v.misc.value) { - .f => w.writeAll("#f"), - .t => w.writeAll("#t"), - .nil => w.writeAll("()"), - .eof => w.writeAll("#eof"), - .undef => w.writeAll("#undef"), - }; -} - -fn unparsePair(w: anytype, p: *[2]Value) !void { - try w.writeByte('('); - try unparse(w, p[0]); - var cdr = p[1]; - while (value.pair.check(cdr)) : (cdr = value.pair.cdr(cdr)) { - try w.writeByte(' '); - try unparse(w, value.pair.car(cdr)); - } - if (!value.nil.check(cdr)) { - try w.writeByte(' '); - try w.writeByte('.'); - try w.writeByte(' '); - try unparse(w, cdr); - } - try w.writeByte(')'); -} - -fn unparseSeq(w: anytype, p: *seq.Header) !void { - const h = istr.getHeaderFromPtr(@ptrCast(p)); - switch (h.type) { - .string => try unparseString(w, h), - else => @panic("not implemented"), - } -} - -fn unparseString(w: anytype, h: *seq.Header) !void { - const info = h.info.string; - if (info.quoted) { - try w.writeByte('"'); - } - try w.writeAll(h.bytes()); - if (info.quoted) { - try w.writeByte('"'); - } -} diff --git a/src/libzisp/io/writer.zig b/src/libzisp/io/writer.zig deleted file mode 100644 index eb27e20..0000000 --- a/src/libzisp/io/writer.zig +++ /dev/null @@ -1 +0,0 @@ -// wip diff --git a/src/libzisp/lib.zig b/src/libzisp/lib.zig deleted file mode 100644 index 7752110..0000000 --- a/src/libzisp/lib.zig +++ /dev/null @@ -1 +0,0 @@ -pub const list = @import("lib/list.zig"); diff --git a/src/libzisp/lib/list.zig b/src/libzisp/lib/list.zig deleted file mode 100644 index 9d6a6bc..0000000 --- a/src/libzisp/lib/list.zig +++ /dev/null @@ -1,20 +0,0 @@ -const value = @import("../value.zig"); - -const Value = value.Value; - -pub fn reverse(list: Value) Value { - return reverseWithTail(list, value.nil.nil); -} - -pub fn reverseWithTail(list: Value, tail: Value) Value { - var head = list; - var result = tail; - while (!value.nil.check(head)) { - value.pair.assert(head); - const car = value.pair.car(head); - const cdr = value.pair.cdr(head); - result = value.pair.cons(car, result); - head = cdr; - } - return result; -} diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig deleted file mode 100644 index fbe907a..0000000 --- a/src/libzisp/value.zig +++ /dev/null @@ -1,325 +0,0 @@ -// -// === NaN Packing Strategy === -// -// Format of a double, in Zig least-to-most significant field order: -// -// { fraction: u52, exponent: u11, sign: u1 } -// -// When the exponent bits are all set, it's either a NaN or an Infinity. -// -// For value packing, almost all remaining 53 bits are available, giving us -// about 2^53 values, except for the four following bit patterns: -// -// *** FORBIDDEN VALUES *** -// -// 1. Negative cqNaN = { sign = 1, exponent = max, fraction = 2^51 } -// -// 2. Negative Infinity = { sign = 1, exponent = max, fraction = 0 } -// -// 3. Positive cqNaN = { sign = 0, exponent = max, fraction = 2^51 } -// -// 4. Positive Infinity = { sign = 0, exponent = max, fraction = 0 } -// -// The abbreviation "cqNaN" stands for canonical quiet NaN. -// -// Note that 2^51 means the MSb of the 52 fraction bits being set, and the rest -// being zero. The fraction MSb is also called the is_quiet flag, because it -// demarcates quiet NaNs. The rest being zero makes it the canonical qNaN. -// -// The positive and negative cqNaN are the *only* NaN values that can actually -// be returned by any FP operations, which is why we don't use them to pack -// values; we want to be able to represent NaN in Zisp as a double. -// -// Beyond those four bit patterns, all values with a maximum exponent (all bits -// set) are fair game for representing other values, so 2^53 - 4 possibilities. -// -// We split those 2^53 - 4 available values into four groups, each allowing for -// 2^51 - 1 different values to be encoded in them: -// -// sign = 1, quiet = 1 :: Negative Fixnum from -1 to -2^51+1 -// -// sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2 -// -// sign = 0, quiet = 1 :: Pointers -// -// sign = 0, quiet = 0 :: Others -// -// -// === Fixnums === -// -// Negative fixnums actually represent themselves without needing to go through -// any transformation. Only the smallest 52-bit signed negative, -2^51, cannot -// be represented, as it would step on forbidden value 1, Negative cqNaN. -// -// Positive fixnums go through bitsiwe NOT (implemented via an XOR mask here to -// make it one operation together with the NaN masking) to avoid the all-zero -// payload value, which would step on forbidden value 2, Negative Infinity. -// -// -// === Pointers === -// -// Pointers are further subdivided as follows based on the remaining 51 bits, -// with the first three bits used as a sort of tag: -// -// 000 :: Pointer to Zisp heap object (string, vector, etc.) -// -// 001 :: Weak pointer to Zisp heap object -// -// 01. :: Undefined (may be used by GC to flag pointers for some reason?) -// -// 1.. :: Foreign pointer (basically, a 50-bit fixnum of another type) -// -// This means pointers to the Zisp heap are 48 bits. Of those, we only really -// need 45, since 64-bit platforms are in practice limited to 48-bit addresses, -// and allocations happen at 8-byte boundaries, meaning the least significant 3 -// bits are always unset. Thus, we are able to store yet another 3-bit tag in -// those 48-bit pointers alongside the actual, multiple-of-8, 48-bit address. -// -// The forbidden value 3, Positive cqNaN, is avoided thanks to the fact that a -// regular Zisp heap pointer can never be null. Weak pointers, which can be -// null, avoid stepping on that forbidden value thanks to bit 49 being set. -// -// Foreign pointers allow storing arbitrary pointers, or integers basically, of -// up to 50 bits, without any further definition in Zisp of what they mean. -// -// -// === Other values === -// -// This 51-bit range is divided as follows, based on the high bits: -// -// 000 :: Rune -// -// 001 :: Short string -// -// 010 :: Short string literal -// -// 011 :: Unicode code point -// -// 100 :: Singleton values -// -// 101, 110, 111 :: Undefined -// -// Runes are symbols of 1 to 6 ASCII characters used to implement reader syntax. -// -// Zisp strings are immutable. Any string fitting into 6 bytes or less will be -// stored as an immediate value, not requiring any heap allocation or interning. -// It's implicitly interned, so to speak. This includes the empty string. -// -// The null byte serves as a terminator for strings shorter than 6 bytes, and -// therefore cannot appear in these strings; a string that short but actually -// containing a null byte will need to be heap allocated like other strings. -// -// There may also be strings that are this short, but ended up on the heap due -// to being uninterned. Interning them will return the equivalent short string -// as an immediate. -// -// The separate type for a short string *literal* is for an efficiency hack in -// the parser; see commentary there. -// -// Unicode code points need a maximum of 21 bits, yet we have 48 available. -// This may be exploited for a future extension. -// -// Similarly, it's very unlikely that we will ever need more than a handful of -// singleton values (false, true, nil, and so on). As such, this range of bit -// patterns may be subdivided in the future. Right now, only the lowest 8 bits -// are allowed to be set, with the other 40 being reserved, so there's a limit -// of 256 singleton values that can be defined. -// -// And top of that, we have three more 48-bit value ranges that are unused! -// -// The forbidden value 4, Positive Infinity, would be the "empty string rune" -// but that isn't allowed anyway, so all is fine. -// - -// Here's the original article explaining the strategy: -// -// https://tkammer.de/zisp/notes/nan.html -// -// More about runes: -// -// https://tkammer.de/zisp/notes/symbols.html -// -// Note: Packed structs are least-to-most significant, so the order of fields -// must be reversed relative to a typical big-endian illustration of the bit -// patterns of IEEE 754 double-precision floating point numbers. - -const std = @import("std"); - -pub const double = @import("value/double.zig"); -pub const fixnum = @import("value/fixnum.zig"); - -pub const ptr = @import("value/ptr.zig"); -pub const seq = @import("value/seq.zig"); - -pub const rune = @import("value/rune.zig"); -pub const sstr = @import("value/sstr.zig"); -pub const char = @import("value/char.zig"); -pub const boole = @import("value/boole.zig"); -pub const nil = @import("value/nil.zig"); -pub const eof = @import("value/eof.zig"); - -pub const pair = @import("value/pair.zig"); -pub const istr = @import("value/istr.zig"); - -// To fill up the u11 exponent part of a NaN. -const FILL = 0x7ff; - -// Used when dealing with runes and short strings. -pub const ShortString = std.BoundedArray(u8, 6); - -pub const OtherTag = enum(u3) { rune, sstr, qstr, char, misc }; - -pub const MiscValue = enum(u8) { f, t, nil, eof, undef = 255 }; - -pub const undef = Value{ .misc = .{ .value = .undef } }; - -/// Represents a Zisp value/object. -pub const Value = packed union { - /// To get the value as a regular double. - double: f64, - - /// To get an agnostic value for direct comparison with == i.e. eq?. - bits: u64, - - // Some of the structs below are just for inspection, whereas others are to - // initialize a new value of that category as well as read it that way. - - /// Inspection through the lens of the general IEEE 754 double layout. - ieee: packed struct { - rest: u51, - quiet: bool, - exp: u11, - sign: bool, - }, - - /// For initializing and reading fixnums. - fixnum: packed struct { - code: u51, - negative: bool, - _: u11 = FILL, - _is_fixnum: bool = true, - }, - - /// Inspection through the lens of the ptr category. - ptr: packed struct { - _value: u48, - is_weak: bool, - _unused: bool, - is_foreign: bool, - _is_ptr: bool, - _: u11, - _is_fixnum: bool, - }, - - /// For initializing and reading foreign pointers. - fptr: packed struct { - value: u50, - _is_foreign: bool = true, - _is_ptr: bool = true, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// For initializing and reading Zisp heap pointers. - zptr: packed struct { - tagged_value: u48, - is_weak: bool = false, - _unused: bool = false, - _is_foreign: bool = false, - _is_ptr: bool = true, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// Inspection as an other (non-fixnum, non-pointer) packed value. - other: packed struct { - _value: u48, - tag: OtherTag, - _is_ptr: bool, - _: u11, - _is_ifxnum: bool, - }, - - /// For initializing and reading runes. - rune: packed struct { - // actually [6]u8 but packed struct cannot contain arrays - name: u48, - _tag: OtherTag = .rune, - _is_ptr: bool = false, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// For initializing and reading short strings. - sstr: packed struct { - // actually [6]u8 but packed struct cannot contain arrays - string: u48, - tag: OtherTag, - _is_ptr: bool = false, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// For initializing and reading characters. - char: packed struct { - value: u21, - _reserved: u27 = 0, - _tag: OtherTag = .char, - _is_ptr: bool = false, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// For initializing and reading misc values aka singletons. - misc: packed struct { - value: MiscValue, - _reserved: u40 = 0, - _tag: OtherTag = .misc, - _is_ptr: bool = false, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// Hexdumps the value. - pub inline fn dump(v: Value) void { - std.debug.dumpHex(std.mem.asBytes(&v)); - } - - pub fn eq(v1: Value, v2: Value) bool { - return v1.bits == v2.bits; - } - - // The following aren't type predicates per se, but rather determine which - // general category the value is in. The exceptions are fixnum and double, - // since those aren't sub-categorized into further types. - - /// Checks for a Zisp double, including: +nan.0, -nan.0, +inf.0, -inf.0 - pub inline fn isDouble(v: Value) bool { - return v.ieee.exp != FILL or v.ieee.rest == 0; - } - - /// Checks for a non-double Zisp value packed into a NaN. - pub inline fn isPacked(v: Value) bool { - return !v.isDouble(); - } - - /// Checks for a fixnum. - pub inline fn isFixnum(v: Value) bool { - return v.isPacked() and v.ieee.sign; - } - - /// Checks for any kind of pointer. - pub inline fn isPtr(v: Value) bool { - return v.isPacked() and !v.ieee.sign and v.ieee.quiet; - } - - /// Checks for a non-double, non-fixnum, non-pointer Zisp value. - pub inline fn isOther(v: Value) bool { - return v.isPacked() and !v.ieee.sign and !v.ieee.quiet; - } - - /// Checks for an other type of value based on tag. - pub inline fn isOtherTag(v: Value, tag: OtherTag) bool { - return v.isOther() and v.other.tag == tag; - } -}; diff --git a/src/libzisp/value/boole.zig b/src/libzisp/value/boole.zig deleted file mode 100644 index 2e94e4d..0000000 --- a/src/libzisp/value/boole.zig +++ /dev/null @@ -1,33 +0,0 @@ -const Value = @import("../value.zig").Value; - -pub const f = Value{ .misc = .{ .value = .f } }; -pub const t = Value{ .misc = .{ .value = .t } }; - -// Zig API - -/// Checks if the value is a boole. -pub fn check(v: Value) bool { - return v.bits == f.bits or v.bits == t.bits; -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not bool"); - } -} - -pub fn pack(b: bool) Value { - return if (b) t else f; -} - -pub fn unpack(v: Value) bool { - assert(v); - return v.bits == t.bits; -} - -// Zisp API - -pub fn pred(v: Value) Value { - return pack(check(v)); -} diff --git a/src/libzisp/value/char.zig b/src/libzisp/value/char.zig deleted file mode 100644 index 09a3034..0000000 --- a/src/libzisp/value/char.zig +++ /dev/null @@ -1,31 +0,0 @@ -const value = @import("../value.zig"); - -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return v.isOtherTag(.char); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not char"); - } -} - -pub fn pack(c: u21) Value { - return .{ .char = .{ .value = c } }; -} - -pub fn unpack(v: Value) u21 { - assert(v); - return @truncate(v.char.value); -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} diff --git a/src/libzisp/value/double.zig b/src/libzisp/value/double.zig deleted file mode 100644 index 5cfe6ee..0000000 --- a/src/libzisp/value/double.zig +++ /dev/null @@ -1,38 +0,0 @@ -const value = @import("../value.zig"); -const Value = value.Value; - -// Zig API - -/// Checks for a Zisp double (double, +inf, -inf, or canonical NaN). -pub fn check(v: Value) bool { - return v.isDouble(); -} - -/// Asserts check(). -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not double"); - } -} - -pub fn pack(d: f64) Value { - return .{ .double = d }; -} - -pub fn unpack(v: Value) f64 { - assert(v); - return v.double; -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn add(v1: Value, v2: Value) Value { - const d1 = unpack(v1); - const d2 = unpack(v2); - return pack(d1 + d2); -} diff --git a/src/libzisp/value/eof.zig b/src/libzisp/value/eof.zig deleted file mode 100644 index 4b16669..0000000 --- a/src/libzisp/value/eof.zig +++ /dev/null @@ -1,28 +0,0 @@ -const value = @import("../value.zig"); - -const Value = value.Value; - -pub const eof = Value{ .misc = .{ .value = .eof } }; - -// Zig API - -pub fn check(v: Value) bool { - return v.bits == eof.bits; -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not bool"); - } -} - -// Zisp API - -pub fn get() Value { - return eof; -} - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} diff --git a/src/libzisp/value/fixnum.zig b/src/libzisp/value/fixnum.zig deleted file mode 100644 index 80fb4ae..0000000 --- a/src/libzisp/value/fixnum.zig +++ /dev/null @@ -1,84 +0,0 @@ -const std = @import("std"); -const value = @import("../value.zig"); - -const Value = value.Value; - -// Zig API - -/// Checks for a Zisp fixnum. -pub fn check(v: Value) bool { - return v.isFixnum(); -} - -/// Asserts check(). -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not fixnum"); - } -} - -// See detailed NaN packing docs for why the +/- 1. -pub const min = std.math.minInt(i52) + 1; -pub const max = std.math.maxInt(i52) - 1; - -fn assertValidRange(int: i64) void { - if (int < min) { - std.debug.print("int too small for fixnum: {}\n", .{int}); - @panic("int too small for fixnum"); - } - if (int > max) { - std.debug.print("int too large for fixnum: {}\n", .{int}); - @panic("int too large for fixnum"); - } -} - -fn packNegative(int: i64) Value { - return @bitCast(int); -} - -fn unpackNegative(v: Value) i64 { - return @bitCast(v); -} - -const positive_mask: u64 = 0xfff7ffffffffffff; - -fn packPositive(int: i64) Value { - const uint: u64 = @bitCast(int); - return @bitCast(uint ^ positive_mask); -} - -fn unpackPositive(v: Value) i64 { - const uint: u64 = @bitCast(v); - return @bitCast(uint ^ positive_mask); -} - -pub fn pack(int: i64) Value { - assertValidRange(int); - if (int < 0) { - return packNegative(int); - } else { - return packPositive(int); - } -} - -pub fn unpack(v: Value) i64 { - assert(v); - if (v.fixnum.negative) { - return unpackNegative(v); - } else { - return unpackPositive(v); - } -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn add(v1: Value, v2: Value) Value { - const int1 = unpack(v1); - const int2 = unpack(v2); - return pack(int1 + int2); -} diff --git a/src/libzisp/value/istr.zig b/src/libzisp/value/istr.zig deleted file mode 100644 index abd0447..0000000 --- a/src/libzisp/value/istr.zig +++ /dev/null @@ -1,65 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); -const gc = @import("../gc.zig"); - -const ptr = @import("ptr.zig"); -const seq = @import("seq.zig"); - -const Hval = gc.Hval; - -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return ptr.checkZispTag(v, .seq); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not istr"); - } -} - -pub fn intern(str: []const u8, quoted: bool) Value { - if (str.len > value.fixnum.max) { - @panic("String length out of fixnum range."); - } - const header: seq.Header = .{ - .type = .string, - .info = .{ .string = .{ - .enc = .utf8, - .quoted = quoted, - .interned = true, - } }, - .size = @intCast(str.len), - }; - const header_ptr = gc.intern(header, str); - return ptr.pack(@ptrCast(header_ptr), .seq); -} - -pub fn getHeader(v: Value) *seq.Header { - assert(v); - const header_ptr, _ = ptr.unpack(v); - return gc.istrHeader(header_ptr); -} - -pub fn getHeaderFromPtr(p: *Hval) *seq.Header { - return gc.istrHeader(p); -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn len(v: Value) Value { - const l = getHeader(v).size; - if (l > value.fixnum.max) { - @panic("string length out of range"); - } - return value.fixnum.pack(@intCast(l)); -} diff --git a/src/libzisp/value/nil.zig b/src/libzisp/value/nil.zig deleted file mode 100644 index f95ecad..0000000 --- a/src/libzisp/value/nil.zig +++ /dev/null @@ -1,28 +0,0 @@ -const value = @import("../value.zig"); - -const Value = value.Value; - -pub const nil = Value{ .misc = .{ .value = .nil } }; - -// Zig API - -pub fn check(v: Value) bool { - return v.bits == nil.bits; -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not bool"); - } -} - -// Zisp API - -pub fn get() Value { - return nil; -} - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} diff --git a/src/libzisp/value/pair.zig b/src/libzisp/value/pair.zig deleted file mode 100644 index 6ea1edf..0000000 --- a/src/libzisp/value/pair.zig +++ /dev/null @@ -1,55 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); -const gc = @import("../gc.zig"); - -const ptr = @import("ptr.zig"); - -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return ptr.checkZispTag(v, .pair); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not pair"); - } -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn cons(v1: Value, v2: Value) Value { - return ptr.pack(@ptrCast(gc.cons(v1, v2)), .pair); -} - -fn getMem(v: Value) *[2]Value { - return @ptrCast(ptr.unpack(v).@"0"); -} - -pub fn car(v: Value) Value { - assert(v); - return getMem(v)[0]; -} - -pub fn cdr(v: Value) Value { - assert(v); - return getMem(v)[1]; -} - -pub fn setcar(v: Value, new: Value) void { - assert(v); - getMem(v)[0] = new; -} - -pub fn setcdr(v: Value, new: Value) void { - assert(v); - getMem(v)[1] = new; -} diff --git a/src/libzisp/value/ptr.zig b/src/libzisp/value/ptr.zig deleted file mode 100644 index 2ed3765..0000000 --- a/src/libzisp/value/ptr.zig +++ /dev/null @@ -1,174 +0,0 @@ -const std = @import("std"); - -const gc = @import("../gc.zig"); -const value = @import("../value.zig"); - -const Hval = gc.Hval; - -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return v.isPtr(); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not a pointer"); - } -} - -// Foreign Pointers - -pub fn checkForeign(v: Value) bool { - return check(v) and v.ptr.is_foreign; -} - -pub fn assertForeign(v: Value) void { - if (!checkForeign(v)) { - v.dump(); - @panic("not foreign pointer"); - } -} - -pub fn packForeign(int: u50) Value { - return .{ .fptr = .{ .value = int } }; -} - -pub fn unpackForeign(v: Value) u50 { - assertForeign(v); - return v.fptr.value; -} - -// Zisp Pointers - -pub fn checkZisp(v: Value) bool { - return check(v) and !v.ptr.is_foreign; -} - -pub fn assertZisp(v: Value) void { - if (!checkZisp(v)) { - v.dump(); - @panic("not zisp pointer"); - } -} - -pub fn checkWeak(v: Value) bool { - return checkZisp(v) and v.zptr.is_weak; -} - -pub fn assertWeak(v: Value) void { - if (!checkWeak(v)) { - v.dump(); - @panic("not zisp weak pointer"); - } -} - -pub fn checkZispTag(v: Value, tag: Tag) bool { - return checkZisp(v) and unpack(v).@"1" == tag; -} - -pub fn assertZispTag(v: Value, tag: Tag) void { - if (!checkZispTag(v, tag)) { - v.dump(); - @panic("not zisp pointer or wrong tag"); - } -} - -pub fn checkStrong(v: Value) bool { - return checkZisp(v) and !v.zptr.is_weak; -} - -pub fn assertStrong(v: Value) void { - if (!checkStrong(v)) { - v.dump(); - @panic("not zisp strong pointer"); - } -} - -pub fn packZisp(ptr: *Hval, tag: Tag, is_weak: bool) Value { - return .{ .zptr = .{ - .tagged_value = tagPtr(ptr, tag), - .is_weak = is_weak, - } }; -} - -pub fn pack(ptr: *Hval, tag: Tag) Value { - return packZisp(ptr, tag, false); -} - -pub fn packWeak(ptr: *Hval, tag: Tag) Value { - return packZisp(ptr, tag, true); -} - -// Unpacks weak as well; no need for a separate fn. -pub fn unpack(v: Value) struct { *Hval, Tag } { - assertZisp(v); - return untagPtr(v.zptr.tagged_value); -} - -pub fn setWeakNull(v: *Value) void { - assertWeak(v.*); - v.zptr.tagged_value = 0; -} - -pub fn isWeakNull(v: Value) bool { - assertWeak(v); - return v.zptr.tagged_value == 0; -} - -fn tagPtr(ptr: *Hval, tag: Tag) u48 { - const int: usize = @intFromPtr(ptr); - const untagged: u48 = @intCast(int); - return untagged | @intFromEnum(tag); -} - -fn untagPtr(tagged: u48) struct { *Hval, Tag } { - const untagged: u48 = tagged & 0xfffffffffff8; - const ptr: *Hval = @ptrFromInt(untagged); - const int: u3 = @truncate(tagged); - const tag: Tag = @enumFromInt(int); - return .{ ptr, tag }; -} - -pub const Tag = enum(u3) { - /// Pair aka cons cell aka *[2]Value - pair, - /// Sequence of various kinds (16-bit meta, 48-bit length, then data) - seq, - /// Procedure - proc, -}; - -// Zisp API - -pub fn predForeign(v: Value) Value { - return value.boole.pack(checkForeign(v)); -} - -pub fn makeWeak(v: Value) Value { - assertStrong(v); - var copy = v; - copy.zptr.is_weak = true; - return copy; -} - -pub fn predWeak(v: Value) Value { - return value.boole.pack(checkWeak(v)); -} - -pub fn predWeakNull(v: Value) Value { - return value.boole.pack(isWeakNull(v)); -} - -pub fn getWeak(v: Value) Value { - if (isWeakNull(v)) { - return value.boole.f; - } else { - var copy = v; - copy.zptr.is_weak = false; - return copy; - } -} diff --git a/src/libzisp/value/rune.zig b/src/libzisp/value/rune.zig deleted file mode 100644 index 195210e..0000000 --- a/src/libzisp/value/rune.zig +++ /dev/null @@ -1,82 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); - -const ShortString = value.ShortString; -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return v.isOtherTag(.rune); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not rune"); - } -} - -pub fn isValidRune(s: []const u8) bool { - if (s.len == 0 or s.len > 6) { - return false; - } - if (!std.ascii.isAlphabetic(s[0])) { - return false; - } - for (s[1..]) |c| { - if (!std.ascii.isAlphanumeric(c)) { - return false; - } - } - return true; -} - -fn assertValidRune(s: []const u8) void { - if (!isValidRune(s)) { - std.debug.print("invalid rune: '{s}'\n", .{s}); - @panic("invalid rune"); - } -} - -// See sstr.zig which uses equivalent code; probably good to keep in sync. - -pub fn pack(s: []const u8) Value { - assertValidRune(s); - return packForced(s); -} - -pub fn packForced(s: []const u8) Value { - var v = Value{ .rune = .{ .name = 0 } }; - const dest: [*]u8 = @ptrCast(&v.rune.name); - @memcpy(dest, s); - return v; -} - -pub fn unpack(v: Value) ShortString { - assert(v); - const s: [6]u8 = @bitCast(v.rune.name); - inline for (0..6) |i| { - if (s[i] == 0) return .{ .buffer = s, .len = i }; - } - return .{ .buffer = s, .len = 6 }; -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn make(v: Value) Value { - const s, const l = value.sstr.unpack(v); - return pack(s[0..l]); -} - -pub fn getName(v: Value) Value { - const s, const l = unpack(v); - return value.sstr.pack(s[0..l]); -} - -// TODO: Registering decoders diff --git a/src/libzisp/value/seq.zig b/src/libzisp/value/seq.zig deleted file mode 100644 index 3418a5a..0000000 --- a/src/libzisp/value/seq.zig +++ /dev/null @@ -1,57 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); - -const value = @import("../value.zig"); -const gc = @import("../gc.zig"); - -const Value = value.Value; - -const Endian = enum(u1) { - little, - big, - - const native: Endian = switch (builtin.target.cpu.arch.endian()) { - .little => .little, - .big => .big, - }; -}; - -pub const Header = packed struct(u64) { - type: enum(u2) { - values, - string, - ints, - floats, - }, - info: packed union { - values: packed struct(u14) { - weak: bool = false, - _: u13 = 0, - }, - string: packed struct(u14) { - enc: enum(u4) { utf8, utf16, utf24, utf32 }, - endian: Endian = .native, - quoted: bool, - interned: bool, - _: u7 = 0, - }, - ints: packed struct(u14) { - signed: bool, - endian: Endian = .native, - size: u12, - }, - floats: packed struct(u14) { - double: bool, - endian: Endian = .native, - _: u12 = 0, - }, - }, - size: u48, - - pub fn bytes(self: *Header) []u8 { - const hs = @sizeOf(Header); - const ptr: [*]u8 = @ptrCast(self); - const end = hs + self.size; - return ptr[hs..end]; - } -}; diff --git a/src/libzisp/value/sstr.zig b/src/libzisp/value/sstr.zig deleted file mode 100644 index b02fd3d..0000000 --- a/src/libzisp/value/sstr.zig +++ /dev/null @@ -1,78 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); - -const ShortString = value.ShortString; -const OtherTag = value.OtherTag; -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return v.isOtherTag(.sstr) or v.isOtherTag(.qstr); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not sstr"); - } -} - -pub fn checkQuoted(v: Value) bool { - return v.isOtherTag(.qstr); -} - -// For now, ignore encoding, just treat it as []u8. - -pub fn isValidSstr(s: []const u8) bool { - if (s.len > 6) { - return false; - } - for (s) |c| { - if (c == 0) { - return false; - } - } - return true; -} - -fn assertValidSstr(s: []const u8) void { - if (!isValidSstr(s)) { - std.debug.print("invalid sstr: {s}\n", .{s}); - @panic("invalid sstr"); - } -} - -// Different ways of doing the following have been tested, including manual -// shifting and bit masking, but memcpy always wins easily according to our -// micro-benchmarks, under both ReleaseSafe and ReleaseFast. - -// Note: rune.zig uses equivalent code; probably good to keep in sync. - -pub fn pack(s: []const u8) Value { - return _pack(s, .sstr); -} - -pub fn packQuoted(s: []const u8) Value { - return _pack(s, .qstr); -} - -fn _pack(s: []const u8, tag: OtherTag) Value { - assertValidSstr(s); - var v = Value{ .sstr = .{ .string = 0, .tag = tag } }; - const dest: [*]u8 = @ptrCast(&v.sstr.string); - @memcpy(dest, s); - return v; -} - -pub fn unpack(v: Value) ShortString { - assert(v); - const s: [6]u8 = @bitCast(v.sstr.string); - inline for (0..6) |i| { - if (s[i] == 0) return .{ .buffer = s, .len = i }; - } - return .{ .buffer = s, .len = 6 }; -} - -// No Zisp API for sstr specifically, since it's a string. See string.zig. diff --git a/src/main.zig b/src/main.zig index c52d7e5..769a906 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,6 +1,6 @@ const std = @import("std"); -const zisp = @import("libzisp"); +const zisp = @import("zisp"); pub fn main() !void { const reader = std.io.getStdIn().reader().any(); diff --git a/src/zisp.zig b/src/zisp.zig new file mode 100644 index 0000000..6472daf --- /dev/null +++ b/src/zisp.zig @@ -0,0 +1,440 @@ +const builtin = @import("builtin"); +const std = @import("std"); + +const testing = std.testing; + +pub const gc = @import("zisp/gc.zig"); +pub const io = @import("zisp/io.zig"); +pub const lib = @import("zisp/lib.zig"); +pub const value = @import("zisp/value.zig"); + +pub const Hval = gc.Hval; + +pub const ShortString = value.ShortString; +pub const Value = value.Value; + +fn benchmark(name: []const u8, iters: usize, func: fn () anyerror!void) !void { + var timer = try std.time.Timer.start(); + for (0..iters) |i| { + _ = i; + try func(); + } + const ns: f64 = @floatFromInt(timer.lap()); + const secs = ns / 1_000_000_000; + std.debug.print( + "bench {s} x {}: {d:.3}s\n", + .{ name, iters, secs }, + ); +} + +test "double" { + const d1: f64 = 0.123456789; + const d2: f64 = -0.987654321; + const v1 = value.double.pack(d1); + const v2 = value.double.pack(d2); + const v3 = value.double.add(v1, v2); + const result = value.double.unpack(v3); + + try std.testing.expect(value.double.check(v1)); + try std.testing.expect(value.double.check(v2)); + try std.testing.expect(value.double.check(v3)); + + try std.testing.expectEqual(d1 + d2, result); +} + +test "fixnum" { + const int1: i64 = 123456789; + const int2: i64 = -987654321; + const v1 = value.fixnum.pack(int1); + const v2 = value.fixnum.pack(int2); + const v3 = value.fixnum.add(v1, v2); + const result = value.fixnum.unpack(v3); + + try std.testing.expect(value.fixnum.check(v1)); + try std.testing.expect(value.fixnum.check(v2)); + try std.testing.expect(value.fixnum.check(v3)); + + try std.testing.expectEqual(int1 + int2, result); +} + +test "ptr" { + const ptr = value.ptr; + + const val: *Hval = @ptrFromInt(256); + const tag = ptr.Tag.pair; + + const p = ptr.pack(val, tag); + try std.testing.expect(ptr.check(p)); + try std.testing.expect(ptr.checkZispTag(p, tag)); + try std.testing.expect(ptr.checkStrong(p)); + + const pv, const pt = ptr.unpack(p); + try std.testing.expectEqual(val, pv); + try std.testing.expectEqual(tag, pt); + + var w = ptr.makeWeak(p); + try std.testing.expect(ptr.check(w)); + try std.testing.expect(ptr.checkZispTag(w, tag)); + try std.testing.expect(ptr.checkWeak(w)); + try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w))); + try std.testing.expectEqual(false, value.boole.unpack(ptr.predWeakNull(w))); + + const wv, const wt = ptr.unpack(w); + try std.testing.expectEqual(val, wv); + try std.testing.expectEqual(tag, wt); + + const wv2, const wt2 = ptr.unpack(ptr.getWeak(w)); + try std.testing.expectEqual(val, wv2); + try std.testing.expectEqual(tag, wt2); + + ptr.setWeakNull(&w); + try std.testing.expect(ptr.check(w)); + try std.testing.expect(ptr.checkWeak(w)); + try std.testing.expect(ptr.isWeakNull(w)); + try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeak(w))); + try std.testing.expectEqual(true, value.boole.unpack(ptr.predWeakNull(w))); + try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w))); +} + +test "fptr" { + const ptr = value.ptr; + + const int1: u50 = 0; + const int2: u50 = std.math.maxInt(u50); + + const f1 = ptr.packForeign(int1); + try std.testing.expect(ptr.checkForeign(f1)); + try std.testing.expectEqual(int1, ptr.unpackForeign(f1)); + + const f2 = ptr.packForeign(int2); + try std.testing.expect(ptr.checkForeign(f2)); + try std.testing.expectEqual(int2, ptr.unpackForeign(f2)); +} + +test "rune" { + const r = value.rune.pack("test"); + try std.testing.expect(value.rune.check(r)); + + const s = value.rune.unpack(r); + try std.testing.expectEqualStrings("test", s.slice()); +} + +const SstrImpl = struct { SstrPack, SstrUnpack }; +const SstrPack = *const fn ([]const u8) Value; +const SstrUnpack = *const fn (Value) ShortString; + +test "sstr" { + const impls = [_]SstrImpl{ + .{ value.sstr.pack, value.sstr.unpack }, + // .{ value.sstr.pack1, value.sstr.unpack1 }, + // .{ value.sstr.pack2, value.sstr.unpack2 }, + // .{ value.sstr.pack3, value.sstr.unpack3 }, + // .{ value.sstr.pack4, value.sstr.unpack4 }, + }; + + for (impls) |impl| { + try testSstr(impl); + } + + if (impls.len > 1) { + const iters = switch (@import("builtin").mode) { + .Debug, .ReleaseSmall => 10_000_000, + .ReleaseSafe => 100_000_000, + .ReleaseFast => 1_000_000_000, + }; + std.debug.print("Benchmarking sstr with {} iters.\n", .{iters}); + inline for (impls, 0..) |impl, i| { + try benchmarkSstr(impl, i, iters); + } + } +} + +fn testSstr(impl: SstrImpl) !void { + const pack, const unpack = impl; + + const ss1 = pack("1"); + const ss2 = pack("123"); + const ss3 = pack("123456"); + + const s1 = unpack(ss1); + const s2 = unpack(ss2); + const s3 = unpack(ss3); + + try std.testing.expect(value.sstr.check(ss1)); + try std.testing.expect(value.sstr.check(ss2)); + try std.testing.expect(value.sstr.check(ss3)); + + try std.testing.expectEqualStrings("1", s1.slice()); + try std.testing.expectEqualStrings("123", s2.slice()); + try std.testing.expectEqualStrings("123456", s3.slice()); +} + +fn benchmarkSstr(impl: SstrImpl, id: usize, iters: usize) !void { + const pack, const unpack = impl; + + var timer = try std.time.Timer.start(); + var ns: f64 = undefined; + var secs: f64 = undefined; + + var ss1: Value = undefined; + var ss2: Value = undefined; + var ss3: Value = undefined; + + for (0..iters) |_i| { + _ = _i; + ss1 = pack("1"); + ss2 = pack("123"); + ss3 = pack("123456"); + } + + ns = @floatFromInt(timer.lap()); + secs = ns / 1_000_000_000; + + std.debug.print("pack{}: {d:.3}s\t", .{ id, secs }); + + for (0..iters) |_i| { + _ = _i; + std.mem.doNotOptimizeAway(unpack(ss1)); + std.mem.doNotOptimizeAway(unpack(ss2)); + std.mem.doNotOptimizeAway(unpack(ss3)); + } + + ns = @floatFromInt(timer.lap()); + secs = ns / 1_000_000_000; + + std.debug.print("unpack{}: {d:.3}s\n", .{ id, secs }); +} + +test "char" { + const c1 = value.char.pack('\x00'); + try std.testing.expect(value.char.check(c1)); + try std.testing.expectEqual('\x00', value.char.unpack(c1)); + + const c2 = value.char.pack('😀'); + try std.testing.expect(value.char.check(c2)); + try std.testing.expectEqual('😀', value.char.unpack(c2)); +} + +test "misc" { + const f = value.boole.pack(false); + try std.testing.expect(value.boole.check(f)); + try std.testing.expectEqual(false, value.boole.unpack(f)); + try std.testing.expect(value.boole.unpack(value.boole.pred(f))); + + const t = value.boole.pack(true); + try std.testing.expect(value.boole.check(t)); + try std.testing.expectEqual(true, value.boole.unpack(t)); + try std.testing.expect(value.boole.unpack(value.boole.pred(t))); + + const nil = value.nil.get(); + try std.testing.expect(value.nil.check(nil)); + try std.testing.expect(value.boole.unpack(value.nil.pred(nil))); + + const eof = value.eof.get(); + try std.testing.expect(value.eof.check(eof)); + try std.testing.expect(value.boole.unpack(value.eof.pred(eof))); +} + +test "pair" { + const v1 = value.fixnum.pack(1); + const v2 = value.fixnum.pack(2); + + const v3 = value.fixnum.pack(3); + const v4 = value.fixnum.pack(4); + + const p = value.pair.cons(v1, v2); + try std.testing.expect(value.pair.check(p)); + try std.testing.expect(value.boole.unpack(value.pair.pred(p))); + + const car = value.pair.car(p); + const cdr = value.pair.cdr(p); + try std.testing.expectEqual(1, value.fixnum.unpack(car)); + try std.testing.expectEqual(2, value.fixnum.unpack(cdr)); + + value.pair.setcar(p, v3); + value.pair.setcdr(p, v4); + + const car2 = value.pair.car(p); + const cdr2 = value.pair.cdr(p); + try std.testing.expectEqual(3, value.fixnum.unpack(car2)); + try std.testing.expectEqual(4, value.fixnum.unpack(cdr2)); +} + +test "istr" { + const istr = value.istr; + const fx = value.fixnum; + + const s1 = "foo bar baz"; + const v1 = istr.intern(s1, false); + const v1_len: usize = @intCast(fx.unpack(istr.len(v1))); + try std.testing.expectEqualStrings(s1, istr.getHeader(v1).bytes()); + try std.testing.expectEqual(s1.len, v1_len); + + const file = try std.fs.cwd().openFile("test-data/string.txt", .{}); + defer file.close(); + var s2_buf: [4096]u8 = undefined; + const s2_len = try file.readAll(&s2_buf); + var s2: []u8 = s2_buf[0..s2_len]; + const v2 = istr.intern(s2, false); + const v2_len: usize = @intCast(fx.unpack(istr.len(v2))); + var s2_orig_buf: [4096]u8 = undefined; + @memcpy(&s2_orig_buf, &s2_buf); + const s2_orig = s2_orig_buf[0..s2_len]; + s2[0] = s2[0] +% 1; + try std.testing.expectEqualStrings(s2_orig, istr.getHeader(v2).bytes()); + try std.testing.expectEqual(s2_len, v2_len); +} + +fn parseString(str: []const u8) Value { + var fbs = std.io.fixedBufferStream(str); + return io.parser.parse(fbs.reader().any()); +} + +test "parse" { + const val = parseString("\"foo\""); + + try std.testing.expect(value.sstr.check(val)); + + const s = value.sstr.unpack(val); + try std.testing.expectEqualStrings("foo", s.slice()); +} + +test "parse2" { + const val = parseString( + \\ ;; Testing some crazy datum comments + \\ ;~"bar"([x #"y"]{##`,'z}) #"foo" + \\ ;; end + ); + + const r = value.rune.unpack(value.pair.car(val)); + try std.testing.expectEqualStrings("HASH", r.slice()); + + const s = value.pair.cdr(val); + try std.testing.expect(value.sstr.check(s)); + + const f = value.sstr.unpack(s); + try std.testing.expectEqualStrings("foo", f.slice()); +} + +test "parse3" { + const val = parseString( + \\(foo ;~x ;~(x y) ;~x #bar [#x #"baz"] 'bat) + ); + + const car = value.pair.car; + const cdr = value.pair.cdr; + + const e1 = car(val); + const e2 = car(cdr(val)); + const e3 = car(cdr(cdr(val))); + const e4 = car(cdr(cdr(cdr(val)))); + + try std.testing.expect(value.sstr.check(e1)); + try std.testing.expect(value.rune.check(e2)); + try std.testing.expect(value.pair.check(e3)); + try std.testing.expect(value.pair.check(e4)); +} + +test "parse4" { + const val = parseString("(foo . ;~x bar ;~y)"); + + const s = value.sstr.unpack(value.pair.car(val)); + try std.testing.expectEqualStrings("foo", s.slice()); + + const f = value.sstr.unpack(value.pair.cdr(val)); + try std.testing.expectEqualStrings("bar", f.slice()); +} + +test "unparse" { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#foo"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); +} + +test "unparse2" { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#{foo bar['x]}"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings( + "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", + try out.toOwnedSlice(), + ); +} + +fn writeParseResult(str: []const u8) !void { + const w = std.io.getStdErr().writer(); + const v = parseString(str); + try io.unparser.unparse(w, v); + try w.writeByte('\n'); +} + +test "unparse3" { + try writeParseResult("#{foo bar['x](y)(z)}"); +} + +test "unparse4" { + try writeParseResult("(foo ;~bar)"); +} + +test "unparse5" { + try writeParseResult("(;~foo foo ;~bar . ;~bar bar ;~bar)"); +} + +test "unparse6" { + try writeParseResult("(foo bar ... baz bat.(qux))"); +} + +test "unparse7" { + try writeParseResult("#`(#,(->keyword (syntax->datum #'sym)) . in)"); +} + +fn parseBench(path: []const u8, iters: usize) !void { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + + var sfa = io.parser.DefaultSfa.init(std.heap.smp_allocator); + var parser = try io.parser.initSfa(&sfa); + defer parser.deinit(); + + var timer = try std.time.Timer.start(); + for (0..iters) |i| { + _ = i; + var br = std.io.bufferedReader(file.reader()); + const reader = br.reader().any(); + // const reader = file.reader().any(); + var v: Value = undefined; + while (true) { + // std.debug.print("hihi {s}\n", .{path}); + v = parser.run(reader) catch |e| { + std.debug.print("\nfile pos: {}\n", .{ + try file.getPos(), + }); + return e; + }; + // try io.unparser.unparse(std.io.getStdOut().writer().any(), v); + if (value.eof.check(v)) { + break; + } + } + try file.seekTo(0); + } + const ns: f64 = @floatFromInt(timer.lap()); + const secs = ns / 1_000_000_000; + std.debug.print( + "parse {s} x {}: {d:.3}s\n", + .{ path, iters, secs }, + ); +} + +test "parse bench" { + try parseBench("test-data/parser-test-1.scm", 200); + try parseBench("test-data/parser-test-2.scm", 800); + try parseBench("test-data/parser-torture.scm", 1); +} diff --git a/src/zisp/gc.zig b/src/zisp/gc.zig new file mode 100644 index 0000000..d778b77 --- /dev/null +++ b/src/zisp/gc.zig @@ -0,0 +1,47 @@ +const builtin = @import("builtin"); +const std = @import("std"); + +const value = @import("value.zig"); + +const seq = value.seq; + +const Value = value.Value; + +/// A "heap value" that could be a Value or object header. +pub const Hval = union { + value: Value, + seq_header: seq.Header, +}; + +pub const alloc = std.heap.smp_allocator; + +// Cons cells + +var cons_pool = std.heap.MemoryPool([2]Value).init(alloc); + +pub fn cons(v1: Value, v2: Value) *[2]Value { + const mem = cons_pool.create() catch @panic("OOM"); + mem[0] = v1; + mem[1] = v2; + return mem; +} + +// Interned strings + +var istr_pool = std.hash_map.StringHashMap(void).init(alloc); + +pub fn intern(header: seq.Header, str: []const u8) *seq.Header { + const hs = @sizeOf(seq.Header); + const size = str.len + hs; + const copy = alloc.alloc(u8, size) catch @panic("OOM"); + const header_bytes: [hs]u8 = @bitCast(header); + @memcpy(copy[0..hs], &header_bytes); + @memcpy(copy[hs..size], str); + const entry = istr_pool.getOrPutValue(copy, {}) catch @panic("OOM"); + return @ptrCast(entry.key_ptr); +} + +pub fn istrHeader(ptr: *Hval) *seq.Header { + const entry_key: *[]u8 = @ptrCast(ptr); + return @alignCast(@ptrCast(entry_key.ptr)); +} diff --git a/src/zisp/io.zig b/src/zisp/io.zig new file mode 100644 index 0000000..8adf495 --- /dev/null +++ b/src/zisp/io.zig @@ -0,0 +1,12 @@ +pub const parser = @import("io/parser.zig"); +pub const unparser = @import("io/unparser.zig"); + +pub const decoder = @import("io/decoder.zig"); +pub const encoder = @import("io/encoder.zig"); + +pub const reader = @import("io/reader.zig"); +pub const writer = @import("io/writer.zig"); + +pub const parse = parser.parse; + +// pub const defaultUnparser = ... diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig new file mode 100644 index 0000000..08bf8ba --- /dev/null +++ b/src/zisp/io/Parser.zig @@ -0,0 +1,847 @@ +// === Syntax === +// +// See docs/parser.md and spec/syntax.bnf to understand the syntax. +// +// +// === Trampolining strategy === +// +// Instead of using recursion directly, the parser is written in a "trampoline" +// style, which ensures that parse depth is not limited by stack size. +// +// All state and context is passed around via a struct pointer. The parser has +// a main loop, which calls a function as dictated by parser.context.next, and +// the function may update the state to have another function called next. +// +// If a function wants to call a recursive subroutine, it pushes some of the +// current context onto a stack, including what function the subroutine should +// return to, and then updates the state to instruct the main loop to call one +// of the entry point subroutines. +// +// If a function wants to make the parser return, either from a subroutine or +// from the main loop, it sets the .result field, and tries to pop the saved +// context. If the context stack was empty, the main loop returns. +// +// +// === Buffering === +// +// For efficiency, call the parser on an input stream with implicit buffering. +// +// The parser does not use its own buffer, beyond one character that may be +// written back into the unread_char field, which is checked at the end to +// ensure it's nothing other than a trailing blank or comment. +// +// This lack of buffering is to ensure that the parser never reads more bytes +// from the input than what it needs to parse a datum. Consider the input: +// +// (a b c) (x y z) +// +// If we used proper buffering, like reading up to 4K bytes per read, then the +// whole stream would be consumed at once before it's parsed. Then, the parser +// would return the first datum, and the rest of the stream would be lost. The +// parser would need some way to reset the input stream's read head to the end +// of the first datum, but not all stream types may support this. +// +// Although in a scenario like this it would be wise to create a single Parser +// instance to call multiple times (in which case it could continue using its +// internal buffer from where it left), this may not always be practical. +// +// One could even have an input stream with Zisp s-expressions and other data +// intertwined, in which case this lack of buffering is also crucial, since one +// needs to alternate between calling the Zisp parser and some other parser on +// the same input stream. +// + +const builtin = @import("builtin"); +const std = @import("std"); + +const lib = @import("../lib.zig"); +const value = @import("../value.zig"); + +const List = std.ArrayListUnmanaged; + +const Value = value.Value; + +const Parser = @This(); + +const is_test = builtin.is_test; +const is_debug = builtin.mode == .Debug; +const detailed_debug = false; + +// zig fmt: off +const DOT = value.rune.pack("DOT"); +const COLON = value.rune.pack("COLON"); +const JOIN = value.rune.pack("JOIN"); +const LABEL = value.rune.pack("LABEL"); +const HASH = value.rune.pack("HASH"); +const QUOTE = value.rune.pack("QUOTE"); +const GRAVE = value.rune.pack("GRAVE"); +const COMMA = value.rune.pack("COMMA"); +const SQUARE = value.rune.pack("SQUARE"); +const BRACE = value.rune.pack("BRACE"); +const VOID = value.rune.packForced(""); +const LSTAIL = value.sstr.pack("."); +// zig fmt: on + +// We could implement an optimization where we swap in a dummy cons when the +// parser is handling a commented-out datum, but this would require changes to +// the algorithm and doesn't seem very important, so it's not implemented. + +const Cons = *const fn (v1: Value, v2: Value) Value; + +fn dummyCons(v1: Value, v2: Value) Value { + _ = v1; + _ = v2; + return value.undef; +} + +const real_cons = &value.pair.cons; +const fake_cons = &dummyCons; + +pub const Error = enum { + InvalidCharacter, + UnclosedString, + UnexpectedEof, + UnicodeError, + OutOfRange, + ReadError, +}; + +pub const Context = struct { + // What to do next. + next: ?Fn = undefined, + // For storing a context value, like accumulated list elements. + val: Value = undefined, + // For storing a context char, like list opening bracket. + char: u8 = undefined, +}; + +pub const Alloc = struct { + stack: std.mem.Allocator, + chars: std.mem.Allocator, +}; + +alloc: Alloc, +input: std.io.AnyReader = undefined, +context: Context = .{}, +stack: List(Context) = undefined, +chars: List(u8) = undefined, +cons: Cons = real_cons, +result: Value = undefined, +unread_char: ?u8 = null, +err_msg: []const u8 = undefined, + +pub fn init( + alloc: Alloc, + init_stack_capacity: usize, + init_chars_capacity: usize, +) !Parser { + var p: Parser = .{ .alloc = alloc }; + p.stack = try .initCapacity(alloc.stack, init_stack_capacity); + p.chars = try .initCapacity(alloc.chars, init_chars_capacity); + return p; +} + +pub fn deinit(p: *Parser) void { + p.stack.deinit(p.alloc.stack); + p.chars.deinit(p.alloc.chars); +} + +// +// Input reading & unreading +// + +fn read(p: *Parser) !?u8 { + if (is_debug and p.unread_char != null) { + std.debug.panic( + "Called read() while there was an unused character! '{c}'", + .{p.unread_char.?}, + ); + } + const c = p.input.readByte() catch |e| switch (e) { + error.EndOfStream => return null, + else => return p.err(.ReadError, "???"), + }; + if (detailed_debug) { + std.debug.print("{c}", .{c}); + } + return c; +} + +fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 { + return if (try p.read()) |c| c else p.err(.UnexpectedEof, emsg); +} + +fn unread(p: *Parser, c: u8) void { + p.unread_char = c; +} + +fn getUnread(p: *Parser) ?u8 { + const c = p.unread_char orelse return null; + p.unread_char = null; + return c; +} + +// +// String accumulation & creation +// + +fn addChar(p: *Parser, c: u8) !void { + try p.chars.append(p.alloc.chars, c); +} + +fn getBareString(p: *Parser) Value { + defer p.chars.clearRetainingCapacity(); + return if (p.chars.items.len <= 6) + value.sstr.pack(p.chars.items) + else + value.istr.intern(p.chars.items, false); +} + +fn getQuotedString(p: *Parser) Value { + defer p.chars.clearRetainingCapacity(); + return if (p.chars.items.len <= 6) + value.sstr.packQuoted(p.chars.items) + else + value.istr.intern(p.chars.items, true); +} + +fn getRune(p: *Parser) Value { + defer p.chars.clearRetainingCapacity(); + return value.rune.pack(p.chars.items); +} + +// +// Stack management & control flow +// + +const Fn = enum { + parseUnit, + parseDatum, + endUnit, + returnContext, + endFirstDatum, + endJoinDatum, + parseJoin, + parseHashDatum, + endHashDatum, + parseRuneDatum, + endRuneDatum, + endLabelDatum, + continueList, + endImproperList, + closeImproperList, + endQuoteExpr, +}; + +fn call(p: *Parser, f: Fn) !void { + try switch (f) { + .parseUnit => parseUnit(p), + .parseDatum => parseDatum(p), + .endUnit => endUnit(p), + .returnContext => returnContext(p), + .endFirstDatum => endFirstDatum(p), + .endJoinDatum => endJoinDatum(p), + .parseJoin => parseJoin(p), + .parseHashDatum => parseHashDatum(p), + .endHashDatum => endHashDatum(p), + .parseRuneDatum => parseRuneDatum(p), + .endRuneDatum => endRuneDatum(p), + .endLabelDatum => endLabelDatum(p), + .continueList => continueList(p), + .endImproperList => endImproperList(p), + .closeImproperList => closeImproperList(p), + .endQuoteExpr => endQuoteExpr(p), + }; +} + +pub fn run(p: *Parser, input: std.io.AnyReader) !Value { + p.input = input; + p.context.next = .parseUnit; + while (p.context.next) |next| { + if (detailed_debug) printStack(p); + try p.call(next); + } + if (p.unread_char) |_| { + return p.err(.InvalidCharacter, "top-level"); + } + return p.result; +} + +fn printStack(p: *Parser) void { + const stack = p.stack.items; + std.debug.print("\n\n{}:{any} ctx:'{c}' unread:'{c}' \n", .{ + stack.len, + p.context.next, + p.context.char, + p.unread_char orelse '_', + }); + if (stack.len > 0) { + var i = stack.len; + while (i > 0) : (i -= 1) { + const prev = stack[i - 1]; + std.debug.print("{}:{any} ctx:'{c}'\n", .{ + i - 1, + prev.next, + prev.char, + }); + } + } +} + +fn err( + p: *Parser, + comptime e: Error, + comptime msg: []const u8, +) error{ParseError} { + p.err_msg = @tagName(e) ++ " at: " ++ msg; + return error.ParseError; +} + +fn push(p: *Parser, next: Fn) !void { + try p.stack.append(p.alloc.stack, .{ .next = next }); +} + +fn pushContext(p: *Parser, next: Fn) !void { + try p.stack.append(p.alloc.stack, .{ + .next = next, + .val = p.context.val, + .char = p.context.char, + }); +} + +fn ret(p: *Parser) void { + if (p.stack.pop()) |c| { + p.context = c; + } else { + p.context.next = null; + } +} + +fn subr(p: *Parser, start: Fn, next: Fn) !void { + try p.pushContext(next); + p.context.next = start; +} + +fn jump(p: *Parser, next: Fn, val: ?Value) void { + if (val) |v| p.result = v; + p.context.next = next; +} + +fn abort(p: *Parser, next: Fn, unread_c: u8) void { + p.result = VOID; + p.unread_char = unread_c; + p.context.next = next; +} + +fn retval(p: *Parser, val: Value) void { + p.result = val; + p.ret(); +} + +// +// Start parser functions +// + +fn parseUnit(p: *Parser) !void { + var c1 = p.getUnread() orelse try p.read(); + while (c1) |c| : (c1 = try p.read()) { + switch (try checkBlanks(p, c)) { + .yes => {}, + .skip_unit => try p.push(.parseUnit), + .no => { + p.unread(c); + return p.subr(.parseDatum, .endUnit); + }, + } + } + return p.retval(value.eof.eof); +} + +fn endUnit(p: *Parser) !void { + const c = p.getUnread() orelse return p.ret(); + switch (try checkBlanks(p, c)) { + .yes => {}, + .skip_unit => return skipUnitAndReturn(p), + .no => p.unread(c), + } + return p.ret(); +} + +fn skipUnitAndReturn(p: *Parser) !void { + p.context.val = p.result; + return p.subr(.parseUnit, .returnContext); +} + +fn returnContext(p: *Parser) !void { + return p.retval(p.context.val); +} + +fn parseDatum(p: *Parser) !void { + return parseOneDatum(p, p.getUnread().?, .endFirstDatum); +} + +fn endFirstDatum(p: *Parser) !void { + if (p.result.eq(VOID)) { + return p.ret(); + } + return parseJoin(p); +} + +fn parseJoin(p: *Parser) !void { + const c = p.getUnread() orelse try p.read() orelse return p.ret(); + switch (c) { + '.', ':' => { + p.context.char = c; + p.unread(try p.readNoEof("join datum")); + }, + else => { + p.context.char = 0; + p.unread(c); + }, + } + p.context.val = p.result; + return p.subr(.parseDatum, .endJoinDatum); +} + +fn endJoinDatum(p: *Parser) !void { + const prev = p.context.val; + const join = p.context.char; + if (p.result.eq(VOID)) { + if (join == 0) { + return p.retval(prev); + } else { + return p.err(.InvalidCharacter, "join datum"); + } + } + const rune = switch (join) { + 0 => JOIN, + '.' => DOT, + ':' => COLON, + else => unreachable, + }; + const joined = p.cons(rune, p.cons(prev, p.result)); + return p.jump(.parseJoin, joined); +} + +fn parseOneDatum(p: *Parser, c: u8, next: Fn) !void { + if (isBareChar(c) or c == '.') { + return p.jump(next, try parseBareString(p, c)); + } + return parseCladDatum(p, c, next); +} + +fn parseBareString(p: *Parser, c: u8) !Value { + const allow_dots = std.ascii.isDigit(c) or switch (c) { + '.', '+', '-' => true, + else => false, + }; + try p.addChar(c); + return parseBareStringRest(p, allow_dots); +} + +fn parseBareEscString(p: *Parser) !Value { + try p.addChar(try parseBareEsc(p)); + return parseBareStringRest(p, false); +} + +fn parseBareStringRest(p: *Parser, allow_dots: bool) !Value { + while (try p.read()) |c| { + if (isBareChar(c) or (allow_dots and c == '.')) { + try p.addChar(c); + } else if (c == '\\') { + try p.addChar(try parseBareEsc(p)); + } else { + p.unread(c); + break; + } + } + return p.getBareString(); +} + +fn parseBareEsc(p: *Parser) !u8 { + const c = try p.readNoEof("bare escape"); + if (isBareEsc(c)) { + return c; + } else { + return p.err(.InvalidCharacter, "bare escape"); + } +} + +fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void { + if (c == '\\') { + return p.jump(next, try parseBareEscString(p)); + } + if (c == '"') { + return p.jump(next, try parseQuotedString(p, '"')); + } + if (c == '|') { + return p.jump(next, try parseQuotedString(p, '|')); + } + return switch (c) { + '#' => parseHashExpression(p, next), + '(', '[', '{' => parseList(p, c, next), + '\'', '`', ',' => parseQuoteExpr(p, c, next), + else => p.abort(next, c), + }; +} + +fn parseQuotedString(p: *Parser, close: u8) !Value { + while (try p.read()) |c| { + if (c == close) { + return p.getQuotedString(); + } + if (c != '\\') { + try p.addChar(c); + } else { + try parseQuotedEsc(p, close); + } + } + return error.UnclosedString; +} + +fn parseQuotedEsc(p: *Parser, close: u8) !void { + const c = try p.readNoEof("quoted escape"); + if (c == close) { + return p.addChar(close); + } + if (c == 'u') { + return parseUniHexHandleErrors(p); + } + try p.addChar(switch (c) { + '\\' => c, + '0' => 0, + 'a' => 7, + 'b' => 8, + 't' => 9, + 'n' => 10, + 'v' => 11, + 'f' => 12, + 'r' => 13, + 'e' => 27, + 'x' => try parseHexByte(p, "hex escape"), + else => return p.err(.InvalidCharacter, "quoted escape"), + }); +} + +fn parseUniHexHandleErrors(p: *Parser) !void { + return parseUniHex(p) catch |e| switch (e) { + error.Utf8CannotEncodeSurrogateHalf => p.err( + .UnicodeError, + "unicode escape", + ), + else => e, + }; +} + +fn parseUniHex(p: *Parser) !void { + const msg = "unicode escape"; + + if (try p.readNoEof(msg) != '{') { + return p.err(.InvalidCharacter, msg); + } + + const uc = try parseHex(p, u21, msg); + const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg); + if (c != '}') { + return p.err(.InvalidCharacter, msg); + } + + const n = try std.unicode.utf8CodepointSequenceLength(uc); + const buf = try p.chars.addManyAsSlice(p.alloc.chars, n); + _ = try std.unicode.utf8Encode(uc, buf); +} + +fn parseHashExpression(p: *Parser, next: Fn) !void { + const c = try p.readNoEof("hash expression"); + if (try checkBlanks(p, c) != .no) { + return p.err(.InvalidCharacter, "hash expression"); + } + if (std.ascii.isAlphabetic(c)) { + const r = try parseRune(p, c); + return parseRuneEnd(p, r, next); + } + if (c == '%') { + const l = try parseLabel(p); + return parseLabelEnd(p, l, next); + } + p.unread(c); + return p.subr(.parseHashDatum, next); +} + +fn parseHashDatum(p: *Parser) !void { + return parseCladDatum(p, p.getUnread().?, .endHashDatum); +} + +fn endHashDatum(p: *Parser) !void { + if (p.result.eq(VOID)) { + return p.err(.InvalidCharacter, "hash datum"); + } + return p.retval(p.cons(HASH, p.result)); +} + +fn parseRune(p: *Parser, c1: u8) !Value { + try p.addChar(c1); + var len: usize = 1; + while (try p.read()) |c| : (len += 1) { + if (len == 6 or !std.ascii.isAlphanumeric(c)) { + p.unread(c); + return p.getRune(); + } + try p.addChar(c); + } + return p.getRune(); +} + +fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { + const c = p.getUnread() orelse return p.jump(next, r); + if (c == '\\') { + return p.jump(next, p.cons(r, try parseBareString(p, c))); + } + if (c == '"') { + return p.jump(next, p.cons(r, try parseQuotedString(p, '"'))); + } + if (c == '|') { + return p.jump(next, p.cons(r, try parseQuotedString(p, '|'))); + } + p.unread(c); + switch (c) { + '#', '(', '[', '{', '\'', '`', ',' => { + try p.push(next); + p.context.val = r; + // Use jump to prevent recursion. + return p.jump(.parseRuneDatum, null); + }, + else => return p.jump(next, r), + } +} + +fn parseRuneDatum(p: *Parser) !void { + return parseCladDatum(p, p.getUnread().?, .endRuneDatum); +} + +fn endRuneDatum(p: *Parser) !void { + if (p.result.eq(VOID)) { + p.retval(p.context.val); + } + return p.retval(p.cons(p.context.val, p.result)); +} + +fn parseLabel(p: *Parser) !Value { + const label = try parseHex(p, u48, "datum label"); + return value.fixnum.pack(label); +} + +fn parseLabelEnd(p: *Parser, l: Value, next: Fn) !void { + const c = p.getUnread() orelse return p.err(.UnexpectedEof, "datum label"); + if (c == '%') { + return p.jump(next, p.cons(LABEL, l)); + } + if (c == '=') { + try p.push(next); + p.context.val = l; + return p.subr(.parseDatum, .endLabelDatum); + } + return p.err(.InvalidCharacter, "datum label"); +} + +fn endLabelDatum(p: *Parser) !void { + if (p.result.eq(VOID)) { + return p.err(.InvalidCharacter, "label datum"); + } + return p.retval(p.cons(LABEL, p.cons(p.context.val, p.result))); +} + +fn parseList(p: *Parser, open: u8, next: Fn) !void { + const head = switch (open) { + '(' => value.nil.nil, + '[' => p.cons(SQUARE, value.nil.nil), + '{' => p.cons(BRACE, value.nil.nil), + else => unreachable, + }; + const close: u8 = switch (open) { + '(' => ')', + '[' => ']', + '{' => '}', + else => unreachable, + }; + while (try p.read()) |c| { + if (c == close) { + return p.jump(next, head); + } + switch (try checkBlanks(p, c)) { + .yes => {}, + .skip_unit => { + try listParserSetup(p, head, close, next); + return p.subr(.parseUnit, .parseUnit); + }, + .no => { + try listParserSetup(p, head, close, next); + p.unread(c); + return p.jump(.parseDatum, null); + }, + } + } + return p.err(.UnexpectedEof, "list"); +} + +fn listParserSetup(p: *Parser, head: Value, close: u8, next: Fn) !void { + try p.push(next); + p.context.val = head; + p.context.char = close; + try p.pushContext(.continueList); +} + +fn continueList(p: *Parser) !void { + const close = p.context.char; + + if (p.result.eq(VOID)) { + const c = p.getUnread().?; + if (c == close) { + return endList(p); + } + return p.err(.InvalidCharacter, "list"); + } + + if (p.result.eq(LSTAIL)) { + return p.subr(.parseUnit, .endImproperList); + } + + p.context.val = p.cons(p.result, p.context.val); + + var c1 = p.getUnread() orelse try p.read(); + while (c1) |c| : (c1 = try p.read()) { + if (c == close) { + return endList(p); + } + switch (try checkBlanks(p, c)) { + .yes => {}, + .skip_unit => { + try p.pushContext(.continueList); + return p.subr(.parseUnit, .parseUnit); + }, + .no => { + p.unread(c); + return p.subr(.parseDatum, .continueList); + }, + } + } + return p.err(.UnexpectedEof, "list"); +} + +fn endList(p: *Parser) !void { + return p.retval(lib.list.reverse(p.context.val)); +} + +fn endImproperList(p: *Parser) !void { + if (p.result.eq(VOID)) { + return p.err(.InvalidCharacter, "list tail"); + } + p.context.val = lib.list.reverseWithTail(p.context.val, p.result); + return closeImproperList(p); +} + +fn closeImproperList(p: *Parser) !void { + const result = p.context.val; + const close = p.context.char; + var c1 = p.getUnread() orelse try p.read(); + while (c1) |c| : (c1 = try p.read()) { + if (c == close) { + return p.retval(result); + } + switch (try checkBlanks(p, c)) { + .yes => {}, + .skip_unit => return p.subr(.parseUnit, .closeImproperList), + .no => return p.err(.InvalidCharacter, "after list tail"), + } + } + return p.err(.UnexpectedEof, "after list tail"); +} + +fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void { + const q = switch (c1) { + '\'' => QUOTE, + '`' => GRAVE, + ',' => COMMA, + else => unreachable, + }; + + try p.push(next); + p.context.val = q; + p.unread(try p.readNoEof("quote expression")); + return p.subr(.parseDatum, .endQuoteExpr); +} + +fn endQuoteExpr(p: *Parser) !void { + if (p.result.eq(VOID)) { + return p.err(.InvalidCharacter, "quote expression datum"); + } + return p.retval(p.cons(p.context.val, p.result)); +} + +// Helpers + +fn checkBlanks(p: *Parser, c: u8) !enum { yes, skip_unit, no } { + switch (c) { + '\t'...'\r', ' ' => return .yes, + ';' => { + const c2 = try p.read() orelse return .yes; + if (c2 == '\n') return .yes; + if (c2 == '~') return .skip_unit; + while (try p.read()) |c3| if (c3 == '\n') break; + return .yes; + }, + else => return .no, + } +} + +fn isBareChar(c: u8) bool { + return switch (c) { + // zig fmt: off + 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' , + '+' , '-' , '/' , '<' , '=' , '>' , '?' , '@' , '^' , '_' , '~' => true, + // zig fmt: on + else => false, + }; +} + +fn isBareEsc(c: u8) bool { + return switch (c) { + 33...126 => true, + else => false, + }; +} + +fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T { + var uc: T = undefined; + + const c1 = try p.readNoEof(emsg); + uc = try parseHexDigit(p, c1, emsg); + + while (try p.read()) |c| { + if (!std.ascii.isHex(c)) { + p.unread(c); + return uc; + } + const shl = std.math.shlExact; + uc = shl(T, uc, 4) catch return p.err(.OutOfRange, emsg); + uc |= try parseHexDigit(p, c, emsg); + } + return uc; +} + +fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 { + const h1 = try p.readNoEof(emsg); + const h2 = try p.readNoEof(emsg); + const hi = try parseHexDigit(p, h1, emsg); + const lo = try parseHexDigit(p, h2, emsg); + return hi << 4 | lo; +} + +fn parseHexDigit(p: *Parser, c: u8, comptime emsg: []const u8) !u8 { + return switch (c) { + '0'...'9' => c - '0', + 'A'...'F' => c - 'A' + 10, + 'a'...'f' => c - 'a' + 10, + else => p.err(.InvalidCharacter, emsg), + }; +} diff --git a/src/zisp/io/decoder.zig b/src/zisp/io/decoder.zig new file mode 100644 index 0000000..eb27e20 --- /dev/null +++ b/src/zisp/io/decoder.zig @@ -0,0 +1 @@ +// wip diff --git a/src/zisp/io/encoder.zig b/src/zisp/io/encoder.zig new file mode 100644 index 0000000..eb27e20 --- /dev/null +++ b/src/zisp/io/encoder.zig @@ -0,0 +1 @@ +// wip diff --git a/src/zisp/io/parser.zig b/src/zisp/io/parser.zig new file mode 100644 index 0000000..cb96e44 --- /dev/null +++ b/src/zisp/io/parser.zig @@ -0,0 +1,75 @@ +const builtin = @import("builtin"); +const std = @import("std"); + +const value = @import("../value.zig"); + +const Parser = @import("Parser.zig"); + +const Value = value.Value; + +const is_test = builtin.is_test; +const is_debug = builtin.mode == .Debug; + +const ctx_size = @sizeOf(Parser.Context); + +pub fn Sfa(init_stack: usize, init_chars: usize) type { + return struct { + const Self = @This(); + + pub const init_stack_capacity: usize = init_stack; + pub const init_chars_capacity: usize = init_chars; + + pub const init_stack_mem: usize = init_stack * ctx_size; + pub const init_chars_mem: usize = init_chars; + + stack: std.heap.StackFallbackAllocator(init_stack_mem), + chars: std.heap.StackFallbackAllocator(init_chars_mem), + + pub fn init(fallback: std.mem.Allocator) Self { + return .{ + .stack = std.heap.stackFallback(init_stack_mem, fallback), + .chars = std.heap.stackFallback(init_chars_mem, fallback), + }; + } + + pub fn allocator(s: *Self) Parser.Alloc { + return .{ .stack = s.stack.get(), .chars = s.chars.get() }; + } + }; +} + +pub const DefaultSfa = Sfa(32, 2048); + +pub fn initSfa(alloc: anytype) !Parser { + const SfaType = @typeInfo(@TypeOf(alloc)).pointer.child; + return Parser.init( + alloc.allocator(), + SfaType.init_stack_capacity, + SfaType.init_chars_capacity, + ); +} + +pub fn parse(input: std.io.AnyReader) Value { + return _parse(input, true); +} + +pub fn _parse( + input: std.io.AnyReader, + comptime panic: bool, +) if (panic) Value else error{ParseError}!Value { + const alloc = std.heap.smp_allocator; + var sfa = DefaultSfa.init(alloc); + var p = initSfa(&sfa) catch |e| if (panic) @panic("OOM") else return e; + defer p.deinit(); + return p.run(input) catch |e| if (panic) { + const format = "Parse error: {s}, unread_char: {s}\n"; + const unread: [4]u8 = + if (p.unread_char) |c| + "0x".* ++ std.fmt.hex(c) + else + "none".*; + std.debug.panic(format, .{ p.err_msg, unread }); + } else { + return e; + }; +} diff --git a/src/zisp/io/reader.zig b/src/zisp/io/reader.zig new file mode 100644 index 0000000..3465cb3 --- /dev/null +++ b/src/zisp/io/reader.zig @@ -0,0 +1,10 @@ +// See parse.zig for details. + +const parser = @import("parser.zig"); +const decoder = @import("decoder.zig"); + +const Value = @import("../value.zig").Value; + +pub fn readCode(input: []const u8) Value { + return decoder.decode(parser.parse(input, .code)); +} diff --git a/src/zisp/io/unparser.zig b/src/zisp/io/unparser.zig new file mode 100644 index 0000000..d703182 --- /dev/null +++ b/src/zisp/io/unparser.zig @@ -0,0 +1,122 @@ +const std = @import("std"); + +const value = @import("../value.zig"); + +const istr = value.istr; +const seq = value.seq; + +const ShortString = value.ShortString; +const OtherTag = value.OtherTag; +const Value = value.Value; + +pub fn unparse(w: anytype, v: Value) anyerror!void { + try if (value.double.check(v)) + unparseDouble(w, v) + else if (value.fixnum.check(v)) + unparseFixnum(w, v) + else if (value.ptr.checkZisp(v)) + unparseHeap(w, v) + else + unparseOther(w, v); +} + +fn unparseDouble(w: anytype, v: Value) !void { + _ = w; + _ = v; + @panic("not implemented"); +} + +fn unparseFixnum(w: anytype, v: Value) !void { + _ = w; + _ = v; + @panic("not implemented"); +} + +fn unparseHeap(w: anytype, v: Value) !void { + const p, const t = value.ptr.unpack(v); + try switch (t) { + .pair => unparsePair(w, @ptrCast(p)), + .seq => unparseSeq(w, @ptrCast(p)), + else => @panic("not implemented"), + }; +} + +fn unparseOther(w: anytype, v: Value) !void { + try switch (v.other.tag) { + .rune => unparseRune(w, v), + .sstr => unparseSstr(w, v), + .qstr => unparseQstr(w, v), + .char => unparseChar(w, v), + .misc => unparseMisc(w, v), + }; +} + +fn unparseRune(w: anytype, v: Value) !void { + const name = value.rune.unpack(v); + try w.writeByte('#'); + try w.writeAll(name.constSlice()); +} + +fn unparseSstr(w: anytype, v: Value) !void { + const str = value.sstr.unpack(v); + try w.writeAll(str.constSlice()); +} + +fn unparseQstr(w: anytype, v: Value) !void { + const str = value.sstr.unpack(v); + try w.writeByte('"'); + try w.writeAll(str.constSlice()); + try w.writeByte('"'); +} + +fn unparseChar(w: anytype, v: Value) !void { + var buf: [4]u8 = undefined; + const len = try std.unicode.utf8Encode(v.char.value, &buf); + try w.writeAll(buf[0..len]); +} + +fn unparseMisc(w: anytype, v: Value) !void { + try switch (v.misc.value) { + .f => w.writeAll("#f"), + .t => w.writeAll("#t"), + .nil => w.writeAll("()"), + .eof => w.writeAll("#eof"), + .undef => w.writeAll("#undef"), + }; +} + +fn unparsePair(w: anytype, p: *[2]Value) !void { + try w.writeByte('('); + try unparse(w, p[0]); + var cdr = p[1]; + while (value.pair.check(cdr)) : (cdr = value.pair.cdr(cdr)) { + try w.writeByte(' '); + try unparse(w, value.pair.car(cdr)); + } + if (!value.nil.check(cdr)) { + try w.writeByte(' '); + try w.writeByte('.'); + try w.writeByte(' '); + try unparse(w, cdr); + } + try w.writeByte(')'); +} + +fn unparseSeq(w: anytype, p: *seq.Header) !void { + const h = istr.getHeaderFromPtr(@ptrCast(p)); + switch (h.type) { + .string => try unparseString(w, h), + else => @panic("not implemented"), + } +} + +fn unparseString(w: anytype, h: *seq.Header) !void { + const info = h.info.string; + if (info.quoted) { + try w.writeByte('"'); + } + try w.writeAll(h.bytes()); + if (info.quoted) { + try w.writeByte('"'); + } +} diff --git a/src/zisp/io/writer.zig b/src/zisp/io/writer.zig new file mode 100644 index 0000000..eb27e20 --- /dev/null +++ b/src/zisp/io/writer.zig @@ -0,0 +1 @@ +// wip diff --git a/src/zisp/lib.zig b/src/zisp/lib.zig new file mode 100644 index 0000000..7752110 --- /dev/null +++ b/src/zisp/lib.zig @@ -0,0 +1 @@ +pub const list = @import("lib/list.zig"); diff --git a/src/zisp/lib/list.zig b/src/zisp/lib/list.zig new file mode 100644 index 0000000..9d6a6bc --- /dev/null +++ b/src/zisp/lib/list.zig @@ -0,0 +1,20 @@ +const value = @import("../value.zig"); + +const Value = value.Value; + +pub fn reverse(list: Value) Value { + return reverseWithTail(list, value.nil.nil); +} + +pub fn reverseWithTail(list: Value, tail: Value) Value { + var head = list; + var result = tail; + while (!value.nil.check(head)) { + value.pair.assert(head); + const car = value.pair.car(head); + const cdr = value.pair.cdr(head); + result = value.pair.cons(car, result); + head = cdr; + } + return result; +} diff --git a/src/zisp/value.zig b/src/zisp/value.zig new file mode 100644 index 0000000..e5b78ec --- /dev/null +++ b/src/zisp/value.zig @@ -0,0 +1,326 @@ +// +// === NaN Packing Strategy === +// +// Format of a double, in Zig least-to-most significant field order: +// +// { fraction: u52, exponent: u11, sign: u1 } +// +// When the exponent bits are all set, it's either a NaN or an Infinity. +// +// For value packing, almost all remaining 53 bits are available, giving us +// about 2^53 values, except for the four following bit patterns: +// +// *** FORBIDDEN VALUES *** +// +// 1. Negative cqNaN = { sign = 1, exponent = max, fraction = 2^51 } +// +// 2. Negative Infinity = { sign = 1, exponent = max, fraction = 0 } +// +// 3. Positive cqNaN = { sign = 0, exponent = max, fraction = 2^51 } +// +// 4. Positive Infinity = { sign = 0, exponent = max, fraction = 0 } +// +// The abbreviation "cqNaN" stands for canonical quiet NaN. +// +// Note that 2^51 means the MSb of the 52 fraction bits being set, and the rest +// being zero. The fraction MSb is also called the is_quiet flag, because it +// demarcates quiet NaNs. The rest being zero makes it the canonical qNaN. +// +// The positive and negative cqNaN are the *only* NaN values that can actually +// be returned by any FP operations, which is why we don't use them to pack +// values; we want to be able to represent NaN in Zisp as a double. +// +// Beyond those four bit patterns, all values with a maximum exponent (all bits +// set) are fair game for representing other values, so 2^53 - 4 possibilities. +// +// We split those 2^53 - 4 available values into four groups, each allowing for +// 2^51 - 1 different values to be encoded in them: +// +// sign = 1, quiet = 1 :: Negative Fixnum from -1 to -2^51+1 +// +// sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2 +// +// sign = 0, quiet = 1 :: Pointers +// +// sign = 0, quiet = 0 :: Others +// +// +// === Fixnums === +// +// Negative fixnums actually represent themselves without needing to go through +// any transformation. Only the smallest 52-bit signed negative, -2^51, cannot +// be represented, as it would step on forbidden value 1, Negative cqNaN. +// +// Positive fixnums go through bitsiwe NOT (implemented via an XOR mask here to +// make it one operation together with the NaN masking) to avoid the all-zero +// payload value, which would step on forbidden value 2, Negative Infinity. +// +// +// === Pointers === +// +// Pointers are further subdivided as follows based on the remaining 51 bits, +// with the first three bits used as a sort of tag: +// +// 000 :: Pointer to Zisp heap object (string, vector, etc.) +// +// 001 :: Weak pointer to Zisp heap object +// +// 01. :: Undefined (may be used by GC to flag pointers for some reason?) +// +// 1.. :: Foreign pointer (basically, a 50-bit fixnum of another type) +// +// This means pointers to the Zisp heap are 48 bits. Of those, we only really +// need 45, since 64-bit platforms are in practice limited to 48-bit addresses, +// and allocations happen at 8-byte boundaries, meaning the least significant 3 +// bits are always unset. Thus, we are able to store yet another 3-bit tag in +// those 48-bit pointers alongside the actual, multiple-of-8, 48-bit address. +// +// The forbidden value 3, Positive cqNaN, is avoided thanks to the fact that a +// regular Zisp heap pointer can never be null. Weak pointers, which can be +// null, avoid stepping on that forbidden value thanks to bit 49 being set. +// +// Foreign pointers allow storing arbitrary pointers, or integers basically, of +// up to 50 bits, without any further definition in Zisp of what they mean. +// +// +// === Other values === +// +// This 51-bit range is divided as follows, based on the high bits: +// +// 000 :: Rune +// +// 001 :: Short string +// +// 010 :: Short string literal +// +// 011 :: Unicode code point +// +// 100 :: Singleton values +// +// 101, 110, 111 :: Undefined +// +// Runes are symbols of 1 to 6 ASCII characters used to implement reader syntax. +// +// Zisp strings are immutable. Any string fitting into 6 bytes or less will be +// stored as an immediate value, not requiring any heap allocation or interning. +// It's implicitly interned, so to speak. This includes the empty string. +// +// The null byte serves as a terminator for strings shorter than 6 bytes, and +// therefore cannot appear in these strings; a string that short but actually +// containing a null byte will need to be heap allocated like other strings. +// +// There may also be strings that are this short, but ended up on the heap due +// to being uninterned. Interning them will return the equivalent short string +// as an immediate. +// +// The separate type for a short string *literal* is for an efficiency hack in +// the parser; see commentary there. +// +// Unicode code points need a maximum of 21 bits, yet we have 48 available. +// This may be exploited for a future extension. +// +// Similarly, it's very unlikely that we will ever need more than a handful of +// singleton values (false, true, nil, and so on). As such, this range of bit +// patterns may be subdivided in the future. Right now, only the lowest 8 bits +// are allowed to be set, with the other 40 being reserved, so there's a limit +// of 256 singleton values that can be defined. +// +// And top of that, we have three more 48-bit value ranges that are unused! +// +// The forbidden value 4, Positive Infinity, would be the "empty string rune" +// but that isn't allowed anyway, so all is fine. +// + +// Here's the original article explaining the strategy: +// +// https://tkammer.de/zisp/notes/nan.html +// +// More about runes: +// +// https://tkammer.de/zisp/notes/symbols.html +// +// Note: Packed structs are least-to-most significant, so the order of fields +// must be reversed relative to a typical big-endian illustration of the bit +// patterns of IEEE 754 double-precision floating point numbers. + +const std = @import("std"); + +pub const double = @import("value/double.zig"); +pub const fixnum = @import("value/fixnum.zig"); + +pub const ptr = @import("value/ptr.zig"); +pub const seq = @import("value/seq.zig"); + +pub const rune = @import("value/rune.zig"); +pub const sstr = @import("value/sstr.zig"); +pub const char = @import("value/char.zig"); +pub const boole = @import("value/boole.zig"); +pub const nil = @import("value/nil.zig"); +pub const eof = @import("value/eof.zig"); + +pub const pair = @import("value/pair.zig"); +pub const istr = @import("value/istr.zig"); + +// To fill up the u11 exponent part of a NaN. +const FILL = 0x7ff; + +// Used when dealing with runes and short strings. +pub const ShortString = std.BoundedArray(u8, 6); + +pub const OtherTag = enum(u3) { rune, sstr, qstr, char, misc }; + +pub const MiscValue = enum(u8) { f, t, nil, eof, undef = 255 }; + +pub const undef = Value{ .misc = .{ .value = .undef } }; + +/// Represents a Zisp value/object. +pub const Value = packed union { + /// To get the value as a regular double. + double: f64, + + /// To get an agnostic value for direct comparison with == i.e. eq?. + bits: u64, + + // Some of the structs below are just for inspection, whereas others are to + // initialize a new value of that category as well as read it that way. + + /// Inspection through the lens of the general IEEE 754 double layout. + ieee: packed struct { + rest: u51, + quiet: bool, + exp: u11, + sign: bool, + }, + + /// For initializing and reading fixnums. + fixnum: packed struct { + code: u51, + negative: bool, + _: u11 = FILL, + _is_fixnum: bool = true, + }, + + /// Inspection through the lens of the ptr category. + ptr: packed struct { + _value: u48, + is_weak: bool, + _unused: bool, + is_foreign: bool, + _is_ptr: bool, + _: u11, + _is_fixnum: bool, + }, + + /// For initializing and reading foreign pointers. + fptr: packed struct { + value: u50, + _is_foreign: bool = true, + _is_ptr: bool = true, + _: u11 = FILL, + _is_fixnum: bool = false, + }, + + /// For initializing and reading Zisp heap pointers. + zptr: packed struct { + tagged_value: u48, + is_weak: bool = false, + _unused: bool = false, + _is_foreign: bool = false, + _is_ptr: bool = true, + _: u11 = FILL, + _is_fixnum: bool = false, + }, + + /// Inspection as an other (non-fixnum, non-pointer) packed value. + other: packed struct { + _value: u48, + tag: OtherTag, + _is_ptr: bool, + _: u11, + _is_ifxnum: bool, + }, + + /// For initializing and reading runes. + rune: packed struct { + // actually [6]u8 but packed struct cannot contain arrays + name: u48, + _tag: OtherTag = .rune, + _is_ptr: bool = false, + _: u11 = FILL, + _is_fixnum: bool = false, + }, + + /// For initializing and reading short strings. + sstr: packed struct { + // actually [6]u8 but packed struct cannot contain arrays + string: u48, + tag: OtherTag, + _is_ptr: bool = false, + _: u11 = FILL, + _is_fixnum: bool = false, + }, + + /// For initializing and reading characters. + char: packed struct { + value: u21, + _reserved: u27 = 0, + _tag: OtherTag = .char, + _is_ptr: bool = false, + _: u11 = FILL, + _is_fixnum: bool = false, + }, + + /// For initializing and reading misc values aka singletons. + misc: packed struct { + value: MiscValue, + _reserved: u40 = 0, + _tag: OtherTag = .misc, + _is_ptr: bool = false, + _: u11 = FILL, + _is_fixnum: bool = false, + }, + + /// Hexdumps the value. + pub fn dump(v: Value) void { + std.debug.dumpHex(std.mem.asBytes(&v)); + } + + /// Checks for bit-equality i.e. == comprison. + pub fn eq(v1: Value, v2: Value) bool { + return v1.bits == v2.bits; + } + + // The following aren't type predicates per se, but rather determine which + // general category the value is in. The exceptions are fixnum and double, + // since those aren't sub-categorized into further types. + + /// Checks for a Zisp double, including: +nan.0, -nan.0, +inf.0, -inf.0 + pub fn isDouble(v: Value) bool { + return v.ieee.exp != FILL or v.ieee.rest == 0; + } + + /// Checks for a non-double Zisp value packed into a NaN. + pub fn isPacked(v: Value) bool { + return !v.isDouble(); + } + + /// Checks for a fixnum. + pub fn isFixnum(v: Value) bool { + return v.isPacked() and v.ieee.sign; + } + + /// Checks for any kind of pointer. + pub fn isPtr(v: Value) bool { + return v.isPacked() and !v.ieee.sign and v.ieee.quiet; + } + + /// Checks for a non-double, non-fixnum, non-pointer Zisp value. + pub fn isOther(v: Value) bool { + return v.isPacked() and !v.ieee.sign and !v.ieee.quiet; + } + + /// Checks for an other type of value based on tag. + pub fn isOtherTag(v: Value, tag: OtherTag) bool { + return v.isOther() and v.other.tag == tag; + } +}; diff --git a/src/zisp/value/boole.zig b/src/zisp/value/boole.zig new file mode 100644 index 0000000..2e94e4d --- /dev/null +++ b/src/zisp/value/boole.zig @@ -0,0 +1,33 @@ +const Value = @import("../value.zig").Value; + +pub const f = Value{ .misc = .{ .value = .f } }; +pub const t = Value{ .misc = .{ .value = .t } }; + +// Zig API + +/// Checks if the value is a boole. +pub fn check(v: Value) bool { + return v.bits == f.bits or v.bits == t.bits; +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not bool"); + } +} + +pub fn pack(b: bool) Value { + return if (b) t else f; +} + +pub fn unpack(v: Value) bool { + assert(v); + return v.bits == t.bits; +} + +// Zisp API + +pub fn pred(v: Value) Value { + return pack(check(v)); +} diff --git a/src/zisp/value/char.zig b/src/zisp/value/char.zig new file mode 100644 index 0000000..09a3034 --- /dev/null +++ b/src/zisp/value/char.zig @@ -0,0 +1,31 @@ +const value = @import("../value.zig"); + +const Value = value.Value; + +// Zig API + +pub fn check(v: Value) bool { + return v.isOtherTag(.char); +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not char"); + } +} + +pub fn pack(c: u21) Value { + return .{ .char = .{ .value = c } }; +} + +pub fn unpack(v: Value) u21 { + assert(v); + return @truncate(v.char.value); +} + +// Zisp API + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} diff --git a/src/zisp/value/double.zig b/src/zisp/value/double.zig new file mode 100644 index 0000000..5cfe6ee --- /dev/null +++ b/src/zisp/value/double.zig @@ -0,0 +1,38 @@ +const value = @import("../value.zig"); +const Value = value.Value; + +// Zig API + +/// Checks for a Zisp double (double, +inf, -inf, or canonical NaN). +pub fn check(v: Value) bool { + return v.isDouble(); +} + +/// Asserts check(). +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not double"); + } +} + +pub fn pack(d: f64) Value { + return .{ .double = d }; +} + +pub fn unpack(v: Value) f64 { + assert(v); + return v.double; +} + +// Zisp API + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} + +pub fn add(v1: Value, v2: Value) Value { + const d1 = unpack(v1); + const d2 = unpack(v2); + return pack(d1 + d2); +} diff --git a/src/zisp/value/eof.zig b/src/zisp/value/eof.zig new file mode 100644 index 0000000..4b16669 --- /dev/null +++ b/src/zisp/value/eof.zig @@ -0,0 +1,28 @@ +const value = @import("../value.zig"); + +const Value = value.Value; + +pub const eof = Value{ .misc = .{ .value = .eof } }; + +// Zig API + +pub fn check(v: Value) bool { + return v.bits == eof.bits; +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not bool"); + } +} + +// Zisp API + +pub fn get() Value { + return eof; +} + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} diff --git a/src/zisp/value/fixnum.zig b/src/zisp/value/fixnum.zig new file mode 100644 index 0000000..80fb4ae --- /dev/null +++ b/src/zisp/value/fixnum.zig @@ -0,0 +1,84 @@ +const std = @import("std"); +const value = @import("../value.zig"); + +const Value = value.Value; + +// Zig API + +/// Checks for a Zisp fixnum. +pub fn check(v: Value) bool { + return v.isFixnum(); +} + +/// Asserts check(). +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not fixnum"); + } +} + +// See detailed NaN packing docs for why the +/- 1. +pub const min = std.math.minInt(i52) + 1; +pub const max = std.math.maxInt(i52) - 1; + +fn assertValidRange(int: i64) void { + if (int < min) { + std.debug.print("int too small for fixnum: {}\n", .{int}); + @panic("int too small for fixnum"); + } + if (int > max) { + std.debug.print("int too large for fixnum: {}\n", .{int}); + @panic("int too large for fixnum"); + } +} + +fn packNegative(int: i64) Value { + return @bitCast(int); +} + +fn unpackNegative(v: Value) i64 { + return @bitCast(v); +} + +const positive_mask: u64 = 0xfff7ffffffffffff; + +fn packPositive(int: i64) Value { + const uint: u64 = @bitCast(int); + return @bitCast(uint ^ positive_mask); +} + +fn unpackPositive(v: Value) i64 { + const uint: u64 = @bitCast(v); + return @bitCast(uint ^ positive_mask); +} + +pub fn pack(int: i64) Value { + assertValidRange(int); + if (int < 0) { + return packNegative(int); + } else { + return packPositive(int); + } +} + +pub fn unpack(v: Value) i64 { + assert(v); + if (v.fixnum.negative) { + return unpackNegative(v); + } else { + return unpackPositive(v); + } +} + +// Zisp API + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} + +pub fn add(v1: Value, v2: Value) Value { + const int1 = unpack(v1); + const int2 = unpack(v2); + return pack(int1 + int2); +} diff --git a/src/zisp/value/istr.zig b/src/zisp/value/istr.zig new file mode 100644 index 0000000..abd0447 --- /dev/null +++ b/src/zisp/value/istr.zig @@ -0,0 +1,65 @@ +const std = @import("std"); + +const value = @import("../value.zig"); +const gc = @import("../gc.zig"); + +const ptr = @import("ptr.zig"); +const seq = @import("seq.zig"); + +const Hval = gc.Hval; + +const Value = value.Value; + +// Zig API + +pub fn check(v: Value) bool { + return ptr.checkZispTag(v, .seq); +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not istr"); + } +} + +pub fn intern(str: []const u8, quoted: bool) Value { + if (str.len > value.fixnum.max) { + @panic("String length out of fixnum range."); + } + const header: seq.Header = .{ + .type = .string, + .info = .{ .string = .{ + .enc = .utf8, + .quoted = quoted, + .interned = true, + } }, + .size = @intCast(str.len), + }; + const header_ptr = gc.intern(header, str); + return ptr.pack(@ptrCast(header_ptr), .seq); +} + +pub fn getHeader(v: Value) *seq.Header { + assert(v); + const header_ptr, _ = ptr.unpack(v); + return gc.istrHeader(header_ptr); +} + +pub fn getHeaderFromPtr(p: *Hval) *seq.Header { + return gc.istrHeader(p); +} + +// Zisp API + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} + +pub fn len(v: Value) Value { + const l = getHeader(v).size; + if (l > value.fixnum.max) { + @panic("string length out of range"); + } + return value.fixnum.pack(@intCast(l)); +} diff --git a/src/zisp/value/nil.zig b/src/zisp/value/nil.zig new file mode 100644 index 0000000..f95ecad --- /dev/null +++ b/src/zisp/value/nil.zig @@ -0,0 +1,28 @@ +const value = @import("../value.zig"); + +const Value = value.Value; + +pub const nil = Value{ .misc = .{ .value = .nil } }; + +// Zig API + +pub fn check(v: Value) bool { + return v.bits == nil.bits; +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not bool"); + } +} + +// Zisp API + +pub fn get() Value { + return nil; +} + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} diff --git a/src/zisp/value/pair.zig b/src/zisp/value/pair.zig new file mode 100644 index 0000000..6ea1edf --- /dev/null +++ b/src/zisp/value/pair.zig @@ -0,0 +1,55 @@ +const std = @import("std"); + +const value = @import("../value.zig"); +const gc = @import("../gc.zig"); + +const ptr = @import("ptr.zig"); + +const Value = value.Value; + +// Zig API + +pub fn check(v: Value) bool { + return ptr.checkZispTag(v, .pair); +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not pair"); + } +} + +// Zisp API + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} + +pub fn cons(v1: Value, v2: Value) Value { + return ptr.pack(@ptrCast(gc.cons(v1, v2)), .pair); +} + +fn getMem(v: Value) *[2]Value { + return @ptrCast(ptr.unpack(v).@"0"); +} + +pub fn car(v: Value) Value { + assert(v); + return getMem(v)[0]; +} + +pub fn cdr(v: Value) Value { + assert(v); + return getMem(v)[1]; +} + +pub fn setcar(v: Value, new: Value) void { + assert(v); + getMem(v)[0] = new; +} + +pub fn setcdr(v: Value, new: Value) void { + assert(v); + getMem(v)[1] = new; +} diff --git a/src/zisp/value/ptr.zig b/src/zisp/value/ptr.zig new file mode 100644 index 0000000..2ed3765 --- /dev/null +++ b/src/zisp/value/ptr.zig @@ -0,0 +1,174 @@ +const std = @import("std"); + +const gc = @import("../gc.zig"); +const value = @import("../value.zig"); + +const Hval = gc.Hval; + +const Value = value.Value; + +// Zig API + +pub fn check(v: Value) bool { + return v.isPtr(); +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not a pointer"); + } +} + +// Foreign Pointers + +pub fn checkForeign(v: Value) bool { + return check(v) and v.ptr.is_foreign; +} + +pub fn assertForeign(v: Value) void { + if (!checkForeign(v)) { + v.dump(); + @panic("not foreign pointer"); + } +} + +pub fn packForeign(int: u50) Value { + return .{ .fptr = .{ .value = int } }; +} + +pub fn unpackForeign(v: Value) u50 { + assertForeign(v); + return v.fptr.value; +} + +// Zisp Pointers + +pub fn checkZisp(v: Value) bool { + return check(v) and !v.ptr.is_foreign; +} + +pub fn assertZisp(v: Value) void { + if (!checkZisp(v)) { + v.dump(); + @panic("not zisp pointer"); + } +} + +pub fn checkWeak(v: Value) bool { + return checkZisp(v) and v.zptr.is_weak; +} + +pub fn assertWeak(v: Value) void { + if (!checkWeak(v)) { + v.dump(); + @panic("not zisp weak pointer"); + } +} + +pub fn checkZispTag(v: Value, tag: Tag) bool { + return checkZisp(v) and unpack(v).@"1" == tag; +} + +pub fn assertZispTag(v: Value, tag: Tag) void { + if (!checkZispTag(v, tag)) { + v.dump(); + @panic("not zisp pointer or wrong tag"); + } +} + +pub fn checkStrong(v: Value) bool { + return checkZisp(v) and !v.zptr.is_weak; +} + +pub fn assertStrong(v: Value) void { + if (!checkStrong(v)) { + v.dump(); + @panic("not zisp strong pointer"); + } +} + +pub fn packZisp(ptr: *Hval, tag: Tag, is_weak: bool) Value { + return .{ .zptr = .{ + .tagged_value = tagPtr(ptr, tag), + .is_weak = is_weak, + } }; +} + +pub fn pack(ptr: *Hval, tag: Tag) Value { + return packZisp(ptr, tag, false); +} + +pub fn packWeak(ptr: *Hval, tag: Tag) Value { + return packZisp(ptr, tag, true); +} + +// Unpacks weak as well; no need for a separate fn. +pub fn unpack(v: Value) struct { *Hval, Tag } { + assertZisp(v); + return untagPtr(v.zptr.tagged_value); +} + +pub fn setWeakNull(v: *Value) void { + assertWeak(v.*); + v.zptr.tagged_value = 0; +} + +pub fn isWeakNull(v: Value) bool { + assertWeak(v); + return v.zptr.tagged_value == 0; +} + +fn tagPtr(ptr: *Hval, tag: Tag) u48 { + const int: usize = @intFromPtr(ptr); + const untagged: u48 = @intCast(int); + return untagged | @intFromEnum(tag); +} + +fn untagPtr(tagged: u48) struct { *Hval, Tag } { + const untagged: u48 = tagged & 0xfffffffffff8; + const ptr: *Hval = @ptrFromInt(untagged); + const int: u3 = @truncate(tagged); + const tag: Tag = @enumFromInt(int); + return .{ ptr, tag }; +} + +pub const Tag = enum(u3) { + /// Pair aka cons cell aka *[2]Value + pair, + /// Sequence of various kinds (16-bit meta, 48-bit length, then data) + seq, + /// Procedure + proc, +}; + +// Zisp API + +pub fn predForeign(v: Value) Value { + return value.boole.pack(checkForeign(v)); +} + +pub fn makeWeak(v: Value) Value { + assertStrong(v); + var copy = v; + copy.zptr.is_weak = true; + return copy; +} + +pub fn predWeak(v: Value) Value { + return value.boole.pack(checkWeak(v)); +} + +pub fn predWeakNull(v: Value) Value { + return value.boole.pack(isWeakNull(v)); +} + +pub fn getWeak(v: Value) Value { + if (isWeakNull(v)) { + return value.boole.f; + } else { + var copy = v; + copy.zptr.is_weak = false; + return copy; + } +} diff --git a/src/zisp/value/rune.zig b/src/zisp/value/rune.zig new file mode 100644 index 0000000..195210e --- /dev/null +++ b/src/zisp/value/rune.zig @@ -0,0 +1,82 @@ +const std = @import("std"); + +const value = @import("../value.zig"); + +const ShortString = value.ShortString; +const Value = value.Value; + +// Zig API + +pub fn check(v: Value) bool { + return v.isOtherTag(.rune); +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not rune"); + } +} + +pub fn isValidRune(s: []const u8) bool { + if (s.len == 0 or s.len > 6) { + return false; + } + if (!std.ascii.isAlphabetic(s[0])) { + return false; + } + for (s[1..]) |c| { + if (!std.ascii.isAlphanumeric(c)) { + return false; + } + } + return true; +} + +fn assertValidRune(s: []const u8) void { + if (!isValidRune(s)) { + std.debug.print("invalid rune: '{s}'\n", .{s}); + @panic("invalid rune"); + } +} + +// See sstr.zig which uses equivalent code; probably good to keep in sync. + +pub fn pack(s: []const u8) Value { + assertValidRune(s); + return packForced(s); +} + +pub fn packForced(s: []const u8) Value { + var v = Value{ .rune = .{ .name = 0 } }; + const dest: [*]u8 = @ptrCast(&v.rune.name); + @memcpy(dest, s); + return v; +} + +pub fn unpack(v: Value) ShortString { + assert(v); + const s: [6]u8 = @bitCast(v.rune.name); + inline for (0..6) |i| { + if (s[i] == 0) return .{ .buffer = s, .len = i }; + } + return .{ .buffer = s, .len = 6 }; +} + +// Zisp API + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} + +pub fn make(v: Value) Value { + const s, const l = value.sstr.unpack(v); + return pack(s[0..l]); +} + +pub fn getName(v: Value) Value { + const s, const l = unpack(v); + return value.sstr.pack(s[0..l]); +} + +// TODO: Registering decoders diff --git a/src/zisp/value/seq.zig b/src/zisp/value/seq.zig new file mode 100644 index 0000000..3418a5a --- /dev/null +++ b/src/zisp/value/seq.zig @@ -0,0 +1,57 @@ +const builtin = @import("builtin"); +const std = @import("std"); + +const value = @import("../value.zig"); +const gc = @import("../gc.zig"); + +const Value = value.Value; + +const Endian = enum(u1) { + little, + big, + + const native: Endian = switch (builtin.target.cpu.arch.endian()) { + .little => .little, + .big => .big, + }; +}; + +pub const Header = packed struct(u64) { + type: enum(u2) { + values, + string, + ints, + floats, + }, + info: packed union { + values: packed struct(u14) { + weak: bool = false, + _: u13 = 0, + }, + string: packed struct(u14) { + enc: enum(u4) { utf8, utf16, utf24, utf32 }, + endian: Endian = .native, + quoted: bool, + interned: bool, + _: u7 = 0, + }, + ints: packed struct(u14) { + signed: bool, + endian: Endian = .native, + size: u12, + }, + floats: packed struct(u14) { + double: bool, + endian: Endian = .native, + _: u12 = 0, + }, + }, + size: u48, + + pub fn bytes(self: *Header) []u8 { + const hs = @sizeOf(Header); + const ptr: [*]u8 = @ptrCast(self); + const end = hs + self.size; + return ptr[hs..end]; + } +}; diff --git a/src/zisp/value/sstr.zig b/src/zisp/value/sstr.zig new file mode 100644 index 0000000..b02fd3d --- /dev/null +++ b/src/zisp/value/sstr.zig @@ -0,0 +1,78 @@ +const std = @import("std"); + +const value = @import("../value.zig"); + +const ShortString = value.ShortString; +const OtherTag = value.OtherTag; +const Value = value.Value; + +// Zig API + +pub fn check(v: Value) bool { + return v.isOtherTag(.sstr) or v.isOtherTag(.qstr); +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not sstr"); + } +} + +pub fn checkQuoted(v: Value) bool { + return v.isOtherTag(.qstr); +} + +// For now, ignore encoding, just treat it as []u8. + +pub fn isValidSstr(s: []const u8) bool { + if (s.len > 6) { + return false; + } + for (s) |c| { + if (c == 0) { + return false; + } + } + return true; +} + +fn assertValidSstr(s: []const u8) void { + if (!isValidSstr(s)) { + std.debug.print("invalid sstr: {s}\n", .{s}); + @panic("invalid sstr"); + } +} + +// Different ways of doing the following have been tested, including manual +// shifting and bit masking, but memcpy always wins easily according to our +// micro-benchmarks, under both ReleaseSafe and ReleaseFast. + +// Note: rune.zig uses equivalent code; probably good to keep in sync. + +pub fn pack(s: []const u8) Value { + return _pack(s, .sstr); +} + +pub fn packQuoted(s: []const u8) Value { + return _pack(s, .qstr); +} + +fn _pack(s: []const u8, tag: OtherTag) Value { + assertValidSstr(s); + var v = Value{ .sstr = .{ .string = 0, .tag = tag } }; + const dest: [*]u8 = @ptrCast(&v.sstr.string); + @memcpy(dest, s); + return v; +} + +pub fn unpack(v: Value) ShortString { + assert(v); + const s: [6]u8 = @bitCast(v.sstr.string); + inline for (0..6) |i| { + if (s[i] == 0) return .{ .buffer = s, .len = i }; + } + return .{ .buffer = s, .len = 6 }; +} + +// No Zisp API for sstr specifically, since it's a string. See string.zig. -- cgit v1.2.3