From 5e461592bfda4b42e6c11beaae1fa4b04f0aec95 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Sun, 31 May 2026 14:35:49 +0200 Subject: [WORKING STATE] Fixes and cleanup. --- src/zisp/gc.zig | 2 -- src/zisp/gc/IstrSet.zig | 39 ++++++++++++++++++------------- src/zisp/io/print.zig | 61 ++++++++++++++++++++++++++++-------------------- src/zisp/value.zig | 3 ++- src/zisp/value/array.zig | 26 +++++++++++---------- src/zisp/value/istr.zig | 19 ++++++++------- 6 files changed, 86 insertions(+), 64 deletions(-) (limited to 'src') diff --git a/src/zisp/gc.zig b/src/zisp/gc.zig index 1be4304..fdb0e02 100644 --- a/src/zisp/gc.zig +++ b/src/zisp/gc.zig @@ -11,8 +11,6 @@ const ptr = value.ptr; const Value = value.Value; const Zptr = value.Zptr; -const Istr = istr.Istr; - pub const alloc = std.heap.smp_allocator; // Cons cells diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig index 465b0e3..0176291 100644 --- a/src/zisp/gc/IstrSet.zig +++ b/src/zisp/gc/IstrSet.zig @@ -13,7 +13,7 @@ //! pointer to the sector becomes the Istr pointer itself. //! //! The 8-byte header of an Istr is a u64 hash, but its lowest 8 bits are also -//! the length of the string: u64hash = (real_u64hash << 1) | u8length +//! the length of the string: u64hash = (real_u64hash << 8) | u8length //! //! Note that the empty string is never interned since strings up to 6 bytes in //! length are packed directly into a NaN value. Thus, checking if a sector is @@ -28,11 +28,11 @@ const std = @import("std"); -const value = @import("../value.zig"); - const Alloc = std.mem.Allocator; -const Istr = value.istr.Istr; +const value = @import("../value.zig"); + +const IstrPtr = value.istr.IstrPtr; const Set = @This(); @@ -51,7 +51,7 @@ const Sector = *align(8) packed union { return @ptrCast(self); } - fn getIstr(self: *@This()) Istr { + fn getIstrPtr(self: *@This()) IstrPtr { if (self.meta.len <= 24) { return @ptrCast(self); } else { @@ -59,11 +59,11 @@ const Sector = *align(8) packed union { } } - pub fn match(self: *@This(), hash: u64, s: []const u8) ?Istr { + pub fn match(self: *@This(), hash: u64, s: []const u8) ?IstrPtr { if (self.hash != hash) { return null; } - const istr = self.getIstr(); + const istr = self.getIstrPtr(); if (std.hash_map.eqlString(s, istr.str())) { return istr; } else { @@ -76,10 +76,10 @@ const Sector = *align(8) packed union { alloc: Alloc, hash: u64, s: []const u8, - ) !Istr { + ) !IstrPtr { self.hash = hash; if (s.len <= 24) { - const istr: Istr = @ptrCast(self); + const istr: IstrPtr = @ptrCast(self); istr.putStr(s); return istr; } else { @@ -90,7 +90,7 @@ const Sector = *align(8) packed union { } }; -buckets: []Bucket, +buckets: []Bucket = &.{}, const default_bcount = 512; @@ -99,26 +99,33 @@ pub fn init(alloc: Alloc) !Set { } pub fn initCustom(alloc: Alloc, bcount: usize) !Set { - return Set{ - .buckets = try alloc.alloc(Bucket, bcount), - }; + var set = Set{}; + try set.realloc(alloc, bcount); + return set; +} + +fn realloc(self: *Set, alloc: Alloc, bcount: usize) !void { + alloc.free(self.buckets); + self.buckets = try alloc.alloc(Bucket, bcount); + @memset(self.buckets, std.mem.zeroes(Bucket)); } pub fn deinit(self: *Set, alloc: Alloc) void { alloc.free(self.buckets); } -pub fn sector(set: *Set, b_idx: usize, s_idx: usize) Sector { +fn sector(set: *Set, b_idx: usize, s_idx: usize) Sector { return @ptrCast(&set.buckets[b_idx][s_idx]); } -pub fn add(self: *Set, alloc: Alloc, s: []const u8) !Istr { +/// Add, or get existing, string. +pub fn add(self: *Set, alloc: Alloc, s: []const u8) !IstrPtr { std.debug.assert(s.len < 256); const idx_mask = self.buckets.len - 1; const h0 = std.hash_map.hashString(s); - const hash = (h0 << 1) | s.len; + const hash = (h0 << 8) | s.len; // We resize if we had to walk through 50% of the buckets. const probe_limit = self.buckets.len >> 1; diff --git a/src/zisp/io/print.zig b/src/zisp/io/print.zig index 5e3bd15..ddf9725 100644 --- a/src/zisp/io/print.zig +++ b/src/zisp/io/print.zig @@ -3,55 +3,62 @@ const std = @import("std"); const gc = @import("../gc.zig"); const value = @import("../value.zig"); +const Writer = *std.Io.Writer; + const Parser = @import("Parser.zig"); + const Value = value.Value; -const Array = value.array.Array; -pub fn toWriter(w: *std.Io.Writer, v: Value) anyerror!void { +const IstrPtr = value.istr.IstrPtr; +const ArrayPtr = value.array.ArrayPtr; + +pub fn toWriter(w: Writer, v: Value) anyerror!void { // zig fmt: off try if (v.isDouble()) double(w, v) else if (v.isFixnum()) fixnum(w, v) else if (v.getPtr(.pair)) |p| pair(w, @ptrCast(p)) + else if (v.getPtr(.istr)) |p| istr(w, @ptrCast(p)) else if (v.getPtr(.array)) |p| array(w, @ptrCast(p)) else if (v.isRune()) rune(w, v) else if (v.isChar()) char(w, v) else if (v.isMisc()) misc(w, v) else if (v.isSrat()) srat(w, v) else if (v.isSstr()) sstr(w, v) + else @panic("unhandled type") ; // zig fmt: on } -pub fn double(w: *std.Io.Writer, v: Value) !void { +pub fn double(w: Writer, v: Value) !void { _ = w; _ = v; @panic("not implemented"); } -pub fn fixnum(w: *std.Io.Writer, v: Value) !void { +pub fn fixnum(w: Writer, v: Value) !void { try w.print("{d}", .{value.fixnum.unpack(v)}); } -pub fn rune(w: *std.Io.Writer, v: Value) !void { +pub fn rune(w: Writer, v: Value) !void { const name = value.rune.unpack(v); try w.writeByte('#'); try w.writeAll(name.slice()); } -pub fn sstr(w: *std.Io.Writer, v: Value) !void { +pub fn sstr(w: Writer, v: Value) !void { // TODO: Check if pipes/escapes necessary. const str = value.sstr.unpack(v); try w.writeAll(str.slice()); } -pub fn char(w: *std.Io.Writer, v: Value) !void { +pub fn char(w: Writer, v: Value) !void { const uc: u21 = value.char.unpack(v); var buf: [4]u8 = undefined; const len = try std.unicode.utf8Encode(uc, &buf); try w.writeAll(buf[0..len]); } -pub fn misc(w: *std.Io.Writer, v: Value) !void { +pub fn misc(w: Writer, v: Value) !void { try switch (v.bits) { value.f.bits => w.writeAll("#f"), value.t.bits => w.writeAll("#t"), @@ -63,13 +70,13 @@ pub fn misc(w: *std.Io.Writer, v: Value) !void { }; } -pub fn srat(w: *std.Io.Writer, v: Value) !void { +pub fn srat(w: Writer, v: Value) !void { _ = w; _ = v; @panic("not implemented"); } -pub fn pair(w: *std.Io.Writer, p: *[2]Value) !void { +pub fn pair(w: Writer, p: *[2]Value) !void { const car = p[0]; //const cdr = p[1]; if (car.eq(Parser.PQSTR)) { @@ -89,7 +96,25 @@ pub fn pair(w: *std.Io.Writer, p: *[2]Value) !void { } } -pub fn list(w: *std.Io.Writer, p: *[2]Value) !void { +pub fn istr(w: Writer, p: IstrPtr) !void { + try w.writeAll(p.str()); +} + +pub fn array(w: Writer, s: ArrayPtr) !void { + switch (s.type) { + .string => try string(w, s), + else => @panic("not implemented"), + } +} + +pub fn string(w: Writer, s: ArrayPtr) !void { + // TODO: Check if pipes/escapes necessary. + try w.writeByte('|'); + try w.writeAll(s.str()); + try w.writeByte('|'); +} + +pub fn list(w: Writer, p: *[2]Value) !void { try w.writeByte('('); try toWriter(w, p[0]); var cdr = p[1]; @@ -105,17 +130,3 @@ pub fn list(w: *std.Io.Writer, p: *[2]Value) !void { } try w.writeByte(')'); } - -pub fn array(w: *std.Io.Writer, s: Array) !void { - switch (s.type) { - .string => try string(w, s), - else => @panic("not implemented"), - } -} - -pub fn string(w: *std.Io.Writer, s: Array) !void { - // TODO: Check if pipes/escapes necessary. - try w.writeByte('|'); - try w.writeAll(s.str()); - try w.writeByte('|'); -} diff --git a/src/zisp/value.zig b/src/zisp/value.zig index 96351f2..b40f022 100644 --- a/src/zisp/value.zig +++ b/src/zisp/value.zig @@ -207,7 +207,8 @@ pub const none = Value{ .misc = .{ .value = .none } }; pub const undef = Value{ .misc = .{ .value = .undef } }; // zig fmt: on -/// A pointer into the Zisp heap. +/// A plain (unpacked, untagged) pointer into the Zisp heap. May point to any +/// kind of object. pub const Zptr = *align(8) anyopaque; /// Values for the lowest 3 bits of a heap pointer, indicating the heap type. diff --git a/src/zisp/value/array.zig b/src/zisp/value/array.zig index 6e37013..ac56391 100644 --- a/src/zisp/value/array.zig +++ b/src/zisp/value/array.zig @@ -6,16 +6,6 @@ const gc = @import("../gc.zig"); const Value = value.Value; -const Endian = enum(u1) { - little, - big, - - const native: Endian = switch (builtin.target.cpu.arch.endian()) { - .little => .little, - .big => .big, - }; -}; - /// Pointer to header for an array of various element types and sizes. /// /// The low 48 bits are either the length (element count, not buffer size) of @@ -56,7 +46,9 @@ const Endian = enum(u1) { /// another array with the same buffer pointer and different type info. /// /// Other remaining bits provide information about element type and size. -pub const Array = *align(8) packed struct(u64) { +pub const ArrayPtr = *align(8) ArrayHeader; + +const ArrayHeader = packed struct(u64) { len_or_ptr: u48, is_slice: bool, is_ptr: bool, @@ -113,7 +105,7 @@ pub const Array = *align(8) packed struct(u64) { return self.len_or_ptr * self.eltSize(); } - fn arrPointer(self: *@This()) ?Array { + fn arrPointer(self: *@This()) ?ArrayPtr { std.debug.assert(self.is_ptr); const p = self.len_or_ptr; return if (p != 0) @ptrFromInt(p) else null; @@ -157,3 +149,13 @@ pub const Array = *align(8) packed struct(u64) { return arr.bufContent()[0..arr.size()]; } }; + +const Endian = enum(u1) { + little, + big, + + const native: Endian = switch (builtin.target.cpu.arch.endian()) { + .little => .little, + .big => .big, + }; +}; diff --git a/src/zisp/value/istr.zig b/src/zisp/value/istr.zig index 8f70725..a525c7e 100644 --- a/src/zisp/value/istr.zig +++ b/src/zisp/value/istr.zig @@ -12,8 +12,10 @@ const Value = value.Value; /// Pointer to an interned string. /// /// First 64 bits are a cached hash, of which the lowest 8 bits are actually the -/// length of the string: u64hash = (real_u64hash << 1) | u8length -pub const Istr = *align(8) packed union { +/// length of the string: u64hash = (real_u64hash << 8) | u8length +pub const IstrPtr = *align(8) IstrHeader; + +const IstrHeader = packed union { hash: u64, meta: packed struct(u64) { len: u8, @@ -26,7 +28,8 @@ pub const Istr = *align(8) packed union { pub fn putStr(self: *@This(), s: []const u8) void { std.debug.assert(s.len <= 255); - @memcpy(self.bufU8()[8 .. 8 + s.len], s); + const buf = self.bufU8(); + @memcpy(buf[8 .. 8 + s.len], s); } pub fn str(self: *@This()) []const u8 { @@ -35,23 +38,23 @@ pub const Istr = *align(8) packed union { } }; -pub fn new(alloc: Alloc, hash: u64, s: []const u8) !Istr { - const aln = std.mem.Alignment.of(Istr); +pub fn new(alloc: Alloc, hash: u64, s: []const u8) !IstrPtr { + const aln = std.mem.Alignment.of(IstrPtr); const ptr = try alloc.alignedAlloc(u8, aln, 8 + s.len); - const istr: Istr = @ptrCast(ptr); + const istr: IstrPtr = @ptrCast(ptr); istr.hash = hash; istr.putStr(s); return istr; } -pub fn check(v: Value) ?Istr { +pub fn check(v: Value) ?IstrPtr { if (v.getPtr(.istr)) |p| { return @ptrCast(p); } return null; } -pub fn assert(v: Value) Istr { +pub fn assert(v: Value) IstrPtr { return check(v) orelse { v.dump(); @panic("not istr"); -- cgit v1.2.3