From 79f4d5ae23c44924f413f9d7a1113d2280611ec5 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Sun, 31 May 2026 01:02:58 +0200 Subject: Make the code compile. --- src/main.zig | 2 + src/zisp/gc.zig | 55 ++++++------------------ src/zisp/gc/IstrSet.zig | 109 +++++++++++++++++++++++++---------------------- src/zisp/io/Parser.zig | 35 +++++++++------ src/zisp/io/print.zig | 30 +++++++++++-- src/zisp/value/array.zig | 14 +++--- src/zisp/value/istr.zig | 23 +++++----- 7 files changed, 143 insertions(+), 125 deletions(-) (limited to 'src') diff --git a/src/main.zig b/src/main.zig index f7e752e..dc1187d 100644 --- a/src/main.zig +++ b/src/main.zig @@ -14,6 +14,8 @@ pub fn main() !u8 { var stdout_writer = std.Io.File.stdout().writer(io, &stdout_buffer); const writer = &stdout_writer.interface; + zisp.gc.init(); + var p = try zisp.io.Parser.init(alloc, io); defer p.deinit(); while (true) { diff --git a/src/zisp/gc.zig b/src/zisp/gc.zig index 4fc2c90..1be4304 100644 --- a/src/zisp/gc.zig +++ b/src/zisp/gc.zig @@ -3,25 +3,24 @@ const std = @import("std"); const value = @import("value.zig"); +const IstrSet = @import("gc/IstrSet.zig"); + +const istr = value.istr; const ptr = value.ptr; -const seq = value.seq; const Value = value.Value; const Zptr = value.Zptr; +const Istr = istr.Istr; + pub const alloc = std.heap.smp_allocator; // Cons cells const ConsPool = std.heap.MemoryPool([2]Value); var cons_pool: ConsPool = undefined; -var need_init = true; pub fn cons(v1: Value, v2: Value) *[2]Value { - if (need_init) { - cons_pool = ConsPool.initCapacity(alloc, 64) catch @panic("OOM"); - need_init = false; - } const mem = cons_pool.create(alloc) catch @panic("OOM"); mem[0] = v1; mem[1] = v2; @@ -30,42 +29,16 @@ pub fn cons(v1: Value, v2: Value) *[2]Value { // Interned strings -const istr_ctx = std.hash_map.StringContext{}; -var istr_pool = std.hash_map.StringHashMap(*seq.Header).init(alloc); - -pub fn internString(str: []const u8) Value { - const gop = istr_pool.getOrPutAdapted( - str, - istr_ctx, - ) catch @panic("OOM"); - if (gop.found_existing) { - const p: *seq.Header = gop.value_ptr.*; - return ptr.pack(p, .seq); - } - - std.debug.assert(str.len <= std.math.maxInt(u48)); +var istr_set: IstrSet = undefined; - const header: seq.Header = .{ - .type = .string, - .info = .{ .string = .{ - .enc = .utf8, - .interned = true, - } }, - .size = @intCast(str.len), - }; - - const h_align: std.mem.Alignment = @enumFromInt(@alignOf(seq.Header)); - const h_size = @sizeOf(seq.Header); - const size = str.len + h_size; - - const mem = alloc.alignedAlloc(u8, h_align, size) catch @panic("OOM"); - - const h_bytes: [h_size]u8 = @bitCast(header); - @memcpy(mem[0..h_size], &h_bytes); - @memcpy(mem[h_size..size], str); +pub fn internString(s: []const u8) !Value { + const p = istr_set.add(alloc, s) catch @panic("OOM"); + return ptr.pack(p, .istr); +} - gop.key_ptr.* = alloc.dupe(u8, str) catch @panic("OOM"); - gop.value_ptr.* = @ptrCast(mem.ptr); +// Init - return ptr.pack(mem.ptr, .seq); +pub fn init() void { + cons_pool = ConsPool.initCapacity(alloc, 64) catch @panic("OOM"); + istr_set = IstrSet.init(alloc) catch @panic("OOM"); } diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig index 31bc850..465b0e3 100644 --- a/src/zisp/gc/IstrSet.zig +++ b/src/zisp/gc/IstrSet.zig @@ -1,32 +1,39 @@ -/// -/// Efficient hash set for short(ish) strings. -/// -/// Layout: []Bucket -/// -/// * Bucket: [2]Sector -/// -/// * Sector: [32]u8 or [4]u64 -/// -/// The parser interns strings up to 255 bytes in length, using the `Istr` heap -/// representation, which has an 8-byte metadata prefix. Therefore, strings of -/// up to 24 bytes can fit into a 32-byte sector as an immediate Istr, so the -/// pointer to the sector becomes the Istr pointer itself. -/// -/// The 8-byte header of an Istr is a u64 hash, but its lowest 8 bits are also -/// the length of the string: u64hash = (real_u64hash << 1) | u8length -/// -/// Note that the empty string is never interned since strings up to 6 bytes in -/// length are packed directly into a NaN value. Thus, checking if a sector is -/// empty is a matter of checking its first 8 bytes as a u64 value, which would -/// hold the Istr header, which cannot be zero, because the lowest 8 bits are -/// the length of the string. -/// -/// If the length value is greater than 24, meaning the Istr can't actually fit -/// here, that means the next 8 bytes of the sector are actually a pointer to -/// the Istr elsewhere on the heap, and another 16 bytes are unused. -/// +//! +//! Efficient hash set for short(ish) strings. +//! +//! Layout: []Bucket +//! +//! * Bucket: [2]Sector +//! +//! * Sector: [32]u8 or [4]u64 +//! +//! The parser interns strings up to 255 bytes in length, using the `Istr` heap +//! representation, which has an 8-byte metadata prefix. Therefore, strings of +//! up to 24 bytes can fit into a 32-byte sector as an immediate Istr, so the +//! pointer to the sector becomes the Istr pointer itself. +//! +//! The 8-byte header of an Istr is a u64 hash, but its lowest 8 bits are also +//! the length of the string: u64hash = (real_u64hash << 1) | u8length +//! +//! Note that the empty string is never interned since strings up to 6 bytes in +//! length are packed directly into a NaN value. Thus, checking if a sector is +//! empty is a matter of checking its first 8 bytes as a u64 value, which would +//! hold the Istr header, which cannot be zero, because the lowest 8 bits are +//! the length of the string. +//! +//! If the length value is greater than 24, meaning the Istr can't actually fit +//! here, that means the next 8 bytes of the sector are actually a pointer to +//! the Istr elsewhere on the heap, and another 16 bytes are unused. +//! + const std = @import("std"); +const value = @import("../value.zig"); + +const Alloc = std.mem.Allocator; + +const Istr = value.istr.Istr; + const Set = @This(); const Bucket = [2][4]u64; @@ -48,13 +55,13 @@ const Sector = *align(8) packed union { if (self.meta.len <= 24) { return @ptrCast(self); } else { - return @bitCast(self.bufU64()[1]); + return @ptrFromInt(self.bufU64()[1]); } } pub fn match(self: *@This(), hash: u64, s: []const u8) ?Istr { if (self.hash != hash) { - return false; + return null; } const istr = self.getIstr(); if (std.hash_map.eqlString(s, istr.str())) { @@ -66,18 +73,18 @@ const Sector = *align(8) packed union { pub fn insert( self: *@This(), - alloc: *Allocator, + alloc: Alloc, hash: u64, s: []const u8, - ) Istr { + ) !Istr { self.hash = hash; if (s.len <= 24) { - const istr = @ptrCast(self); + const istr: Istr = @ptrCast(self); istr.putStr(s); return istr; } else { - const istr = Istr.new(alloc, hash, s); - self.bufU64()[1] = @bitCast(istr); + const istr = try value.istr.new(alloc, hash, s); + self.bufU64()[1] = @intFromPtr(istr); return istr; } } @@ -87,53 +94,55 @@ buckets: []Bucket, const default_bcount = 512; -pub fn init(alloc: *Allocator) !Set { +pub fn init(alloc: Alloc) !Set { return initCustom(alloc, default_bcount); } -pub fn initCustom(alloc: *Allocator, bcount: usize) !Set { +pub fn initCustom(alloc: Alloc, bcount: usize) !Set { return Set{ .buckets = try alloc.alloc(Bucket, bcount), }; } -pub fn deinit(self: *Set, alloc: *Allocator) void { +pub fn deinit(self: *Set, alloc: Alloc) void { alloc.free(self.buckets); } -pub fn add(self: *Set, s: []const u8) Istr { +pub fn sector(set: *Set, b_idx: usize, s_idx: usize) Sector { + return @ptrCast(&set.buckets[b_idx][s_idx]); +} + +pub fn add(self: *Set, alloc: Alloc, s: []const u8) !Istr { std.debug.assert(s.len < 256); const idx_mask = self.buckets.len - 1; - const len = s.len; - const h0 = std.hash_map.hashString(s); - const hash = (h0 << 1) | len; + const hash = (h0 << 1) | s.len; // We resize if we had to walk through 50% of the buckets. - const probe_limit = self.buckets.len / 2; + const probe_limit = self.buckets.len >> 1; var probes: usize = 0; var idx = @as(usize, @truncate(hash & idx_mask)); - var s_idx: usize = undefined; - while (true) : (idx = (idx + 1) & idx_mask) { + const s_idx = b: while (true) : (idx = (idx + 1) & idx_mask) { inline for (0..1) |i| { - const sec: Sector = @ptrCast(&self.buckets[idx][i]); - if (sec.hash == 0) { s_idx = i; break; } - if (sec.match(s)) |istr| return istr; + const sec = sector(self, idx, i); + if (sec.hash == 0) break :b i; + if (sec.match(hash, s)) |istr| return istr; } probes += 1; if (probes > probe_limit) { self.resize(); - return self.add(s); + return self.add(alloc, s); } }; - const sec = &self.buckets[idx][s_idx]; + const sec = sector(self, idx, s_idx); return sec.insert(alloc, hash, s); } -fn resize() void { +fn resize(self: *Set) void { + _ = self; @panic("IstrSet resize not implemented."); } diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig index ce61817..f92a308 100644 --- a/src/zisp/io/Parser.zig +++ b/src/zisp/io/Parser.zig @@ -152,7 +152,12 @@ fn read(p: *Parser) !?u8 { } fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 { - return if (try p.read()) |c| c else p.err(.UnexpectedEof, emsg); + return try p.read() orelse p.err(.UnexpectedEof, emsg); +} + +// Fake optional, for use in: while (readToEof()) |c| { } +fn readNoEof2(p: *Parser, comptime emsg: []const u8) !?u8 { + return try p.read() orelse p.err(.UnexpectedEof, emsg); } fn unread(p: *Parser, c: u8) void { @@ -187,12 +192,13 @@ fn addUnicode(p: *Parser, uc: u21) !void { fn getCharsAsString(p: *Parser) !Value { defer p.chars.clearRetainingCapacity(); const s = p.chars.items; - if (value.sstr.isValidSstr(s)) + if (value.sstr.isValidSstr(s)) { return value.sstr.pack(s); - else if (value.istr.isValidIstr(s)) + } else if (value.istr.isValidIstr(s)) { return value.istr.intern(s); - else - return @panic("not implemented"); // TODO + } else { + @panic("not implemented"); // TODO + } } fn getCharsAsRune(p: *Parser) Value { @@ -445,7 +451,7 @@ fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void { fn getString(p: *Parser, comptime close: u8) !Value { const msg = "string(" ++ .{close} ++ ")"; - while (try p.readNoEof(msg)) |c| sw: switch (c) { + while (try p.readNoEof2(msg)) |c| sw: switch (c) { close => break, '\\' => switch (try p.readNoEof("string backslash escape")) { '\\', '|', '"' => |c2| try p.addChar(c2), @@ -475,33 +481,36 @@ fn getString(p: *Parser, comptime close: u8) !Value { fn getAtString(p: *Parser) !Value { const sentinel = try p.readNoEof("at-string"); - while (try p.readNoEof("at-string")) |c| { + while (try p.readNoEof2("at-string")) |c| { if (c == sentinel) break; try p.addChar(c); } - const s = p.getCharsAsString(); + const s = try p.getCharsAsString(); return p.cons(ATSTR, s); } fn skipStringLfEscape(p: *Parser) !u8 { const msg = "string linefeed escape"; - while (try p.readNoEof(msg)) |c| switch (c) { - '\t', ' ' => {}, + while (try p.readNoEof2(msg)) |c| switch (c) { + '\t', ' ' => continue, '\n' => return p.skipStringIndent(), else => return p.err(.InvalidCharacter, msg), }; + unreachable; } fn skipStringIndent(p: *Parser) !u8 { - while (try p.readNoEof("string newline escape")) |c| switch (c) { - '\t', ' ' => {}, + const msg = "string newline escape"; + while (try p.readNoEof2(msg)) |c| switch (c) { + '\t', ' ' => continue, else => return c, }; + unreachable; } fn parseStringRawHexEsc(p: *Parser) !void { const msg = "string raw hex escape"; - while (try p.readNoEof(msg)) |c1| { + while (try p.readNoEof2(msg)) |c1| { if (c1 == ';') break; const c2 = try p.readNoEof(msg); const hi = try p.parseHexDigit(c1, msg); diff --git a/src/zisp/io/print.zig b/src/zisp/io/print.zig index 65082c4..5e3bd15 100644 --- a/src/zisp/io/print.zig +++ b/src/zisp/io/print.zig @@ -3,14 +3,16 @@ const std = @import("std"); const gc = @import("../gc.zig"); const value = @import("../value.zig"); +const Parser = @import("Parser.zig"); const Value = value.Value; +const Array = value.array.Array; pub fn toWriter(w: *std.Io.Writer, v: Value) anyerror!void { // zig fmt: off try if (v.isDouble()) double(w, v) else if (v.isFixnum()) fixnum(w, v) else if (v.getPtr(.pair)) |p| pair(w, @ptrCast(p)) - else if (v.getPtr(.seq)) |p| seq(w, @ptrCast(p)) + else if (v.getPtr(.array)) |p| array(w, @ptrCast(p)) else if (v.isRune()) rune(w, v) else if (v.isChar()) char(w, v) else if (v.isMisc()) misc(w, v) @@ -68,6 +70,26 @@ pub fn srat(w: *std.Io.Writer, v: Value) !void { } pub fn pair(w: *std.Io.Writer, p: *[2]Value) !void { + const car = p[0]; + //const cdr = p[1]; + if (car.eq(Parser.PQSTR)) { + //try pipeString(w, cdr); + @panic(""); + } else if (car.eq(Parser.DQSTR)) { + //try quotString(w, cdr); + @panic(""); + } else if (car.eq(Parser.ATSTR)) { + //try atString(w, cdr); + @panic(""); + } else if (car.eq(Parser.LABEL)) { + //try label(w, cdr); + @panic(""); + } else { + try list(w, p); + } +} + +pub fn list(w: *std.Io.Writer, p: *[2]Value) !void { try w.writeByte('('); try toWriter(w, p[0]); var cdr = p[1]; @@ -84,16 +106,16 @@ pub fn pair(w: *std.Io.Writer, p: *[2]Value) !void { try w.writeByte(')'); } -pub fn seq(w: *std.Io.Writer, s: *value.seq.Header) !void { +pub fn array(w: *std.Io.Writer, s: Array) !void { switch (s.type) { .string => try string(w, s), else => @panic("not implemented"), } } -pub fn string(w: *std.Io.Writer, s: *value.seq.Header) !void { +pub fn string(w: *std.Io.Writer, s: Array) !void { // TODO: Check if pipes/escapes necessary. try w.writeByte('|'); - try w.writeAll(s.bytes()); + try w.writeAll(s.str()); try w.writeByte('|'); } diff --git a/src/zisp/value/array.zig b/src/zisp/value/array.zig index b2faf94..6e37013 100644 --- a/src/zisp/value/array.zig +++ b/src/zisp/value/array.zig @@ -94,13 +94,13 @@ pub const Array = *align(8) packed struct(u64) { fn bufContent(self: *@This()) [*]u8 { std.debug.assert(!self.is_ptr); - return @ptrCast(self.bufU64 + 1); + return @ptrCast(self.bufU64() + 1); } fn bufPointer(self: *@This()) [*]u8 { std.debug.assert(self.is_ptr); std.debug.assert(self.len_or_ptr == 0); - return @ptrCast(self.bufU64[1]); + return @ptrFromInt(self.bufU64()[1]); } fn eltSize(self: *@This()) u16 { @@ -116,7 +116,7 @@ pub const Array = *align(8) packed struct(u64) { fn arrPointer(self: *@This()) ?Array { std.debug.assert(self.is_ptr); const p = self.len_or_ptr; - return if (p != 0) @ptrCast(p) else null; + return if (p != 0) @ptrFromInt(p) else null; } fn sliceInfo(self: *@This()) [2]u64 { @@ -124,13 +124,13 @@ pub const Array = *align(8) packed struct(u64) { const ptr = self.len_or_ptr; const buf = self.bufU64(); if (ptr != 0) { - return buf[1..2]; + return .{ buf[1], buf[2] }; } else { - return buf[2..3]; + return .{ buf[2], buf[3] }; } } - pub fn buf(self: *@This()) [*]u8 { + pub fn bufU8(self: *@This()) [*]u8 { if (self.is_ptr) { if (self.arrPointer()) |a| { return a.bufContent(); @@ -144,7 +144,7 @@ pub const Array = *align(8) packed struct(u64) { pub fn str(self: *@This()) []u8 { if (self.is_slice) { - const buf = self.buf(); + const buf = self.bufU8(); const start, const end = self.sliceInfo(); return buf[start..end]; } diff --git a/src/zisp/value/istr.zig b/src/zisp/value/istr.zig index dec3b09..8f70725 100644 --- a/src/zisp/value/istr.zig +++ b/src/zisp/value/istr.zig @@ -3,6 +3,8 @@ const std = @import("std"); const gc = @import("../gc.zig"); const value = @import("../value.zig"); +const Alloc = std.mem.Allocator; + const Value = value.Value; // Zig API @@ -22,25 +24,26 @@ pub const Istr = *align(8) packed union { return @ptrCast(self); } - pub fn new(alloc: *Allocator, hash: u64, s: []const u8) Istr { - const istr: Istr = @ptrCast(alloc.alloc(u8, 8 + s.len)); - istr.hash = hash; - istr.putStr(s); - return istr; - } - pub fn putStr(self: *@This(), s: []const u8) void { std.debug.assert(s.len <= 255); - @memcpy(istr.bufU8()[8 .. 8 + s.len], s); + @memcpy(self.bufU8()[8 .. 8 + s.len], s); } pub fn str(self: *@This()) []const u8 { const buf = self.bufU8(); - const len = self.meta.len; - return buf[8 .. 8 + len]; + return buf[8 .. 8 + self.meta.len]; } }; +pub fn new(alloc: Alloc, hash: u64, s: []const u8) !Istr { + const aln = std.mem.Alignment.of(Istr); + const ptr = try alloc.alignedAlloc(u8, aln, 8 + s.len); + const istr: Istr = @ptrCast(ptr); + istr.hash = hash; + istr.putStr(s); + return istr; +} + pub fn check(v: Value) ?Istr { if (v.getPtr(.istr)) |p| { return @ptrCast(p); -- cgit v1.2.3