diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2026-06-06 17:02:31 +0200 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2026-06-06 17:02:31 +0200 |
| commit | d79c15870b4193f11fbbd8e8105ce0f6e56044d3 (patch) | |
| tree | 063f87ade9b09e42908e8660df669192e1cdb2ee /src | |
| parent | 9b2082aac14aea504495a0faed361716c9760501 (diff) | |
Fix IstrSet but for real this time?
Diffstat (limited to 'src')
| -rw-r--r-- | src/test/strings.zig | 6 | ||||
| -rw-r--r-- | src/zisp/gc/IstrSet.zig | 135 | ||||
| -rw-r--r-- | src/zisp/io/Parser.zig | 2 | ||||
| -rw-r--r-- | src/zisp/value/istr.zig | 35 |
4 files changed, 93 insertions, 85 deletions
diff --git a/src/test/strings.zig b/src/test/strings.zig index 09c8e2a..09aa988 100644 --- a/src/test/strings.zig +++ b/src/test/strings.zig @@ -11,13 +11,13 @@ const fx = value.fixnum; test "istr" { const s1 = "foo bar baz"; - const v1 = try istr.intern(s1); + const v1 = try istr.getOrNew(s1); const v1_len: usize = @intCast(fx.unpack(istr.getLen(v1))); try testing.expectEqualStrings(s1, istr.assert(v1).str()); try testing.expectEqual(s1.len, v1_len); const s2 = @embedFile("data/string.txt"); - const v2 = try istr.intern(s2); + const v2 = try istr.getOrNew(s2); const v2_len: usize = @intCast(fx.unpack(istr.getLen(v2))); try testing.expectEqualStrings(s2, istr.assert(v2).str()); try testing.expectEqual(s2.len, v2_len); @@ -25,7 +25,7 @@ test "istr" { // Check that modifying a slice doesn't affect the string. var s3 = "test".*; - const v3 = try istr.intern(&s3); + const v3 = try istr.getOrNew(&s3); s3[0] = 'x'; try testing.expectEqualStrings("test", istr.assert(v3).str()); } diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig index b22d57b..4146713 100644 --- a/src/zisp/gc/IstrSet.zig +++ b/src/zisp/gc/IstrSet.zig @@ -12,8 +12,20 @@ const Set = @This(); const max_fill_percent = 80; const Bucket = packed struct(u64) { - ptr: u48, - fp: u16, + ptr: u48 = 0, + fp: u16 = 0, + + pub fn empty(self: Bucket) bool { + return self.ptr == 0; + } + + pub fn istrPtr(self: *const Bucket) IstrPtr { + return @ptrFromInt(self.ptr); + } + + pub fn putIstrPtr(self: *Bucket, istr: IstrPtr) void { + self.ptr = @intCast(@intFromPtr(istr)); + } }; alloc: Alloc, @@ -37,7 +49,7 @@ pub fn initCustom(alloc: Alloc, bcount: usize) !Set { fn allocBuckets(self: *Set, bcount: usize) !void { self.buckets = try self.alloc.alloc(Bucket, bcount); - @memset(self.buckets, std.mem.zeroes(Bucket)); + @memset(self.buckets, Bucket{}); self.used_threshold = bcount * max_fill_percent / 100; } @@ -45,90 +57,77 @@ pub fn deinit(self: *Set) void { self.alloc.free(self.buckets); } -/// Add, or get existing, string. -pub fn add(self: *Set, s: []const u8) !IstrPtr { +/// Get the istr with the given string contents, or alloc and store a new one. +pub fn getOrNew(self: *Set, s: []const u8) !IstrPtr { std.debug.assert(s.len < 256); + return self.getOrNewOrPut(s, null); +} + +/// Get the canonical istr with the same contents, or store this one as it. +pub fn getOrPut(self: *Set, istr: IstrPtr) !IstrPtr { + return self.getOrNewOrPut(istr.str(), istr); +} +fn getOrNewOrPut(self: *Set, s: []const u8, existing: ?IstrPtr) !IstrPtr { if (self.used_buckets > self.used_threshold) { try self.resize(); } - const idx_mask = self.buckets.len - 1; - - const hash = std.hash_map.hashString(s); - const fp: u16 = @truncate(hash); + const hash = strHash(s); + const fp = hashFp(hash); - const idx_init = @as(usize, @truncate(hash & idx_mask)); + const idx_mask = self.buckets.len - 1; + const idx_start = hash & idx_mask; - var idx = idx_init; - while (true) : (idx = (idx +% 1) & idx_mask) { - const bucket = self.buckets[idx]; - if (bucket.ptr == 0) break; + var idx = idx_start; + while (true) : (idx = (idx + 1) & idx_mask) { + const bucket = &self.buckets[idx]; + if (bucket.empty()) { + self.used_buckets += 1; + const istr = existing orelse try self.newIstr(s); + bucket.putIstrPtr(istr); + bucket.fp = fp; + return istr; + } if (bucket.fp == fp) { - const istr: IstrPtr = @ptrFromInt(bucket.ptr); - if (std.hash_map.eqlString(s, istr.str())) return istr; + const istr = bucket.istrPtr(); + if (strEq(s, istr.str())) return istr; } } +} + +fn resize(self: *Set) !void { + const old_buckets = self.buckets; + try self.allocBuckets(old_buckets.len << 1); + defer self.alloc.free(old_buckets); + const new_buckets = self.buckets; - self.used_buckets += 1; + const idx_mask = new_buckets.len - 1; + for (old_buckets) |old| { + if (old.empty()) continue; + const str = old.istrPtr().str(); + var idx = strHash(str) & idx_mask; + while (!new_buckets[idx].empty()) idx = (idx + 1) & idx_mask; + new_buckets[idx] = old; + } +} +fn newIstr(self: *Set, s: []const u8) !IstrPtr { const algn = std.mem.Alignment.of(IstrPtr); const size = @sizeOf(IstrHead) + s.len; - const ptr = try self.alloc.alignedAlloc(u8, algn, size); - const istr: IstrPtr = @ptrCast(ptr); - istr.* = .{ .len = @intCast(s.len) }; + const istr: IstrPtr = @ptrCast(try self.alloc.alignedAlloc(u8, algn, size)); istr.putStr(s); - self.buckets[idx] = .{ - .fp = fp, - .ptr = @intCast(@intFromPtr(istr)), - }; return istr; } -fn resize(self: *Set) !void { - const old_bcount = self.buckets.len; - const new_bcount = self.buckets.len << 1; - - const old_mask = old_bcount - 1; - const new_mask = new_bcount - 1; - - const old_buckets = self.buckets; - try self.allocBuckets(new_bcount); - defer self.alloc.free(old_buckets); - const new_buckets = self.buckets; - - // Make sure we're not in the middle of a cluster; find an empty bucket. - const idx_start = b: for (old_buckets, 0..) |_, idx| { - if (old_buckets[idx].ptr == 0) break :b idx; - } else unreachable; // Resize strategy guarantees empty slots +fn strHash(s: []const u8) u64 { + return std.hash_map.hashString(s); +} - var idx = idx_start; - while (true) { - // +1 at start of first iteration is OK since idx_start is empty. - idx = (idx +% 1) & old_mask; - - if (idx == idx_start) break; - if (old_buckets[idx].ptr == 0) continue; - - var dest_lo = idx; - var dest_hi = idx + old_bcount; - while (true) : (idx = (idx +% 1) & old_mask) { - const bucket = old_buckets[idx]; - if (bucket.ptr == 0) break; - - const istr: IstrPtr = @ptrFromInt(bucket.ptr); - const hash = std.hash_map.hashString(istr.str()); - const new_idx = @as(usize, @truncate(hash & new_mask)); - - if (new_idx < old_bcount) { - new_buckets[dest_lo] = bucket; - dest_lo += 1; - } else { - new_buckets[dest_hi] = bucket; - dest_hi = (dest_hi +% 1) & new_mask; - } - } +fn strEq(s: []const u8, s2: []const u8) bool { + return std.hash_map.eqlString(s, s2); +} - if (idx == idx_start) break; - } +fn hashFp(hash: u64) u16 { + return @intCast(hash >> 48); } diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig index 490b231..4fdde8b 100644 --- a/src/zisp/io/Parser.zig +++ b/src/zisp/io/Parser.zig @@ -195,7 +195,7 @@ fn getCharsAsString(p: *Parser) !Value { return if (value.sstr.isValidSstr(s)) value.sstr.pack(s) else if (value.istr.isValidIstr(s) and p.istr_set != null) - value.istr.internInSet(p.istr_set.?, s) + value.istr.getOrNewInSet(p.istr_set.?, s) else value.array.newString(p.alloc, s); } diff --git a/src/zisp/value/istr.zig b/src/zisp/value/istr.zig index 1159cc7..9156606 100644 --- a/src/zisp/value/istr.zig +++ b/src/zisp/value/istr.zig @@ -20,18 +20,16 @@ pub const IstrHead = packed struct(u8) { return @ptrCast(self); } - pub fn putStr(self: *@This(), s: []const u8) void { - std.debug.assert(s.len <= 255); - const buf = self.bufU8(); + pub fn str(self: *@This()) []const u8 { const start = @sizeOf(IstrHead); - @memcpy(buf[start .. start + s.len], s); + const len: usize = self.len; + return self.bufU8()[start .. start + len]; } - pub fn str(self: *@This()) []const u8 { - const buf = self.bufU8(); + pub fn putStr(self: *@This(), s: []const u8) void { const start = @sizeOf(IstrHead); - const len: usize = self.len; - return buf[start .. start + len]; + self.len = @intCast(s.len); + @memcpy(self.bufU8()[start .. start + s.len], s); } }; @@ -60,14 +58,17 @@ fn assertValidIstr(s: []const u8) void { } } -pub fn intern(s: []const u8) !Value { - return internInSet(gc.mainIstrSet(), s); +fn pack(istr: IstrPtr) Value { + return value.ptr.pack(@ptrCast(istr), .istr); +} + +pub fn getOrNew(s: []const u8) !Value { + return getOrNewInSet(gc.mainIstrSet(), s); } -pub fn internInSet(set: *IstrSet, s: []const u8) !Value { +pub fn getOrNewInSet(set: *IstrSet, s: []const u8) !Value { assertValidIstr(s); - const istr = try set.add(s); - return value.ptr.pack(@ptrCast(istr), .istr); + return pack(try set.getOrNew(s)); } pub fn getStr(v: Value) []const u8 { @@ -84,3 +85,11 @@ pub fn getLen(v: Value) Value { const istr = assert(v); return value.fixnum.pack(@intCast(istr.len)); } + +// TODO: Zisp representation of IstrSet & ability to intern in given IstrSet + +pub fn intern(v: Value) Value { + const istr = assert(v); + const set = gc.mainIstrSet(); + return pack(set.getOrPut(istr)); +} |
