From 6fad780b6c0a8ac0bce4cd33fea54cb960b5e36d Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Mon, 1 Jun 2026 18:46:39 +0200 Subject: Code cleanup. --- src/zisp/gc/IstrSet.zig | 88 ++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig index f3212ab..15af760 100644 --- a/src/zisp/gc/IstrSet.zig +++ b/src/zisp/gc/IstrSet.zig @@ -41,6 +41,15 @@ const sector_u64_cnt = 4; const sector_size = sector_u64_cnt * 8; +// Empirically, 32 is the sweet spot; perhaps because it corresponds to 4 KiB; +// this was however tested with strings from /dev/random, which also tended to +// be on the shorter end. TODO: Re-test with various data. +const max_probe_length = 32; + +// Really just a fall-back to ensure we don't fill to 100% because our resize +// logic needs to find at least one empty sector. +const max_fill_percent = 95; + const Bucket = [bucket_sectors][sector_u64_cnt]u64; const SectorPtr = *align(8) SectorHead; @@ -100,9 +109,10 @@ const SectorHead = packed union { } }; -buckets: []Bucket, +buckets: []Bucket = undefined, used_sectors: usize = 0, +used_threshold: usize = undefined, const default_bcount = 512; @@ -112,13 +122,15 @@ pub fn init(alloc: Alloc) !Set { pub fn initCustom(alloc: Alloc, bcount: usize) !Set { std.debug.assert(@popCount(bcount) == 1); // Must be power of 2. - return Set{ .buckets = try allocBuckets(alloc, bcount) }; + var self = Set{}; + try self.allocBuckets(alloc, bcount); + return self; } -fn allocBuckets(alloc: Alloc, bcount: usize) ![]Bucket { - const bs = try alloc.alloc(Bucket, bcount); - @memset(bs, std.mem.zeroes(Bucket)); - return bs; +fn allocBuckets(self: *Set, alloc: Alloc, bcount: usize) !void { + self.buckets = try alloc.alloc(Bucket, bcount); + @memset(self.buckets, std.mem.zeroes(Bucket)); + self.used_threshold = bcount * bucket_sectors * max_fill_percent / 100; } pub fn deinit(self: *Set, alloc: Alloc) void { @@ -149,23 +161,19 @@ pub fn add(self: *Set, alloc: Alloc, s: []const u8) !IstrPtr { } }; - // Check if we walked 64 buckets (4096 bytes) - if ((idx -% idx_init) >= 64) { + if (idx -% idx_init >= max_probe_length) { try self.resize(alloc); return self.add(alloc, s); } - const sec = sector(self.buckets, idx, s_idx); - const result = sec.insert(alloc, hash, s); - self.used_sectors += 1; - - // Resize at 80% fill rate - if (self.used_sectors >= (self.buckets.len * bucket_sectors * 8) / 10) { + if (self.used_sectors >= self.used_threshold) { try self.resize(alloc); + return self.add(alloc, s); } - return result; + const sec = sector(self.buckets, idx, s_idx); + return sec.insert(alloc, hash, s); } fn resize(self: *Set, alloc: Alloc) !void { @@ -173,55 +181,51 @@ fn resize(self: *Set, alloc: Alloc) !void { const old_bs = self.buckets; const new_bc = old_bc << 1; - const new_bs = try allocBuckets(alloc, new_bc); - @memset(new_bs, std.mem.zeroes(Bucket)); + + try self.allocBuckets(alloc, new_bc); + defer alloc.free(old_bs); + + const new_bs = self.buckets; const old_mask = old_bc - 1; const new_mask = new_bc - 1; - // Find a cluster root by searching backwards. - const idx_init = for (old_bs, 0..) |_, i| { - // old_mask = highest valid index - const idx = old_mask - i; + // Make sure we're not in the middle of a cluster + var idx = for (old_bs, 0..) |_, idx| { const s0 = sector(old_bs, idx, 0); const s1 = sector(old_bs, idx, 1); - // NOTE: Might also end up pointing to an empty bucket if (s0.hash == 0 or s1.hash == 0) break (idx + 1) & old_mask; - } else unreachable; // Resize at 80% guarantees empty slots. + } else unreachable; // Resize strategy guarantees empty slots - const idx_last = (idx_init -% 1) & old_mask; + const idx_last = (idx -% 1) & old_mask; - var idx = idx_init; while (idx != idx_last) : (idx = (idx + 1) & old_mask) { - // Make sure we're at the start of a cluster - if (sector(old_bs, idx, 0).hash == 0) continue; + // Need to walk destinations sector by sector, since strings grouped in + // one sector in the old buckets may be split across buckets in the new + // array. - // Have to use sector offsets here - var dest_lo = bucket_sectors * idx; - var dest_hi = bucket_sectors * (idx + old_bc); + const bucket_size = sector_size * bucket_sectors; + const new_bs_base = @intFromPtr(new_bs.ptr); + + var dest_lo = new_bs_base + (bucket_size * idx); + var dest_hi = new_bs_base + (bucket_size * (idx + old_bc)); // Start copying cluster - const new_bs_base = @intFromPtr(new_bs.ptr); b: while (idx != idx_last) : (idx = (idx + 1) & old_mask) { inline for (0..bucket_sectors) |s_idx| { const sec = sector(old_bs, idx, s_idx); if (sec.hash == 0) break :b; const new_idx = sec.hash & new_mask; + var dest: SectorPtr = undefined; if (new_idx < old_bc) { - const dest_addr = new_bs_base + dest_lo * sector_size; - const dest: SectorPtr = @ptrFromInt(dest_addr); - sec.copy(dest); - dest_lo += 1; + dest = @ptrFromInt(dest_lo); + dest_lo += sector_size; } else { - const dest_addr = new_bs_base + dest_hi * sector_size; - const dest: SectorPtr = @ptrFromInt(dest_addr); - sec.copy(dest); - dest_hi += 1; + dest = @ptrFromInt(dest_hi); + dest_hi += sector_size; } + sec.copy(dest); } } } - - alloc.free(old_bs); - self.buckets = new_bs; } -- cgit v1.2.3