summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/zisp/gc/IstrSet.zig88
1 files changed, 46 insertions, 42 deletions
diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig
index f3212ab..15af760 100644
--- a/src/zisp/gc/IstrSet.zig
+++ b/src/zisp/gc/IstrSet.zig
@@ -41,6 +41,15 @@ const sector_u64_cnt = 4;
const sector_size = sector_u64_cnt * 8;
+// Empirically, 32 is the sweet spot; perhaps because it corresponds to 4 KiB;
+// this was however tested with strings from /dev/random, which also tended to
+// be on the shorter end. TODO: Re-test with various data.
+const max_probe_length = 32;
+
+// Really just a fall-back to ensure we don't fill to 100% because our resize
+// logic needs to find at least one empty sector.
+const max_fill_percent = 95;
+
const Bucket = [bucket_sectors][sector_u64_cnt]u64;
const SectorPtr = *align(8) SectorHead;
@@ -100,9 +109,10 @@ const SectorHead = packed union {
}
};
-buckets: []Bucket,
+buckets: []Bucket = undefined,
used_sectors: usize = 0,
+used_threshold: usize = undefined,
const default_bcount = 512;
@@ -112,13 +122,15 @@ pub fn init(alloc: Alloc) !Set {
pub fn initCustom(alloc: Alloc, bcount: usize) !Set {
std.debug.assert(@popCount(bcount) == 1); // Must be power of 2.
- return Set{ .buckets = try allocBuckets(alloc, bcount) };
+ var self = Set{};
+ try self.allocBuckets(alloc, bcount);
+ return self;
}
-fn allocBuckets(alloc: Alloc, bcount: usize) ![]Bucket {
- const bs = try alloc.alloc(Bucket, bcount);
- @memset(bs, std.mem.zeroes(Bucket));
- return bs;
+fn allocBuckets(self: *Set, alloc: Alloc, bcount: usize) !void {
+ self.buckets = try alloc.alloc(Bucket, bcount);
+ @memset(self.buckets, std.mem.zeroes(Bucket));
+ self.used_threshold = bcount * bucket_sectors * max_fill_percent / 100;
}
pub fn deinit(self: *Set, alloc: Alloc) void {
@@ -149,23 +161,19 @@ pub fn add(self: *Set, alloc: Alloc, s: []const u8) !IstrPtr {
}
};
- // Check if we walked 64 buckets (4096 bytes)
- if ((idx -% idx_init) >= 64) {
+ if (idx -% idx_init >= max_probe_length) {
try self.resize(alloc);
return self.add(alloc, s);
}
- const sec = sector(self.buckets, idx, s_idx);
- const result = sec.insert(alloc, hash, s);
-
self.used_sectors += 1;
-
- // Resize at 80% fill rate
- if (self.used_sectors >= (self.buckets.len * bucket_sectors * 8) / 10) {
+ if (self.used_sectors >= self.used_threshold) {
try self.resize(alloc);
+ return self.add(alloc, s);
}
- return result;
+ const sec = sector(self.buckets, idx, s_idx);
+ return sec.insert(alloc, hash, s);
}
fn resize(self: *Set, alloc: Alloc) !void {
@@ -173,55 +181,51 @@ fn resize(self: *Set, alloc: Alloc) !void {
const old_bs = self.buckets;
const new_bc = old_bc << 1;
- const new_bs = try allocBuckets(alloc, new_bc);
- @memset(new_bs, std.mem.zeroes(Bucket));
+
+ try self.allocBuckets(alloc, new_bc);
+ defer alloc.free(old_bs);
+
+ const new_bs = self.buckets;
const old_mask = old_bc - 1;
const new_mask = new_bc - 1;
- // Find a cluster root by searching backwards.
- const idx_init = for (old_bs, 0..) |_, i| {
- // old_mask = highest valid index
- const idx = old_mask - i;
+ // Make sure we're not in the middle of a cluster
+ var idx = for (old_bs, 0..) |_, idx| {
const s0 = sector(old_bs, idx, 0);
const s1 = sector(old_bs, idx, 1);
- // NOTE: Might also end up pointing to an empty bucket
if (s0.hash == 0 or s1.hash == 0) break (idx + 1) & old_mask;
- } else unreachable; // Resize at 80% guarantees empty slots.
+ } else unreachable; // Resize strategy guarantees empty slots
- const idx_last = (idx_init -% 1) & old_mask;
+ const idx_last = (idx -% 1) & old_mask;
- var idx = idx_init;
while (idx != idx_last) : (idx = (idx + 1) & old_mask) {
- // Make sure we're at the start of a cluster
- if (sector(old_bs, idx, 0).hash == 0) continue;
+ // Need to walk destinations sector by sector, since strings grouped in
+ // one sector in the old buckets may be split across buckets in the new
+ // array.
- // Have to use sector offsets here
- var dest_lo = bucket_sectors * idx;
- var dest_hi = bucket_sectors * (idx + old_bc);
+ const bucket_size = sector_size * bucket_sectors;
+ const new_bs_base = @intFromPtr(new_bs.ptr);
+
+ var dest_lo = new_bs_base + (bucket_size * idx);
+ var dest_hi = new_bs_base + (bucket_size * (idx + old_bc));
// Start copying cluster
- const new_bs_base = @intFromPtr(new_bs.ptr);
b: while (idx != idx_last) : (idx = (idx + 1) & old_mask) {
inline for (0..bucket_sectors) |s_idx| {
const sec = sector(old_bs, idx, s_idx);
if (sec.hash == 0) break :b;
const new_idx = sec.hash & new_mask;
+ var dest: SectorPtr = undefined;
if (new_idx < old_bc) {
- const dest_addr = new_bs_base + dest_lo * sector_size;
- const dest: SectorPtr = @ptrFromInt(dest_addr);
- sec.copy(dest);
- dest_lo += 1;
+ dest = @ptrFromInt(dest_lo);
+ dest_lo += sector_size;
} else {
- const dest_addr = new_bs_base + dest_hi * sector_size;
- const dest: SectorPtr = @ptrFromInt(dest_addr);
- sec.copy(dest);
- dest_hi += 1;
+ dest = @ptrFromInt(dest_hi);
+ dest_hi += sector_size;
}
+ sec.copy(dest);
}
}
}
-
- alloc.free(old_bs);
- self.buckets = new_bs;
}