summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/test/strings.zig6
-rw-r--r--src/zisp/gc/IstrSet.zig135
-rw-r--r--src/zisp/io/Parser.zig2
-rw-r--r--src/zisp/value/istr.zig35
4 files changed, 93 insertions, 85 deletions
diff --git a/src/test/strings.zig b/src/test/strings.zig
index 09c8e2a..09aa988 100644
--- a/src/test/strings.zig
+++ b/src/test/strings.zig
@@ -11,13 +11,13 @@ const fx = value.fixnum;
test "istr" {
const s1 = "foo bar baz";
- const v1 = try istr.intern(s1);
+ const v1 = try istr.getOrNew(s1);
const v1_len: usize = @intCast(fx.unpack(istr.getLen(v1)));
try testing.expectEqualStrings(s1, istr.assert(v1).str());
try testing.expectEqual(s1.len, v1_len);
const s2 = @embedFile("data/string.txt");
- const v2 = try istr.intern(s2);
+ const v2 = try istr.getOrNew(s2);
const v2_len: usize = @intCast(fx.unpack(istr.getLen(v2)));
try testing.expectEqualStrings(s2, istr.assert(v2).str());
try testing.expectEqual(s2.len, v2_len);
@@ -25,7 +25,7 @@ test "istr" {
// Check that modifying a slice doesn't affect the string.
var s3 = "test".*;
- const v3 = try istr.intern(&s3);
+ const v3 = try istr.getOrNew(&s3);
s3[0] = 'x';
try testing.expectEqualStrings("test", istr.assert(v3).str());
}
diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig
index b22d57b..4146713 100644
--- a/src/zisp/gc/IstrSet.zig
+++ b/src/zisp/gc/IstrSet.zig
@@ -12,8 +12,20 @@ const Set = @This();
const max_fill_percent = 80;
const Bucket = packed struct(u64) {
- ptr: u48,
- fp: u16,
+ ptr: u48 = 0,
+ fp: u16 = 0,
+
+ pub fn empty(self: Bucket) bool {
+ return self.ptr == 0;
+ }
+
+ pub fn istrPtr(self: *const Bucket) IstrPtr {
+ return @ptrFromInt(self.ptr);
+ }
+
+ pub fn putIstrPtr(self: *Bucket, istr: IstrPtr) void {
+ self.ptr = @intCast(@intFromPtr(istr));
+ }
};
alloc: Alloc,
@@ -37,7 +49,7 @@ pub fn initCustom(alloc: Alloc, bcount: usize) !Set {
fn allocBuckets(self: *Set, bcount: usize) !void {
self.buckets = try self.alloc.alloc(Bucket, bcount);
- @memset(self.buckets, std.mem.zeroes(Bucket));
+ @memset(self.buckets, Bucket{});
self.used_threshold = bcount * max_fill_percent / 100;
}
@@ -45,90 +57,77 @@ pub fn deinit(self: *Set) void {
self.alloc.free(self.buckets);
}
-/// Add, or get existing, string.
-pub fn add(self: *Set, s: []const u8) !IstrPtr {
+/// Get the istr with the given string contents, or alloc and store a new one.
+pub fn getOrNew(self: *Set, s: []const u8) !IstrPtr {
std.debug.assert(s.len < 256);
+ return self.getOrNewOrPut(s, null);
+}
+
+/// Get the canonical istr with the same contents, or store this one as it.
+pub fn getOrPut(self: *Set, istr: IstrPtr) !IstrPtr {
+ return self.getOrNewOrPut(istr.str(), istr);
+}
+fn getOrNewOrPut(self: *Set, s: []const u8, existing: ?IstrPtr) !IstrPtr {
if (self.used_buckets > self.used_threshold) {
try self.resize();
}
- const idx_mask = self.buckets.len - 1;
-
- const hash = std.hash_map.hashString(s);
- const fp: u16 = @truncate(hash);
+ const hash = strHash(s);
+ const fp = hashFp(hash);
- const idx_init = @as(usize, @truncate(hash & idx_mask));
+ const idx_mask = self.buckets.len - 1;
+ const idx_start = hash & idx_mask;
- var idx = idx_init;
- while (true) : (idx = (idx +% 1) & idx_mask) {
- const bucket = self.buckets[idx];
- if (bucket.ptr == 0) break;
+ var idx = idx_start;
+ while (true) : (idx = (idx + 1) & idx_mask) {
+ const bucket = &self.buckets[idx];
+ if (bucket.empty()) {
+ self.used_buckets += 1;
+ const istr = existing orelse try self.newIstr(s);
+ bucket.putIstrPtr(istr);
+ bucket.fp = fp;
+ return istr;
+ }
if (bucket.fp == fp) {
- const istr: IstrPtr = @ptrFromInt(bucket.ptr);
- if (std.hash_map.eqlString(s, istr.str())) return istr;
+ const istr = bucket.istrPtr();
+ if (strEq(s, istr.str())) return istr;
}
}
+}
+
+fn resize(self: *Set) !void {
+ const old_buckets = self.buckets;
+ try self.allocBuckets(old_buckets.len << 1);
+ defer self.alloc.free(old_buckets);
+ const new_buckets = self.buckets;
- self.used_buckets += 1;
+ const idx_mask = new_buckets.len - 1;
+ for (old_buckets) |old| {
+ if (old.empty()) continue;
+ const str = old.istrPtr().str();
+ var idx = strHash(str) & idx_mask;
+ while (!new_buckets[idx].empty()) idx = (idx + 1) & idx_mask;
+ new_buckets[idx] = old;
+ }
+}
+fn newIstr(self: *Set, s: []const u8) !IstrPtr {
const algn = std.mem.Alignment.of(IstrPtr);
const size = @sizeOf(IstrHead) + s.len;
- const ptr = try self.alloc.alignedAlloc(u8, algn, size);
- const istr: IstrPtr = @ptrCast(ptr);
- istr.* = .{ .len = @intCast(s.len) };
+ const istr: IstrPtr = @ptrCast(try self.alloc.alignedAlloc(u8, algn, size));
istr.putStr(s);
- self.buckets[idx] = .{
- .fp = fp,
- .ptr = @intCast(@intFromPtr(istr)),
- };
return istr;
}
-fn resize(self: *Set) !void {
- const old_bcount = self.buckets.len;
- const new_bcount = self.buckets.len << 1;
-
- const old_mask = old_bcount - 1;
- const new_mask = new_bcount - 1;
-
- const old_buckets = self.buckets;
- try self.allocBuckets(new_bcount);
- defer self.alloc.free(old_buckets);
- const new_buckets = self.buckets;
-
- // Make sure we're not in the middle of a cluster; find an empty bucket.
- const idx_start = b: for (old_buckets, 0..) |_, idx| {
- if (old_buckets[idx].ptr == 0) break :b idx;
- } else unreachable; // Resize strategy guarantees empty slots
+fn strHash(s: []const u8) u64 {
+ return std.hash_map.hashString(s);
+}
- var idx = idx_start;
- while (true) {
- // +1 at start of first iteration is OK since idx_start is empty.
- idx = (idx +% 1) & old_mask;
-
- if (idx == idx_start) break;
- if (old_buckets[idx].ptr == 0) continue;
-
- var dest_lo = idx;
- var dest_hi = idx + old_bcount;
- while (true) : (idx = (idx +% 1) & old_mask) {
- const bucket = old_buckets[idx];
- if (bucket.ptr == 0) break;
-
- const istr: IstrPtr = @ptrFromInt(bucket.ptr);
- const hash = std.hash_map.hashString(istr.str());
- const new_idx = @as(usize, @truncate(hash & new_mask));
-
- if (new_idx < old_bcount) {
- new_buckets[dest_lo] = bucket;
- dest_lo += 1;
- } else {
- new_buckets[dest_hi] = bucket;
- dest_hi = (dest_hi +% 1) & new_mask;
- }
- }
+fn strEq(s: []const u8, s2: []const u8) bool {
+ return std.hash_map.eqlString(s, s2);
+}
- if (idx == idx_start) break;
- }
+fn hashFp(hash: u64) u16 {
+ return @intCast(hash >> 48);
}
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
index 490b231..4fdde8b 100644
--- a/src/zisp/io/Parser.zig
+++ b/src/zisp/io/Parser.zig
@@ -195,7 +195,7 @@ fn getCharsAsString(p: *Parser) !Value {
return if (value.sstr.isValidSstr(s))
value.sstr.pack(s)
else if (value.istr.isValidIstr(s) and p.istr_set != null)
- value.istr.internInSet(p.istr_set.?, s)
+ value.istr.getOrNewInSet(p.istr_set.?, s)
else
value.array.newString(p.alloc, s);
}
diff --git a/src/zisp/value/istr.zig b/src/zisp/value/istr.zig
index 1159cc7..9156606 100644
--- a/src/zisp/value/istr.zig
+++ b/src/zisp/value/istr.zig
@@ -20,18 +20,16 @@ pub const IstrHead = packed struct(u8) {
return @ptrCast(self);
}
- pub fn putStr(self: *@This(), s: []const u8) void {
- std.debug.assert(s.len <= 255);
- const buf = self.bufU8();
+ pub fn str(self: *@This()) []const u8 {
const start = @sizeOf(IstrHead);
- @memcpy(buf[start .. start + s.len], s);
+ const len: usize = self.len;
+ return self.bufU8()[start .. start + len];
}
- pub fn str(self: *@This()) []const u8 {
- const buf = self.bufU8();
+ pub fn putStr(self: *@This(), s: []const u8) void {
const start = @sizeOf(IstrHead);
- const len: usize = self.len;
- return buf[start .. start + len];
+ self.len = @intCast(s.len);
+ @memcpy(self.bufU8()[start .. start + s.len], s);
}
};
@@ -60,14 +58,17 @@ fn assertValidIstr(s: []const u8) void {
}
}
-pub fn intern(s: []const u8) !Value {
- return internInSet(gc.mainIstrSet(), s);
+fn pack(istr: IstrPtr) Value {
+ return value.ptr.pack(@ptrCast(istr), .istr);
+}
+
+pub fn getOrNew(s: []const u8) !Value {
+ return getOrNewInSet(gc.mainIstrSet(), s);
}
-pub fn internInSet(set: *IstrSet, s: []const u8) !Value {
+pub fn getOrNewInSet(set: *IstrSet, s: []const u8) !Value {
assertValidIstr(s);
- const istr = try set.add(s);
- return value.ptr.pack(@ptrCast(istr), .istr);
+ return pack(try set.getOrNew(s));
}
pub fn getStr(v: Value) []const u8 {
@@ -84,3 +85,11 @@ pub fn getLen(v: Value) Value {
const istr = assert(v);
return value.fixnum.pack(@intCast(istr.len));
}
+
+// TODO: Zisp representation of IstrSet & ability to intern in given IstrSet
+
+pub fn intern(v: Value) Value {
+ const istr = assert(v);
+ const set = gc.mainIstrSet();
+ return pack(set.getOrPut(istr));
+}