3 files changed, 69 insertions, 8 deletions
diff --git a/docs/c1/1-parse.md b/docs/c1/1-parse.md
index 31e340a..0fc0da5 100644
--- a/docs/c1/1-parse.md
+++ b/docs/c1/1-parse.md
@@ -228,12 +228,12 @@ including zero (aka ASCII NULL).  The parser reads bytes, not characters, and
 has no concept of a character encoding, which means that a string can contain
 UTF-8 byte sequences, but these are not tested for validity.
 
-A string that is up to 64 bytes long is automatically *interned*, meaning any
+A string that is up to 255 bytes long is automatically *interned*, meaning any
 occurrence of the same string -- equal in length and containing the same byte
 values -- ends up being represented by the same bit-pattern; either a memory
 address, or an immediate representation within a CPU word for short strings.
 
-Strings with a length greater than 64 bytes end up being represented by a
+Strings with a length greater than 255 bytes end up being represented by a
 distinct memory address, even if they are equal in length and content.
 
 
diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig
new file mode 100644
index 0000000..c90f5dd
--- /dev/null
+++ b/src/zisp/gc/IstrSet.zig
@@ -0,0 +1,53 @@
+//!
+//! Efficient hash set for short(ish) strings.
+//!
+//! Layout: []Bucket
+//!
+//! * Bucket: [8]Sector
+//!
+//! * Sector: [8]u8 or u64
+//!
+//! Strategy: Hybrid between open addressing with linear probing, and chaining,
+//! with special markers for indirection; see details below.
+//!
+//! The Zisp parser interns strings up to 255 bytes in length, using the `Istr`
+//! heap representation with a len prefix of one byte and hash cache suffix of
+//! eight bytes.  This means that strings of up to 55 bytes of content can fit
+//! within 64 bytes.  We store these inline in our buckets.  A pointer to that
+//! bucket is then itself the Istr.
+//!
+//! A bucket can contain multiple strings if they fit.  The strings are aligned
+//! at 8 byte "sectors" so a pointer to a sector can also be a valid Istr.
+//!
+//! A sector may appear empty (all 0 bytes) but actually be part of a string;
+//! this is not a problem since strings are length-prefixed.  Linear probing
+//! starts at the beginning of a bucket, and jumps over occupied sectors as
+//! indicated by a string's length prefix.  This can continue across buckets.
+//! When an empty sector is found after jumping over a string, we check if
+//! there's enough space within the bucket to hold the string, otherwise we
+//! advance into the next bucket and keep probing.
+//!
+//! Splitting across buckets is not allowed, since this introduces ambiguity:
+//! Strings can contain ASCII NUL bytes, so if a string ending in a series of
+//! NUL bytes were allowed to spill into the next bucket, it would appear to
+//! begin with an empty sector.  A later probe sequence beginning with that
+//! bucket would then see it as empty and overwrite part of another string.
+//!
+//! Since strings up to 6 bytes are packed into NaN, they can never appear here,
+//! which means the empty string cannot appear, so a length prefix cannot be 0.
+//! Thus there's no ambiguity between empty sector and empty string.
+//!
+//! A sector can begin with a 0 byte, but have a non-0 value as a u64; this
+//! indicates a packed pointer to an Istr elsewhere on the heap.  The low 48
+//! bits are the pointer, while the high bits encode the length of the string
+//! being pointed to, so we don't need to follow the pointer for the equality
+//! check in case the lengths are different.  Also, in this case, the cached
+//! hash follows directly in the next sector, so we also needn't follow the
+//! pointer when resizing the hash set.
+//!
+
+/// Bucket count
+bcount: usize = 512, // 512
+
+used_bytes: usize,
+total_bytes: usize,
diff --git a/src/zisp/value/istr.zig b/src/zisp/value/istr.zig
index f34dbd0..2ca2dad 100644
--- a/src/zisp/value/istr.zig
+++ b/src/zisp/value/istr.zig
@@ -1,25 +1,33 @@
 const std = @import("std");
 
-const value = @import("../value.zig");
 const gc = @import("../gc.zig");
+const value = @import("../value.zig");
 
 const Value = value.Value;
 
 // Zig API
 
-/// Pointer to beginning of an interned string with a 1-byte length prefix.
-/// Aligned to 8 bytes like all Zisp heap pointers.
+/// Pointer to beginning of an interned string with a 1-byte length prefix and
+/// 8-byte hash cache suffix.  Aligned to 8 bytes, like all Zisp heap pointers.
 pub const Istr = *align(8) struct {
-    pub fn buf(self: *@This()) [*]u8 {
+    fn buf(self: *@This()) [*]u8 {
         return @ptrCast(self);
     }
 
     pub fn len(self: *@This()) u8 {
-        return buf()[0];
+        return self.buf()[0];
     }
 
     pub fn str(self: *@This()) []u8 {
-        return buf()[1 .. 1 + len()];
+        const end = 1 + self.len();
+        return self.buf()[1..end];
+    }
+
+    pub fn getHash(self: *@This()) u64 {
+        // Hash stored in the next naturally aligned u64
+        const hash_idx = ((self.len() + 8) & -8) / 8;
+        const buf: [*]u64 = @ptrCast(self);
+        return buf[hash_idx];
     }
 };