update

author: Taylan Kammer <taylan.kammer@gmail.com> 2025-02-19 23:29:26 +0100
committer: Taylan Kammer <taylan.kammer@gmail.com> 2025-02-19 23:29:26 +0100
commit: 4e88891235664917a2db44b84c0bbeeb13dd71ad (patch)
tree: 7ed8ac2272ce92054fdf2f4e5e09b156dfc5a4d1
parent: 4d0db1a1065f18d879b3ff90da6ecb14e9e1ae31 (diff)
13 files changed, 1079 insertions, 145 deletions
diff --git a/html/notes/compilation.md b/html/notes/compile.md
index 4d5fc6d..4d5fc6d 100644
--- a/html/notes/compilation.md
+++ b/html/notes/compile.md
diff --git a/html/notes/format.md b/html/notes/format.md
new file mode 100644
index 0000000..39da84a
--- /dev/null
+++ b/html/notes/format.md
@@ -0,0 +1,12 @@
+WIP WIP WIP
+
+(format "template" arg ...) ;sprintf
+(format obj) ;like write but returns string 
+(format! "template" arg ...) ;printf 
+(format! arg) ;write
+
+The ones with a string template are special forms and process the
+template string at compile time and ensure correct number of args.
+
+Need a way to let a special form's name also appear as an identifier
+like Guile does it with record accessors and shit.
diff --git a/html/notes/reader.md b/html/notes/reader.md
new file mode 100644
index 0000000..ebbe1ea
--- /dev/null
+++ b/html/notes/reader.md
@@ -0,0 +1,470 @@
+# Reader? Decoder? I barely know 'er!
+
+*This started from an expansion to the following, then became its own
+article:*
+
+[Symbols are strings are symbols](symbols.html)
+
+OK but hear me out... What if there were different reader modes, for
+code and (pure) data?
+
+I want Zisp to have various neat [syntactic extensions](sugar.html)
+for programming purposes anyway, like the lambda shorthand, and these
+shouldn't apply to configuration files, either.  (Although they seem
+unlikely to occur by accident.)
+
+So what if the transform from string literal to quoted string literal
+only occurred in code reading mode?
+
+At least one problem remains, which is that `'(foo "bar")` would turn
+into `(quote (foo (quote bar)))` because the reader would be in code
+mode while reading it...
+
+This reminds me of the long-standing annoyance in Scheme that "quote
+is unhygienic" and maybe we can tackle this problem as well now.
+
+Also, expressions like `'(foo '(bar))` always seemed weird to me, and
+probably have no place in Scheme, because we don't generate code via
+quote; we generate it with macros that operate on explicit syntax
+objects rather than pure data.
+
+I want to experiment with an idea like this:
+
+    ;; "code reader mode" transformations
+
+    '(foo bar)     -> (#quote foo bar)
+    
+    '(foo 'bar)    -> ERROR
+
+    "foo"          -> (#quote . foo)
+
+    "foo bar"      -> (#quote . "foo bar")
+
+    '(foo "bar")   -> (#quote foo bar)
+
+    '(foo "x y")   -> (#quote foo "x y")
+
+The right-hand side shows what you would get if you read the form on
+the left in code reader mode, then wrote it back out in data mode.
+
+The writer could also have a code writer mode, which applies the
+reverse transformations.  There should be a one to one mapping,
+unambiguous, so this always works.  A "hygienic" way to quote is
+imperative here, since the writer could otherwise not know whether
+some `quote` keyword in a list is *the* quote special form, or just
+part of some data.
+
+We've made quote into a special token, `#quote`, to solve that.
+Instead of adding a separate symbol data type that's a subtype of
+strings, I think I'll add something called a "rune" or such that's
+represented like `#foo` and allows for custom reader extensions, or
+rather, writer extensions.
+
+Essentially, these runes would be bound to a pair of the following:
+
+1. A procedure that accepts a datum and returns some type of value.
+
+2. A procedure that takes values of that type, and turns them back
+   into written format.
+
+For `#quote`, the reader procedure would be the identity function.
+The writer procedure would need to be a little more sophisticated.
+
+Note that the first procedure would not actually be called during
+reading of data.  Somewhat confusingly, it would only be called in
+evaluation of code.
+
+Let's recap.  Starting with pure data reading and writing:
+
+1. There is no special reader syntax.  This s-expression format is a
+bare minimum of what's needed to represent sequential data i.e. lists
+and lists are the only compound data type recognized by the reader.
+Anything that isn't a list is either an atomic value, or a string
+which may or may not be considered atomic depending on how pedantic
+you want to be.  Oh and runes are allowed.
+
+   A. This is basically "classic" s-expressions with runes added.
+   Only lists, numbers, and strings/symbols are recognized.
+
+   B. Heck, numbers may not be recognized.  Or maybe they will be
+   limited to integers and floats, but no rationals or such, and
+   reading a float will guarantee no loss of precision?
+
+2. Writing data returned by the data reader back out, in data form,
+will produce exactly what was read, with the sole exception being
+whitespace differences.  The data is not allowed to contain any
+non-atomic values other than proper lists.
+
+   A. It's important not to allow floats that IEEE 754 doubles can't
+   represent, since then differences between input and output would
+   occur.  But what about numbers like "10.00"?  That would also
+   become something else when written back out.
+
+   B. OK, maybe numbers represented in a non-canonical way are a
+   second source of difference between reading and writing back out,
+   but let's at least guarantee there's no loss of precision.
+
+(I've not considered comments.  Maybe they will be preserved?  Maybe
+they should be implemented as code reader syntax sugar as well??)
+
+And now code reading and writing:
+
+1. Various syntax sugar is internally transformed into runes, with
+non-list compound data literals (vectors, hash tables, etc.) needing
+this type of representation to appear in code.
+
+   A. Writing that data back out in data mode will reveal the inner
+   workings of the language, producing output containing runes.
+
+   B. Direct use of runes may be forbidden; not sure about this.
+
+   C. Evaluating this data containing runes will produce, in-memory,
+   the actual values being represented.  The "reader procedure" tied
+   to the rune is responsible for this, though the fact that it's
+   evaluation and not reading that calls that procedure makes it
+   confusing so a better name is needed.  Maybe just "decoder."
+
+2. For every data type that falls outside the pure data syntax, there
+is a procedure that turns it into a canonical data representation
+based on lists and atomics, always using the format `(#rune ...)`.
+
+   A. Another procedure is capable of turning that back into reader
+   sugar, but this is not terribly important.  Although it would be
+   neat to be able to write out code that looks like hand-written
+   program code, this really is just a bonus feature.
+
+   B. For some types, turning them back into code without any runes
+   may be highly complicated; procedures, in particular, would need
+   decompilation to make this work.
+
+
+## Recap (or not?)
+
+Wow, that was a long "recap."  I actually came up with new ideas in
+writing that.  Let's recap the recap.  I'll represent the mechanisms
+as different pipelines that can happen using the various features.
+
+Typical pipeline when reading and evaluating code:
+
+    code-file --[code-reader]--> code-data --[eval]--> values
+                 ^^^^^^^^^^^                  ^^^^
+              turns sugar into        calls rune decoders
+                 rune calls            to produce values
+            i.e. desugars code          & compiles code
+
+Reading in a [serialized program](compile.html):
+
+    data-file --[data-reader]--> data --[eval]--> values
+                                         ^^^^
+                                    fairly trivial
+                                (no lambdas, only runes)
+
+Reading pure and simple data like a config file:
+
+    data-file --[data-reader]--> data (no runes to eval)
+
+Note that "data" is a subset of "values" basically.  And the term
+"code-data" which was used above just means data that is meant to be
+evaluated as code, but is totally valid as pure data.  This is not to
+be confused with the "data" that existed in the intermediate step
+while we were reading a serialized program; that was absent of any
+forms like lambdas that need compilation.
+
+OK, that last bit was a bit confusing, and I realize it stems from
+conflating rune decoding with code compilation, so let's split that
+further up.  Above, "eval" is "decode + compile" basically, but it's
+possible to separate them, for example if we want to read a file of
+serialized values that should not contain any code:
+
+    values-file --[data-reader]--> values-data --[decode]--> values
+
+This is a secure way to read complex data even if it comes from an
+untrusted source.  It may contain runes that represent code, such as
+in the form of `(#program "binary")` (compiled procedure) or even
+`(#lambda (x) (do-things))` but so long as you don't actually call
+those things after having decoded them, they can't do anything.
+Decoding runes can't define macros or register new rune decoders,
+meaning there's no way to achieve arbitrary code execution.
+
+Heck, although `#lambda` exists to represent the desugaring of the
+`{...}` convenience syntax, it wouldn't actually work here because
+decoding runes would happen in a null-environment without any bound
+identifiers, meaning that e.g. `(#lambda (x) (+ x x))` would just
+raise an error during decoding, because the compiler would consider
+`+` unbound.
+
+Alternatively, instead of calling the compiler, the `#lambda` decoder
+could just be a no-op that returns the same form back, but without the
+rune, like `(lambda (x) (+ x x))`, because the compiler will take care
+of that later.  Yeah, I think this makes more sense.  Why doesn't the
+code reader directly give `(lambda ...)` for the `{...}` sugar?  Well,
+actually, the `#lambda` decoder may yield a syntax object where the
+first element specifically refers to the binding of `lambda` in the
+default environment, so you could use `{...}` in an environment where
+`lambda` is bound to something else, and you would still hygienically
+get the default lambda behavior from `{...}`.  Yay!
+
+(Wow, it's rabbit hole after rabbit hole today.  This is good though.
+I'm coming up with some crazy stuff.)
+
+It would be possible to decode "code-data" and get an internal memory
+representation of an uncompiled program which however already has
+various data structure literals turned into values.  This is super
+obscure but for sake of completeness:
+
+    code-file --[code-reader]--> code-data --[decode]--> code-values
+
+(These so-called "code-values" would only ever be useful for piping
+them into the compiler.  By the way, I initially used "eval" in the
+example of reading a serialized program, but "decode" would have been
+sufficient there.)
+
+
+## Here's a well-deserved break
+
+(There wasn't a new header in a while.  This seemed a good spot.)
+
+Now writing pipelines.  Let's reverse the above pipelines, from the
+bottom back towards eventually the first...
+
+The reverse of the super obscure thing above:
+
+    code-values --[encode]--> code-data --[code-writer]--> code-file
+
+That would only ever be useful for debugging things.  Now writing a
+data structure into a serialized file, without unnecessarily invoking
+the decompiler:
+
+    values --[encode]--> values-data --[data-writer]--> data-file
+
+That gives you a file containing only data, but the data is the
+encoded format of various data structures Zisp recognizes...
+Actually, that may include compiled procedures as well.
+
+Now the simple config file case, being serialized:
+
+    data -[data writer]-> data-file
+
+Now serializing a compiled program to a file, without decompilation:
+
+    values --[encode]--> values-data --[data-writer]--> data-file
+              ^^^^^^                    ^^^^^^^^^^^
+         data structures             no decompilation
+        become rune calls            or "re-sugaring"
+
+Oh, look at that.  It's the same as writing out data structures, as
+we've already seen previously...  This recap of a recap will need
+another recap for sure.
+
+And now, the full decompiler:
+
+    values --[uneval]--> code-data --[code-writer]--> code-file
+              ^^^^^^
+          decompilation
+
+Actually, just like "eval" is "decode + compile", the "uneval" here
+really is "decompile + encode".
+
+
+## The Revised Recap of the Recap
+
+The following exist:
+
+1. Readers:
+
+   1. Data reader: Reads lists, strings/symbols, runes, integers, and
+   IEEE 754 double-precision floats without loss of precision.
+
+   2. Code reader: Reads code that can contain various syntax sugar,
+   all of which has an equivalent representation with runes.
+
+2. In-memory transformers:
+
+   1. Decoder: Calls decoders for runes in data, to yield values.
+
+   2. Evaluator: [Executes aka compiles](compile.html) decoded values
+   into other values.[*]
+
+3. Reverse in-memory transformers:
+
+   1. Encoder: Reverse of the decoder. (Lossless?)
+
+   2. Unevaluator: Reverse of the evaluator. (Lossy.)
+
+4. Writers:
+
+   1. Data writer: Reverse of data reader. (Lossless.)
+
+   2. Code writer: Reverse of code reader. (Lossy?)
+
+(*) This needs decoding to run first, because otherwise it wouldn't
+    realize that you're e.g. calling `+` on a pair of rational number
+    constants represented through runes, so constant folding wouldn't
+    work.  Same with `vector-ref` on a vector literal represented as a
+    rune, and so on.
+
+
+## How in the seven hells did I arrive at this point?
+
+Jesus Christ!
+
+This was about symbols and strings being the same thing.
+
+But I love these rabbit holes.  They're mind expanding and you find
+out so many new things you never thought about.
+
+Did you notice, by the way, that the code reader/writer above is
+essentially a parser (and unparser) you would have in a regular
+programming language, where syntax becomes an AST?  The pure data
+format is basically our AST!
+
+But this doesn't mean we lost homoiconicity.  No, we merely expanded
+upon it by providing a more detailed explanation of the relationship
+between textual representation of code and in-memory data that exists
+at various stages before ultimate compilation.
+
+Oh, and did we achieve our strategy of strings = symbols now, or does
+it need to be dropped?  I think we achieved it.  The code reader, as
+described all the way up where the section "Reconsidering AGAIN"
+begins --in the original article; see top-- will desugar string
+literals into:
+
+    "foo" -> (#quote foo)
+
+(As already described originally...)
+
+And the `#quote` rune?  Well, it will not actually just return its
+operand verbatim, no!  It will return a syntax object that's a list
+with the first element specifically refers to the binding of `quote`
+from the standard library.  In other words, it's the evaluator that
+actually implements quote, not the decoder.
+
+Oh yes, this is very satisfying.  Everything is coming together.
+
+Syntax objects, by the way, will also have a rune-based external
+representation, so you can inspect the result of macro expansion.
+
+And yes, I think using runes directly in code mode should be illegal,
+because it allows referring to bindings in the standard library, or
+even bindings in arbitrary libraries by crafting syntax objects
+represented via runes, to bypass environment limits.
+
+That bug actually existed in Guile at some point, where one could
+craft syntax objects, represented as vector literals, to refer to
+bindings in other modules, making it impossible to run code in a
+sandboxed environment.  (It was fixed long ago, I believe.)
+
+Oh, but what about `#true` and `#false`?  OK, maybe there will be a
+whitelist of runes that are allowed in code.  That makes sense.
+
+We will see.  Still more details to be fleshed out.
+
+In any case, some runes must be able to declare that they don't take
+arguments, in which case `(#rune ...)` isn't decoded by passing the
+entire form to the decoder of `#rune`, but rather treated as a normal
+list whose first element is decoded as a nullary rune.  That's how
+boolean literals in code will be implemented.
+
+
+## Looking at more of the initial problems
+
+What happened to `'(quote "foo")` in code mode being weird?  Well,
+encountering an apostrophe tells the code reader that the next
+expression is a datum, so it switches to data mode for that.
+
+Wow, that was easy.
+
+This also means you can't use syntax sugar inside it, which is good
+because as we said previously, we don't want to use quoting to create
+code; we want to use syntax objects for that.
+
+This is really orthogonal to the whole runes issue, and could have
+been solved without that mechanism, but I'm happy I came up with it
+because it resolves hygiene issues.
+
+The syntax `#'(quote "foo")` would be sugar for a different rune, and
+the reader would remain in code mode, further desugaring any sugar
+found within, so this works: `#'{x (+ x x)}`
+
+Oh and I mentioned reader extensions (for code mode) but then didn't
+expand on that.  Well, whenever the code reader encounters this:
+
+    #foo(blah blah blah)
+
+It will turn that into:
+
+    (#foo blah blah blah)
+
+After which the decoder for `#foo` will be invoked, which could have
+been registered by the programmer.
+
+Can that registration be done in the same file though?  Normally, the
+execution step comes after decoding, and we decided that we don't want
+to allow arbitrary code execution to happen just when reading a data
+file and decoding it.  So something exceptional would need to happen
+for this to work.  Or maybe not.
+
+Remember that [compilation is execution](compile.html) in Zisp,
+meaning that compiling a file looks like this in pseudo-Scheme:
+
+    (define env (null-environment))    ;start out empty
+    
+    (while (not (eof? input))
+      (let* ((datum (read-code input)) ;desugar
+             (value (decode datum)))   ;decode
+        (eval! value env)))            ;eval in mutable env
+
+    (write (env-lookup env 'main))     ;serialize
+
+I've called eval `eval!` to indicate that it can mutate the env it
+receives, which is what import statements and defines would do.
+
+Let's modify that a little further to indicate the fact that reader
+macros, or in our terms, custom rune decoders, can be defined in the
+middle of the code file by affecting the environment:
+
+    (define env (null-environment))      ;start out empty
+    
+    (while (not (eof? input))
+      (let* ((datum (read-code input))   ;desugar
+             (value (decode datum env))) ;decode in env
+        (eval! value env)))              ;eval in mutable env
+
+    (write (env-lookup env 'main))       ;serialize
+
+Since the `decode` procedure is given an environment, it will look up
+decoders from therein.  So, after the evaluation of each top-level
+expression, the expressions coming after it could be using a custom
+decoder.
+
+What our reader macros cannot do is completely affect the lexical
+syntax of the language, as in, add more sugar.  You must rely on the
+global desugaring feature of `#x(...) -> (#x ...)` which, now that I
+think about it, is completely useless because a regular macro could
+have achieved exactly the same thing.
+
+OK, let's try that again.  The global desugaring wouldn't work on
+lists only, it would work on a number of things:
+
+    #x"foo"    -> (#x #string . foo)
+
+    #x[foo]    -> (#x #square . foo)
+
+    #x{foo}    -> (#x #braces . foo)
+
+You get the idea!
+
+(I've changed my mind that `"foo"` should desugar into a call to the
+regular `#quote` rune; it should be `#string` instead to disambiguate
+from the apostrophe if needed.)
+
+Also, all those would work without a rune as well, to allow a file to
+change the meaning of some of the default syntax sugar if desired:
+
+    "foo"    -> (#string . foo)
+
+    [foo bar]    -> (#square foo bar)
+
+    {foo bar}    -> (#braces foo bar)
+
+Or something like that.  I'm making this all up as I go.
diff --git a/html/notes/serialize.md b/html/notes/serialize.md
index e35177e..fb9963a 100644
--- a/html/notes/serialize.md
+++ b/html/notes/serialize.md
@@ -1,7 +1,6 @@
 # Everything can be serialized
 
-Let's look at the code mentioned in [compilation](compilation.html)
-again:
+Let's look at the code mentioned in [compilation](compile.html) again:
 
 ```scheme
 
diff --git a/html/notes/symbols.md b/html/notes/symbols.md
index aa3c448..f45f9cf 100644
--- a/html/notes/symbols.md
+++ b/html/notes/symbols.md
@@ -18,6 +18,7 @@ Instead of `string->symbol` we will have `string-intern` which
 basically does the same thing.  Dynamically generated strings that
 aren't passed to this function will be uninterned.
 
+
 ## But but but
 
 (Late addition because I didn't even notice this problem at first.
@@ -66,3 +67,46 @@ That prints: "base to us"
 
 I'm not married to the syntax `#"string"` and may end up using the
 simpler `|foo|` in the end.  It doesn't really matter.
+
+
+## More problems
+
+Dangit, couldn't have been so easy could it?
+
+What if you have a configuration file with these contents:
+
+    ((job-name "Clear /tmp directory")
+     (interval reboot)
+     (command "find /tmp -mindepth 1 -delete"))
+
+Now all those "string constants" will turn into lists with the string
+"quote" at its head.  Terrible.  One could write it with the explicit
+string literal syntax `#"foo"` for strings, but that's also terrible.
+
+
+## Salvageable
+
+I'm not yet done with this idea.  What if strings simply have a flag
+that says whether they are intended as a symbol or not?
+
+While reading, it would be set automatically.  Instead of `intern`,
+one would call a function like `symbol`, which would return a string
+with the flag set, after interning it if necessary; it would simply
+return the original string if it already had the flag set.
+
+Another way to look at this is that strings and symbols are sort of
+"polymorphic" and can be used interchangeably.  I don't want to get
+into JavaScript style automatic type conversions (yuck) but this may
+simply be a flag that's set on a string, which makes it a subtype of
+regular strings.
+
+Yes yes, I think that's good.  I even still have enough space left in
+the NaN-packing strategy to put a tag on "short strings" which are our
+6-byte immediate strings.
+
+
+## Reconsidering AGAIN
+
+*This got too long and off-topic so it continues here:*
+
+[Reader? Decoder? I barely know 'er!](reader.html)
diff --git a/html/style.css b/html/style.css
index f1b474b..4725089 100644
--- a/html/style.css
+++ b/html/style.css
@@ -1,6 +1,6 @@
 body {
     margin: 20px auto;
-    padding: 0 20px;
+    padding: 10px 20px;
     max-width: 80ch;
 
     background: #eee;
diff --git a/src/libzisp.zig b/src/libzisp.zig
index f5ad6af..174cd40 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -48,13 +48,6 @@ test "ptr" {
     const val: [*]Bucket = @ptrFromInt(256);
     const tag = ptr.Tag.string;
 
-    const f = ptr.packForeign(val);
-    try std.testing.expect(ptr.checkForeign(f));
-    try std.testing.expectEqual(
-        @intFromPtr(val),
-        @intFromPtr(ptr.unpackForeign(f)),
-    );
-
     const p = ptr.pack(val, tag);
     try std.testing.expect(ptr.check(p));
     try std.testing.expect(ptr.checkZisp(p, tag));
@@ -88,8 +81,35 @@ test "ptr" {
     try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w)));
 }
 
+test "fptr" {
+    const ptr = value.ptr;
+
+    const int1: u50 = 0;
+    const int2: u50 = std.math.maxInt(u50);
+
+    const f1 = ptr.packForeign(int1);
+    try std.testing.expect(ptr.checkForeign(f1));
+    try std.testing.expectEqual(int1, ptr.unpackForeign(f1));
+
+    const f2 = ptr.packForeign(int2);
+    try std.testing.expect(ptr.checkForeign(f2));
+    try std.testing.expectEqual(int2, ptr.unpackForeign(f2));
+}
+
+test "rune" {
+    const r1 = value.rune.pack("test");
+    try std.testing.expect(value.rune.check(r1));
+
+    const s1, const l1 = value.rune.unpack(r1);
+    try std.testing.expectEqualStrings("test", s1[0..l1]);
+}
+
+const SstrImpl = struct { SstrPack, SstrUnpack };
+const SstrPack = *const fn ([]const u8) Value;
+const SstrUnpack = *const fn (Value) struct { [6]u8, u3 };
+
 test "sstr" {
-    const impls = .{
+    const impls = [_]SstrImpl{
         .{ value.sstr.pack, value.sstr.unpack },
         // .{ value.sstr.pack1, value.sstr.unpack1 },
         // .{ value.sstr.pack2, value.sstr.unpack2 },
@@ -97,63 +117,82 @@ test "sstr" {
         // .{ value.sstr.pack4, value.sstr.unpack4 },
     };
 
-    inline for (impls, 0..) |impl, i| {
-        const pack, const unpack = impl;
+    for (impls) |impl| {
+        try testSstr(impl);
+    }
+
+    if (impls.len > 1) {
+        const iters = switch (@import("builtin").mode) {
+            .Debug, .ReleaseSmall => 10_000_000,
+            .ReleaseSafe => 100_000_000,
+            .ReleaseFast => 1_000_000_000,
+        };
+        std.debug.print("Benchmarking with {} iters.\n", .{iters});
+        inline for (impls, 0..) |impl, i| {
+            try benchmarkSstr(impl, i, iters);
+        }
+    }
+}
+
+fn testSstr(impl: SstrImpl) !void {
+    const pack, const unpack = impl;
 
-        const ss1 = pack("1");
-        const ss2 = pack("123");
-        const ss3 = pack("123456");
+    const ss1 = pack("1");
+    const ss2 = pack("123");
+    const ss3 = pack("123456");
 
-        const s1, const l1 = unpack(ss1);
-        const s2, const l2 = unpack(ss2);
-        const s3, const l3 = unpack(ss3);
+    const s1, const l1 = unpack(ss1);
+    const s2, const l2 = unpack(ss2);
+    const s3, const l3 = unpack(ss3);
 
-        try std.testing.expect(value.sstr.check(ss1));
-        try std.testing.expect(value.sstr.check(ss2));
-        try std.testing.expect(value.sstr.check(ss3));
+    try std.testing.expect(value.sstr.check(ss1));
+    try std.testing.expect(value.sstr.check(ss2));
+    try std.testing.expect(value.sstr.check(ss3));
 
-        try std.testing.expectEqual(1, l1);
-        try std.testing.expectEqualStrings("1", s1[0..l1]);
+    try std.testing.expectEqual(1, l1);
+    try std.testing.expectEqualStrings("1", s1[0..l1]);
 
-        try std.testing.expectEqual(3, l2);
-        try std.testing.expectEqualStrings("123", s2[0..l2]);
+    try std.testing.expectEqual(3, l2);
+    try std.testing.expectEqualStrings("123", s2[0..l2]);
 
-        try std.testing.expectEqual(6, l3);
-        try std.testing.expectEqualStrings("123456", s3[0..l3]);
+    try std.testing.expectEqual(6, l3);
+    try std.testing.expectEqualStrings("123456", s3[0..l3]);
+}
 
-        var timer = try std.time.Timer.start();
-        var ns: f64 = undefined;
-        var secs: f64 = undefined;
+fn benchmarkSstr(impl: SstrImpl, id: usize, iters: usize) !void {
+    const pack, const unpack = impl;
 
-        const iters = 1;
-        // const iters = 10_000_000; // standard
-        // const iters = 1_000_000_000; // ReleaseFast
-        if (iters > 1) {
-            for (0..iters) |_i| {
-                _ = _i;
-                std.mem.doNotOptimizeAway(pack("1"));
-                std.mem.doNotOptimizeAway(pack("123"));
-                std.mem.doNotOptimizeAway(pack("123456"));
-            }
+    var timer = try std.time.Timer.start();
+    var ns: f64 = undefined;
+    var secs: f64 = undefined;
 
-            ns = @floatFromInt(timer.lap());
-            secs = ns / 1_000_000_000;
+    var ss1: Value = undefined;
+    var ss2: Value = undefined;
+    var ss3: Value = undefined;
 
-            std.debug.print("pack{}: {d:.3}s\t", .{ i, secs });
+    for (0..iters) |_i| {
+        _ = _i;
+        ss1 = pack("1");
+        ss2 = pack("123");
+        ss3 = pack("123456");
+    }
 
-            for (0..iters) |_i| {
-                _ = _i;
-                std.mem.doNotOptimizeAway(unpack(ss1));
-                std.mem.doNotOptimizeAway(unpack(ss2));
-                std.mem.doNotOptimizeAway(unpack(ss3));
-            }
+    ns = @floatFromInt(timer.lap());
+    secs = ns / 1_000_000_000;
 
-            ns = @floatFromInt(timer.lap());
-            secs = ns / 1_000_000_000;
+    std.debug.print("pack{}: {d:.3}s\t", .{ id, secs });
 
-            std.debug.print("unpack{}: {d:.3}s\n", .{ i, secs });
-        }
+    for (0..iters) |_i| {
+        _ = _i;
+        std.mem.doNotOptimizeAway(unpack(ss1));
+        std.mem.doNotOptimizeAway(unpack(ss2));
+        std.mem.doNotOptimizeAway(unpack(ss3));
     }
+
+    ns = @floatFromInt(timer.lap());
+    secs = ns / 1_000_000_000;
+
+    std.debug.print("unpack{}: {d:.3}s\n", .{ id, secs });
 }
 
 test "char" {
@@ -213,7 +252,23 @@ test "pair" {
 
 test "read" {
     const val = read.read("\"foo\"");
-    const s, const l = value.sstr.unpack(value.pair.car(value.pair.cdr(val)));
-    try std.testing.expectEqualStrings("foo", s[0..l]);
-    try std.testing.expectEqual(3, l);
+    const r, const rl = value.rune.unpack(value.pair.car(val));
+    const s, const sl = value.sstr.unpack(value.pair.cdr(val));
+    try std.testing.expectEqualStrings("string", r[0..rl]);
+    try std.testing.expectEqualStrings("foo", s[0..sl]);
+}
+
+test "read2" {
+    const val = read.read("#\"foo\"");
+
+    const r, const rl = value.rune.unpack(value.pair.car(val));
+    try std.testing.expectEqualStrings("hash", r[0..rl]);
+
+    const cdr = value.pair.cdr(val);
+
+    const s, const sl = value.rune.unpack(value.pair.car(cdr));
+    try std.testing.expectEqualStrings("string", s[0..sl]);
+
+    const f, const fl = value.sstr.unpack(value.pair.cdr(cdr));
+    try std.testing.expectEqualStrings("foo", f[0..fl]);
 }
diff --git a/src/libzisp/read.zig b/src/libzisp/read.zig
index 9ef9891..812b7c7 100644
--- a/src/libzisp/read.zig
+++ b/src/libzisp/read.zig
@@ -5,101 +5,369 @@ const value = @import("value.zig");
 
 const Value = value.Value;
 
+const Next = enum {
+    start,
+    datum,
+    hash_end,
+    rune_datum_end,
+    quote_end,
+    list,
+    list_end,
+    done,
+};
+
 const State = struct {
     alloc: std.mem.Allocator,
+
     input: []const u8,
     pos: usize = 0,
 
-    next: enum {
-        start,
+    mode: enum { code, data } = .code,
 
-        list,
-        list_end,
+    next: Next = .start,
 
-        err,
+    parent: ?*State = null,
 
-        done,
-    } = .start,
+    last_rune: ?Value = null,
+    list_tail: ?Value = null,
 
     retval: Value = value.eof.eof,
 
-    parent: ?*State = null,
+    fn eof(self: *State) bool {
+        return self.pos >= self.input.len;
+    }
+
+    fn peek(self: *State) u8 {
+        return self.input[self.pos];
+    }
+
+    fn getc(self: *State) u8 {
+        const c = self.peek();
+        self.pos += 1;
+        return c;
+    }
+
+    fn isFinalNull(self: *State) bool {
+        return self.peek() == 0 and self.pos == self.input.len - 1;
+    }
+
+    fn newChild(self: *State, next: Next) *State {
+        const s = self.alloc.create(State) catch @panic("OOM");
+        s.* = State{ .alloc = self.alloc, .input = self.input };
+        s.pos = self.pos;
+        s.mode = self.mode;
+        s.next = next;
+        s.parent = self;
+        return s;
+    }
+
+    fn setReturn(self: *State, val: Value) *State {
+        self.retval = val;
+        self.next = .done;
+        return self;
+    }
+
+    fn finish(self: *State) ?*State {
+        if (self.parent) |p| {
+            p.retval = self.retval;
+            p.pos = self.pos;
+            p.alloc.destroy(self);
+            return p;
+        } else {
+            return null;
+        }
+    }
 };
 
 pub fn read(input: []const u8) Value {
     var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
     var top = State{ .alloc = gpa.allocator(), .input = input };
     var s = &top;
-    while (s.pos <= s.input.len) : (s.pos += 1) {
-        s = switch (s.next) {
-            .start => start(s),
-
-            .list => list(s),
-            .list_end => list(s),
-
-            .err => err(s),
-
-            .done => ret: {
-                if (s.parent) |parent| {
-                    s.alloc.destroy(s);
-                    break :ret parent;
-                } else {
-                    return s.retval;
-                }
-            },
-        };
+    while (true) s = switch (s.next) {
+        .start => start(s),
+        .datum => datum(s),
+        .hash_end => hashEnd(s),
+        .rune_datum_end => runeDatumEnd(s),
+        .quote_end => quoteEnd(s),
+        .list => list(s),
+        .list_end => list(s),
+        .done => s.finish() orelse break,
+    };
+    if (s.eof() or s.isFinalNull()) {
+        return s.retval;
+    } else {
+        @panic("unconsumed input");
     }
-    unreachable;
 }
 
 fn start(s: *State) *State {
-    switch (s.input[s.pos]) {
-        0...8 => s.next = .err,
-
-        '\t', '\n' => {},
+    while (true) {
+        if (s.eof()) {
+            s.next = .done;
+            return s;
+        }
+        const c = s.getc();
+        if (isWhitespace(c)) {
+            continue;
+        }
+        return switch (c) {
+            // whitespace checked above
+            0...31, 127...255 => err(s, "invalid character"),
+            ')', ']', '}' => err(s, "unexpected closing bracket"),
+            ';' => semi(s),
+            else => ret: {
+                // backtrack; let other function handle it
+                s.pos -= 1;
+                break :ret datum(s);
+            },
+        };
+    }
+}
 
-        11...31 => s.next = .err,
+fn semi(s: *State) *State {
+    while (true) {
+        if (s.eof()) {
+            s.next = .done;
+            return s;
+        }
+        const c = s.getc();
+        if (c == '\n') {
+            break;
+        }
+    }
+    return s;
+}
 
-        ' ' => {},
+fn datum(s: *State) *State {
+    const c = s.getc();
+    if (isWhitespace(c)) {
+        return err(s, "expected datum, got whitespace");
+    }
+    return switch (c) {
+        // whitespace checked above
+        0...31, 127...255 => err(s, "invalid character"),
+        ')', ']', '}' => err(s, "unexpected closing bracket"),
+        ';' => err(s, "expected datum, got semicolon"),
+        '"' => string(s),
+        '#' => hash(s),
+        '\'' => quote(s),
+        '(' => list(s),
+        '+' => plus(s),
+        ',' => comma(s),
+        '.' => dot(s),
+        '0'...'9' => number(s),
+        '[' => square(s),
+        '`' => backtick(s),
+        '{' => brace(s),
+        else => symbol(s),
+    };
+}
 
-        '!' => s.next = .err,
+fn isWhitespace(c: u8) bool {
+    return switch (c) {
+        '\t', '\n', ' ' => true,
+        else => false,
+    };
+}
 
-        '"' => quotedString(s),
+// Whitespace, semicolon, and closing brackets of any kind
+fn isEndDelimiter(c: u8) bool {
+    return switch (c) {
+        '\t', '\n', ' ', ';' => true,
+        ')', ']', '}' => true,
+        else => false,
+    };
+}
 
-        else => s.next = .err,
+fn string(s: *State) *State {
+    const str = readString(s) catch return err(s, "unclosed string");
+    if (s.mode == .code) {
+        // "foo bar" => (#string . "foo bar")
+        const rune = value.rune.pack("string");
+        const pair = value.pair.cons(rune, str);
+        return s.setReturn(pair);
+    } else {
+        return s.setReturn(str);
     }
-    return s;
 }
 
-fn quotedString(s: *State) void {
-    var buf: [6]u8 = .{0} ** 6;
-    const len = readString(&buf, s);
-    s.retval = value.pair.cons(
-        value.sstr.pack("quote"),
-        value.pair.cons(
-            value.sstr.pack(buf[0..len]),
-            value.nil.nil,
-        ),
-    );
-    s.next = .done;
+const StringReadError = enum { UnclosedString };
+
+fn readString(s: *State) error{UnclosedString}!Value {
+    return try tryReadSstr(s) orelse readLongString(s);
 }
 
-fn readString(buf: []u8, s: *State) usize {
-    s.pos += 1; // skip opening quote
-    for (s.input[s.pos..], 0..) |c, i| {
+fn tryReadSstr(s: *State) error{UnclosedString}!?Value {
+    // We will reset to this position if we fail.
+    const start_pos = s.pos;
+
+    var buf: [6]u8 = undefined;
+    var i: usize = 0;
+    while (!s.eof()) {
+        const c = s.getc();
         if (c == '"') {
-            s.pos += i;
-            return i;
+            // ok, return what we accumulated
+            return value.sstr.pack(buf[0..i]);
         }
+        if (i == 6) {
+            // failed; reset and bail out
+            s.pos = start_pos;
+            return null;
+        }
+        // ok, save this byte and go on
         buf[i] = c;
+        i += 1;
+    }
+    return error.UnclosedString;
+}
+
+fn readLongString(s: *State) Value {
+    _ = s;
+    @panic("not implemented");
+}
+
+fn hash(s: *State) *State {
+    if (isWhitespace(s.peek())) {
+        return err(s, "whitespace after hash sign");
     }
-    unreachable;
+
+    // is it a datum comment?
+    if (s.peek() == ';') {
+        // consume semicolon
+        _ = s.getc();
+        // Just ignore value and return to starting state after reading it.
+        s.next = .start;
+    } else {
+        s.next = .hash_end;
+    }
+
+    // No whitespace or anything; hash must be immediately followed by datum,
+    // including if it's a datum comment.  Note that if it's actually a rune
+    // we're reading, like #foo, we abuse our ability to reading an sstr here
+    // and later turn it into a rune instead, since they're the same length.
+    return s.newChild(.datum);
+}
+
+fn hashEnd(s: *State) *State {
+    // It's not actually a sstr but a rune, like: #foo or #foo(...)
+    if (value.sstr.check(s.retval)) {
+        return hashRuneEnd(s);
+    }
+
+    // Hash followed by an actual datum; becomes a (#hash ...) invocation:
+    //
+    //   #(...) -> (#hash . (...))
+    //
+    //   #"..." -> (#hash . "...")
+    //
+
+    // But data mode doesn't allow that.
+    if (s.mode == .data) {
+        return err(s, "invalid use of hash in data mode");
+    }
+
+    // Also, bare long strings are not OK here; too similar to a rune.
+    if (value.ptr.checkZisp(s.retval, .string)) {
+        return err(s, "long string after hash sign");
+    }
+
+    return s.setReturn(value.pair.cons(
+        value.rune.pack("hash"),
+        s.retval,
+    ));
+}
+
+// Note: Can only come here from hashEnd().
+fn hashRuneEnd(s: *State) *State {
+    // Convert the fake sstr that was meant to be a rune.
+    const rune = value.rune.make(s.retval);
+
+    // Maybe it's a stand-alone rune, like: #foo
+    if (isEndDelimiter(s.peek())) {
+        // Which is only allowed in data mode.
+        if (s.mode == .code) {
+            return err(s, "bare runes not allowed in code");
+        } else {
+            return s.setReturn(rune);
+        }
+    }
+
+    // Otherwise, it's followed by a datum, like: #foo(...)
+
+    // Which is only allowed in code mode.
+    if (s.mode == .data) {
+        return err(s, "invalid use of hash in data mode");
+    } else {
+        s.last_rune = rune;
+        s.next = .rune_datum_end;
+        return s.newChild(.datum);
+    }
+}
+
+fn runeDatumEnd(s: *State) *State {
+    if (s.last_rune) |rune| {
+        return s.setReturn(value.pair.cons(rune, s.retval));
+    } else {
+        unreachable;
+    }
+}
+
+fn quote(s: *State) *State {
+    // Allowed in Scheme, but why? Not in Zisp.
+    if (isWhitespace(s.peek())) {
+        return err(s, "whitespace after apostrophe");
+    }
+    s.next = .quote_end;
+    const c = s.newChild(.datum);
+    c.mode = .data;
+    return c;
+}
+
+fn quoteEnd(s: *State) *State {
+    return s.setReturn(value.pair.cons(
+        value.rune.pack("quote"),
+        s.retval,
+    ));
 }
 
 fn list(s: *State) *State {
     return s;
 }
 
-fn err(s: *State) *State {
+fn plus(s: *State) *State {
+    return s;
+}
+
+fn comma(s: *State) *State {
+    return s;
+}
+
+fn dot(s: *State) *State {
+    return s;
+}
+
+fn number(s: *State) *State {
+    return s;
+}
+
+fn square(s: *State) *State {
+    return s;
+}
+
+fn backtick(s: *State) *State {
+    return s;
+}
+
+fn brace(s: *State) *State {
+    return s;
+}
+
+fn symbol(s: *State) *State {
     return s;
 }
+
+fn err(s: *State, msg: []const u8) *State {
+    _ = s;
+    std.debug.print("{s}\n", .{msg});
+    @panic("reader error");
+}
diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig
index dd9df3c..46ba9fa 100644
--- a/src/libzisp/value.zig
+++ b/src/libzisp/value.zig
@@ -65,9 +65,9 @@
 //
 //   001   :: Weak pointer to Zisp heap object
 //
-//   01.   :: Undefined
+//   01.   :: Undefined (may be used by GC to flag pointers for some reason?)
 //
-//   1..   :: Undefined
+//   1..   :: Foreign pointer (basically, a 50-bit fixnum of another type)
 //
 // This means pointers to the Zisp heap are 48 bits.  Of those, we only really
 // need 45, since 64-bit platforms are in practice limited to 48-bit addresses,
@@ -79,12 +79,15 @@
 // regular Zisp heap pointer can never be null.  Weak pointers, which can be
 // null, avoid stepping on that forbidden value thanks to bit 49 being set.
 //
+// Foreign pointers allow storing arbitrary pointers, or integers basically, of
+// up to 50 bits, without any further definition in Zisp of what they mean.
+//
 //
 // === Other values ===
 //
-// This 51-bit range is divided as follows, based on the initial bits:
+// This 51-bit range is divided as follows, based on the high bits:
 //
-//   000   :: Undefined
+//   000   :: Runes
 //
 //   001   :: Short string
 //
@@ -94,9 +97,12 @@
 //
 //   1..   :: Undefined
 //
-// Zisp strings are immutable and always encoded in UTF-8.  Any string fitting
-// into 6 bytes or less will be stored as an immediate value, not requiring any
-// heap allocation or interning.  (It's implicitly interned.)
+// Runes are symbols of 1 to 6 ASCII letters, used to implement reader syntax;
+// both built-in and extensions.
+//
+// Zisp strings are immutable.  Any string fitting into 6 bytes or less will be
+// stored as an immediate value, not requiring any heap allocation or interning.
+// It's implicitly interned, so to speak.  This includes the empty string.
 //
 // The null byte serves as a terminator and cannot appear in these strings; a
 // string that short but actually containing a null byte will need to be heap
@@ -115,15 +121,19 @@
 // 8 bits are allowed to be set, with the other 40 being reserved, so there's a
 // limit of 256 singleton values that can be defined.
 //
-// And on top of all that we still have a 48-bit and a 50-bit range left!
+// And on top of all that we still have a 50-bit range left!
 //
-// The forbidden value 4, Positive Infinity, is in the 48-bit undefined value
-// range starting with the 000 tag.
+// The forbidden value 4, Positive Infinity, would be the "empty string rune"
+// but that isn't allowed anyway, so all is fine.
 //
 
 // Here's the original article explaining the strategy:
 //
-// https://tkammer.de/zisp/notes/nan.html
+//   https://tkammer.de/zisp/notes/nan.html
+//
+// More about runes:
+//
+//   https://tkammer.de/zisp/notes/symbols.html
 //
 // Note: Packed structs are least-to-most significant, so the order of fields
 // must be reversed relative to a typical big-endian illustration of the bit
@@ -136,6 +146,7 @@ pub const fixnum = @import("value/fixnum.zig");
 
 pub const ptr = @import("value/ptr.zig");
 
+pub const rune = @import("value/rune.zig");
 pub const sstr = @import("value/sstr.zig");
 pub const char = @import("value/char.zig");
 pub const boole = @import("value/boole.zig");
@@ -144,7 +155,7 @@ pub const eof = @import("value/eof.zig");
 
 pub const pair = @import("value/pair.zig");
 
-/// To fill up the u11 exponent part of a NaN.
+// To fill up the u11 exponent part of a NaN.
 const FILL = 0x7ff;
 
 /// Represents a Zisp value/object.
@@ -214,6 +225,16 @@ pub const Value = packed union {
         _is_ifxnum: bool,
     },
 
+    /// For initializing and reading runes.
+    rune: packed struct {
+        // actually [6]u8 but packed struct cannot contain arrays
+        name: u48,
+        _tag: OtherTag = .rune,
+        _is_ptr: bool = false,
+        _: u11 = FILL,
+        _is_fixnum: bool = false,
+    },
+
     /// For initializing and reading short strings.
     sstr: packed struct {
         // actually [6]u8 but packed struct cannot contain arrays
@@ -244,7 +265,7 @@ pub const Value = packed union {
         _is_fixnum: bool = false,
     },
 
-    const OtherTag = enum(u3) { sstr = 1, char = 2, misc = 3 };
+    const OtherTag = enum(u3) { rune, sstr, char, misc };
 
     const Self = @This();
 
@@ -282,6 +303,7 @@ pub const Value = packed union {
         return self.isPacked() and !self.ieee.sign and !self.ieee.quiet;
     }
 
+    /// Checks for any "other" type of value.
     pub fn isOther(self: Self, tag: OtherTag) bool {
         return self._isOther() and self.other.tag == tag;
     }
diff --git a/src/libzisp/value/fixnum.zig b/src/libzisp/value/fixnum.zig
index 888dd3a..c705880 100644
--- a/src/libzisp/value/fixnum.zig
+++ b/src/libzisp/value/fixnum.zig
@@ -28,11 +28,11 @@ pub fn isValidRange(int: i64) bool {
 
 fn assertValidRange(int: i64) void {
     if (int < fixnum_min) {
-        std.debug.print("int too small for fixnum: {}", .{int});
+        std.debug.print("int too small for fixnum: {}\n", .{int});
         @panic("int too small for fixnum");
     }
     if (int > fixnum_max) {
-        std.debug.print("int too large for fixnum: {}", .{int});
+        std.debug.print("int too large for fixnum: {}\n", .{int});
         @panic("int too large for fixnum");
     }
 }
diff --git a/src/libzisp/value/ptr.zig b/src/libzisp/value/ptr.zig
index fe13af5..e1fadf2 100644
--- a/src/libzisp/value/ptr.zig
+++ b/src/libzisp/value/ptr.zig
@@ -31,27 +31,13 @@ pub fn assertForeign(v: Value) void {
     }
 }
 
-pub fn checkForeignRange(ptr: *anyopaque) bool {
-    const int = @intFromPtr(ptr);
-    return int <= std.math.maxInt(u50);
-}
-
-fn assertForeignRange(ptr: *anyopaque) void {
-    if (!checkForeignRange(ptr)) {
-        std.debug.print("foreign pointer out of range: {}\n", .{ptr});
-        @panic("foreign pointer out of range");
-    }
-}
-
-pub fn packForeign(ptr: *anyopaque) Value {
-    assertForeignRange(ptr);
-    const int: u50 = @intCast(@intFromPtr(ptr));
+pub fn packForeign(int: u50) Value {
     return .{ .fptr = .{ .value = int } };
 }
 
-pub fn unpackForeign(v: Value) *anyopaque {
+pub fn unpackForeign(v: Value) u50 {
     assertForeign(v);
-    return @ptrFromInt(v.fptr.value);
+    return v.fptr.value;
 }
 
 // Zisp Pointers
diff --git a/src/libzisp/value/rune.zig b/src/libzisp/value/rune.zig
new file mode 100644
index 0000000..ab251b4
--- /dev/null
+++ b/src/libzisp/value/rune.zig
@@ -0,0 +1,75 @@
+const std = @import("std");
+
+const value = @import("../value.zig");
+
+const Value = value.Value;
+
+// Zig API
+
+pub fn check(v: Value) bool {
+    return v.isOther(.rune);
+}
+
+pub fn assert(v: Value) void {
+    if (!check(v)) {
+        v.dump();
+        @panic("not rune");
+    }
+}
+
+pub fn isValidRune(s: []const u8) bool {
+    if (s.len == 0 or s.len > 6) {
+        return false;
+    }
+    for (s) |c| {
+        switch (c) {
+            'A'...'Z' => {},
+            'a'...'z' => {},
+            else => return false,
+        }
+    }
+    return true;
+}
+
+fn assertValidRune(s: []const u8) void {
+    if (!isValidRune(s)) {
+        std.debug.print("invalid rune: '{s}'\n", .{s});
+        @panic("invalid rune");
+    }
+}
+
+// See sstr.zig which uses equivalent code; probably good to keep in sync.
+
+pub fn pack(s: []const u8) Value {
+    assertValidRune(s);
+    var v = Value{ .rune = .{ .name = 0 } };
+    const dest: [*]u8 = @ptrCast(&v.rune.name);
+    @memcpy(dest, s);
+    return v;
+}
+
+pub fn unpack(v: Value) struct { [6]u8, u3 } {
+    const s: [6]u8 = @bitCast(v.rune.name);
+    inline for (0..6) |i| {
+        if (s[i] == 0) return .{ s, i };
+    }
+    return .{ s, 6 };
+}
+
+// Zisp API
+
+pub fn pred(v: Value) Value {
+    return value.boole.pack(check(v));
+}
+
+pub fn make(v: Value) Value {
+    const s, const l = value.sstr.unpack(v);
+    return pack(s[0..l]);
+}
+
+pub fn getName(v: Value) Value {
+    const s, const l = unpack(v);
+    return value.sstr.pack(s[0..l]);
+}
+
+// TODO: Registering decoders
diff --git a/src/libzisp/value/sstr.zig b/src/libzisp/value/sstr.zig
index 896b8d7..2be2647 100644
--- a/src/libzisp/value/sstr.zig
+++ b/src/libzisp/value/sstr.zig
@@ -31,7 +31,7 @@ pub fn isValidSstr(s: []const u8) bool {
 
 fn assertValidSstr(s: []const u8) void {
     if (!isValidSstr(s)) {
-        std.debug.print("invalid sstr: {s}", .{s});
+        std.debug.print("invalid sstr: {s}\n", .{s});
         @panic("invalid sstr");
     }
 }
@@ -40,6 +40,8 @@ fn assertValidSstr(s: []const u8) void {
 // shifting and bit masking, but memcpy always wins easily according to our
 // micro-benchmarks, under both ReleaseSafe and ReleaseFast.
 
+// Note: rune.zig uses equivalent code; probably good to keep in sync.
+
 pub fn pack(s: []const u8) Value {
     assertValidSstr(s);
     var v = Value{ .sstr = .{ .string = 0 } };
@@ -49,6 +51,7 @@ pub fn pack(s: []const u8) Value {
 }
 
 pub fn unpack(v: Value) struct { [6]u8, u3 } {
+    assert(v);
     const s: [6]u8 = @bitCast(v.sstr.string);
     inline for (0..6) |i| {
         if (s[i] == 0) return .{ s, i };
author	Taylan Kammer <taylan.kammer@gmail.com>	2025-02-19 23:29:26 +0100
committer	Taylan Kammer <taylan.kammer@gmail.com>	2025-02-19 23:29:26 +0100
commit	4e88891235664917a2db44b84c0bbeeb13dd71ad (patch)
tree	7ed8ac2272ce92054fdf2f4e5e09b156dfc5a4d1
parent	4d0db1a1065f18d879b3ff90da6ecb14e9e1ae31 (diff)