summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-02-19 23:29:26 +0100
committerTaylan Kammer <taylan.kammer@gmail.com>2025-02-19 23:29:26 +0100
commit4e88891235664917a2db44b84c0bbeeb13dd71ad (patch)
tree7ed8ac2272ce92054fdf2f4e5e09b156dfc5a4d1
parent4d0db1a1065f18d879b3ff90da6ecb14e9e1ae31 (diff)
update
-rw-r--r--html/notes/compile.md (renamed from html/notes/compilation.md)0
-rw-r--r--html/notes/format.md12
-rw-r--r--html/notes/reader.md470
-rw-r--r--html/notes/serialize.md3
-rw-r--r--html/notes/symbols.md44
-rw-r--r--html/style.css2
-rw-r--r--src/libzisp.zig163
-rw-r--r--src/libzisp/read.zig378
-rw-r--r--src/libzisp/value.zig48
-rw-r--r--src/libzisp/value/fixnum.zig4
-rw-r--r--src/libzisp/value/ptr.zig20
-rw-r--r--src/libzisp/value/rune.zig75
-rw-r--r--src/libzisp/value/sstr.zig5
13 files changed, 1079 insertions, 145 deletions
diff --git a/html/notes/compilation.md b/html/notes/compile.md
index 4d5fc6d..4d5fc6d 100644
--- a/html/notes/compilation.md
+++ b/html/notes/compile.md
diff --git a/html/notes/format.md b/html/notes/format.md
new file mode 100644
index 0000000..39da84a
--- /dev/null
+++ b/html/notes/format.md
@@ -0,0 +1,12 @@
+WIP WIP WIP
+
+(format "template" arg ...) ;sprintf
+(format obj) ;like write but returns string
+(format! "template" arg ...) ;printf
+(format! arg) ;write
+
+The ones with a string template are special forms and process the
+template string at compile time and ensure correct number of args.
+
+Need a way to let a special form's name also appear as an identifier
+like Guile does it with record accessors and shit.
diff --git a/html/notes/reader.md b/html/notes/reader.md
new file mode 100644
index 0000000..ebbe1ea
--- /dev/null
+++ b/html/notes/reader.md
@@ -0,0 +1,470 @@
+# Reader? Decoder? I barely know 'er!
+
+*This started from an expansion to the following, then became its own
+article:*
+
+[Symbols are strings are symbols](symbols.html)
+
+OK but hear me out... What if there were different reader modes, for
+code and (pure) data?
+
+I want Zisp to have various neat [syntactic extensions](sugar.html)
+for programming purposes anyway, like the lambda shorthand, and these
+shouldn't apply to configuration files, either. (Although they seem
+unlikely to occur by accident.)
+
+So what if the transform from string literal to quoted string literal
+only occurred in code reading mode?
+
+At least one problem remains, which is that `'(foo "bar")` would turn
+into `(quote (foo (quote bar)))` because the reader would be in code
+mode while reading it...
+
+This reminds me of the long-standing annoyance in Scheme that "quote
+is unhygienic" and maybe we can tackle this problem as well now.
+
+Also, expressions like `'(foo '(bar))` always seemed weird to me, and
+probably have no place in Scheme, because we don't generate code via
+quote; we generate it with macros that operate on explicit syntax
+objects rather than pure data.
+
+I want to experiment with an idea like this:
+
+ ;; "code reader mode" transformations
+
+ '(foo bar) -> (#quote foo bar)
+
+ '(foo 'bar) -> ERROR
+
+ "foo" -> (#quote . foo)
+
+ "foo bar" -> (#quote . "foo bar")
+
+ '(foo "bar") -> (#quote foo bar)
+
+ '(foo "x y") -> (#quote foo "x y")
+
+The right-hand side shows what you would get if you read the form on
+the left in code reader mode, then wrote it back out in data mode.
+
+The writer could also have a code writer mode, which applies the
+reverse transformations. There should be a one to one mapping,
+unambiguous, so this always works. A "hygienic" way to quote is
+imperative here, since the writer could otherwise not know whether
+some `quote` keyword in a list is *the* quote special form, or just
+part of some data.
+
+We've made quote into a special token, `#quote`, to solve that.
+Instead of adding a separate symbol data type that's a subtype of
+strings, I think I'll add something called a "rune" or such that's
+represented like `#foo` and allows for custom reader extensions, or
+rather, writer extensions.
+
+Essentially, these runes would be bound to a pair of the following:
+
+1. A procedure that accepts a datum and returns some type of value.
+
+2. A procedure that takes values of that type, and turns them back
+ into written format.
+
+For `#quote`, the reader procedure would be the identity function.
+The writer procedure would need to be a little more sophisticated.
+
+Note that the first procedure would not actually be called during
+reading of data. Somewhat confusingly, it would only be called in
+evaluation of code.
+
+Let's recap. Starting with pure data reading and writing:
+
+1. There is no special reader syntax. This s-expression format is a
+bare minimum of what's needed to represent sequential data i.e. lists
+and lists are the only compound data type recognized by the reader.
+Anything that isn't a list is either an atomic value, or a string
+which may or may not be considered atomic depending on how pedantic
+you want to be. Oh and runes are allowed.
+
+ A. This is basically "classic" s-expressions with runes added.
+ Only lists, numbers, and strings/symbols are recognized.
+
+ B. Heck, numbers may not be recognized. Or maybe they will be
+ limited to integers and floats, but no rationals or such, and
+ reading a float will guarantee no loss of precision?
+
+2. Writing data returned by the data reader back out, in data form,
+will produce exactly what was read, with the sole exception being
+whitespace differences. The data is not allowed to contain any
+non-atomic values other than proper lists.
+
+ A. It's important not to allow floats that IEEE 754 doubles can't
+ represent, since then differences between input and output would
+ occur. But what about numbers like "10.00"? That would also
+ become something else when written back out.
+
+ B. OK, maybe numbers represented in a non-canonical way are a
+ second source of difference between reading and writing back out,
+ but let's at least guarantee there's no loss of precision.
+
+(I've not considered comments. Maybe they will be preserved? Maybe
+they should be implemented as code reader syntax sugar as well??)
+
+And now code reading and writing:
+
+1. Various syntax sugar is internally transformed into runes, with
+non-list compound data literals (vectors, hash tables, etc.) needing
+this type of representation to appear in code.
+
+ A. Writing that data back out in data mode will reveal the inner
+ workings of the language, producing output containing runes.
+
+ B. Direct use of runes may be forbidden; not sure about this.
+
+ C. Evaluating this data containing runes will produce, in-memory,
+ the actual values being represented. The "reader procedure" tied
+ to the rune is responsible for this, though the fact that it's
+ evaluation and not reading that calls that procedure makes it
+ confusing so a better name is needed. Maybe just "decoder."
+
+2. For every data type that falls outside the pure data syntax, there
+is a procedure that turns it into a canonical data representation
+based on lists and atomics, always using the format `(#rune ...)`.
+
+ A. Another procedure is capable of turning that back into reader
+ sugar, but this is not terribly important. Although it would be
+ neat to be able to write out code that looks like hand-written
+ program code, this really is just a bonus feature.
+
+ B. For some types, turning them back into code without any runes
+ may be highly complicated; procedures, in particular, would need
+ decompilation to make this work.
+
+
+## Recap (or not?)
+
+Wow, that was a long "recap." I actually came up with new ideas in
+writing that. Let's recap the recap. I'll represent the mechanisms
+as different pipelines that can happen using the various features.
+
+Typical pipeline when reading and evaluating code:
+
+ code-file --[code-reader]--> code-data --[eval]--> values
+ ^^^^^^^^^^^ ^^^^
+ turns sugar into calls rune decoders
+ rune calls to produce values
+ i.e. desugars code & compiles code
+
+Reading in a [serialized program](compile.html):
+
+ data-file --[data-reader]--> data --[eval]--> values
+ ^^^^
+ fairly trivial
+ (no lambdas, only runes)
+
+Reading pure and simple data like a config file:
+
+ data-file --[data-reader]--> data (no runes to eval)
+
+Note that "data" is a subset of "values" basically. And the term
+"code-data" which was used above just means data that is meant to be
+evaluated as code, but is totally valid as pure data. This is not to
+be confused with the "data" that existed in the intermediate step
+while we were reading a serialized program; that was absent of any
+forms like lambdas that need compilation.
+
+OK, that last bit was a bit confusing, and I realize it stems from
+conflating rune decoding with code compilation, so let's split that
+further up. Above, "eval" is "decode + compile" basically, but it's
+possible to separate them, for example if we want to read a file of
+serialized values that should not contain any code:
+
+ values-file --[data-reader]--> values-data --[decode]--> values
+
+This is a secure way to read complex data even if it comes from an
+untrusted source. It may contain runes that represent code, such as
+in the form of `(#program "binary")` (compiled procedure) or even
+`(#lambda (x) (do-things))` but so long as you don't actually call
+those things after having decoded them, they can't do anything.
+Decoding runes can't define macros or register new rune decoders,
+meaning there's no way to achieve arbitrary code execution.
+
+Heck, although `#lambda` exists to represent the desugaring of the
+`{...}` convenience syntax, it wouldn't actually work here because
+decoding runes would happen in a null-environment without any bound
+identifiers, meaning that e.g. `(#lambda (x) (+ x x))` would just
+raise an error during decoding, because the compiler would consider
+`+` unbound.
+
+Alternatively, instead of calling the compiler, the `#lambda` decoder
+could just be a no-op that returns the same form back, but without the
+rune, like `(lambda (x) (+ x x))`, because the compiler will take care
+of that later. Yeah, I think this makes more sense. Why doesn't the
+code reader directly give `(lambda ...)` for the `{...}` sugar? Well,
+actually, the `#lambda` decoder may yield a syntax object where the
+first element specifically refers to the binding of `lambda` in the
+default environment, so you could use `{...}` in an environment where
+`lambda` is bound to something else, and you would still hygienically
+get the default lambda behavior from `{...}`. Yay!
+
+(Wow, it's rabbit hole after rabbit hole today. This is good though.
+I'm coming up with some crazy stuff.)
+
+It would be possible to decode "code-data" and get an internal memory
+representation of an uncompiled program which however already has
+various data structure literals turned into values. This is super
+obscure but for sake of completeness:
+
+ code-file --[code-reader]--> code-data --[decode]--> code-values
+
+(These so-called "code-values" would only ever be useful for piping
+them into the compiler. By the way, I initially used "eval" in the
+example of reading a serialized program, but "decode" would have been
+sufficient there.)
+
+
+## Here's a well-deserved break
+
+(There wasn't a new header in a while. This seemed a good spot.)
+
+Now writing pipelines. Let's reverse the above pipelines, from the
+bottom back towards eventually the first...
+
+The reverse of the super obscure thing above:
+
+ code-values --[encode]--> code-data --[code-writer]--> code-file
+
+That would only ever be useful for debugging things. Now writing a
+data structure into a serialized file, without unnecessarily invoking
+the decompiler:
+
+ values --[encode]--> values-data --[data-writer]--> data-file
+
+That gives you a file containing only data, but the data is the
+encoded format of various data structures Zisp recognizes...
+Actually, that may include compiled procedures as well.
+
+Now the simple config file case, being serialized:
+
+ data -[data writer]-> data-file
+
+Now serializing a compiled program to a file, without decompilation:
+
+ values --[encode]--> values-data --[data-writer]--> data-file
+ ^^^^^^ ^^^^^^^^^^^
+ data structures no decompilation
+ become rune calls or "re-sugaring"
+
+Oh, look at that. It's the same as writing out data structures, as
+we've already seen previously... This recap of a recap will need
+another recap for sure.
+
+And now, the full decompiler:
+
+ values --[uneval]--> code-data --[code-writer]--> code-file
+ ^^^^^^
+ decompilation
+
+Actually, just like "eval" is "decode + compile", the "uneval" here
+really is "decompile + encode".
+
+
+## The Revised Recap of the Recap
+
+The following exist:
+
+1. Readers:
+
+ 1. Data reader: Reads lists, strings/symbols, runes, integers, and
+ IEEE 754 double-precision floats without loss of precision.
+
+ 2. Code reader: Reads code that can contain various syntax sugar,
+ all of which has an equivalent representation with runes.
+
+2. In-memory transformers:
+
+ 1. Decoder: Calls decoders for runes in data, to yield values.
+
+ 2. Evaluator: [Executes aka compiles](compile.html) decoded values
+ into other values.[*]
+
+3. Reverse in-memory transformers:
+
+ 1. Encoder: Reverse of the decoder. (Lossless?)
+
+ 2. Unevaluator: Reverse of the evaluator. (Lossy.)
+
+4. Writers:
+
+ 1. Data writer: Reverse of data reader. (Lossless.)
+
+ 2. Code writer: Reverse of code reader. (Lossy?)
+
+(*) This needs decoding to run first, because otherwise it wouldn't
+ realize that you're e.g. calling `+` on a pair of rational number
+ constants represented through runes, so constant folding wouldn't
+ work. Same with `vector-ref` on a vector literal represented as a
+ rune, and so on.
+
+
+## How in the seven hells did I arrive at this point?
+
+Jesus Christ!
+
+This was about symbols and strings being the same thing.
+
+But I love these rabbit holes. They're mind expanding and you find
+out so many new things you never thought about.
+
+Did you notice, by the way, that the code reader/writer above is
+essentially a parser (and unparser) you would have in a regular
+programming language, where syntax becomes an AST? The pure data
+format is basically our AST!
+
+But this doesn't mean we lost homoiconicity. No, we merely expanded
+upon it by providing a more detailed explanation of the relationship
+between textual representation of code and in-memory data that exists
+at various stages before ultimate compilation.
+
+Oh, and did we achieve our strategy of strings = symbols now, or does
+it need to be dropped? I think we achieved it. The code reader, as
+described all the way up where the section "Reconsidering AGAIN"
+begins --in the original article; see top-- will desugar string
+literals into:
+
+ "foo" -> (#quote foo)
+
+(As already described originally...)
+
+And the `#quote` rune? Well, it will not actually just return its
+operand verbatim, no! It will return a syntax object that's a list
+with the first element specifically refers to the binding of `quote`
+from the standard library. In other words, it's the evaluator that
+actually implements quote, not the decoder.
+
+Oh yes, this is very satisfying. Everything is coming together.
+
+Syntax objects, by the way, will also have a rune-based external
+representation, so you can inspect the result of macro expansion.
+
+And yes, I think using runes directly in code mode should be illegal,
+because it allows referring to bindings in the standard library, or
+even bindings in arbitrary libraries by crafting syntax objects
+represented via runes, to bypass environment limits.
+
+That bug actually existed in Guile at some point, where one could
+craft syntax objects, represented as vector literals, to refer to
+bindings in other modules, making it impossible to run code in a
+sandboxed environment. (It was fixed long ago, I believe.)
+
+Oh, but what about `#true` and `#false`? OK, maybe there will be a
+whitelist of runes that are allowed in code. That makes sense.
+
+We will see. Still more details to be fleshed out.
+
+In any case, some runes must be able to declare that they don't take
+arguments, in which case `(#rune ...)` isn't decoded by passing the
+entire form to the decoder of `#rune`, but rather treated as a normal
+list whose first element is decoded as a nullary rune. That's how
+boolean literals in code will be implemented.
+
+
+## Looking at more of the initial problems
+
+What happened to `'(quote "foo")` in code mode being weird? Well,
+encountering an apostrophe tells the code reader that the next
+expression is a datum, so it switches to data mode for that.
+
+Wow, that was easy.
+
+This also means you can't use syntax sugar inside it, which is good
+because as we said previously, we don't want to use quoting to create
+code; we want to use syntax objects for that.
+
+This is really orthogonal to the whole runes issue, and could have
+been solved without that mechanism, but I'm happy I came up with it
+because it resolves hygiene issues.
+
+The syntax `#'(quote "foo")` would be sugar for a different rune, and
+the reader would remain in code mode, further desugaring any sugar
+found within, so this works: `#'{x (+ x x)}`
+
+Oh and I mentioned reader extensions (for code mode) but then didn't
+expand on that. Well, whenever the code reader encounters this:
+
+ #foo(blah blah blah)
+
+It will turn that into:
+
+ (#foo blah blah blah)
+
+After which the decoder for `#foo` will be invoked, which could have
+been registered by the programmer.
+
+Can that registration be done in the same file though? Normally, the
+execution step comes after decoding, and we decided that we don't want
+to allow arbitrary code execution to happen just when reading a data
+file and decoding it. So something exceptional would need to happen
+for this to work. Or maybe not.
+
+Remember that [compilation is execution](compile.html) in Zisp,
+meaning that compiling a file looks like this in pseudo-Scheme:
+
+ (define env (null-environment)) ;start out empty
+
+ (while (not (eof? input))
+ (let* ((datum (read-code input)) ;desugar
+ (value (decode datum))) ;decode
+ (eval! value env))) ;eval in mutable env
+
+ (write (env-lookup env 'main)) ;serialize
+
+I've called eval `eval!` to indicate that it can mutate the env it
+receives, which is what import statements and defines would do.
+
+Let's modify that a little further to indicate the fact that reader
+macros, or in our terms, custom rune decoders, can be defined in the
+middle of the code file by affecting the environment:
+
+ (define env (null-environment)) ;start out empty
+
+ (while (not (eof? input))
+ (let* ((datum (read-code input)) ;desugar
+ (value (decode datum env))) ;decode in env
+ (eval! value env))) ;eval in mutable env
+
+ (write (env-lookup env 'main)) ;serialize
+
+Since the `decode` procedure is given an environment, it will look up
+decoders from therein. So, after the evaluation of each top-level
+expression, the expressions coming after it could be using a custom
+decoder.
+
+What our reader macros cannot do is completely affect the lexical
+syntax of the language, as in, add more sugar. You must rely on the
+global desugaring feature of `#x(...) -> (#x ...)` which, now that I
+think about it, is completely useless because a regular macro could
+have achieved exactly the same thing.
+
+OK, let's try that again. The global desugaring wouldn't work on
+lists only, it would work on a number of things:
+
+ #x"foo" -> (#x #string . foo)
+
+ #x[foo] -> (#x #square . foo)
+
+ #x{foo} -> (#x #braces . foo)
+
+You get the idea!
+
+(I've changed my mind that `"foo"` should desugar into a call to the
+regular `#quote` rune; it should be `#string` instead to disambiguate
+from the apostrophe if needed.)
+
+Also, all those would work without a rune as well, to allow a file to
+change the meaning of some of the default syntax sugar if desired:
+
+ "foo" -> (#string . foo)
+
+ [foo bar] -> (#square foo bar)
+
+ {foo bar} -> (#braces foo bar)
+
+Or something like that. I'm making this all up as I go.
diff --git a/html/notes/serialize.md b/html/notes/serialize.md
index e35177e..fb9963a 100644
--- a/html/notes/serialize.md
+++ b/html/notes/serialize.md
@@ -1,7 +1,6 @@
# Everything can be serialized
-Let's look at the code mentioned in [compilation](compilation.html)
-again:
+Let's look at the code mentioned in [compilation](compile.html) again:
```scheme
diff --git a/html/notes/symbols.md b/html/notes/symbols.md
index aa3c448..f45f9cf 100644
--- a/html/notes/symbols.md
+++ b/html/notes/symbols.md
@@ -18,6 +18,7 @@ Instead of `string->symbol` we will have `string-intern` which
basically does the same thing. Dynamically generated strings that
aren't passed to this function will be uninterned.
+
## But but but
(Late addition because I didn't even notice this problem at first.
@@ -66,3 +67,46 @@ That prints: "base to us"
I'm not married to the syntax `#"string"` and may end up using the
simpler `|foo|` in the end. It doesn't really matter.
+
+
+## More problems
+
+Dangit, couldn't have been so easy could it?
+
+What if you have a configuration file with these contents:
+
+ ((job-name "Clear /tmp directory")
+ (interval reboot)
+ (command "find /tmp -mindepth 1 -delete"))
+
+Now all those "string constants" will turn into lists with the string
+"quote" at its head. Terrible. One could write it with the explicit
+string literal syntax `#"foo"` for strings, but that's also terrible.
+
+
+## Salvageable
+
+I'm not yet done with this idea. What if strings simply have a flag
+that says whether they are intended as a symbol or not?
+
+While reading, it would be set automatically. Instead of `intern`,
+one would call a function like `symbol`, which would return a string
+with the flag set, after interning it if necessary; it would simply
+return the original string if it already had the flag set.
+
+Another way to look at this is that strings and symbols are sort of
+"polymorphic" and can be used interchangeably. I don't want to get
+into JavaScript style automatic type conversions (yuck) but this may
+simply be a flag that's set on a string, which makes it a subtype of
+regular strings.
+
+Yes yes, I think that's good. I even still have enough space left in
+the NaN-packing strategy to put a tag on "short strings" which are our
+6-byte immediate strings.
+
+
+## Reconsidering AGAIN
+
+*This got too long and off-topic so it continues here:*
+
+[Reader? Decoder? I barely know 'er!](reader.html)
diff --git a/html/style.css b/html/style.css
index f1b474b..4725089 100644
--- a/html/style.css
+++ b/html/style.css
@@ -1,6 +1,6 @@
body {
margin: 20px auto;
- padding: 0 20px;
+ padding: 10px 20px;
max-width: 80ch;
background: #eee;
diff --git a/src/libzisp.zig b/src/libzisp.zig
index f5ad6af..174cd40 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -48,13 +48,6 @@ test "ptr" {
const val: [*]Bucket = @ptrFromInt(256);
const tag = ptr.Tag.string;
- const f = ptr.packForeign(val);
- try std.testing.expect(ptr.checkForeign(f));
- try std.testing.expectEqual(
- @intFromPtr(val),
- @intFromPtr(ptr.unpackForeign(f)),
- );
-
const p = ptr.pack(val, tag);
try std.testing.expect(ptr.check(p));
try std.testing.expect(ptr.checkZisp(p, tag));
@@ -88,8 +81,35 @@ test "ptr" {
try std.testing.expectEqual(false, value.boole.unpack(ptr.getWeak(w)));
}
+test "fptr" {
+ const ptr = value.ptr;
+
+ const int1: u50 = 0;
+ const int2: u50 = std.math.maxInt(u50);
+
+ const f1 = ptr.packForeign(int1);
+ try std.testing.expect(ptr.checkForeign(f1));
+ try std.testing.expectEqual(int1, ptr.unpackForeign(f1));
+
+ const f2 = ptr.packForeign(int2);
+ try std.testing.expect(ptr.checkForeign(f2));
+ try std.testing.expectEqual(int2, ptr.unpackForeign(f2));
+}
+
+test "rune" {
+ const r1 = value.rune.pack("test");
+ try std.testing.expect(value.rune.check(r1));
+
+ const s1, const l1 = value.rune.unpack(r1);
+ try std.testing.expectEqualStrings("test", s1[0..l1]);
+}
+
+const SstrImpl = struct { SstrPack, SstrUnpack };
+const SstrPack = *const fn ([]const u8) Value;
+const SstrUnpack = *const fn (Value) struct { [6]u8, u3 };
+
test "sstr" {
- const impls = .{
+ const impls = [_]SstrImpl{
.{ value.sstr.pack, value.sstr.unpack },
// .{ value.sstr.pack1, value.sstr.unpack1 },
// .{ value.sstr.pack2, value.sstr.unpack2 },
@@ -97,63 +117,82 @@ test "sstr" {
// .{ value.sstr.pack4, value.sstr.unpack4 },
};
- inline for (impls, 0..) |impl, i| {
- const pack, const unpack = impl;
+ for (impls) |impl| {
+ try testSstr(impl);
+ }
+
+ if (impls.len > 1) {
+ const iters = switch (@import("builtin").mode) {
+ .Debug, .ReleaseSmall => 10_000_000,
+ .ReleaseSafe => 100_000_000,
+ .ReleaseFast => 1_000_000_000,
+ };
+ std.debug.print("Benchmarking with {} iters.\n", .{iters});
+ inline for (impls, 0..) |impl, i| {
+ try benchmarkSstr(impl, i, iters);
+ }
+ }
+}
+
+fn testSstr(impl: SstrImpl) !void {
+ const pack, const unpack = impl;
- const ss1 = pack("1");
- const ss2 = pack("123");
- const ss3 = pack("123456");
+ const ss1 = pack("1");
+ const ss2 = pack("123");
+ const ss3 = pack("123456");
- const s1, const l1 = unpack(ss1);
- const s2, const l2 = unpack(ss2);
- const s3, const l3 = unpack(ss3);
+ const s1, const l1 = unpack(ss1);
+ const s2, const l2 = unpack(ss2);
+ const s3, const l3 = unpack(ss3);
- try std.testing.expect(value.sstr.check(ss1));
- try std.testing.expect(value.sstr.check(ss2));
- try std.testing.expect(value.sstr.check(ss3));
+ try std.testing.expect(value.sstr.check(ss1));
+ try std.testing.expect(value.sstr.check(ss2));
+ try std.testing.expect(value.sstr.check(ss3));
- try std.testing.expectEqual(1, l1);
- try std.testing.expectEqualStrings("1", s1[0..l1]);
+ try std.testing.expectEqual(1, l1);
+ try std.testing.expectEqualStrings("1", s1[0..l1]);
- try std.testing.expectEqual(3, l2);
- try std.testing.expectEqualStrings("123", s2[0..l2]);
+ try std.testing.expectEqual(3, l2);
+ try std.testing.expectEqualStrings("123", s2[0..l2]);
- try std.testing.expectEqual(6, l3);
- try std.testing.expectEqualStrings("123456", s3[0..l3]);
+ try std.testing.expectEqual(6, l3);
+ try std.testing.expectEqualStrings("123456", s3[0..l3]);
+}
- var timer = try std.time.Timer.start();
- var ns: f64 = undefined;
- var secs: f64 = undefined;
+fn benchmarkSstr(impl: SstrImpl, id: usize, iters: usize) !void {
+ const pack, const unpack = impl;
- const iters = 1;
- // const iters = 10_000_000; // standard
- // const iters = 1_000_000_000; // ReleaseFast
- if (iters > 1) {
- for (0..iters) |_i| {
- _ = _i;
- std.mem.doNotOptimizeAway(pack("1"));
- std.mem.doNotOptimizeAway(pack("123"));
- std.mem.doNotOptimizeAway(pack("123456"));
- }
+ var timer = try std.time.Timer.start();
+ var ns: f64 = undefined;
+ var secs: f64 = undefined;
- ns = @floatFromInt(timer.lap());
- secs = ns / 1_000_000_000;
+ var ss1: Value = undefined;
+ var ss2: Value = undefined;
+ var ss3: Value = undefined;
- std.debug.print("pack{}: {d:.3}s\t", .{ i, secs });
+ for (0..iters) |_i| {
+ _ = _i;
+ ss1 = pack("1");
+ ss2 = pack("123");
+ ss3 = pack("123456");
+ }
- for (0..iters) |_i| {
- _ = _i;
- std.mem.doNotOptimizeAway(unpack(ss1));
- std.mem.doNotOptimizeAway(unpack(ss2));
- std.mem.doNotOptimizeAway(unpack(ss3));
- }
+ ns = @floatFromInt(timer.lap());
+ secs = ns / 1_000_000_000;
- ns = @floatFromInt(timer.lap());
- secs = ns / 1_000_000_000;
+ std.debug.print("pack{}: {d:.3}s\t", .{ id, secs });
- std.debug.print("unpack{}: {d:.3}s\n", .{ i, secs });
- }
+ for (0..iters) |_i| {
+ _ = _i;
+ std.mem.doNotOptimizeAway(unpack(ss1));
+ std.mem.doNotOptimizeAway(unpack(ss2));
+ std.mem.doNotOptimizeAway(unpack(ss3));
}
+
+ ns = @floatFromInt(timer.lap());
+ secs = ns / 1_000_000_000;
+
+ std.debug.print("unpack{}: {d:.3}s\n", .{ id, secs });
}
test "char" {
@@ -213,7 +252,23 @@ test "pair" {
test "read" {
const val = read.read("\"foo\"");
- const s, const l = value.sstr.unpack(value.pair.car(value.pair.cdr(val)));
- try std.testing.expectEqualStrings("foo", s[0..l]);
- try std.testing.expectEqual(3, l);
+ const r, const rl = value.rune.unpack(value.pair.car(val));
+ const s, const sl = value.sstr.unpack(value.pair.cdr(val));
+ try std.testing.expectEqualStrings("string", r[0..rl]);
+ try std.testing.expectEqualStrings("foo", s[0..sl]);
+}
+
+test "read2" {
+ const val = read.read("#\"foo\"");
+
+ const r, const rl = value.rune.unpack(value.pair.car(val));
+ try std.testing.expectEqualStrings("hash", r[0..rl]);
+
+ const cdr = value.pair.cdr(val);
+
+ const s, const sl = value.rune.unpack(value.pair.car(cdr));
+ try std.testing.expectEqualStrings("string", s[0..sl]);
+
+ const f, const fl = value.sstr.unpack(value.pair.cdr(cdr));
+ try std.testing.expectEqualStrings("foo", f[0..fl]);
}
diff --git a/src/libzisp/read.zig b/src/libzisp/read.zig
index 9ef9891..812b7c7 100644
--- a/src/libzisp/read.zig
+++ b/src/libzisp/read.zig
@@ -5,101 +5,369 @@ const value = @import("value.zig");
const Value = value.Value;
+const Next = enum {
+ start,
+ datum,
+ hash_end,
+ rune_datum_end,
+ quote_end,
+ list,
+ list_end,
+ done,
+};
+
const State = struct {
alloc: std.mem.Allocator,
+
input: []const u8,
pos: usize = 0,
- next: enum {
- start,
+ mode: enum { code, data } = .code,
- list,
- list_end,
+ next: Next = .start,
- err,
+ parent: ?*State = null,
- done,
- } = .start,
+ last_rune: ?Value = null,
+ list_tail: ?Value = null,
retval: Value = value.eof.eof,
- parent: ?*State = null,
+ fn eof(self: *State) bool {
+ return self.pos >= self.input.len;
+ }
+
+ fn peek(self: *State) u8 {
+ return self.input[self.pos];
+ }
+
+ fn getc(self: *State) u8 {
+ const c = self.peek();
+ self.pos += 1;
+ return c;
+ }
+
+ fn isFinalNull(self: *State) bool {
+ return self.peek() == 0 and self.pos == self.input.len - 1;
+ }
+
+ fn newChild(self: *State, next: Next) *State {
+ const s = self.alloc.create(State) catch @panic("OOM");
+ s.* = State{ .alloc = self.alloc, .input = self.input };
+ s.pos = self.pos;
+ s.mode = self.mode;
+ s.next = next;
+ s.parent = self;
+ return s;
+ }
+
+ fn setReturn(self: *State, val: Value) *State {
+ self.retval = val;
+ self.next = .done;
+ return self;
+ }
+
+ fn finish(self: *State) ?*State {
+ if (self.parent) |p| {
+ p.retval = self.retval;
+ p.pos = self.pos;
+ p.alloc.destroy(self);
+ return p;
+ } else {
+ return null;
+ }
+ }
};
pub fn read(input: []const u8) Value {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
var top = State{ .alloc = gpa.allocator(), .input = input };
var s = &top;
- while (s.pos <= s.input.len) : (s.pos += 1) {
- s = switch (s.next) {
- .start => start(s),
-
- .list => list(s),
- .list_end => list(s),
-
- .err => err(s),
-
- .done => ret: {
- if (s.parent) |parent| {
- s.alloc.destroy(s);
- break :ret parent;
- } else {
- return s.retval;
- }
- },
- };
+ while (true) s = switch (s.next) {
+ .start => start(s),
+ .datum => datum(s),
+ .hash_end => hashEnd(s),
+ .rune_datum_end => runeDatumEnd(s),
+ .quote_end => quoteEnd(s),
+ .list => list(s),
+ .list_end => list(s),
+ .done => s.finish() orelse break,
+ };
+ if (s.eof() or s.isFinalNull()) {
+ return s.retval;
+ } else {
+ @panic("unconsumed input");
}
- unreachable;
}
fn start(s: *State) *State {
- switch (s.input[s.pos]) {
- 0...8 => s.next = .err,
-
- '\t', '\n' => {},
+ while (true) {
+ if (s.eof()) {
+ s.next = .done;
+ return s;
+ }
+ const c = s.getc();
+ if (isWhitespace(c)) {
+ continue;
+ }
+ return switch (c) {
+ // whitespace checked above
+ 0...31, 127...255 => err(s, "invalid character"),
+ ')', ']', '}' => err(s, "unexpected closing bracket"),
+ ';' => semi(s),
+ else => ret: {
+ // backtrack; let other function handle it
+ s.pos -= 1;
+ break :ret datum(s);
+ },
+ };
+ }
+}
- 11...31 => s.next = .err,
+fn semi(s: *State) *State {
+ while (true) {
+ if (s.eof()) {
+ s.next = .done;
+ return s;
+ }
+ const c = s.getc();
+ if (c == '\n') {
+ break;
+ }
+ }
+ return s;
+}
- ' ' => {},
+fn datum(s: *State) *State {
+ const c = s.getc();
+ if (isWhitespace(c)) {
+ return err(s, "expected datum, got whitespace");
+ }
+ return switch (c) {
+ // whitespace checked above
+ 0...31, 127...255 => err(s, "invalid character"),
+ ')', ']', '}' => err(s, "unexpected closing bracket"),
+ ';' => err(s, "expected datum, got semicolon"),
+ '"' => string(s),
+ '#' => hash(s),
+ '\'' => quote(s),
+ '(' => list(s),
+ '+' => plus(s),
+ ',' => comma(s),
+ '.' => dot(s),
+ '0'...'9' => number(s),
+ '[' => square(s),
+ '`' => backtick(s),
+ '{' => brace(s),
+ else => symbol(s),
+ };
+}
- '!' => s.next = .err,
+fn isWhitespace(c: u8) bool {
+ return switch (c) {
+ '\t', '\n', ' ' => true,
+ else => false,
+ };
+}
- '"' => quotedString(s),
+// Whitespace, semicolon, and closing brackets of any kind
+fn isEndDelimiter(c: u8) bool {
+ return switch (c) {
+ '\t', '\n', ' ', ';' => true,
+ ')', ']', '}' => true,
+ else => false,
+ };
+}
- else => s.next = .err,
+fn string(s: *State) *State {
+ const str = readString(s) catch return err(s, "unclosed string");
+ if (s.mode == .code) {
+ // "foo bar" => (#string . "foo bar")
+ const rune = value.rune.pack("string");
+ const pair = value.pair.cons(rune, str);
+ return s.setReturn(pair);
+ } else {
+ return s.setReturn(str);
}
- return s;
}
-fn quotedString(s: *State) void {
- var buf: [6]u8 = .{0} ** 6;
- const len = readString(&buf, s);
- s.retval = value.pair.cons(
- value.sstr.pack("quote"),
- value.pair.cons(
- value.sstr.pack(buf[0..len]),
- value.nil.nil,
- ),
- );
- s.next = .done;
+const StringReadError = enum { UnclosedString };
+
+fn readString(s: *State) error{UnclosedString}!Value {
+ return try tryReadSstr(s) orelse readLongString(s);
}
-fn readString(buf: []u8, s: *State) usize {
- s.pos += 1; // skip opening quote
- for (s.input[s.pos..], 0..) |c, i| {
+fn tryReadSstr(s: *State) error{UnclosedString}!?Value {
+ // We will reset to this position if we fail.
+ const start_pos = s.pos;
+
+ var buf: [6]u8 = undefined;
+ var i: usize = 0;
+ while (!s.eof()) {
+ const c = s.getc();
if (c == '"') {
- s.pos += i;
- return i;
+ // ok, return what we accumulated
+ return value.sstr.pack(buf[0..i]);
}
+ if (i == 6) {
+ // failed; reset and bail out
+ s.pos = start_pos;
+ return null;
+ }
+ // ok, save this byte and go on
buf[i] = c;
+ i += 1;
+ }
+ return error.UnclosedString;
+}
+
+fn readLongString(s: *State) Value {
+ _ = s;
+ @panic("not implemented");
+}
+
+fn hash(s: *State) *State {
+ if (isWhitespace(s.peek())) {
+ return err(s, "whitespace after hash sign");
}
- unreachable;
+
+ // is it a datum comment?
+ if (s.peek() == ';') {
+ // consume semicolon
+ _ = s.getc();
+ // Just ignore value and return to starting state after reading it.
+ s.next = .start;
+ } else {
+ s.next = .hash_end;
+ }
+
+ // No whitespace or anything; hash must be immediately followed by datum,
+ // including if it's a datum comment. Note that if it's actually a rune
+ // we're reading, like #foo, we abuse our ability to reading an sstr here
+ // and later turn it into a rune instead, since they're the same length.
+ return s.newChild(.datum);
+}
+
+fn hashEnd(s: *State) *State {
+ // It's not actually a sstr but a rune, like: #foo or #foo(...)
+ if (value.sstr.check(s.retval)) {
+ return hashRuneEnd(s);
+ }
+
+ // Hash followed by an actual datum; becomes a (#hash ...) invocation:
+ //
+ // #(...) -> (#hash . (...))
+ //
+ // #"..." -> (#hash . "...")
+ //
+
+ // But data mode doesn't allow that.
+ if (s.mode == .data) {
+ return err(s, "invalid use of hash in data mode");
+ }
+
+ // Also, bare long strings are not OK here; too similar to a rune.
+ if (value.ptr.checkZisp(s.retval, .string)) {
+ return err(s, "long string after hash sign");
+ }
+
+ return s.setReturn(value.pair.cons(
+ value.rune.pack("hash"),
+ s.retval,
+ ));
+}
+
+// Note: Can only come here from hashEnd().
+fn hashRuneEnd(s: *State) *State {
+ // Convert the fake sstr that was meant to be a rune.
+ const rune = value.rune.make(s.retval);
+
+ // Maybe it's a stand-alone rune, like: #foo
+ if (isEndDelimiter(s.peek())) {
+ // Which is only allowed in data mode.
+ if (s.mode == .code) {
+ return err(s, "bare runes not allowed in code");
+ } else {
+ return s.setReturn(rune);
+ }
+ }
+
+ // Otherwise, it's followed by a datum, like: #foo(...)
+
+ // Which is only allowed in code mode.
+ if (s.mode == .data) {
+ return err(s, "invalid use of hash in data mode");
+ } else {
+ s.last_rune = rune;
+ s.next = .rune_datum_end;
+ return s.newChild(.datum);
+ }
+}
+
+fn runeDatumEnd(s: *State) *State {
+ if (s.last_rune) |rune| {
+ return s.setReturn(value.pair.cons(rune, s.retval));
+ } else {
+ unreachable;
+ }
+}
+
+fn quote(s: *State) *State {
+ // Allowed in Scheme, but why? Not in Zisp.
+ if (isWhitespace(s.peek())) {
+ return err(s, "whitespace after apostrophe");
+ }
+ s.next = .quote_end;
+ const c = s.newChild(.datum);
+ c.mode = .data;
+ return c;
+}
+
+fn quoteEnd(s: *State) *State {
+ return s.setReturn(value.pair.cons(
+ value.rune.pack("quote"),
+ s.retval,
+ ));
}
fn list(s: *State) *State {
return s;
}
-fn err(s: *State) *State {
+fn plus(s: *State) *State {
+ return s;
+}
+
+fn comma(s: *State) *State {
+ return s;
+}
+
+fn dot(s: *State) *State {
+ return s;
+}
+
+fn number(s: *State) *State {
+ return s;
+}
+
+fn square(s: *State) *State {
+ return s;
+}
+
+fn backtick(s: *State) *State {
+ return s;
+}
+
+fn brace(s: *State) *State {
+ return s;
+}
+
+fn symbol(s: *State) *State {
return s;
}
+
+fn err(s: *State, msg: []const u8) *State {
+ _ = s;
+ std.debug.print("{s}\n", .{msg});
+ @panic("reader error");
+}
diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig
index dd9df3c..46ba9fa 100644
--- a/src/libzisp/value.zig
+++ b/src/libzisp/value.zig
@@ -65,9 +65,9 @@
//
// 001 :: Weak pointer to Zisp heap object
//
-// 01. :: Undefined
+// 01. :: Undefined (may be used by GC to flag pointers for some reason?)
//
-// 1.. :: Undefined
+// 1.. :: Foreign pointer (basically, a 50-bit fixnum of another type)
//
// This means pointers to the Zisp heap are 48 bits. Of those, we only really
// need 45, since 64-bit platforms are in practice limited to 48-bit addresses,
@@ -79,12 +79,15 @@
// regular Zisp heap pointer can never be null. Weak pointers, which can be
// null, avoid stepping on that forbidden value thanks to bit 49 being set.
//
+// Foreign pointers allow storing arbitrary pointers, or integers basically, of
+// up to 50 bits, without any further definition in Zisp of what they mean.
+//
//
// === Other values ===
//
-// This 51-bit range is divided as follows, based on the initial bits:
+// This 51-bit range is divided as follows, based on the high bits:
//
-// 000 :: Undefined
+// 000 :: Runes
//
// 001 :: Short string
//
@@ -94,9 +97,12 @@
//
// 1.. :: Undefined
//
-// Zisp strings are immutable and always encoded in UTF-8. Any string fitting
-// into 6 bytes or less will be stored as an immediate value, not requiring any
-// heap allocation or interning. (It's implicitly interned.)
+// Runes are symbols of 1 to 6 ASCII letters, used to implement reader syntax;
+// both built-in and extensions.
+//
+// Zisp strings are immutable. Any string fitting into 6 bytes or less will be
+// stored as an immediate value, not requiring any heap allocation or interning.
+// It's implicitly interned, so to speak. This includes the empty string.
//
// The null byte serves as a terminator and cannot appear in these strings; a
// string that short but actually containing a null byte will need to be heap
@@ -115,15 +121,19 @@
// 8 bits are allowed to be set, with the other 40 being reserved, so there's a
// limit of 256 singleton values that can be defined.
//
-// And on top of all that we still have a 48-bit and a 50-bit range left!
+// And on top of all that we still have a 50-bit range left!
//
-// The forbidden value 4, Positive Infinity, is in the 48-bit undefined value
-// range starting with the 000 tag.
+// The forbidden value 4, Positive Infinity, would be the "empty string rune"
+// but that isn't allowed anyway, so all is fine.
//
// Here's the original article explaining the strategy:
//
-// https://tkammer.de/zisp/notes/nan.html
+// https://tkammer.de/zisp/notes/nan.html
+//
+// More about runes:
+//
+// https://tkammer.de/zisp/notes/symbols.html
//
// Note: Packed structs are least-to-most significant, so the order of fields
// must be reversed relative to a typical big-endian illustration of the bit
@@ -136,6 +146,7 @@ pub const fixnum = @import("value/fixnum.zig");
pub const ptr = @import("value/ptr.zig");
+pub const rune = @import("value/rune.zig");
pub const sstr = @import("value/sstr.zig");
pub const char = @import("value/char.zig");
pub const boole = @import("value/boole.zig");
@@ -144,7 +155,7 @@ pub const eof = @import("value/eof.zig");
pub const pair = @import("value/pair.zig");
-/// To fill up the u11 exponent part of a NaN.
+// To fill up the u11 exponent part of a NaN.
const FILL = 0x7ff;
/// Represents a Zisp value/object.
@@ -214,6 +225,16 @@ pub const Value = packed union {
_is_ifxnum: bool,
},
+ /// For initializing and reading runes.
+ rune: packed struct {
+ // actually [6]u8 but packed struct cannot contain arrays
+ name: u48,
+ _tag: OtherTag = .rune,
+ _is_ptr: bool = false,
+ _: u11 = FILL,
+ _is_fixnum: bool = false,
+ },
+
/// For initializing and reading short strings.
sstr: packed struct {
// actually [6]u8 but packed struct cannot contain arrays
@@ -244,7 +265,7 @@ pub const Value = packed union {
_is_fixnum: bool = false,
},
- const OtherTag = enum(u3) { sstr = 1, char = 2, misc = 3 };
+ const OtherTag = enum(u3) { rune, sstr, char, misc };
const Self = @This();
@@ -282,6 +303,7 @@ pub const Value = packed union {
return self.isPacked() and !self.ieee.sign and !self.ieee.quiet;
}
+ /// Checks for any "other" type of value.
pub fn isOther(self: Self, tag: OtherTag) bool {
return self._isOther() and self.other.tag == tag;
}
diff --git a/src/libzisp/value/fixnum.zig b/src/libzisp/value/fixnum.zig
index 888dd3a..c705880 100644
--- a/src/libzisp/value/fixnum.zig
+++ b/src/libzisp/value/fixnum.zig
@@ -28,11 +28,11 @@ pub fn isValidRange(int: i64) bool {
fn assertValidRange(int: i64) void {
if (int < fixnum_min) {
- std.debug.print("int too small for fixnum: {}", .{int});
+ std.debug.print("int too small for fixnum: {}\n", .{int});
@panic("int too small for fixnum");
}
if (int > fixnum_max) {
- std.debug.print("int too large for fixnum: {}", .{int});
+ std.debug.print("int too large for fixnum: {}\n", .{int});
@panic("int too large for fixnum");
}
}
diff --git a/src/libzisp/value/ptr.zig b/src/libzisp/value/ptr.zig
index fe13af5..e1fadf2 100644
--- a/src/libzisp/value/ptr.zig
+++ b/src/libzisp/value/ptr.zig
@@ -31,27 +31,13 @@ pub fn assertForeign(v: Value) void {
}
}
-pub fn checkForeignRange(ptr: *anyopaque) bool {
- const int = @intFromPtr(ptr);
- return int <= std.math.maxInt(u50);
-}
-
-fn assertForeignRange(ptr: *anyopaque) void {
- if (!checkForeignRange(ptr)) {
- std.debug.print("foreign pointer out of range: {}\n", .{ptr});
- @panic("foreign pointer out of range");
- }
-}
-
-pub fn packForeign(ptr: *anyopaque) Value {
- assertForeignRange(ptr);
- const int: u50 = @intCast(@intFromPtr(ptr));
+pub fn packForeign(int: u50) Value {
return .{ .fptr = .{ .value = int } };
}
-pub fn unpackForeign(v: Value) *anyopaque {
+pub fn unpackForeign(v: Value) u50 {
assertForeign(v);
- return @ptrFromInt(v.fptr.value);
+ return v.fptr.value;
}
// Zisp Pointers
diff --git a/src/libzisp/value/rune.zig b/src/libzisp/value/rune.zig
new file mode 100644
index 0000000..ab251b4
--- /dev/null
+++ b/src/libzisp/value/rune.zig
@@ -0,0 +1,75 @@
+const std = @import("std");
+
+const value = @import("../value.zig");
+
+const Value = value.Value;
+
+// Zig API
+
+pub fn check(v: Value) bool {
+ return v.isOther(.rune);
+}
+
+pub fn assert(v: Value) void {
+ if (!check(v)) {
+ v.dump();
+ @panic("not rune");
+ }
+}
+
+pub fn isValidRune(s: []const u8) bool {
+ if (s.len == 0 or s.len > 6) {
+ return false;
+ }
+ for (s) |c| {
+ switch (c) {
+ 'A'...'Z' => {},
+ 'a'...'z' => {},
+ else => return false,
+ }
+ }
+ return true;
+}
+
+fn assertValidRune(s: []const u8) void {
+ if (!isValidRune(s)) {
+ std.debug.print("invalid rune: '{s}'\n", .{s});
+ @panic("invalid rune");
+ }
+}
+
+// See sstr.zig which uses equivalent code; probably good to keep in sync.
+
+pub fn pack(s: []const u8) Value {
+ assertValidRune(s);
+ var v = Value{ .rune = .{ .name = 0 } };
+ const dest: [*]u8 = @ptrCast(&v.rune.name);
+ @memcpy(dest, s);
+ return v;
+}
+
+pub fn unpack(v: Value) struct { [6]u8, u3 } {
+ const s: [6]u8 = @bitCast(v.rune.name);
+ inline for (0..6) |i| {
+ if (s[i] == 0) return .{ s, i };
+ }
+ return .{ s, 6 };
+}
+
+// Zisp API
+
+pub fn pred(v: Value) Value {
+ return value.boole.pack(check(v));
+}
+
+pub fn make(v: Value) Value {
+ const s, const l = value.sstr.unpack(v);
+ return pack(s[0..l]);
+}
+
+pub fn getName(v: Value) Value {
+ const s, const l = unpack(v);
+ return value.sstr.pack(s[0..l]);
+}
+
+// TODO: Registering decoders
diff --git a/src/libzisp/value/sstr.zig b/src/libzisp/value/sstr.zig
index 896b8d7..2be2647 100644
--- a/src/libzisp/value/sstr.zig
+++ b/src/libzisp/value/sstr.zig
@@ -31,7 +31,7 @@ pub fn isValidSstr(s: []const u8) bool {
fn assertValidSstr(s: []const u8) void {
if (!isValidSstr(s)) {
- std.debug.print("invalid sstr: {s}", .{s});
+ std.debug.print("invalid sstr: {s}\n", .{s});
@panic("invalid sstr");
}
}
@@ -40,6 +40,8 @@ fn assertValidSstr(s: []const u8) void {
// shifting and bit masking, but memcpy always wins easily according to our
// micro-benchmarks, under both ReleaseSafe and ReleaseFast.
+// Note: rune.zig uses equivalent code; probably good to keep in sync.
+
pub fn pack(s: []const u8) Value {
assertValidSstr(s);
var v = Value{ .sstr = .{ .string = 0 } };
@@ -49,6 +51,7 @@ pub fn pack(s: []const u8) Value {
}
pub fn unpack(v: Value) struct { [6]u8, u3 } {
+ assert(v);
const s: [6]u8 = @bitCast(v.sstr.string);
inline for (0..6) |i| {
if (s[i] == 0) return .{ s, i };