summaryrefslogtreecommitdiff
path: root/src/libzisp/value.zig
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-02-16 22:07:26 +0100
committerTaylan Kammer <taylan.kammer@gmail.com>2025-02-16 22:07:26 +0100
commite8ee011bf530ce8c9fc8b55ebc05e4258ac2dd21 (patch)
treeb04abcfb3c3cc26e7dbbcf99a16c0111bae2d9a5 /src/libzisp/value.zig
parentdd3d8f9d768479df36e51d402adf55afad1aff07 (diff)
update
Diffstat (limited to 'src/libzisp/value.zig')
-rw-r--r--src/libzisp/value.zig221
1 files changed, 221 insertions, 0 deletions
diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig
new file mode 100644
index 0000000..62807be
--- /dev/null
+++ b/src/libzisp/value.zig
@@ -0,0 +1,221 @@
+//
+// Here's a summary of our packing strategy.
+//
+// Format of a double, in Zig least-to-most significant field order:
+//
+// { sign: u1, exponent: u11, fraction: u52 }
+//
+// When the exponent bits are all set, it's either a NaN or an Infinity.
+//
+// For value packing, almost all remaining 53 bits are available, giving us
+// about 2^53 values, except for the four following bit patterns:
+//
+// *** FORBIDDEN VALUES ***
+//
+// 1. Negative cqNaN = { sign = 1, exponent = max, fraction = 2^51 }
+//
+// 2. Negative Infinity = { sign = 1, exponent = max, fraction = 0 }
+//
+// 3. Positive cqNaN = { sign = 0, exponent = max, fraction = 2^51 }
+//
+// 4. Positive Infinity = { sign = 0, exponent = max, fraction = 0 }
+//
+// The abbreviation "cqNaN" stands for canonical quiet NaN.
+//
+// Note that 2^51 means the MSb of the 52 fraction bits being set, and the rest
+// being zero. Th fraction MSb is also called the is_quiet flag, because it
+// demarcates quiet NaNs. The rest being zero makes it the canonical qNaN.
+//
+// The positive and negative cqNaN are the *only* NaN values that can actually
+// be returned by any FP operations, which is why we don't use them to pack
+// values; we want to be able to represent NaN in Zisp as a double.
+//
+// Beyond those four bit patterns, all values with a maximum exponent (all bits
+// set) are fair game for representing other values, so 2^53 - 4 possibilities.
+//
+// We split those 2^53 - 4 available values into four groups, each allowing for
+// 2^51 - 1 different values to be encoded in them:
+//
+// sign = 1, quiet = 1 :: Negative Fixnum from -1 to -2^51+1
+//
+// sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2
+//
+// sign = 0, quiet = 1 :: Pointers
+//
+// sign = 0, quiet = 0 :: Others
+//
+//
+// === Fixnums ===
+//
+// Negative fixnums actually represent themselves without needing to go through
+// any transformation. Only the smallest 52-bit signed negative, -2^51, cannot
+// be represented, as it would step on forbidden value 1, Negative cqNaN.
+//
+// Positive fixnums go through bitsiwe NOT (implemented via an XOR mask here to
+// make it one operation together with the NaN masking) to avoid the all-zero
+// payload value, which would step on forbidden value 2, Negative Infinity.
+//
+//
+// === Pointers ===
+//
+// Pointers are further subdivided as follows based on the remaining 51 bits:
+//
+// MSb = 1 :: Foreign Pointer (or a "special 50-bit fixnum")
+//
+// MSb = 0, SSb = 0 :: Pointer to heap object (string, vector, etc.)
+//
+// MSb = 0, SSb = 1 :: Weak pointer to heap object
+//
+// (SSb = Second-most significant bit)
+//
+// This means regular pointers to the Zisp heap are 49 bits. Of these, we only
+// really need 45, since 64-bit platforms are in practice limited to 48-bit
+// addresses, and allocations happen at 8-byte boundaries, meaning the least
+// significant 3 bit are always 0. Thus, we are able to store 4-bit tags in
+// those 49-bit pointers alongside the actual, multiple-of-8, 48-bit address.
+//
+// Note that foreign pointers avoid stepping on any forbidden value, thanks to
+// bit 51 being set.
+//
+// The forbidden value 3, Positive cqNaN, is avoided thanks to the fact that a
+// regular Zisp heap pointer can never be null. Weak pointers, which can be
+// null, avoid stepping on that forbidden value thanks to bit 50 being set.
+//
+//
+// === Other values ===
+//
+// This 51-bit range is divided as follows, based on the initial bits:
+//
+// 000 :: Undefined
+//
+// 001 :: Small string
+//
+// 010 :: Unicode code point
+//
+// 011 :: Singleton values
+//
+// 1.. :: Undefined
+//
+// Zisp strings are immutable and always encoded in UTF-8. Any string fitting
+// into 6 bytes or less will be stored as an immediate value, not requiring any
+// heap allocation or interning. (It's implicitly interned.)
+//
+// There may still be uninterned strings on the heap that are just as short.
+// Calling intern on them will return the equivalent small string.
+//
+// Unicode code points need a maximum of 21 bits, yet we have 48 available.
+// This may be exploited for a future extension.
+//
+// Similarly, it's extremely unlikely that we will ever need more than a few
+// dozen singleton values (false, true, null, and so on). As such, this range
+// of bit patterns may be subdivided further in the future.
+//
+// And on top of all that we still have two 50-bit ranges left!
+//
+// The forbidden value 4, Positive Infinity, is in one of the two undefined
+// value ranges.
+//
+
+// Here's the original article explaining the strategy:
+//
+// https://tkammer.de/zisp/notes/nan.html
+//
+// Note: Packed structs are least-to-most significant, so the order of fields
+// must be reversed relative to a typical big-endian illustration of the bit
+// patterns of IEEE 754 double-precision floating point numbers.
+
+const std = @import("std");
+
+pub const double = @import("value/double.zig");
+pub const fixnum = @import("value/fixnum.zig");
+
+pub const ptr = @import("value/ptr.zig");
+
+pub const sstr = @import("value/sstr.zig");
+pub const char = @import("value/char.zig");
+pub const misc = @import("value/misc.zig");
+pub const boole = @import("value/boole.zig");
+
+/// To fill up the u11 exponent part of a NaN.
+const FILL = 0x7ff;
+
+/// Represents a Zisp value/object.
+pub const Value = packed union {
+ double: f64,
+
+ nan: packed struct {
+ rest: u51,
+ quiet: u1,
+ exp: u11 = FILL,
+ sign: u1,
+ },
+
+ fixnum: packed struct {
+ code: u51,
+ negative: bool,
+ _: u11 = FILL,
+ is_fixnum: bool = true,
+ },
+
+ ptr: packed struct {
+ // if foreign, we don't actually use value and is_weak
+ value: u49,
+ weak: bool = false,
+ foreign: bool = false,
+ is_ptr: bool = true,
+ _: u11 = FILL,
+ _fixnum: bool = false,
+ },
+
+ fptr: packed struct {
+ value: u50,
+ _foreign: bool = true,
+ _ptr: bool = true,
+ _: u11 = FILL,
+ _fixnum: bool = false,
+ },
+
+ sstr: packed struct {
+ // packed struct cannot contain array
+ value: u48,
+ tag: Tag = .str,
+ ptr: bool = false,
+ _: u11 = FILL,
+ fixnum: bool = false,
+ },
+
+ char: packed struct {
+ value: u48,
+ tag: u3 = 2,
+ ptr: bool = false,
+ _: u11 = FILL,
+ fixnum: bool = false,
+ },
+
+ misc: packed struct {
+ value: u48,
+ tag: u3 = 3,
+ ptr: bool = false,
+ _: u11 = FILL,
+ fixnum: bool = false,
+ },
+
+ const Tag = enum(u3) { str = 1, char = 2, misc = 3 };
+
+ const Self = @This();
+
+ /// Hexdumps the value.
+ pub fn dump(self: Self) void {
+ std.debug.dumpHex(std.mem.asBytes(&self));
+ }
+
+ /// Checks for any IEEE 754 NaN.
+ pub fn isNan(self: Self) bool {
+ return self.nan.exp == FILL;
+ }
+
+ /// Checks for a Zisp value (non-double) packed into a NaN.
+ pub fn isPacked(self: Self) bool {
+ return self.isNan() and self.nan.rest != 0;
+ }
+};