summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2026-06-10 15:49:31 +0200
committerTaylan Kammer <taylan.kammer@gmail.com>2026-06-10 15:49:31 +0200
commitb27809517b26089ad3359c1212caf9c3ffbf247d (patch)
tree0bb51c2fcd28fd5e2af434bea5e5784d495d386b
parent320c5aafff0499512475686feb374bdfde2a4123 (diff)
NaN-packing changes for interpreter.
-rw-r--r--src/test/values.zig23
-rw-r--r--src/zisp/value.zig167
2 files changed, 125 insertions, 65 deletions
diff --git a/src/test/values.zig b/src/test/values.zig
index 968a5bc..a276b2e 100644
--- a/src/test/values.zig
+++ b/src/test/values.zig
@@ -42,6 +42,29 @@ test "fixnum" {
try testing.expectEqual(int1 + int2, result);
}
+test "snan" {
+ // Can speculative execution screw up sNaN handling?
+ const v1 = value.fixnum.pack(1234);
+ const v2 = value.fixnum.pack(1234);
+
+ // Even executing this unconditionally doesn't seem to cause issues on my
+ // machine, so it's not clear how to best test this.
+ //const f = v1.double + v2.double;
+ //std.debug.print("oh no! {}\n", .{f});
+
+ if (v1.isDouble() and v2.isDouble()) {
+ @branchHint(.likely);
+ // This actually doesn't even cause any issues if you execute it
+ // unconditionally. It just prints the NaN value.
+ const fl = v1.double + v2.double;
+ std.debug.print("oh no! {}\n", .{fl});
+ } else {
+ @branchHint(.cold);
+ const v3 = value.fixnum.add(v1, v2);
+ try testing.expectEqual(value.fixnum.unpack(v3), 2468);
+ }
+}
+
test "ptr" {
const ptr = value.ptr;
diff --git a/src/zisp/value.zig b/src/zisp/value.zig
index d157165..9fd9384 100644
--- a/src/zisp/value.zig
+++ b/src/zisp/value.zig
@@ -1,5 +1,5 @@
//!
-//! === NaN Packing Strategy ===
+//! == NaN Packing Strategy ==
//!
//! Format of a double, in most to least significant field order:
//!
@@ -43,9 +43,9 @@
//!
//! sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2
//!
-//! sign = 0, quiet = 1 :: Pointers
+//! sign = 0, quiet = 1 :: Pointers and various immediates
//!
-//! sign = 0, quiet = 0 :: Others
+//! sign = 0, quiet = 0 :: Internal use by interpreter
//!
//!
//! === Fixnums ===
@@ -54,40 +54,25 @@
//! any transformation. Only the smallest 52-bit signed negative, -2^51, cannot
//! be represented, as it would step on Forbidden Value #1, Negative cqNaN.
//!
-//! Positive fixnums go through bitsiwe NOT (implemented via an XOR mask here to
-//! make it one operation together with the NaN masking) to avoid the all-zero
+//! Positive fixnums go through a bitsiwe NOT (implemented as an XOR mask to
+//! combine it with removal of NaN-related high bits) to avoid the all-zero
//! payload value, which would step on Forbidden Value #2, Negative Infinity.
//!
//!
-//! === Pointers ===
+//! === Pointers and immediates ===
//!
-//! Pointers are further subdivided as follows based on the remaining 51 bits,
-//! with the first three bits used as a sort of tag:
+//! This region of 51-bit values is divided as follows based on the three high
+//! bits, providing a payload value of 48 bits for each.
//!
-//! 000 :: Regular pointer to Zisp heap object
+//! 000 :: Regular pointer to Zisp heap (type tagged)
//!
-//! 001 :: Weak pointer to Zisp heap object
+//! 001 :: Weak pointer to Zisp heap (type tagged)
//!
-//! 01. :: Undefined
+//! 010 :: List segment array pointer (length tagged)
//!
-//! 1.. :: Undefined
+//! 011 :: Interned string pointer (untagged)
//!
-//! This means Zisp heap pointers are 48 bits. This is sufficient, since the
-//! address space of user-land applications is effectively 48 bits on 64-bit
-//! systems. Further, Zisp heap objects are allocated at 16-byte boundaries,
-//! meaning the lowest 4 bits are always zero; this is used for type tagging,
-//! providing immediate information about the type of object pointed to.
-//!
-//! Forbidden Value #3, Positive cqNaN, is avoided thanks to the fact that a
-//! regular Zisp heap pointer can never be null. Weak pointers, which can be
-//! null, avoid stepping on that forbidden value thanks to one bit being set.
-//!
-//!
-//! === Other values ===
-//!
-//! This 51-bit range is divided as follows:
-//!
-//! 000 :: Subdivided as follows:
+//! 100 :: Subdivided as follows:
//!
//! 0....... 0....... 0....... (etc.) :: Rune
//!
@@ -99,13 +84,32 @@
//!
//! (etc.)
//!
-//! 001 :: Short string
+//! 101 :: Short immediate string
+//!
+//! 11s :: Small rational (signed)
+//!
+//! ==== Type-tagged pointers ====
+//!
+//! Zisp heap pointers use 48-bit addresses. This is sufficient, since the
+//! address space of user-land applications is effectively 48 bits on 64-bit
+//! systems. Further, Zisp heap objects are allocated at 16-byte boundaries,
+//! meaning the lowest 4 bits are always zero; this is used for type tagging,
+//! providing immediate information about the type of object pointed to.
+//!
+//! Forbidden Value #3, Positive cqNaN, is avoided thanks to the fact that a
+//! regular Zisp heap pointer can never be null.
+//!
+//! ==== Segmented lists ====
//!
-//! 01. :: Small rational
+//! List segment arrays are also allocated at 16-byte boundaries, but use their
+//! low 4 bits for segment length; see documentation of this API for details.
//!
-//! 1.. :: Undefined
+//! ==== Interned strings ====
//!
-//! ==== Runes and Small Values ====
+//! Interned string pointers don't use any low tag bits. Unlike other heap
+//! objects, they can be allocated at any address.
+//!
+//! ==== Runes and misc. small values ====
//!
//! Runes are symbols up to 6 ASCII characters used to implement reader syntax.
//! They are NUL-terminated if shorter than six characters, meaning they cannot
@@ -116,9 +120,6 @@
//! characters will be in "reverse" order, with the first character in lowest
//! position, so the terminating NUL has to be searched from low to high.
//!
-//! Forbidden Value #4, Positive Infinity, would denote a rune of length zero
-//! (all NUL bytes) which isn't allowed, so we avoid stepping on it.
-//!
//! The fact that runes are limited to ASCII opens up a lot of space for other
//! small values to co-inhabit the same 48-bit range. We subdivide this space
//! into increasingly many potential types, with smaller and smaller payloads,
@@ -130,9 +131,10 @@
//!
//! Unicode code points need 21 bits, so we use a 24-bit type for Characters.
//! Miscellaneous values like true, false, nil, eof, etc. are placed into an
-//! 8-bit type, since there will never be that many of them.
+//! 8-bit type, since there will never be that many of them. A virtually
+//! unlimited number of user-defined enum types can fit here.
//!
-//! ==== Strings ====
+//! ==== Short strings ====
//!
//! Another 48-bit space is used for strings of zero to six bytes. Like runes,
//! these are NUL-terminated if shorter than six bytes, meaning that NUL cannot
@@ -144,6 +146,25 @@
//! being a two's complement 25-bit signed integer, and denominator a 24-bit
//! unsigned integer.
//!
+//!
+//! === Internal use values ===
+//!
+//! The final 51-bit range is used for various internal purposes by the Zisp
+//! interpreter, mostly related to transparent code optimization:
+//!
+//! 000 :: Direct function pointer aka CALL instruction
+//!
+//! 001 :: Lexically scoped variable reference by index
+//!
+//! 010 :: Pointer to heap object as constant data
+//!
+//! 011 :: Short immediate string as constant data
+//!
+//! 1.. :: TBD
+//!
+//! Forbidden Value #4, Positive Infinity, is avoided thanks to the fact that
+//! direct function pointers cannot be null.
+//!
const builtin = @import("builtin");
const std = @import("std");
@@ -228,8 +249,10 @@ pub const PtrTag = enum(u4) {
}
};
-// Non-pointer high bits (sign=0,exp=MAX,quiet=0) but as a u13 field.
-const non_ptr: u13 = max(u11) << 1;
+pub const PtrTagUIntType = @typeInfo(PtrTag).@"enum".tag_type;
+
+// "Pointer etc." high bits (sign=0,exp=MAX,quiet=1) as a u13 field.
+const ptr_etc: u13 = max(u13) - 1;
/// Represents a Zisp value/object.
pub const Value = packed union {
@@ -267,35 +290,47 @@ pub const Value = packed union {
ptr: packed struct {
tagged_value: u48,
is_weak: bool = false,
- _unused1: bool = false,
- _unused2: bool = false,
- _is_ptr: bool = true,
- _: u11 = max(u11),
- _is_fixnum: bool = false,
+ _1: bool = false,
+ _2: bool = false,
+ _: u13 = ptr_etc,
+ },
+
+ /// List segment array pointers
+ lsa: packed struct {
+ tagged_value: u48,
+ _id: u3 = 0b010,
+ _: u13 = ptr_etc,
+ },
+
+ /// Interned string pointer
+ isp: packed struct {
+ pointer: u48,
+ _id: u3 = 0b011,
+ _: u13 = ptr_etc,
},
/// For initializing and reading runes.
rune: packed struct {
// actually [6]u8 but packed struct cannot contain arrays
name: u48,
- _tag: u3 = 0b000,
- _: u13 = non_ptr,
+ _id: u3 = 0b100,
+ _: u13 = ptr_etc,
},
/// For initializing and reading short strings.
sstr: packed struct {
// actually [6]u8 but packed struct cannot contain arrays
bytes: u48,
- _tag: u3 = 0b001,
- _: u13 = non_ptr,
+ _id: u3 = 0b101,
+ _: u13 = ptr_etc,
},
/// For initializing and reading small rats (rational numbers).
srat: packed struct {
q: u24,
- p: u25,
- _tag: u2 = 0b01,
- _: u13 = non_ptr,
+ p: i25,
+ _tag: u2 = 0b11,
+ _: u13 = ptr_etc,
},
// TODO: Use a general Small Value type registration mechanism.
@@ -303,8 +338,8 @@ pub const Value = packed union {
char: packed struct {
value: u24,
_sv_tag: u24 = 0x000080,
- _tag: u3 = 0b000,
- _: u13 = non_ptr,
+ _id: u3 = 0b100,
+ _: u13 = ptr_etc,
},
// TODO: Use a general Small Value type registration mechanism.
@@ -312,8 +347,8 @@ pub const Value = packed union {
misc: packed struct {
value: MiscValue,
_sv_tag: u40 = 0x0000000080,
- _tag: u3 = 0b000,
- _: u13 = non_ptr,
+ _id: u3 = 0b100,
+ _: u13 = ptr_etc,
},
// Disjoint masks where a specific bit or bit-group are set.
@@ -325,7 +360,7 @@ pub const Value = packed union {
// zig fmt: on
// Mask for pointer type tag bits
- const mask_ptr_tag: u48 = max(@typeInfo(PtrTag).@"enum".tag_type);
+ const mask_ptr_tag: u48 = max(PtrTagUIntType);
/// Dumps the value for inspection.
pub fn dump(v: Value) void {
@@ -431,10 +466,10 @@ pub const Value = packed union {
// that for example bits 50 and 51 must be zero, or 51 must be zero
// while 50 is still ignored, and so on, as appropriate.
//
- const hi_bits: u13 = @intCast(v.bits >> 51);
+ const hi_bits: u14 = @intCast(v.bits >> 50);
const ptr_val: u48 = @intCast(v.bits & ~mask_ptr_tag);
- const tag_bits: u4 = @intCast(v.bits & mask_ptr_tag);
- const is_ptr = hi_bits == 0b0111111111111;
+ const tag_bits: PtrTagUIntType = @intCast(v.bits & mask_ptr_tag);
+ const is_ptr = hi_bits == 0b01111111111110;
const is_tag = tag_bits == @intFromEnum(tag);
return if (is_ptr and is_tag) @ptrFromInt(ptr_val) else null;
}
@@ -444,10 +479,10 @@ pub const Value = packed union {
/// table based on type.
pub fn getPtrAny(v: Value) ?struct { Zptr, PtrTag } {
// See last function, which is almost identical.
- const hi_bits: u13 = @intCast(v.bits >> 51);
+ const hi_bits: u14 = @intCast(v.bits >> 50);
const ptr_val: u48 = @intCast(v.bits & ~mask_ptr_tag);
- const tag_bits: u4 = @intCast(v.bits & mask_ptr_tag);
- const is_ptr = hi_bits == 0b0111111111111;
+ const tag_bits: PtrTagUIntType = @intCast(v.bits & mask_ptr_tag);
+ const is_ptr = hi_bits == 0b01111111111110;
const zptr: Zptr = @ptrFromInt(ptr_val);
const tag: PtrTag = @enumFromInt(tag_bits);
return if (is_ptr) .{ zptr, tag } else null;
@@ -469,11 +504,13 @@ pub const Value = packed union {
/// mis-identify +cqNaN as a null pointer with zero property bits, which
/// makes this a bit slower than `isPtrProps()`.
pub fn isPtrAny(v: Value) bool {
- const hi: u13 = @intCast(v.bits >> 51);
- const lo: u51 = @truncate(v.bits);
- return hi == 0b0111111111111 and lo != 0;
+ const hi: u14 = @intCast(v.bits >> 50);
+ const lo: u50 = @truncate(v.bits);
+ return hi == 0b01111111111110 and lo != 0;
}
+ // TODO: Needs update from here on, and test the above
+
/// Checks for a rune.
pub fn isRune(v: Value) bool {
//