diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2026-06-10 15:49:31 +0200 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2026-06-10 15:49:31 +0200 |
| commit | b27809517b26089ad3359c1212caf9c3ffbf247d (patch) | |
| tree | 0bb51c2fcd28fd5e2af434bea5e5784d495d386b | |
| parent | 320c5aafff0499512475686feb374bdfde2a4123 (diff) | |
NaN-packing changes for interpreter.
| -rw-r--r-- | src/test/values.zig | 23 | ||||
| -rw-r--r-- | src/zisp/value.zig | 167 |
2 files changed, 125 insertions, 65 deletions
diff --git a/src/test/values.zig b/src/test/values.zig index 968a5bc..a276b2e 100644 --- a/src/test/values.zig +++ b/src/test/values.zig @@ -42,6 +42,29 @@ test "fixnum" { try testing.expectEqual(int1 + int2, result); } +test "snan" { + // Can speculative execution screw up sNaN handling? + const v1 = value.fixnum.pack(1234); + const v2 = value.fixnum.pack(1234); + + // Even executing this unconditionally doesn't seem to cause issues on my + // machine, so it's not clear how to best test this. + //const f = v1.double + v2.double; + //std.debug.print("oh no! {}\n", .{f}); + + if (v1.isDouble() and v2.isDouble()) { + @branchHint(.likely); + // This actually doesn't even cause any issues if you execute it + // unconditionally. It just prints the NaN value. + const fl = v1.double + v2.double; + std.debug.print("oh no! {}\n", .{fl}); + } else { + @branchHint(.cold); + const v3 = value.fixnum.add(v1, v2); + try testing.expectEqual(value.fixnum.unpack(v3), 2468); + } +} + test "ptr" { const ptr = value.ptr; diff --git a/src/zisp/value.zig b/src/zisp/value.zig index d157165..9fd9384 100644 --- a/src/zisp/value.zig +++ b/src/zisp/value.zig @@ -1,5 +1,5 @@ //! -//! === NaN Packing Strategy === +//! == NaN Packing Strategy == //! //! Format of a double, in most to least significant field order: //! @@ -43,9 +43,9 @@ //! //! sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2 //! -//! sign = 0, quiet = 1 :: Pointers +//! sign = 0, quiet = 1 :: Pointers and various immediates //! -//! sign = 0, quiet = 0 :: Others +//! sign = 0, quiet = 0 :: Internal use by interpreter //! //! //! === Fixnums === @@ -54,40 +54,25 @@ //! any transformation. Only the smallest 52-bit signed negative, -2^51, cannot //! be represented, as it would step on Forbidden Value #1, Negative cqNaN. //! -//! Positive fixnums go through bitsiwe NOT (implemented via an XOR mask here to -//! make it one operation together with the NaN masking) to avoid the all-zero +//! Positive fixnums go through a bitsiwe NOT (implemented as an XOR mask to +//! combine it with removal of NaN-related high bits) to avoid the all-zero //! payload value, which would step on Forbidden Value #2, Negative Infinity. //! //! -//! === Pointers === +//! === Pointers and immediates === //! -//! Pointers are further subdivided as follows based on the remaining 51 bits, -//! with the first three bits used as a sort of tag: +//! This region of 51-bit values is divided as follows based on the three high +//! bits, providing a payload value of 48 bits for each. //! -//! 000 :: Regular pointer to Zisp heap object +//! 000 :: Regular pointer to Zisp heap (type tagged) //! -//! 001 :: Weak pointer to Zisp heap object +//! 001 :: Weak pointer to Zisp heap (type tagged) //! -//! 01. :: Undefined +//! 010 :: List segment array pointer (length tagged) //! -//! 1.. :: Undefined +//! 011 :: Interned string pointer (untagged) //! -//! This means Zisp heap pointers are 48 bits. This is sufficient, since the -//! address space of user-land applications is effectively 48 bits on 64-bit -//! systems. Further, Zisp heap objects are allocated at 16-byte boundaries, -//! meaning the lowest 4 bits are always zero; this is used for type tagging, -//! providing immediate information about the type of object pointed to. -//! -//! Forbidden Value #3, Positive cqNaN, is avoided thanks to the fact that a -//! regular Zisp heap pointer can never be null. Weak pointers, which can be -//! null, avoid stepping on that forbidden value thanks to one bit being set. -//! -//! -//! === Other values === -//! -//! This 51-bit range is divided as follows: -//! -//! 000 :: Subdivided as follows: +//! 100 :: Subdivided as follows: //! //! 0....... 0....... 0....... (etc.) :: Rune //! @@ -99,13 +84,32 @@ //! //! (etc.) //! -//! 001 :: Short string +//! 101 :: Short immediate string +//! +//! 11s :: Small rational (signed) +//! +//! ==== Type-tagged pointers ==== +//! +//! Zisp heap pointers use 48-bit addresses. This is sufficient, since the +//! address space of user-land applications is effectively 48 bits on 64-bit +//! systems. Further, Zisp heap objects are allocated at 16-byte boundaries, +//! meaning the lowest 4 bits are always zero; this is used for type tagging, +//! providing immediate information about the type of object pointed to. +//! +//! Forbidden Value #3, Positive cqNaN, is avoided thanks to the fact that a +//! regular Zisp heap pointer can never be null. +//! +//! ==== Segmented lists ==== //! -//! 01. :: Small rational +//! List segment arrays are also allocated at 16-byte boundaries, but use their +//! low 4 bits for segment length; see documentation of this API for details. //! -//! 1.. :: Undefined +//! ==== Interned strings ==== //! -//! ==== Runes and Small Values ==== +//! Interned string pointers don't use any low tag bits. Unlike other heap +//! objects, they can be allocated at any address. +//! +//! ==== Runes and misc. small values ==== //! //! Runes are symbols up to 6 ASCII characters used to implement reader syntax. //! They are NUL-terminated if shorter than six characters, meaning they cannot @@ -116,9 +120,6 @@ //! characters will be in "reverse" order, with the first character in lowest //! position, so the terminating NUL has to be searched from low to high. //! -//! Forbidden Value #4, Positive Infinity, would denote a rune of length zero -//! (all NUL bytes) which isn't allowed, so we avoid stepping on it. -//! //! The fact that runes are limited to ASCII opens up a lot of space for other //! small values to co-inhabit the same 48-bit range. We subdivide this space //! into increasingly many potential types, with smaller and smaller payloads, @@ -130,9 +131,10 @@ //! //! Unicode code points need 21 bits, so we use a 24-bit type for Characters. //! Miscellaneous values like true, false, nil, eof, etc. are placed into an -//! 8-bit type, since there will never be that many of them. +//! 8-bit type, since there will never be that many of them. A virtually +//! unlimited number of user-defined enum types can fit here. //! -//! ==== Strings ==== +//! ==== Short strings ==== //! //! Another 48-bit space is used for strings of zero to six bytes. Like runes, //! these are NUL-terminated if shorter than six bytes, meaning that NUL cannot @@ -144,6 +146,25 @@ //! being a two's complement 25-bit signed integer, and denominator a 24-bit //! unsigned integer. //! +//! +//! === Internal use values === +//! +//! The final 51-bit range is used for various internal purposes by the Zisp +//! interpreter, mostly related to transparent code optimization: +//! +//! 000 :: Direct function pointer aka CALL instruction +//! +//! 001 :: Lexically scoped variable reference by index +//! +//! 010 :: Pointer to heap object as constant data +//! +//! 011 :: Short immediate string as constant data +//! +//! 1.. :: TBD +//! +//! Forbidden Value #4, Positive Infinity, is avoided thanks to the fact that +//! direct function pointers cannot be null. +//! const builtin = @import("builtin"); const std = @import("std"); @@ -228,8 +249,10 @@ pub const PtrTag = enum(u4) { } }; -// Non-pointer high bits (sign=0,exp=MAX,quiet=0) but as a u13 field. -const non_ptr: u13 = max(u11) << 1; +pub const PtrTagUIntType = @typeInfo(PtrTag).@"enum".tag_type; + +// "Pointer etc." high bits (sign=0,exp=MAX,quiet=1) as a u13 field. +const ptr_etc: u13 = max(u13) - 1; /// Represents a Zisp value/object. pub const Value = packed union { @@ -267,35 +290,47 @@ pub const Value = packed union { ptr: packed struct { tagged_value: u48, is_weak: bool = false, - _unused1: bool = false, - _unused2: bool = false, - _is_ptr: bool = true, - _: u11 = max(u11), - _is_fixnum: bool = false, + _1: bool = false, + _2: bool = false, + _: u13 = ptr_etc, + }, + + /// List segment array pointers + lsa: packed struct { + tagged_value: u48, + _id: u3 = 0b010, + _: u13 = ptr_etc, + }, + + /// Interned string pointer + isp: packed struct { + pointer: u48, + _id: u3 = 0b011, + _: u13 = ptr_etc, }, /// For initializing and reading runes. rune: packed struct { // actually [6]u8 but packed struct cannot contain arrays name: u48, - _tag: u3 = 0b000, - _: u13 = non_ptr, + _id: u3 = 0b100, + _: u13 = ptr_etc, }, /// For initializing and reading short strings. sstr: packed struct { // actually [6]u8 but packed struct cannot contain arrays bytes: u48, - _tag: u3 = 0b001, - _: u13 = non_ptr, + _id: u3 = 0b101, + _: u13 = ptr_etc, }, /// For initializing and reading small rats (rational numbers). srat: packed struct { q: u24, - p: u25, - _tag: u2 = 0b01, - _: u13 = non_ptr, + p: i25, + _tag: u2 = 0b11, + _: u13 = ptr_etc, }, // TODO: Use a general Small Value type registration mechanism. @@ -303,8 +338,8 @@ pub const Value = packed union { char: packed struct { value: u24, _sv_tag: u24 = 0x000080, - _tag: u3 = 0b000, - _: u13 = non_ptr, + _id: u3 = 0b100, + _: u13 = ptr_etc, }, // TODO: Use a general Small Value type registration mechanism. @@ -312,8 +347,8 @@ pub const Value = packed union { misc: packed struct { value: MiscValue, _sv_tag: u40 = 0x0000000080, - _tag: u3 = 0b000, - _: u13 = non_ptr, + _id: u3 = 0b100, + _: u13 = ptr_etc, }, // Disjoint masks where a specific bit or bit-group are set. @@ -325,7 +360,7 @@ pub const Value = packed union { // zig fmt: on // Mask for pointer type tag bits - const mask_ptr_tag: u48 = max(@typeInfo(PtrTag).@"enum".tag_type); + const mask_ptr_tag: u48 = max(PtrTagUIntType); /// Dumps the value for inspection. pub fn dump(v: Value) void { @@ -431,10 +466,10 @@ pub const Value = packed union { // that for example bits 50 and 51 must be zero, or 51 must be zero // while 50 is still ignored, and so on, as appropriate. // - const hi_bits: u13 = @intCast(v.bits >> 51); + const hi_bits: u14 = @intCast(v.bits >> 50); const ptr_val: u48 = @intCast(v.bits & ~mask_ptr_tag); - const tag_bits: u4 = @intCast(v.bits & mask_ptr_tag); - const is_ptr = hi_bits == 0b0111111111111; + const tag_bits: PtrTagUIntType = @intCast(v.bits & mask_ptr_tag); + const is_ptr = hi_bits == 0b01111111111110; const is_tag = tag_bits == @intFromEnum(tag); return if (is_ptr and is_tag) @ptrFromInt(ptr_val) else null; } @@ -444,10 +479,10 @@ pub const Value = packed union { /// table based on type. pub fn getPtrAny(v: Value) ?struct { Zptr, PtrTag } { // See last function, which is almost identical. - const hi_bits: u13 = @intCast(v.bits >> 51); + const hi_bits: u14 = @intCast(v.bits >> 50); const ptr_val: u48 = @intCast(v.bits & ~mask_ptr_tag); - const tag_bits: u4 = @intCast(v.bits & mask_ptr_tag); - const is_ptr = hi_bits == 0b0111111111111; + const tag_bits: PtrTagUIntType = @intCast(v.bits & mask_ptr_tag); + const is_ptr = hi_bits == 0b01111111111110; const zptr: Zptr = @ptrFromInt(ptr_val); const tag: PtrTag = @enumFromInt(tag_bits); return if (is_ptr) .{ zptr, tag } else null; @@ -469,11 +504,13 @@ pub const Value = packed union { /// mis-identify +cqNaN as a null pointer with zero property bits, which /// makes this a bit slower than `isPtrProps()`. pub fn isPtrAny(v: Value) bool { - const hi: u13 = @intCast(v.bits >> 51); - const lo: u51 = @truncate(v.bits); - return hi == 0b0111111111111 and lo != 0; + const hi: u14 = @intCast(v.bits >> 50); + const lo: u50 = @truncate(v.bits); + return hi == 0b01111111111110 and lo != 0; } + // TODO: Needs update from here on, and test the above + /// Checks for a rune. pub fn isRune(v: Value) bool { // |
