diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-02-16 22:07:26 +0100 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-02-16 22:07:26 +0100 |
| commit | e8ee011bf530ce8c9fc8b55ebc05e4258ac2dd21 (patch) | |
| tree | b04abcfb3c3cc26e7dbbcf99a16c0111bae2d9a5 /src/libzisp | |
| parent | dd3d8f9d768479df36e51d402adf55afad1aff07 (diff) | |
update
Diffstat (limited to 'src/libzisp')
| -rw-r--r-- | src/libzisp/value.zig | 221 | ||||
| -rw-r--r-- | src/libzisp/value/boole.zig | 10 | ||||
| -rw-r--r-- | src/libzisp/value/char.zig | 24 | ||||
| -rw-r--r-- | src/libzisp/value/double.zig | 37 | ||||
| -rw-r--r-- | src/libzisp/value/fixnum.zig | 87 | ||||
| -rw-r--r-- | src/libzisp/value/misc.zig | 6 | ||||
| -rw-r--r-- | src/libzisp/value/ptr.zig | 176 | ||||
| -rw-r--r-- | src/libzisp/value/sstr.zig | 3 |
8 files changed, 564 insertions, 0 deletions
diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig new file mode 100644 index 0000000..62807be --- /dev/null +++ b/src/libzisp/value.zig @@ -0,0 +1,221 @@ +// +// Here's a summary of our packing strategy. +// +// Format of a double, in Zig least-to-most significant field order: +// +// { sign: u1, exponent: u11, fraction: u52 } +// +// When the exponent bits are all set, it's either a NaN or an Infinity. +// +// For value packing, almost all remaining 53 bits are available, giving us +// about 2^53 values, except for the four following bit patterns: +// +// *** FORBIDDEN VALUES *** +// +// 1. Negative cqNaN = { sign = 1, exponent = max, fraction = 2^51 } +// +// 2. Negative Infinity = { sign = 1, exponent = max, fraction = 0 } +// +// 3. Positive cqNaN = { sign = 0, exponent = max, fraction = 2^51 } +// +// 4. Positive Infinity = { sign = 0, exponent = max, fraction = 0 } +// +// The abbreviation "cqNaN" stands for canonical quiet NaN. +// +// Note that 2^51 means the MSb of the 52 fraction bits being set, and the rest +// being zero. Th fraction MSb is also called the is_quiet flag, because it +// demarcates quiet NaNs. The rest being zero makes it the canonical qNaN. +// +// The positive and negative cqNaN are the *only* NaN values that can actually +// be returned by any FP operations, which is why we don't use them to pack +// values; we want to be able to represent NaN in Zisp as a double. +// +// Beyond those four bit patterns, all values with a maximum exponent (all bits +// set) are fair game for representing other values, so 2^53 - 4 possibilities. +// +// We split those 2^53 - 4 available values into four groups, each allowing for +// 2^51 - 1 different values to be encoded in them: +// +// sign = 1, quiet = 1 :: Negative Fixnum from -1 to -2^51+1 +// +// sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2 +// +// sign = 0, quiet = 1 :: Pointers +// +// sign = 0, quiet = 0 :: Others +// +// +// === Fixnums === +// +// Negative fixnums actually represent themselves without needing to go through +// any transformation. Only the smallest 52-bit signed negative, -2^51, cannot +// be represented, as it would step on forbidden value 1, Negative cqNaN. +// +// Positive fixnums go through bitsiwe NOT (implemented via an XOR mask here to +// make it one operation together with the NaN masking) to avoid the all-zero +// payload value, which would step on forbidden value 2, Negative Infinity. +// +// +// === Pointers === +// +// Pointers are further subdivided as follows based on the remaining 51 bits: +// +// MSb = 1 :: Foreign Pointer (or a "special 50-bit fixnum") +// +// MSb = 0, SSb = 0 :: Pointer to heap object (string, vector, etc.) +// +// MSb = 0, SSb = 1 :: Weak pointer to heap object +// +// (SSb = Second-most significant bit) +// +// This means regular pointers to the Zisp heap are 49 bits. Of these, we only +// really need 45, since 64-bit platforms are in practice limited to 48-bit +// addresses, and allocations happen at 8-byte boundaries, meaning the least +// significant 3 bit are always 0. Thus, we are able to store 4-bit tags in +// those 49-bit pointers alongside the actual, multiple-of-8, 48-bit address. +// +// Note that foreign pointers avoid stepping on any forbidden value, thanks to +// bit 51 being set. +// +// The forbidden value 3, Positive cqNaN, is avoided thanks to the fact that a +// regular Zisp heap pointer can never be null. Weak pointers, which can be +// null, avoid stepping on that forbidden value thanks to bit 50 being set. +// +// +// === Other values === +// +// This 51-bit range is divided as follows, based on the initial bits: +// +// 000 :: Undefined +// +// 001 :: Small string +// +// 010 :: Unicode code point +// +// 011 :: Singleton values +// +// 1.. :: Undefined +// +// Zisp strings are immutable and always encoded in UTF-8. Any string fitting +// into 6 bytes or less will be stored as an immediate value, not requiring any +// heap allocation or interning. (It's implicitly interned.) +// +// There may still be uninterned strings on the heap that are just as short. +// Calling intern on them will return the equivalent small string. +// +// Unicode code points need a maximum of 21 bits, yet we have 48 available. +// This may be exploited for a future extension. +// +// Similarly, it's extremely unlikely that we will ever need more than a few +// dozen singleton values (false, true, null, and so on). As such, this range +// of bit patterns may be subdivided further in the future. +// +// And on top of all that we still have two 50-bit ranges left! +// +// The forbidden value 4, Positive Infinity, is in one of the two undefined +// value ranges. +// + +// Here's the original article explaining the strategy: +// +// https://tkammer.de/zisp/notes/nan.html +// +// Note: Packed structs are least-to-most significant, so the order of fields +// must be reversed relative to a typical big-endian illustration of the bit +// patterns of IEEE 754 double-precision floating point numbers. + +const std = @import("std"); + +pub const double = @import("value/double.zig"); +pub const fixnum = @import("value/fixnum.zig"); + +pub const ptr = @import("value/ptr.zig"); + +pub const sstr = @import("value/sstr.zig"); +pub const char = @import("value/char.zig"); +pub const misc = @import("value/misc.zig"); +pub const boole = @import("value/boole.zig"); + +/// To fill up the u11 exponent part of a NaN. +const FILL = 0x7ff; + +/// Represents a Zisp value/object. +pub const Value = packed union { + double: f64, + + nan: packed struct { + rest: u51, + quiet: u1, + exp: u11 = FILL, + sign: u1, + }, + + fixnum: packed struct { + code: u51, + negative: bool, + _: u11 = FILL, + is_fixnum: bool = true, + }, + + ptr: packed struct { + // if foreign, we don't actually use value and is_weak + value: u49, + weak: bool = false, + foreign: bool = false, + is_ptr: bool = true, + _: u11 = FILL, + _fixnum: bool = false, + }, + + fptr: packed struct { + value: u50, + _foreign: bool = true, + _ptr: bool = true, + _: u11 = FILL, + _fixnum: bool = false, + }, + + sstr: packed struct { + // packed struct cannot contain array + value: u48, + tag: Tag = .str, + ptr: bool = false, + _: u11 = FILL, + fixnum: bool = false, + }, + + char: packed struct { + value: u48, + tag: u3 = 2, + ptr: bool = false, + _: u11 = FILL, + fixnum: bool = false, + }, + + misc: packed struct { + value: u48, + tag: u3 = 3, + ptr: bool = false, + _: u11 = FILL, + fixnum: bool = false, + }, + + const Tag = enum(u3) { str = 1, char = 2, misc = 3 }; + + const Self = @This(); + + /// Hexdumps the value. + pub fn dump(self: Self) void { + std.debug.dumpHex(std.mem.asBytes(&self)); + } + + /// Checks for any IEEE 754 NaN. + pub fn isNan(self: Self) bool { + return self.nan.exp == FILL; + } + + /// Checks for a Zisp value (non-double) packed into a NaN. + pub fn isPacked(self: Self) bool { + return self.isNan() and self.nan.rest != 0; + } +}; diff --git a/src/libzisp/value/boole.zig b/src/libzisp/value/boole.zig new file mode 100644 index 0000000..d4fbd28 --- /dev/null +++ b/src/libzisp/value/boole.zig @@ -0,0 +1,10 @@ +const Value = @import("../value.zig").Value; +const misc = @import("misc.zig"); + +// These can be accessed from either namespace. +pub const f = misc.f; +pub const t = misc.t; + +pub fn pack(b: bool) Value { + return if (b) f else t; +} diff --git a/src/libzisp/value/char.zig b/src/libzisp/value/char.zig new file mode 100644 index 0000000..7034128 --- /dev/null +++ b/src/libzisp/value/char.zig @@ -0,0 +1,24 @@ +const Value = @import("../value.zig").Value; + +pub fn check(v: Value) bool { + return v.isPacked() and + !v.char.fixnum and + !v.char.ptr and + v.char.tag == .char; +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not char"); + } +} + +pub fn pack(c: u21) Value { + return .{ .char = .{c} }; +} + +pub fn unpack(v: Value) u21 { + assert(v); + return v.char.value; +} diff --git a/src/libzisp/value/double.zig b/src/libzisp/value/double.zig new file mode 100644 index 0000000..5c98324 --- /dev/null +++ b/src/libzisp/value/double.zig @@ -0,0 +1,37 @@ +const Value = @import("../value.zig").Value; + +// Zig API + +/// Checks for a Zisp double (double, +inf, -inf, or canonical NaN). +pub fn check(v: Value) bool { + return !v.isPacked(); +} + +/// Asserts check(). +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not double"); + } +} + +pub fn pack(d: f64) Value { + return .{ .double = d }; +} + +pub fn unpack(v: Value) f64 { + assert(v); + return v.double; +} + +// Zisp API + +pub fn pred(v: Value) Value { + return Value.boole.pack(check(v)); +} + +pub fn add(v1: Value, v2: Value) Value { + const d1 = unpack(v1); + const d2 = unpack(v2); + return pack(d1 + d2); +} diff --git a/src/libzisp/value/fixnum.zig b/src/libzisp/value/fixnum.zig new file mode 100644 index 0000000..60b4239 --- /dev/null +++ b/src/libzisp/value/fixnum.zig @@ -0,0 +1,87 @@ +const std = @import("std"); + +const Value = @import("../value.zig").Value; + +// Zig API + +/// Checks for a Zisp fixnum. +pub fn check(v: Value) bool { + return v.isPacked() and v.fixnum.is_fixnum; +} + +/// Asserts check(). +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not fixnum"); + } +} + +// See detailed NaN packing docs for why the +/- 1. +const fixnum_min = std.math.minInt(i52) + 1; +const fixnum_max = std.math.maxInt(i52) - 1; + +fn isValidRange(int: i64) bool { + return fixnum_min < int and int < fixnum_max; +} + +fn assertValidRange(int: i64) void { + if (int < fixnum_min) { + std.debug.print("int too small for fixnum: {}", .{int}); + @panic("int too small for fixnum"); + } + if (int > fixnum_max) { + std.debug.print("int too large for fixnum: {}", .{int}); + @panic("int too large for fixnum"); + } +} + +fn packNegative(int: i64) Value { + return @bitCast(int); +} + +fn unpackNegative(v: Value) i64 { + return @bitCast(v); +} + +const positive_mask: u64 = 0xfff7ffffffffffff; + +fn packPositive(int: i64) Value { + const uint: u64 = @bitCast(int); + return @bitCast(uint ^ positive_mask); +} + +fn unpackPositive(v: Value) i64 { + const uint: u64 = @bitCast(v); + return @bitCast(uint ^ positive_mask); +} + +pub fn pack(int: i64) Value { + assertValidRange(int); + if (int < 0) { + return packNegative(int); + } else { + return packPositive(int); + } +} + +pub fn unpack(v: Value) i64 { + assert(v); + if (v.fixnum.negative) { + return unpackNegative(v); + } else { + return unpackPositive(v); + } +} + +// Zisp API + +pub fn pred(v: Value) Value { + return Value.boole.pack(check(v)); +} + +pub fn add(v1: Value, v2: Value) Value { + const int1 = unpack(v1); + const int2 = unpack(v2); + return pack(int1 + int2); +} diff --git a/src/libzisp/value/misc.zig b/src/libzisp/value/misc.zig new file mode 100644 index 0000000..2570644 --- /dev/null +++ b/src/libzisp/value/misc.zig @@ -0,0 +1,6 @@ +const Value = @import("../value.zig").Value; + +pub const f = Value{ .misc = .{0} }; +pub const t = Value{ .misc = .{1} }; +pub const nil = Value{ .misc = .{2} }; +pub const eof = Value{ .misc = .{3} }; diff --git a/src/libzisp/value/ptr.zig b/src/libzisp/value/ptr.zig new file mode 100644 index 0000000..4bf92b6 --- /dev/null +++ b/src/libzisp/value/ptr.zig @@ -0,0 +1,176 @@ +const std = @import("std"); + +const Value = @import("../value.zig").Value; + +// Zig API + +pub fn check(v: Value) bool { + return v.isPacked() and v.ptr.is_ptr; +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not a pointer"); + } +} + +// Foreign Pointers + +pub fn checkForeign(v: Value) bool { + return check(v) and v.ptr.foreign; +} + +pub fn assertForeign(v: Value) void { + if (!checkForeign(v)) { + v.dump(); + @panic("not foreign pointer"); + } +} + +pub fn packForeign(int: u50) Value { + return .{ .fptr = .{int} }; +} + +pub fn unpackForeign(v: Value) u64 { + assertForeign(v); + return v.ptr.value.foreign; +} + +// Zisp Pointers + +pub fn checkZisp(v: Value) bool { + return check(v) and !v.ptr.foreign; +} + +pub fn assertZisp(v: Value) void { + if (!checkZisp(v)) { + v.dump(); + @panic("not zisp pointer"); + } +} + +pub fn checkWeak(v: Value) bool { + return checkZisp(v) and v.ptr.weak; +} + +pub fn assertWeak(v: Value) void { + if (!checkWeak(v)) { + v.dump(); + @panic("not weak zisp pointer"); + } +} + +pub fn checkNormal(v: Value) bool { + return checkZisp(v) and !v.ptr.weak; +} + +pub fn assertNormal(v: Value) void { + if (!checkNormal(v)) { + v.dump(); + @panic("not normal zisp pointer"); + } +} + +pub fn packZisp(ptr: *anyopaque, tag: Tag, weak: bool) Value { + return .{ .ptr = .{ + .value = tagPtr(ptr, tag), + .weak = weak, + } }; +} + +pub fn pack(ptr: *anyopaque, tag: Tag) Value { + return packZisp(ptr, tag, false); +} + +pub fn packWeak(ptr: *anyopaque, tag: Tag) Value { + return packZisp(ptr, tag, true); +} + +// Unpacks weak as well; no need for a separate fn. +pub fn unpack(v: Value) struct { ptr: *anyopaque, tag: Tag } { + assertZisp(v); + return untagPtr(v.ptr.value.tagged); +} + +// Weak pointers may be null. +pub fn isNull(v: Value) bool { + assertWeak(v); + const ptr, _ = untagPtr(v.ptr.value.tagged); + return @intFromPtr(ptr) == 0; +} + +pub fn tagPtr(ptr: *anyopaque, tag: Tag) u49 { + const int: u64 = @intFromPtr(ptr); + const untagged: u49 = @truncate(int); + return untagged << 1 | @intFromEnum(tag); +} + +pub fn untagPtr(tagged: 49) struct { ptr: *anyopaque, tag: Tag } { + const untagged: u49 = tagged >> 1 & 0xfffffffffff0; + const ptr: *anyopaque = @ptrFromInt(untagged); + const int: u4 = @truncate(tagged); + const tag: Tag = @enumFromInt(int); + return .{ .ptr = ptr, .tag = tag }; +} + +pub const Tag = enum(u4) { + /// 1. Strings / Symbols + string, + /// 2. Bignums / Ratnums + number, + /// 3. Pairs ([2]Value) + pair, + /// 4. Vector, bytevector, etc. + array, + /// 5. Ordered hash table + table, + /// 6. String buffer + text, + /// 7. Class, interface, etc. + role, + /// 8. Instance, basically + actor, + /// 9. I/O Port + port, + /// 10. Procedure + proc, + /// 11. Continuation + cont, + /// Other + other = 15, +}; + +// Zisp API + +pub fn predForeign(v: Value) Value { + return Value.boole.pack(checkForeign(v)); +} + +pub fn makeWeak(v: Value) Value { + assertNormal(v); + var copy = v; + copy.ptr.weak = true; + return copy; +} + +pub fn predWeak(v: Value) Value { + const isWeak = checkWeak(v); + return Value.boole.pack(isWeak); +} + +pub fn predWeakNull(v: Value) Value { + assertWeak(v); + return Value.boole.pack(v.ptr.weak); +} + +pub fn getWeak(v: Value) Value { + assertWeak(v); + if (isNull(v)) { + return Value.boole.pack(false); + } else { + var copy = v; + copy.ptr.weak = false; + return copy; + } +} diff --git a/src/libzisp/value/sstr.zig b/src/libzisp/value/sstr.zig new file mode 100644 index 0000000..3c0755d --- /dev/null +++ b/src/libzisp/value/sstr.zig @@ -0,0 +1,3 @@ +const Value = @import("../value.zig").Value; + +// stub |
