From 78e08799d53d03d9972fbcdd1e8640574fb9c31e Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Fri, 29 May 2026 14:03:28 +0200 Subject: seq -> array --- src/zisp/value/array.zig | 159 +++++++++++++++++++++++++++++++++++++++++++++++ src/zisp/value/seq.zig | 56 ----------------- 2 files changed, 159 insertions(+), 56 deletions(-) create mode 100644 src/zisp/value/array.zig delete mode 100644 src/zisp/value/seq.zig (limited to 'src') diff --git a/src/zisp/value/array.zig b/src/zisp/value/array.zig new file mode 100644 index 0000000..b2faf94 --- /dev/null +++ b/src/zisp/value/array.zig @@ -0,0 +1,159 @@ +const builtin = @import("builtin"); +const std = @import("std"); + +const value = @import("../value.zig"); +const gc = @import("../gc.zig"); + +const Value = value.Value; + +const Endian = enum(u1) { + little, + big, + + const native: Endian = switch (builtin.target.cpu.arch.endian()) { + .little => .little, + .big => .big, + }; +}; + +/// Pointer to header for an array of various element types and sizes. +/// +/// The low 48 bits are either the length (element count, not buffer size) of +/// the array contents that follow immediately, or a pointer to the header of +/// another array whose memory is shared with this one, or a null pointer that +/// has special meaning; see below. +/// +/// NOTE: For strings, the length is in fact the size in bytes of the buffer. +/// To get the "length" of a string according to other definitions of length, +/// such as count of Unicode Scalar Values, or count of Grapheme Clusters, +/// different encoding-specific string APIs must be used. +/// +/// If this is a pointer (`is_ptr` is set) it means it's a re-interpretation of +/// the contents of the array that is being pointed to. (But see below.) +/// +/// If this is a slice (`is_slice` is set) then two more u64 values follow this +/// one, marking the start and end of the slice of the pointed-to array that +/// this one represents. (But see next paragraph.) +/// +/// If this is a pointer or slice, but the pointer value is null, then another +/// u64 follows this one immediately, and points directly to a memory buffer +/// (not array head) whose contents are used. In this case, if it's a slice, +/// the start and end u64 values come after that pointer instead. +/// +/// If this header encodes a non-slice, non-array, direct buffer pointer, then +/// there is no length information, so the count of elements is unknown and +/// bounds checks cannot be performed. +/// +/// Having `is_slice` set implies `is_ptr` being set. Having `is_slice` set +/// while `is_ptr` is unset is an undefined state. +/// +/// Only one layer of indirection is allowed; a pointer or slice must not point +/// to another pointer or slice header. Given that the pointer points to the +/// header of another array, rather than its contents, it's always possible to +/// reach the original array and create another pointer to or slice of it, so +/// additional layers of indirection are never needed. Alternatively, if the +/// array pointer is null, then the buffer pointer can be reused to create +/// another array with the same buffer pointer and different type info. +/// +/// Other remaining bits provide information about element type and size. +pub const Array = *align(8) packed struct(u64) { + len_or_ptr: u48, + is_slice: bool, + is_ptr: bool, + type: enum(u2) { int, float, value, string }, + info: packed union { + int: packed struct(u12) { + size: u10, + signed: bool, + endian: Endian = .native, + }, + float: packed struct(u12) { + size: u10, + _: bool, + endian: Endian = .native, + }, + value: packed struct(u12) { + weak: bool, + _: u11, + }, + string: packed struct(u12) { + encoding: enum(u10) { + utf8, + utf16, + utf24, + utf32, + } = .utf8, + interned: bool = false, + endian: Endian = .native, + }, + }, + + fn bufU64(self: *@This()) [*]u64 { + return @ptrCast(self); + } + + fn bufContent(self: *@This()) [*]u8 { + std.debug.assert(!self.is_ptr); + return @ptrCast(self.bufU64 + 1); + } + + fn bufPointer(self: *@This()) [*]u8 { + std.debug.assert(self.is_ptr); + std.debug.assert(self.len_or_ptr == 0); + return @ptrCast(self.bufU64[1]); + } + + fn eltSize(self: *@This()) u16 { + std.debug.assert(!self.is_ptr); + @panic(""); + } + + fn size(self: *@This()) usize { + std.debug.assert(!self.is_ptr); + return self.len_or_ptr * self.eltSize(); + } + + fn arrPointer(self: *@This()) ?Array { + std.debug.assert(self.is_ptr); + const p = self.len_or_ptr; + return if (p != 0) @ptrCast(p) else null; + } + + fn sliceInfo(self: *@This()) [2]u64 { + std.debug.assert(self.is_slice); + const ptr = self.len_or_ptr; + const buf = self.bufU64(); + if (ptr != 0) { + return buf[1..2]; + } else { + return buf[2..3]; + } + } + + pub fn buf(self: *@This()) [*]u8 { + if (self.is_ptr) { + if (self.arrPointer()) |a| { + return a.bufContent(); + } else { + return self.bufPointer(); + } + } else { + return self.bufContent(); + } + } + + pub fn str(self: *@This()) []u8 { + if (self.is_slice) { + const buf = self.buf(); + const start, const end = self.sliceInfo(); + return buf[start..end]; + } + var arr = self; + if (self.is_ptr) { + arr = self.arrPointer() orelse { + @panic("Called str() on array with direct buffer pointer."); + }; + } + return arr.bufContent()[0..arr.size()]; + } +}; diff --git a/src/zisp/value/seq.zig b/src/zisp/value/seq.zig deleted file mode 100644 index cba46ab..0000000 --- a/src/zisp/value/seq.zig +++ /dev/null @@ -1,56 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); - -const value = @import("../value.zig"); -const gc = @import("../gc.zig"); - -const Value = value.Value; - -const Endian = enum(u1) { - little, - big, - - const native: Endian = switch (builtin.target.cpu.arch.endian()) { - .little => .little, - .big => .big, - }; -}; - -pub const Header = packed struct(u64) { - type: enum(u2) { - values, - string, - ints, - floats, - }, - info: packed union { - values: packed struct(u14) { - weak: bool = false, - _: u13 = 0, - }, - string: packed struct(u14) { - enc: enum(u4) { utf8, utf16, utf24, utf32 }, - endian: Endian = .native, - interned: bool, - _: u8 = 0, - }, - ints: packed struct(u14) { - signed: bool, - endian: Endian = .native, - size: u12, - }, - floats: packed struct(u14) { - double: bool, - endian: Endian = .native, - _: u12 = 0, - }, - }, - size: u48, - - pub fn bytes(self: *Header) []u8 { - const hs = @sizeOf(Header); - const ptr: [*]u8 = @ptrCast(self); - const end = hs + self.size; - return ptr[hs..end]; - } -}; -- cgit v1.2.3