diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-30 18:34:00 +0200 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-30 18:34:00 +0200 |
| commit | 3d05c94b9d8aa964e4ff848c95d5999cec170e04 (patch) | |
| tree | 2864753404e8c9cb9fa8bce537772a29b90d1cbd /src/libzisp | |
| parent | 9340f3c44ca0d9d4fdee905fc6e8b428824b9185 (diff) | |
Big cleanup.
Diffstat (limited to 'src/libzisp')
| -rw-r--r-- | src/libzisp/gc.zig | 54 | ||||
| -rw-r--r-- | src/libzisp/io.zig | 12 | ||||
| -rw-r--r-- | src/libzisp/io/Parser.zig | 870 | ||||
| -rw-r--r-- | src/libzisp/io/decoder.zig | 1 | ||||
| -rw-r--r-- | src/libzisp/io/encoder.zig | 1 | ||||
| -rw-r--r-- | src/libzisp/io/parser.zig | 80 | ||||
| -rw-r--r-- | src/libzisp/io/reader.zig | 10 | ||||
| -rw-r--r-- | src/libzisp/io/unparser.zig | 122 | ||||
| -rw-r--r-- | src/libzisp/io/writer.zig | 1 | ||||
| -rw-r--r-- | src/libzisp/lib.zig | 1 | ||||
| -rw-r--r-- | src/libzisp/lib/list.zig | 20 | ||||
| -rw-r--r-- | src/libzisp/value.zig | 325 | ||||
| -rw-r--r-- | src/libzisp/value/boole.zig | 33 | ||||
| -rw-r--r-- | src/libzisp/value/char.zig | 31 | ||||
| -rw-r--r-- | src/libzisp/value/double.zig | 38 | ||||
| -rw-r--r-- | src/libzisp/value/eof.zig | 28 | ||||
| -rw-r--r-- | src/libzisp/value/fixnum.zig | 84 | ||||
| -rw-r--r-- | src/libzisp/value/istr.zig | 65 | ||||
| -rw-r--r-- | src/libzisp/value/nil.zig | 28 | ||||
| -rw-r--r-- | src/libzisp/value/pair.zig | 55 | ||||
| -rw-r--r-- | src/libzisp/value/ptr.zig | 174 | ||||
| -rw-r--r-- | src/libzisp/value/rune.zig | 82 | ||||
| -rw-r--r-- | src/libzisp/value/seq.zig | 57 | ||||
| -rw-r--r-- | src/libzisp/value/sstr.zig | 78 |
24 files changed, 0 insertions, 2250 deletions
diff --git a/src/libzisp/gc.zig b/src/libzisp/gc.zig deleted file mode 100644 index 815e203..0000000 --- a/src/libzisp/gc.zig +++ /dev/null @@ -1,54 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); - -const value = @import("value.zig"); - -const seq = value.seq; - -const Value = value.Value; - -/// A "heap value" that could be a Value or object header. -pub const Hval = union { - value: Value, - seq_header: seq.Header, -}; - -var _debugAlloc = switch (builtin.mode) { - .Debug => std.heap.DebugAllocator(.{}).init, - else => undefined, -}; -const alloc = switch (builtin.mode) { - .Debug => _debugAlloc.allocator(), - else => std.heap.smp_allocator, -}; - -// Cons cells - -var cons_pool = std.heap.MemoryPool([2]Value).init(alloc); - -pub fn cons(v1: Value, v2: Value) *[2]Value { - const mem = cons_pool.create() catch @panic("OOM"); - mem[0] = v1; - mem[1] = v2; - return mem; -} - -// Interned strings - -var istr_pool = std.hash_map.StringHashMap(void).init(alloc); - -pub fn intern(header: seq.Header, str: []const u8) *seq.Header { - const hs = @sizeOf(seq.Header); - const size = str.len + hs; - const copy = alloc.alloc(u8, size) catch @panic("OOM"); - const header_bytes: [hs]u8 = @bitCast(header); - @memcpy(copy[0..hs], &header_bytes); - @memcpy(copy[hs..size], str); - const entry = istr_pool.getOrPutValue(copy, {}) catch @panic("OOM"); - return @ptrCast(entry.key_ptr); -} - -pub fn istrHeader(ptr: *Hval) *seq.Header { - const entry_key: *[]u8 = @ptrCast(ptr); - return @alignCast(@ptrCast(entry_key.ptr)); -} diff --git a/src/libzisp/io.zig b/src/libzisp/io.zig deleted file mode 100644 index 8adf495..0000000 --- a/src/libzisp/io.zig +++ /dev/null @@ -1,12 +0,0 @@ -pub const parser = @import("io/parser.zig"); -pub const unparser = @import("io/unparser.zig"); - -pub const decoder = @import("io/decoder.zig"); -pub const encoder = @import("io/encoder.zig"); - -pub const reader = @import("io/reader.zig"); -pub const writer = @import("io/writer.zig"); - -pub const parse = parser.parse; - -// pub const defaultUnparser = ... diff --git a/src/libzisp/io/Parser.zig b/src/libzisp/io/Parser.zig deleted file mode 100644 index 6684104..0000000 --- a/src/libzisp/io/Parser.zig +++ /dev/null @@ -1,870 +0,0 @@ -// === Syntax === -// -// See docs/parser.md and spec/syntax.bnf to understand the syntax. -// -// -// === Trampolining strategy === -// -// Instead of using recursion directly, the parser is written in a "trampoline" -// style, which ensures that parse depth is not limited by stack size. -// -// All state and context is passed around via a struct pointer. The parser has -// a main loop, which calls a function as dictated by parser.context.next, and -// the function may update the state to have another function called next. -// -// If a function wants to call a recursive subroutine, it pushes some of the -// current context onto a stack, including what function the subroutine should -// return to, and then updates the state to instruct the main loop to call one -// of the entry point subroutines. -// -// If a function wants to make the parser return, either from a subroutine or -// from the main loop, it sets the .result field, and tries to pop the saved -// context. If the context stack was empty, the main loop returns. -// -// -// === Buffering === -// -// For efficiency, call the parser on an input stream with implicit buffering. -// -// The parser does not use its own buffer, beyond one character that may be -// written back into the unread_char field, which is checked at the end to -// ensure it's nothing other than a trailing blank or comment. -// -// This lack of buffering is to ensure that the parser never reads more bytes -// from the input than what it needs to parse a datum. Consider the input: -// -// (a b c) (x y z) -// -// If we used proper buffering, like reading up to 4K bytes per read, then the -// whole stream would be consumed at once before it's parsed. Then, the parser -// would return the first datum, and the rest of the stream would be lost. The -// parser would need some way to reset the input stream's read head to the end -// of the first datum, but not all stream types may support this. -// -// Although in a scenario like this it would be wise to create a single Parser -// instance to call multiple times (in which case it could continue using its -// internal buffer from where it left), this may not always be practical. -// -// One could even have an input stream with Zisp s-expressions and other data -// intertwined, in which case this lack of buffering is also crucial, since one -// needs to alternate between calling the Zisp parser and some other parser on -// the same input stream. -// - -const builtin = @import("builtin"); -const std = @import("std"); - -const lib = @import("../lib.zig"); -const value = @import("../value.zig"); - -const List = std.ArrayListUnmanaged; - -const Value = value.Value; - -const Parser = @This(); - -const is_test = builtin.is_test; -const is_debug = builtin.mode == .Debug; -const detailed_debug = false; - -// zig fmt: off -const DOT = value.rune.pack("DOT"); -const COLON = value.rune.pack("COLON"); -const JOIN = value.rune.pack("JOIN"); -const LABEL = value.rune.pack("LABEL"); -const HASH = value.rune.pack("HASH"); -const QUOTE = value.rune.pack("QUOTE"); -const GRAVE = value.rune.pack("GRAVE"); -const COMMA = value.rune.pack("COMMA"); -const SQUARE = value.rune.pack("SQUARE"); -const BRACE = value.rune.pack("BRACE"); -const VOID = value.rune.packForced(""); -const LSTAIL = value.sstr.pack("."); -// zig fmt: on - -// We could implement an optimization where we swap in a dummy cons when the -// parser is handling a commented-out datum, but this would require changes to -// the algorithm and doesn't seem very important, so it's not implemented. - -const Cons = *const fn (v1: Value, v2: Value) Value; - -fn dummyCons(v1: Value, v2: Value) Value { - _ = v1; - _ = v2; - return value.undef; -} - -const real_cons = &value.pair.cons; -const fake_cons = &dummyCons; - -pub const ParseError = enum { - InvalidCharacter, - UnclosedString, - UnexpectedEof, - UnicodeError, - RuneTooLong, - OutOfMemory, - OutOfRange, - ReadError, -}; - -pub const Context = struct { - // What to do next. - next: ?Fn = undefined, - // For storing a context value, like accumulated list elements. - val: Value = undefined, - // For storing a context char, like list opening bracket. - char: u8 = undefined, -}; - -// parameters -stack_alloc: std.mem.Allocator, -chars_alloc: std.mem.Allocator, - -// state -input: std.io.AnyReader = undefined, -context: Context = .{}, -stack: List(Context) = undefined, -chars: List(u8) = undefined, -cons: Cons = real_cons, -result: Value = undefined, -unread_char: ?u8 = null, -err_msg: []const u8 = undefined, - -pub fn init( - stack_alloc: std.mem.Allocator, - chars_alloc: std.mem.Allocator, - init_stack_mem: usize, - init_chars_mem: usize, -) !Parser { - var p: Parser = .{ - .stack_alloc = stack_alloc, - .chars_alloc = chars_alloc, - }; - const csize = @sizeOf(Context); - p.stack = try .initCapacity(stack_alloc, init_stack_mem / csize); - p.chars = try .initCapacity(chars_alloc, init_chars_mem); - return p; -} - -pub fn deinit(p: *Parser) void { - p.stack.deinit(p.stack_alloc); - p.chars.deinit(p.chars_alloc); -} - -// -// Input reading & unreading -// - -fn read(p: *Parser) !?u8 { - if (is_debug and p.unread_char != null) { - @panic("Called read() while there was an unused character!"); - } - const c = p.input.readByte() catch |e| switch (e) { - error.EndOfStream => return null, - else => return p.err(.ReadError, "???"), - }; - if (detailed_debug) { - std.debug.print("{c}", .{c}); - } - return c; -} - -fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 { - return if (try p.read()) |c| c else p.err(.UnexpectedEof, emsg); -} - -fn unread(p: *Parser, c: u8) void { - p.unread_char = c; -} - -fn getUnread(p: *Parser) ?u8 { - const c = p.unread_char orelse return null; - p.unread_char = null; - return c; -} - -// -// String accumulation & creation -// - -fn addChar(p: *Parser, c: u8) !void { - try p.chars.append(p.chars_alloc, c); -} - -fn getBareString(p: *Parser) Value { - defer p.chars.clearRetainingCapacity(); - return if (p.chars.items.len <= 6) - value.sstr.pack(p.chars.items) - else - value.istr.intern(p.chars.items, false); -} - -fn getQuotedString(p: *Parser) Value { - defer p.chars.clearRetainingCapacity(); - return if (p.chars.items.len <= 6) - value.sstr.packQuoted(p.chars.items) - else - value.istr.intern(p.chars.items, true); -} - -fn getRune(p: *Parser) Value { - defer p.chars.clearRetainingCapacity(); - return value.rune.pack(p.chars.items); -} - -// -// Stack management & control flow -// - -const Fn = enum { - parseUnit, - parseDatum, - endUnit, - returnContext, - endFirstDatum, - endJoinDatum, - parseJoin, - parseHashDatum, - endHashDatum, - parseRuneDatum, - endRuneDatum, - endLabelDatum, - continueList, - endImproperList, - closeImproperList, - endQuoteExpr, -}; - -fn call(p: *Parser, f: Fn) !void { - try switch (f) { - .parseUnit => parseUnit(p), - .parseDatum => parseDatum(p), - .endUnit => endUnit(p), - .returnContext => returnContext(p), - .endFirstDatum => endFirstDatum(p), - .endJoinDatum => endJoinDatum(p), - .parseJoin => parseJoin(p), - .parseHashDatum => parseHashDatum(p), - .endHashDatum => endHashDatum(p), - .parseRuneDatum => parseRuneDatum(p), - .endRuneDatum => endRuneDatum(p), - .endLabelDatum => endLabelDatum(p), - .continueList => continueList(p), - .endImproperList => endImproperList(p), - .closeImproperList => closeImproperList(p), - .endQuoteExpr => endQuoteExpr(p), - }; -} - -pub fn run(p: *Parser, input: std.io.AnyReader) !Value { - p.input = input; - p.context.next = .parseUnit; - while (p.context.next) |next| { - if (detailed_debug) printStack(p); - try p.call(next); - } - if (p.unread_char) |_| { - return p.err(.InvalidCharacter, "top-level"); - } - return p.result; -} - -fn printStack(p: *Parser) void { - const stack = p.stack.items; - std.debug.print("\n\n{}:{any} ctx:'{c}' unread:'{c}' \n", .{ - stack.len, - p.context.next, - p.context.char, - p.unread_char orelse '_', - }); - if (stack.len > 0) { - var i = stack.len; - while (i > 0) : (i -= 1) { - const prev = stack[i - 1]; - std.debug.print("{}:{any} ctx:'{c}'\n", .{ - i - 1, - prev.next, - prev.char, - }); - } - } -} - -fn err( - p: *Parser, - comptime e: ParseError, - comptime msg: []const u8, -) error{ParseError} { - p.err_msg = @tagName(e) ++ " at: " ++ msg; - return error.ParseError; -} - -fn push(p: *Parser, next: Fn) !void { - try p.stack.append(p.stack_alloc, .{ .next = next }); -} - -fn pushContext(p: *Parser, next: Fn) !void { - try p.stack.append(p.stack_alloc, .{ - .next = next, - .val = p.context.val, - .char = p.context.char, - }); -} - -fn ret(p: *Parser) void { - if (p.stack.pop()) |c| { - p.context = c; - } else { - p.context.next = null; - } -} - -fn subr(p: *Parser, start: Fn, next: Fn) !void { - try p.pushContext(next); - p.context.next = start; -} - -fn jump(p: *Parser, next: Fn, val: ?Value) void { - if (val) |v| p.result = v; - p.context.next = next; -} - -fn abort(p: *Parser, next: Fn, unread_c: u8) void { - p.result = VOID; - p.unread_char = unread_c; - p.context.next = next; -} - -fn retval(p: *Parser, val: Value) void { - p.result = val; - p.ret(); -} - -// -// Start parser functions -// - -fn parseUnit(p: *Parser) !void { - var c1 = p.getUnread() orelse try p.read(); - while (c1) |c| : (c1 = try p.read()) { - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => try p.push(.parseUnit), - .no => { - p.unread(c); - return p.subr(.parseDatum, .endUnit); - }, - } - } - return p.retval(value.eof.eof); -} - -fn endUnit(p: *Parser) !void { - const c = p.getUnread() orelse return p.ret(); - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => return skipUnitAndReturn(p), - .no => p.unread(c), - } - return p.ret(); -} - -fn skipUnitAndReturn(p: *Parser) !void { - p.context.val = p.result; - return p.subr(.parseUnit, .returnContext); -} - -fn returnContext(p: *Parser) !void { - return p.retval(p.context.val); -} - -fn parseDatum(p: *Parser) !void { - return parseOneDatum(p, p.getUnread().?, .endFirstDatum); -} - -fn endFirstDatum(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.ret(); - } - return parseJoin(p); -} - -fn parseJoin(p: *Parser) !void { - const c = p.getUnread() orelse try p.read() orelse return p.ret(); - switch (c) { - '.', ':' => { - p.context.char = c; - p.unread(try p.readNoEof("join datum")); - }, - else => { - p.context.char = 0; - p.unread(c); - }, - } - p.context.val = p.result; - return p.subr(.parseDatum, .endJoinDatum); -} - -fn endJoinDatum(p: *Parser) !void { - const prev = p.context.val; - const join = p.context.char; - if (p.result.eq(VOID)) { - if (join == 0) { - return p.retval(prev); - } else { - return p.err(.InvalidCharacter, "join datum"); - } - } - const rune = switch (join) { - 0 => JOIN, - '.' => DOT, - ':' => COLON, - else => unreachable, - }; - const joined = p.cons(rune, p.cons(prev, p.result)); - return p.jump(.parseJoin, joined); -} - -fn parseOneDatum(p: *Parser, c: u8, next: Fn) !void { - if (isBareChar(c) or c == '.') { - return p.jump(next, try parseBareString(p, c)); - } - return parseCladDatum(p, c, next); -} - -fn parseBareString(p: *Parser, c: u8) !Value { - const allow_dots = std.ascii.isDigit(c) or switch (c) { - '.', '+', '-' => true, - else => false, - }; - try p.addChar(c); - return parseBareStringRest(p, allow_dots); -} - -fn parseBareEscString(p: *Parser) !Value { - try p.addChar(try parseBareEsc(p)); - return parseBareStringRest(p, false); -} - -fn parseBareStringRest(p: *Parser, allow_dots: bool) !Value { - while (try p.read()) |c| { - if (isBareChar(c) or (allow_dots and c == '.')) { - try p.addChar(c); - } else if (c == '\\') { - try p.addChar(try parseBareEsc(p)); - } else { - p.unread(c); - break; - } - } - return p.getBareString(); -} - -fn parseBareEsc(p: *Parser) !u8 { - const c = try p.readNoEof("bare escape"); - if (isBareEsc(c)) { - return c; - } else { - return p.err(.InvalidCharacter, "bare escape"); - } -} - -fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void { - if (c == '\\') { - return p.jump(next, try parseBareEscString(p)); - } - if (c == '"') { - return p.jump(next, try parseQuotedString(p, '"')); - } - if (c == '|') { - return p.jump(next, try parseQuotedString(p, '|')); - } - return switch (c) { - '#' => parseHashExpression(p, next), - '(', '[', '{' => parseList(p, c, next), - '\'', '`', ',' => parseQuoteExpr(p, c, next), - else => p.abort(next, c), - }; -} - -fn parseQuotedString(p: *Parser, close: u8) !Value { - while (try p.read()) |c| { - if (c == close) { - return p.getQuotedString(); - } - if (c != '\\') { - try p.addChar(c); - } else { - try parseQuotedEsc(p, close); - } - } - return error.UnclosedString; -} - -fn parseQuotedEsc(p: *Parser, close: u8) !void { - const c = try p.readNoEof("quoted escape"); - if (c == close) { - return p.addChar(close); - } - if (c == 'u') { - return parseUniHexHandleErrors(p); - } - try p.addChar(switch (c) { - '\\' => c, - '0' => 0, - 'a' => 7, - 'b' => 8, - 't' => 9, - 'n' => 10, - 'v' => 11, - 'f' => 12, - 'r' => 13, - 'e' => 27, - 'x' => try parseHexByte(p, "hex escape"), - else => return p.err(.InvalidCharacter, "quoted escape"), - }); -} - -fn parseUniHexHandleErrors(p: *Parser) !void { - return parseUniHex(p) catch |e| switch (e) { - error.Utf8CannotEncodeSurrogateHalf => p.err( - .UnicodeError, - "unicode escape", - ), - else => e, - }; -} - -fn parseUniHex(p: *Parser) !void { - const msg = "unicode escape"; - - if (try p.readNoEof(msg) != '{') { - return p.err(.InvalidCharacter, msg); - } - - const uc = try parseHex(p, u21, msg); - const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg); - if (c != '}') { - return p.err(.InvalidCharacter, msg); - } - - const n = try std.unicode.utf8CodepointSequenceLength(uc); - const buf = try p.chars.addManyAsSlice(p.chars_alloc, n); - _ = try std.unicode.utf8Encode(uc, buf); -} - -fn parseHashExpression(p: *Parser, next: Fn) !void { - const c = try p.readNoEof("hash expression"); - if (try checkBlanks(p, c) != .no) { - return p.err(.InvalidCharacter, "hash expression"); - } - if (std.ascii.isAlphabetic(c)) { - const r = try parseRune(p, c); - return parseRuneEnd(p, r, next); - } - if (c == '%') { - const l = try parseLabel(p); - return parseLabelEnd(p, l, next); - } - if (isBareChar(c)) { - // Reserved for future extensions to syntax sugar. - return p.err(.InvalidCharacter, "hash expression"); - } - // fast-path to avoid subr - if (c == '\\') { - return p.jump(next, p.cons(HASH, try parseBareEscString(p))); - } - if (c == '"') { - return p.jump(next, p.cons(HASH, try parseQuotedString(p, '"'))); - } - if (c == '|') { - return p.jump(next, p.cons(HASH, try parseQuotedString(p, '|'))); - } - p.unread(c); - return p.subr(.parseHashDatum, next); -} - -fn parseHashDatum(p: *Parser) !void { - return parseCladDatum(p, p.getUnread().?, .endHashDatum); -} - -fn endHashDatum(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.err(.InvalidCharacter, "hash datum"); - } - return p.retval(p.cons(HASH, p.result)); -} - -fn parseRune(p: *Parser, c1: u8) !Value { - try p.addChar(c1); - var len: usize = 1; - while (try p.read()) |c| : (len += 1) { - if (len == 6 or !std.ascii.isAlphanumeric(c)) { - p.unread(c); - return p.getRune(); - } - try p.addChar(c); - } - return p.getRune(); -} - -fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { - const c = p.getUnread() orelse return p.jump(next, r); - if (c == '\\') { - return p.jump(next, p.cons(r, try parseBareString(p, c))); - } - if (c == '"') { - return p.jump(next, p.cons(r, try parseQuotedString(p, '"'))); - } - if (c == '|') { - return p.jump(next, p.cons(r, try parseQuotedString(p, '|'))); - } - p.unread(c); - switch (c) { - '#', '(', '[', '{', '\'', '`', ',' => { - try p.push(next); - p.context.val = r; - // Use jump to prevent recursion. - return p.jump(.parseRuneDatum, null); - }, - else => return p.jump(next, r), - } -} - -fn parseRuneDatum(p: *Parser) !void { - return parseCladDatum(p, p.getUnread().?, .endRuneDatum); -} - -fn endRuneDatum(p: *Parser) !void { - if (p.result.eq(VOID)) { - p.retval(p.context.val); - } - return p.retval(p.cons(p.context.val, p.result)); -} - -fn parseLabel(p: *Parser) !Value { - const label = try parseHex(p, u48, "datum label"); - return value.fixnum.pack(label); -} - -fn parseLabelEnd(p: *Parser, l: Value, next: Fn) !void { - const c = p.getUnread() orelse return p.err(.UnexpectedEof, "datum label"); - if (c == '%') { - return p.jump(next, p.cons(LABEL, l)); - } - if (c == '=') { - try p.push(next); - p.context.val = l; - return p.subr(.parseDatum, .endLabelDatum); - } - return p.err(.InvalidCharacter, "datum label"); -} - -fn endLabelDatum(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.err(.InvalidCharacter, "label datum"); - } - return p.retval(p.cons(LABEL, p.cons(p.context.val, p.result))); -} - -fn parseList(p: *Parser, open: u8, next: Fn) !void { - const head = switch (open) { - '(' => value.nil.nil, - '[' => p.cons(SQUARE, value.nil.nil), - '{' => p.cons(BRACE, value.nil.nil), - else => unreachable, - }; - const close: u8 = switch (open) { - '(' => ')', - '[' => ']', - '{' => '}', - else => unreachable, - }; - while (try p.read()) |c| { - if (c == close) { - return p.jump(next, head); - } - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => { - try listParserSetup(p, head, close, next); - return p.subr(.parseUnit, .parseUnit); - }, - .no => { - try listParserSetup(p, head, close, next); - p.unread(c); - return p.jump(.parseDatum, null); - }, - } - } - return p.err(.UnexpectedEof, "list"); -} - -fn listParserSetup(p: *Parser, head: Value, close: u8, next: Fn) !void { - try p.push(next); - p.context.val = head; - p.context.char = close; - try p.pushContext(.continueList); -} - -fn continueList(p: *Parser) !void { - const close = p.context.char; - - if (p.result.eq(VOID)) { - const c = p.getUnread().?; - if (c == close) { - return endList(p); - } - return p.err(.InvalidCharacter, "list"); - } - - if (p.result.eq(LSTAIL)) { - return p.subr(.parseUnit, .endImproperList); - } - - p.context.val = p.cons(p.result, p.context.val); - - var c1 = p.getUnread() orelse try p.read(); - while (c1) |c| : (c1 = try p.read()) { - if (c == close) { - return endList(p); - } - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => { - try p.pushContext(.continueList); - return p.subr(.parseUnit, .parseUnit); - }, - .no => { - p.unread(c); - return p.subr(.parseDatum, .continueList); - }, - } - } - return p.err(.UnexpectedEof, "list"); -} - -fn endList(p: *Parser) !void { - return p.retval(lib.list.reverse(p.context.val)); -} - -fn endImproperList(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.err(.InvalidCharacter, "list tail"); - } - p.context.val = lib.list.reverseWithTail(p.context.val, p.result); - return closeImproperList(p); -} - -fn closeImproperList(p: *Parser) !void { - const result = p.context.val; - const close = p.context.char; - var c1 = p.getUnread() orelse try p.read(); - while (c1) |c| : (c1 = try p.read()) { - if (c == close) { - return p.retval(result); - } - switch (try checkBlanks(p, c)) { - .yes => {}, - .skip_unit => return p.subr(.parseUnit, .closeImproperList), - .no => return p.err(.InvalidCharacter, "after list tail"), - } - } - return p.err(.UnexpectedEof, "after list tail"); -} - -fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void { - const q = switch (c1) { - '\'' => QUOTE, - '`' => GRAVE, - ',' => COMMA, - else => unreachable, - }; - - // fast-path to avoid subr - const c = try p.readNoEof("quote expression"); - if (isBareChar(c) or c == '\\') { - return p.jump(next, p.cons(q, try parseBareString(p, c))); - } - - try p.push(next); - p.context.val = q; - p.unread(c); - return p.subr(.parseDatum, .endQuoteExpr); -} - -fn endQuoteExpr(p: *Parser) !void { - if (p.result.eq(VOID)) { - return p.err(.InvalidCharacter, "quote expression datum"); - } - return p.retval(p.cons(p.context.val, p.result)); -} - -// Helpers - -fn checkBlanks(p: *Parser, c: u8) !enum { yes, skip_unit, no } { - switch (c) { - '\t'...'\r', ' ' => return .yes, - ';' => { - const c2 = try p.read() orelse return .yes; - if (c2 == '\n') return .yes; - if (c2 == '~') return .skip_unit; - while (try p.read()) |c3| if (c3 == '\n') break; - return .yes; - }, - else => return .no, - } -} - -fn isBareChar(c: u8) bool { - return switch (c) { - // zig fmt: off - 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' , - '+' , '-' , '/' , '<' , '=' , '>' , '?' , '@' , '^' , '_' , '~' => true, - // zig fmt: on - else => false, - }; -} - -fn isBareEsc(c: u8) bool { - return switch (c) { - 33...126 => true, - else => false, - }; -} - -fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T { - var uc: T = undefined; - - const c1 = try p.readNoEof(emsg); - uc = try parseHexDigit(p, c1, emsg); - - while (try p.read()) |c| { - if (!std.ascii.isHex(c)) { - p.unread(c); - return uc; - } - const shl = std.math.shlExact; - uc = shl(T, uc, 4) catch return p.err(.OutOfRange, emsg); - uc |= try parseHexDigit(p, c, emsg); - } - return uc; -} - -fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 { - const h1 = try p.readNoEof(emsg); - const h2 = try p.readNoEof(emsg); - const hi = try parseHexDigit(p, h1, emsg); - const lo = try parseHexDigit(p, h2, emsg); - return hi << 4 | lo; -} - -fn parseHexDigit(p: *Parser, c: u8, comptime emsg: []const u8) !u8 { - return switch (c) { - '0'...'9' => c - '0', - 'A'...'F' => c - 'A' + 10, - 'a'...'f' => c - 'a' + 10, - else => p.err(.InvalidCharacter, emsg), - }; -} diff --git a/src/libzisp/io/decoder.zig b/src/libzisp/io/decoder.zig deleted file mode 100644 index eb27e20..0000000 --- a/src/libzisp/io/decoder.zig +++ /dev/null @@ -1 +0,0 @@ -// wip diff --git a/src/libzisp/io/encoder.zig b/src/libzisp/io/encoder.zig deleted file mode 100644 index eb27e20..0000000 --- a/src/libzisp/io/encoder.zig +++ /dev/null @@ -1 +0,0 @@ -// wip diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig deleted file mode 100644 index d004c91..0000000 --- a/src/libzisp/io/parser.zig +++ /dev/null @@ -1,80 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); - -const value = @import("../value.zig"); - -const Parser = @import("Parser.zig"); - -const Value = value.Value; - -const is_test = builtin.is_test; -const is_debug = builtin.mode == .Debug; - -// In debug, we want to see if we leak, so very small numbers. -const def_init_stack_mem = (if (is_debug) 2 else 32) * @sizeOf(Parser.Context); -const def_init_chars_mem = (if (is_debug) 2 else 2048); - -pub const DefaultStackSfa = std.heap.StackFallbackAllocator(def_init_stack_mem); -pub const DefaultCharsSfa = std.heap.StackFallbackAllocator(def_init_chars_mem); - -pub fn default( - fb_alloc: *std.mem.Allocator, - stack_sfa: *DefaultStackSfa, - chars_sfa: *DefaultCharsSfa, -) !Parser { - if (!is_test and is_debug) { - fb_alloc.* = std.heap.DebugAllocator(.{}).init; - } - - const alloc = if (is_test) - std.testing.allocator - else if (is_debug) - fb_alloc.allocator() - else - std.heap.smp_allocator; - - stack_sfa.* = std.heap.stackFallback(def_init_stack_mem, alloc); - chars_sfa.* = std.heap.stackFallback(def_init_chars_mem, alloc); - - return Parser.init( - stack_sfa.get(), - chars_sfa.get(), - def_init_stack_mem, - def_init_chars_mem, - ); -} - -pub fn deinit(fb_alloc: *std.mem.Allocator) void { - if (!is_test and is_debug) { - if (fb_alloc.?.deinit() == .leak) { - @panic("parser leaked"); - } - } -} - -pub fn parse(input: std.io.AnyReader) Value { - var fb_alloc: std.mem.Allocator = undefined; - var stack_sfa: DefaultStackSfa = undefined; - var chars_sfa: DefaultCharsSfa = undefined; - var p = default(&fb_alloc, &stack_sfa, &chars_sfa) catch @panic("OOM"); - defer p.deinit(); - return p.run(input) catch { - if (p.unread_char) |c| { - std.debug.panic( - "Parse error: {s}, unread_char: 0x{x}\n", - .{ p.err_msg, c }, - ); - } else { - std.debug.panic("Parse error: {s}\n", .{p.err_msg}); - } - }; -} - -pub fn _parse(input: std.io.AnyReader) !Value { - var fb_alloc: std.mem.Allocator = undefined; - var stack_sfa: DefaultStackSfa = undefined; - var chars_sfa: DefaultCharsSfa = undefined; - var p = try default(&fb_alloc, &stack_sfa, &chars_sfa); - defer p.deinit(); - return p.run(input); -} diff --git a/src/libzisp/io/reader.zig b/src/libzisp/io/reader.zig deleted file mode 100644 index 3465cb3..0000000 --- a/src/libzisp/io/reader.zig +++ /dev/null @@ -1,10 +0,0 @@ -// See parse.zig for details. - -const parser = @import("parser.zig"); -const decoder = @import("decoder.zig"); - -const Value = @import("../value.zig").Value; - -pub fn readCode(input: []const u8) Value { - return decoder.decode(parser.parse(input, .code)); -} diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig deleted file mode 100644 index d703182..0000000 --- a/src/libzisp/io/unparser.zig +++ /dev/null @@ -1,122 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); - -const istr = value.istr; -const seq = value.seq; - -const ShortString = value.ShortString; -const OtherTag = value.OtherTag; -const Value = value.Value; - -pub fn unparse(w: anytype, v: Value) anyerror!void { - try if (value.double.check(v)) - unparseDouble(w, v) - else if (value.fixnum.check(v)) - unparseFixnum(w, v) - else if (value.ptr.checkZisp(v)) - unparseHeap(w, v) - else - unparseOther(w, v); -} - -fn unparseDouble(w: anytype, v: Value) !void { - _ = w; - _ = v; - @panic("not implemented"); -} - -fn unparseFixnum(w: anytype, v: Value) !void { - _ = w; - _ = v; - @panic("not implemented"); -} - -fn unparseHeap(w: anytype, v: Value) !void { - const p, const t = value.ptr.unpack(v); - try switch (t) { - .pair => unparsePair(w, @ptrCast(p)), - .seq => unparseSeq(w, @ptrCast(p)), - else => @panic("not implemented"), - }; -} - -fn unparseOther(w: anytype, v: Value) !void { - try switch (v.other.tag) { - .rune => unparseRune(w, v), - .sstr => unparseSstr(w, v), - .qstr => unparseQstr(w, v), - .char => unparseChar(w, v), - .misc => unparseMisc(w, v), - }; -} - -fn unparseRune(w: anytype, v: Value) !void { - const name = value.rune.unpack(v); - try w.writeByte('#'); - try w.writeAll(name.constSlice()); -} - -fn unparseSstr(w: anytype, v: Value) !void { - const str = value.sstr.unpack(v); - try w.writeAll(str.constSlice()); -} - -fn unparseQstr(w: anytype, v: Value) !void { - const str = value.sstr.unpack(v); - try w.writeByte('"'); - try w.writeAll(str.constSlice()); - try w.writeByte('"'); -} - -fn unparseChar(w: anytype, v: Value) !void { - var buf: [4]u8 = undefined; - const len = try std.unicode.utf8Encode(v.char.value, &buf); - try w.writeAll(buf[0..len]); -} - -fn unparseMisc(w: anytype, v: Value) !void { - try switch (v.misc.value) { - .f => w.writeAll("#f"), - .t => w.writeAll("#t"), - .nil => w.writeAll("()"), - .eof => w.writeAll("#eof"), - .undef => w.writeAll("#undef"), - }; -} - -fn unparsePair(w: anytype, p: *[2]Value) !void { - try w.writeByte('('); - try unparse(w, p[0]); - var cdr = p[1]; - while (value.pair.check(cdr)) : (cdr = value.pair.cdr(cdr)) { - try w.writeByte(' '); - try unparse(w, value.pair.car(cdr)); - } - if (!value.nil.check(cdr)) { - try w.writeByte(' '); - try w.writeByte('.'); - try w.writeByte(' '); - try unparse(w, cdr); - } - try w.writeByte(')'); -} - -fn unparseSeq(w: anytype, p: *seq.Header) !void { - const h = istr.getHeaderFromPtr(@ptrCast(p)); - switch (h.type) { - .string => try unparseString(w, h), - else => @panic("not implemented"), - } -} - -fn unparseString(w: anytype, h: *seq.Header) !void { - const info = h.info.string; - if (info.quoted) { - try w.writeByte('"'); - } - try w.writeAll(h.bytes()); - if (info.quoted) { - try w.writeByte('"'); - } -} diff --git a/src/libzisp/io/writer.zig b/src/libzisp/io/writer.zig deleted file mode 100644 index eb27e20..0000000 --- a/src/libzisp/io/writer.zig +++ /dev/null @@ -1 +0,0 @@ -// wip diff --git a/src/libzisp/lib.zig b/src/libzisp/lib.zig deleted file mode 100644 index 7752110..0000000 --- a/src/libzisp/lib.zig +++ /dev/null @@ -1 +0,0 @@ -pub const list = @import("lib/list.zig"); diff --git a/src/libzisp/lib/list.zig b/src/libzisp/lib/list.zig deleted file mode 100644 index 9d6a6bc..0000000 --- a/src/libzisp/lib/list.zig +++ /dev/null @@ -1,20 +0,0 @@ -const value = @import("../value.zig"); - -const Value = value.Value; - -pub fn reverse(list: Value) Value { - return reverseWithTail(list, value.nil.nil); -} - -pub fn reverseWithTail(list: Value, tail: Value) Value { - var head = list; - var result = tail; - while (!value.nil.check(head)) { - value.pair.assert(head); - const car = value.pair.car(head); - const cdr = value.pair.cdr(head); - result = value.pair.cons(car, result); - head = cdr; - } - return result; -} diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig deleted file mode 100644 index fbe907a..0000000 --- a/src/libzisp/value.zig +++ /dev/null @@ -1,325 +0,0 @@ -// -// === NaN Packing Strategy === -// -// Format of a double, in Zig least-to-most significant field order: -// -// { fraction: u52, exponent: u11, sign: u1 } -// -// When the exponent bits are all set, it's either a NaN or an Infinity. -// -// For value packing, almost all remaining 53 bits are available, giving us -// about 2^53 values, except for the four following bit patterns: -// -// *** FORBIDDEN VALUES *** -// -// 1. Negative cqNaN = { sign = 1, exponent = max, fraction = 2^51 } -// -// 2. Negative Infinity = { sign = 1, exponent = max, fraction = 0 } -// -// 3. Positive cqNaN = { sign = 0, exponent = max, fraction = 2^51 } -// -// 4. Positive Infinity = { sign = 0, exponent = max, fraction = 0 } -// -// The abbreviation "cqNaN" stands for canonical quiet NaN. -// -// Note that 2^51 means the MSb of the 52 fraction bits being set, and the rest -// being zero. The fraction MSb is also called the is_quiet flag, because it -// demarcates quiet NaNs. The rest being zero makes it the canonical qNaN. -// -// The positive and negative cqNaN are the *only* NaN values that can actually -// be returned by any FP operations, which is why we don't use them to pack -// values; we want to be able to represent NaN in Zisp as a double. -// -// Beyond those four bit patterns, all values with a maximum exponent (all bits -// set) are fair game for representing other values, so 2^53 - 4 possibilities. -// -// We split those 2^53 - 4 available values into four groups, each allowing for -// 2^51 - 1 different values to be encoded in them: -// -// sign = 1, quiet = 1 :: Negative Fixnum from -1 to -2^51+1 -// -// sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2 -// -// sign = 0, quiet = 1 :: Pointers -// -// sign = 0, quiet = 0 :: Others -// -// -// === Fixnums === -// -// Negative fixnums actually represent themselves without needing to go through -// any transformation. Only the smallest 52-bit signed negative, -2^51, cannot -// be represented, as it would step on forbidden value 1, Negative cqNaN. -// -// Positive fixnums go through bitsiwe NOT (implemented via an XOR mask here to -// make it one operation together with the NaN masking) to avoid the all-zero -// payload value, which would step on forbidden value 2, Negative Infinity. -// -// -// === Pointers === -// -// Pointers are further subdivided as follows based on the remaining 51 bits, -// with the first three bits used as a sort of tag: -// -// 000 :: Pointer to Zisp heap object (string, vector, etc.) -// -// 001 :: Weak pointer to Zisp heap object -// -// 01. :: Undefined (may be used by GC to flag pointers for some reason?) -// -// 1.. :: Foreign pointer (basically, a 50-bit fixnum of another type) -// -// This means pointers to the Zisp heap are 48 bits. Of those, we only really -// need 45, since 64-bit platforms are in practice limited to 48-bit addresses, -// and allocations happen at 8-byte boundaries, meaning the least significant 3 -// bits are always unset. Thus, we are able to store yet another 3-bit tag in -// those 48-bit pointers alongside the actual, multiple-of-8, 48-bit address. -// -// The forbidden value 3, Positive cqNaN, is avoided thanks to the fact that a -// regular Zisp heap pointer can never be null. Weak pointers, which can be -// null, avoid stepping on that forbidden value thanks to bit 49 being set. -// -// Foreign pointers allow storing arbitrary pointers, or integers basically, of -// up to 50 bits, without any further definition in Zisp of what they mean. -// -// -// === Other values === -// -// This 51-bit range is divided as follows, based on the high bits: -// -// 000 :: Rune -// -// 001 :: Short string -// -// 010 :: Short string literal -// -// 011 :: Unicode code point -// -// 100 :: Singleton values -// -// 101, 110, 111 :: Undefined -// -// Runes are symbols of 1 to 6 ASCII characters used to implement reader syntax. -// -// Zisp strings are immutable. Any string fitting into 6 bytes or less will be -// stored as an immediate value, not requiring any heap allocation or interning. -// It's implicitly interned, so to speak. This includes the empty string. -// -// The null byte serves as a terminator for strings shorter than 6 bytes, and -// therefore cannot appear in these strings; a string that short but actually -// containing a null byte will need to be heap allocated like other strings. -// -// There may also be strings that are this short, but ended up on the heap due -// to being uninterned. Interning them will return the equivalent short string -// as an immediate. -// -// The separate type for a short string *literal* is for an efficiency hack in -// the parser; see commentary there. -// -// Unicode code points need a maximum of 21 bits, yet we have 48 available. -// This may be exploited for a future extension. -// -// Similarly, it's very unlikely that we will ever need more than a handful of -// singleton values (false, true, nil, and so on). As such, this range of bit -// patterns may be subdivided in the future. Right now, only the lowest 8 bits -// are allowed to be set, with the other 40 being reserved, so there's a limit -// of 256 singleton values that can be defined. -// -// And top of that, we have three more 48-bit value ranges that are unused! -// -// The forbidden value 4, Positive Infinity, would be the "empty string rune" -// but that isn't allowed anyway, so all is fine. -// - -// Here's the original article explaining the strategy: -// -// https://tkammer.de/zisp/notes/nan.html -// -// More about runes: -// -// https://tkammer.de/zisp/notes/symbols.html -// -// Note: Packed structs are least-to-most significant, so the order of fields -// must be reversed relative to a typical big-endian illustration of the bit -// patterns of IEEE 754 double-precision floating point numbers. - -const std = @import("std"); - -pub const double = @import("value/double.zig"); -pub const fixnum = @import("value/fixnum.zig"); - -pub const ptr = @import("value/ptr.zig"); -pub const seq = @import("value/seq.zig"); - -pub const rune = @import("value/rune.zig"); -pub const sstr = @import("value/sstr.zig"); -pub const char = @import("value/char.zig"); -pub const boole = @import("value/boole.zig"); -pub const nil = @import("value/nil.zig"); -pub const eof = @import("value/eof.zig"); - -pub const pair = @import("value/pair.zig"); -pub const istr = @import("value/istr.zig"); - -// To fill up the u11 exponent part of a NaN. -const FILL = 0x7ff; - -// Used when dealing with runes and short strings. -pub const ShortString = std.BoundedArray(u8, 6); - -pub const OtherTag = enum(u3) { rune, sstr, qstr, char, misc }; - -pub const MiscValue = enum(u8) { f, t, nil, eof, undef = 255 }; - -pub const undef = Value{ .misc = .{ .value = .undef } }; - -/// Represents a Zisp value/object. -pub const Value = packed union { - /// To get the value as a regular double. - double: f64, - - /// To get an agnostic value for direct comparison with == i.e. eq?. - bits: u64, - - // Some of the structs below are just for inspection, whereas others are to - // initialize a new value of that category as well as read it that way. - - /// Inspection through the lens of the general IEEE 754 double layout. - ieee: packed struct { - rest: u51, - quiet: bool, - exp: u11, - sign: bool, - }, - - /// For initializing and reading fixnums. - fixnum: packed struct { - code: u51, - negative: bool, - _: u11 = FILL, - _is_fixnum: bool = true, - }, - - /// Inspection through the lens of the ptr category. - ptr: packed struct { - _value: u48, - is_weak: bool, - _unused: bool, - is_foreign: bool, - _is_ptr: bool, - _: u11, - _is_fixnum: bool, - }, - - /// For initializing and reading foreign pointers. - fptr: packed struct { - value: u50, - _is_foreign: bool = true, - _is_ptr: bool = true, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// For initializing and reading Zisp heap pointers. - zptr: packed struct { - tagged_value: u48, - is_weak: bool = false, - _unused: bool = false, - _is_foreign: bool = false, - _is_ptr: bool = true, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// Inspection as an other (non-fixnum, non-pointer) packed value. - other: packed struct { - _value: u48, - tag: OtherTag, - _is_ptr: bool, - _: u11, - _is_ifxnum: bool, - }, - - /// For initializing and reading runes. - rune: packed struct { - // actually [6]u8 but packed struct cannot contain arrays - name: u48, - _tag: OtherTag = .rune, - _is_ptr: bool = false, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// For initializing and reading short strings. - sstr: packed struct { - // actually [6]u8 but packed struct cannot contain arrays - string: u48, - tag: OtherTag, - _is_ptr: bool = false, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// For initializing and reading characters. - char: packed struct { - value: u21, - _reserved: u27 = 0, - _tag: OtherTag = .char, - _is_ptr: bool = false, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// For initializing and reading misc values aka singletons. - misc: packed struct { - value: MiscValue, - _reserved: u40 = 0, - _tag: OtherTag = .misc, - _is_ptr: bool = false, - _: u11 = FILL, - _is_fixnum: bool = false, - }, - - /// Hexdumps the value. - pub inline fn dump(v: Value) void { - std.debug.dumpHex(std.mem.asBytes(&v)); - } - - pub fn eq(v1: Value, v2: Value) bool { - return v1.bits == v2.bits; - } - - // The following aren't type predicates per se, but rather determine which - // general category the value is in. The exceptions are fixnum and double, - // since those aren't sub-categorized into further types. - - /// Checks for a Zisp double, including: +nan.0, -nan.0, +inf.0, -inf.0 - pub inline fn isDouble(v: Value) bool { - return v.ieee.exp != FILL or v.ieee.rest == 0; - } - - /// Checks for a non-double Zisp value packed into a NaN. - pub inline fn isPacked(v: Value) bool { - return !v.isDouble(); - } - - /// Checks for a fixnum. - pub inline fn isFixnum(v: Value) bool { - return v.isPacked() and v.ieee.sign; - } - - /// Checks for any kind of pointer. - pub inline fn isPtr(v: Value) bool { - return v.isPacked() and !v.ieee.sign and v.ieee.quiet; - } - - /// Checks for a non-double, non-fixnum, non-pointer Zisp value. - pub inline fn isOther(v: Value) bool { - return v.isPacked() and !v.ieee.sign and !v.ieee.quiet; - } - - /// Checks for an other type of value based on tag. - pub inline fn isOtherTag(v: Value, tag: OtherTag) bool { - return v.isOther() and v.other.tag == tag; - } -}; diff --git a/src/libzisp/value/boole.zig b/src/libzisp/value/boole.zig deleted file mode 100644 index 2e94e4d..0000000 --- a/src/libzisp/value/boole.zig +++ /dev/null @@ -1,33 +0,0 @@ -const Value = @import("../value.zig").Value; - -pub const f = Value{ .misc = .{ .value = .f } }; -pub const t = Value{ .misc = .{ .value = .t } }; - -// Zig API - -/// Checks if the value is a boole. -pub fn check(v: Value) bool { - return v.bits == f.bits or v.bits == t.bits; -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not bool"); - } -} - -pub fn pack(b: bool) Value { - return if (b) t else f; -} - -pub fn unpack(v: Value) bool { - assert(v); - return v.bits == t.bits; -} - -// Zisp API - -pub fn pred(v: Value) Value { - return pack(check(v)); -} diff --git a/src/libzisp/value/char.zig b/src/libzisp/value/char.zig deleted file mode 100644 index 09a3034..0000000 --- a/src/libzisp/value/char.zig +++ /dev/null @@ -1,31 +0,0 @@ -const value = @import("../value.zig"); - -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return v.isOtherTag(.char); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not char"); - } -} - -pub fn pack(c: u21) Value { - return .{ .char = .{ .value = c } }; -} - -pub fn unpack(v: Value) u21 { - assert(v); - return @truncate(v.char.value); -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} diff --git a/src/libzisp/value/double.zig b/src/libzisp/value/double.zig deleted file mode 100644 index 5cfe6ee..0000000 --- a/src/libzisp/value/double.zig +++ /dev/null @@ -1,38 +0,0 @@ -const value = @import("../value.zig"); -const Value = value.Value; - -// Zig API - -/// Checks for a Zisp double (double, +inf, -inf, or canonical NaN). -pub fn check(v: Value) bool { - return v.isDouble(); -} - -/// Asserts check(). -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not double"); - } -} - -pub fn pack(d: f64) Value { - return .{ .double = d }; -} - -pub fn unpack(v: Value) f64 { - assert(v); - return v.double; -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn add(v1: Value, v2: Value) Value { - const d1 = unpack(v1); - const d2 = unpack(v2); - return pack(d1 + d2); -} diff --git a/src/libzisp/value/eof.zig b/src/libzisp/value/eof.zig deleted file mode 100644 index 4b16669..0000000 --- a/src/libzisp/value/eof.zig +++ /dev/null @@ -1,28 +0,0 @@ -const value = @import("../value.zig"); - -const Value = value.Value; - -pub const eof = Value{ .misc = .{ .value = .eof } }; - -// Zig API - -pub fn check(v: Value) bool { - return v.bits == eof.bits; -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not bool"); - } -} - -// Zisp API - -pub fn get() Value { - return eof; -} - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} diff --git a/src/libzisp/value/fixnum.zig b/src/libzisp/value/fixnum.zig deleted file mode 100644 index 80fb4ae..0000000 --- a/src/libzisp/value/fixnum.zig +++ /dev/null @@ -1,84 +0,0 @@ -const std = @import("std"); -const value = @import("../value.zig"); - -const Value = value.Value; - -// Zig API - -/// Checks for a Zisp fixnum. -pub fn check(v: Value) bool { - return v.isFixnum(); -} - -/// Asserts check(). -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not fixnum"); - } -} - -// See detailed NaN packing docs for why the +/- 1. -pub const min = std.math.minInt(i52) + 1; -pub const max = std.math.maxInt(i52) - 1; - -fn assertValidRange(int: i64) void { - if (int < min) { - std.debug.print("int too small for fixnum: {}\n", .{int}); - @panic("int too small for fixnum"); - } - if (int > max) { - std.debug.print("int too large for fixnum: {}\n", .{int}); - @panic("int too large for fixnum"); - } -} - -fn packNegative(int: i64) Value { - return @bitCast(int); -} - -fn unpackNegative(v: Value) i64 { - return @bitCast(v); -} - -const positive_mask: u64 = 0xfff7ffffffffffff; - -fn packPositive(int: i64) Value { - const uint: u64 = @bitCast(int); - return @bitCast(uint ^ positive_mask); -} - -fn unpackPositive(v: Value) i64 { - const uint: u64 = @bitCast(v); - return @bitCast(uint ^ positive_mask); -} - -pub fn pack(int: i64) Value { - assertValidRange(int); - if (int < 0) { - return packNegative(int); - } else { - return packPositive(int); - } -} - -pub fn unpack(v: Value) i64 { - assert(v); - if (v.fixnum.negative) { - return unpackNegative(v); - } else { - return unpackPositive(v); - } -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn add(v1: Value, v2: Value) Value { - const int1 = unpack(v1); - const int2 = unpack(v2); - return pack(int1 + int2); -} diff --git a/src/libzisp/value/istr.zig b/src/libzisp/value/istr.zig deleted file mode 100644 index abd0447..0000000 --- a/src/libzisp/value/istr.zig +++ /dev/null @@ -1,65 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); -const gc = @import("../gc.zig"); - -const ptr = @import("ptr.zig"); -const seq = @import("seq.zig"); - -const Hval = gc.Hval; - -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return ptr.checkZispTag(v, .seq); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not istr"); - } -} - -pub fn intern(str: []const u8, quoted: bool) Value { - if (str.len > value.fixnum.max) { - @panic("String length out of fixnum range."); - } - const header: seq.Header = .{ - .type = .string, - .info = .{ .string = .{ - .enc = .utf8, - .quoted = quoted, - .interned = true, - } }, - .size = @intCast(str.len), - }; - const header_ptr = gc.intern(header, str); - return ptr.pack(@ptrCast(header_ptr), .seq); -} - -pub fn getHeader(v: Value) *seq.Header { - assert(v); - const header_ptr, _ = ptr.unpack(v); - return gc.istrHeader(header_ptr); -} - -pub fn getHeaderFromPtr(p: *Hval) *seq.Header { - return gc.istrHeader(p); -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn len(v: Value) Value { - const l = getHeader(v).size; - if (l > value.fixnum.max) { - @panic("string length out of range"); - } - return value.fixnum.pack(@intCast(l)); -} diff --git a/src/libzisp/value/nil.zig b/src/libzisp/value/nil.zig deleted file mode 100644 index f95ecad..0000000 --- a/src/libzisp/value/nil.zig +++ /dev/null @@ -1,28 +0,0 @@ -const value = @import("../value.zig"); - -const Value = value.Value; - -pub const nil = Value{ .misc = .{ .value = .nil } }; - -// Zig API - -pub fn check(v: Value) bool { - return v.bits == nil.bits; -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not bool"); - } -} - -// Zisp API - -pub fn get() Value { - return nil; -} - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} diff --git a/src/libzisp/value/pair.zig b/src/libzisp/value/pair.zig deleted file mode 100644 index 6ea1edf..0000000 --- a/src/libzisp/value/pair.zig +++ /dev/null @@ -1,55 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); -const gc = @import("../gc.zig"); - -const ptr = @import("ptr.zig"); - -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return ptr.checkZispTag(v, .pair); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not pair"); - } -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn cons(v1: Value, v2: Value) Value { - return ptr.pack(@ptrCast(gc.cons(v1, v2)), .pair); -} - -fn getMem(v: Value) *[2]Value { - return @ptrCast(ptr.unpack(v).@"0"); -} - -pub fn car(v: Value) Value { - assert(v); - return getMem(v)[0]; -} - -pub fn cdr(v: Value) Value { - assert(v); - return getMem(v)[1]; -} - -pub fn setcar(v: Value, new: Value) void { - assert(v); - getMem(v)[0] = new; -} - -pub fn setcdr(v: Value, new: Value) void { - assert(v); - getMem(v)[1] = new; -} diff --git a/src/libzisp/value/ptr.zig b/src/libzisp/value/ptr.zig deleted file mode 100644 index 2ed3765..0000000 --- a/src/libzisp/value/ptr.zig +++ /dev/null @@ -1,174 +0,0 @@ -const std = @import("std"); - -const gc = @import("../gc.zig"); -const value = @import("../value.zig"); - -const Hval = gc.Hval; - -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return v.isPtr(); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not a pointer"); - } -} - -// Foreign Pointers - -pub fn checkForeign(v: Value) bool { - return check(v) and v.ptr.is_foreign; -} - -pub fn assertForeign(v: Value) void { - if (!checkForeign(v)) { - v.dump(); - @panic("not foreign pointer"); - } -} - -pub fn packForeign(int: u50) Value { - return .{ .fptr = .{ .value = int } }; -} - -pub fn unpackForeign(v: Value) u50 { - assertForeign(v); - return v.fptr.value; -} - -// Zisp Pointers - -pub fn checkZisp(v: Value) bool { - return check(v) and !v.ptr.is_foreign; -} - -pub fn assertZisp(v: Value) void { - if (!checkZisp(v)) { - v.dump(); - @panic("not zisp pointer"); - } -} - -pub fn checkWeak(v: Value) bool { - return checkZisp(v) and v.zptr.is_weak; -} - -pub fn assertWeak(v: Value) void { - if (!checkWeak(v)) { - v.dump(); - @panic("not zisp weak pointer"); - } -} - -pub fn checkZispTag(v: Value, tag: Tag) bool { - return checkZisp(v) and unpack(v).@"1" == tag; -} - -pub fn assertZispTag(v: Value, tag: Tag) void { - if (!checkZispTag(v, tag)) { - v.dump(); - @panic("not zisp pointer or wrong tag"); - } -} - -pub fn checkStrong(v: Value) bool { - return checkZisp(v) and !v.zptr.is_weak; -} - -pub fn assertStrong(v: Value) void { - if (!checkStrong(v)) { - v.dump(); - @panic("not zisp strong pointer"); - } -} - -pub fn packZisp(ptr: *Hval, tag: Tag, is_weak: bool) Value { - return .{ .zptr = .{ - .tagged_value = tagPtr(ptr, tag), - .is_weak = is_weak, - } }; -} - -pub fn pack(ptr: *Hval, tag: Tag) Value { - return packZisp(ptr, tag, false); -} - -pub fn packWeak(ptr: *Hval, tag: Tag) Value { - return packZisp(ptr, tag, true); -} - -// Unpacks weak as well; no need for a separate fn. -pub fn unpack(v: Value) struct { *Hval, Tag } { - assertZisp(v); - return untagPtr(v.zptr.tagged_value); -} - -pub fn setWeakNull(v: *Value) void { - assertWeak(v.*); - v.zptr.tagged_value = 0; -} - -pub fn isWeakNull(v: Value) bool { - assertWeak(v); - return v.zptr.tagged_value == 0; -} - -fn tagPtr(ptr: *Hval, tag: Tag) u48 { - const int: usize = @intFromPtr(ptr); - const untagged: u48 = @intCast(int); - return untagged | @intFromEnum(tag); -} - -fn untagPtr(tagged: u48) struct { *Hval, Tag } { - const untagged: u48 = tagged & 0xfffffffffff8; - const ptr: *Hval = @ptrFromInt(untagged); - const int: u3 = @truncate(tagged); - const tag: Tag = @enumFromInt(int); - return .{ ptr, tag }; -} - -pub const Tag = enum(u3) { - /// Pair aka cons cell aka *[2]Value - pair, - /// Sequence of various kinds (16-bit meta, 48-bit length, then data) - seq, - /// Procedure - proc, -}; - -// Zisp API - -pub fn predForeign(v: Value) Value { - return value.boole.pack(checkForeign(v)); -} - -pub fn makeWeak(v: Value) Value { - assertStrong(v); - var copy = v; - copy.zptr.is_weak = true; - return copy; -} - -pub fn predWeak(v: Value) Value { - return value.boole.pack(checkWeak(v)); -} - -pub fn predWeakNull(v: Value) Value { - return value.boole.pack(isWeakNull(v)); -} - -pub fn getWeak(v: Value) Value { - if (isWeakNull(v)) { - return value.boole.f; - } else { - var copy = v; - copy.zptr.is_weak = false; - return copy; - } -} diff --git a/src/libzisp/value/rune.zig b/src/libzisp/value/rune.zig deleted file mode 100644 index 195210e..0000000 --- a/src/libzisp/value/rune.zig +++ /dev/null @@ -1,82 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); - -const ShortString = value.ShortString; -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return v.isOtherTag(.rune); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not rune"); - } -} - -pub fn isValidRune(s: []const u8) bool { - if (s.len == 0 or s.len > 6) { - return false; - } - if (!std.ascii.isAlphabetic(s[0])) { - return false; - } - for (s[1..]) |c| { - if (!std.ascii.isAlphanumeric(c)) { - return false; - } - } - return true; -} - -fn assertValidRune(s: []const u8) void { - if (!isValidRune(s)) { - std.debug.print("invalid rune: '{s}'\n", .{s}); - @panic("invalid rune"); - } -} - -// See sstr.zig which uses equivalent code; probably good to keep in sync. - -pub fn pack(s: []const u8) Value { - assertValidRune(s); - return packForced(s); -} - -pub fn packForced(s: []const u8) Value { - var v = Value{ .rune = .{ .name = 0 } }; - const dest: [*]u8 = @ptrCast(&v.rune.name); - @memcpy(dest, s); - return v; -} - -pub fn unpack(v: Value) ShortString { - assert(v); - const s: [6]u8 = @bitCast(v.rune.name); - inline for (0..6) |i| { - if (s[i] == 0) return .{ .buffer = s, .len = i }; - } - return .{ .buffer = s, .len = 6 }; -} - -// Zisp API - -pub fn pred(v: Value) Value { - return value.boole.pack(check(v)); -} - -pub fn make(v: Value) Value { - const s, const l = value.sstr.unpack(v); - return pack(s[0..l]); -} - -pub fn getName(v: Value) Value { - const s, const l = unpack(v); - return value.sstr.pack(s[0..l]); -} - -// TODO: Registering decoders diff --git a/src/libzisp/value/seq.zig b/src/libzisp/value/seq.zig deleted file mode 100644 index 3418a5a..0000000 --- a/src/libzisp/value/seq.zig +++ /dev/null @@ -1,57 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); - -const value = @import("../value.zig"); -const gc = @import("../gc.zig"); - -const Value = value.Value; - -const Endian = enum(u1) { - little, - big, - - const native: Endian = switch (builtin.target.cpu.arch.endian()) { - .little => .little, - .big => .big, - }; -}; - -pub const Header = packed struct(u64) { - type: enum(u2) { - values, - string, - ints, - floats, - }, - info: packed union { - values: packed struct(u14) { - weak: bool = false, - _: u13 = 0, - }, - string: packed struct(u14) { - enc: enum(u4) { utf8, utf16, utf24, utf32 }, - endian: Endian = .native, - quoted: bool, - interned: bool, - _: u7 = 0, - }, - ints: packed struct(u14) { - signed: bool, - endian: Endian = .native, - size: u12, - }, - floats: packed struct(u14) { - double: bool, - endian: Endian = .native, - _: u12 = 0, - }, - }, - size: u48, - - pub fn bytes(self: *Header) []u8 { - const hs = @sizeOf(Header); - const ptr: [*]u8 = @ptrCast(self); - const end = hs + self.size; - return ptr[hs..end]; - } -}; diff --git a/src/libzisp/value/sstr.zig b/src/libzisp/value/sstr.zig deleted file mode 100644 index b02fd3d..0000000 --- a/src/libzisp/value/sstr.zig +++ /dev/null @@ -1,78 +0,0 @@ -const std = @import("std"); - -const value = @import("../value.zig"); - -const ShortString = value.ShortString; -const OtherTag = value.OtherTag; -const Value = value.Value; - -// Zig API - -pub fn check(v: Value) bool { - return v.isOtherTag(.sstr) or v.isOtherTag(.qstr); -} - -pub fn assert(v: Value) void { - if (!check(v)) { - v.dump(); - @panic("not sstr"); - } -} - -pub fn checkQuoted(v: Value) bool { - return v.isOtherTag(.qstr); -} - -// For now, ignore encoding, just treat it as []u8. - -pub fn isValidSstr(s: []const u8) bool { - if (s.len > 6) { - return false; - } - for (s) |c| { - if (c == 0) { - return false; - } - } - return true; -} - -fn assertValidSstr(s: []const u8) void { - if (!isValidSstr(s)) { - std.debug.print("invalid sstr: {s}\n", .{s}); - @panic("invalid sstr"); - } -} - -// Different ways of doing the following have been tested, including manual -// shifting and bit masking, but memcpy always wins easily according to our -// micro-benchmarks, under both ReleaseSafe and ReleaseFast. - -// Note: rune.zig uses equivalent code; probably good to keep in sync. - -pub fn pack(s: []const u8) Value { - return _pack(s, .sstr); -} - -pub fn packQuoted(s: []const u8) Value { - return _pack(s, .qstr); -} - -fn _pack(s: []const u8, tag: OtherTag) Value { - assertValidSstr(s); - var v = Value{ .sstr = .{ .string = 0, .tag = tag } }; - const dest: [*]u8 = @ptrCast(&v.sstr.string); - @memcpy(dest, s); - return v; -} - -pub fn unpack(v: Value) ShortString { - assert(v); - const s: [6]u8 = @bitCast(v.sstr.string); - inline for (0..6) |i| { - if (s[i] == 0) return .{ .buffer = s, .len = i }; - } - return .{ .buffer = s, .len = 6 }; -} - -// No Zisp API for sstr specifically, since it's a string. See string.zig. |
