Big cleanup.

author: Taylan Kammer <taylan.kammer@gmail.com> 2025-03-30 18:34:00 +0200
committer: Taylan Kammer <taylan.kammer@gmail.com> 2025-03-30 18:34:00 +0200
commit: 3d05c94b9d8aa964e4ff848c95d5999cec170e04 (patch)
tree: 2864753404e8c9cb9fa8bce537772a29b90d1cbd /src/zisp/io
parent: 9340f3c44ca0d9d4fdee905fc6e8b428824b9185 (diff)
7 files changed, 1057 insertions, 0 deletions
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
new file mode 100644
index 0000000..08bf8ba
--- /dev/null
+++ b/src/zisp/io/Parser.zig
@@ -0,0 +1,847 @@
+// === Syntax ===
+//
+// See docs/parser.md and spec/syntax.bnf to understand the syntax.
+//
+//
+// === Trampolining strategy ===
+//
+// Instead of using recursion directly, the parser is written in a "trampoline"
+// style, which ensures that parse depth is not limited by stack size.
+//
+// All state and context is passed around via a struct pointer.  The parser has
+// a main loop, which calls a function as dictated by parser.context.next, and
+// the function may update the state to have another function called next.
+//
+// If a function wants to call a recursive subroutine, it pushes some of the
+// current context onto a stack, including what function the subroutine should
+// return to, and then updates the state to instruct the main loop to call one
+// of the entry point subroutines.
+//
+// If a function wants to make the parser return, either from a subroutine or
+// from the main loop, it sets the .result field, and tries to pop the saved
+// context.  If the context stack was empty, the main loop returns.
+//
+//
+// === Buffering ===
+//
+// For efficiency, call the parser on an input stream with implicit buffering.
+//
+// The parser does not use its own buffer, beyond one character that may be
+// written back into the unread_char field, which is checked at the end to
+// ensure it's nothing other than a trailing blank or comment.
+//
+// This lack of buffering is to ensure that the parser never reads more bytes
+// from the input than what it needs to parse a datum.  Consider the input:
+//
+//   (a b c) (x y z)
+//
+// If we used proper buffering, like reading up to 4K bytes per read, then the
+// whole stream would be consumed at once before it's parsed.  Then, the parser
+// would return the first datum, and the rest of the stream would be lost.  The
+// parser would need some way to reset the input stream's read head to the end
+// of the first datum, but not all stream types may support this.
+//
+// Although in a scenario like this it would be wise to create a single Parser
+// instance to call multiple times (in which case it could continue using its
+// internal buffer from where it left), this may not always be practical.
+//
+// One could even have an input stream with Zisp s-expressions and other data
+// intertwined, in which case this lack of buffering is also crucial, since one
+// needs to alternate between calling the Zisp parser and some other parser on
+// the same input stream.
+//
+
+const builtin = @import("builtin");
+const std = @import("std");
+
+const lib = @import("../lib.zig");
+const value = @import("../value.zig");
+
+const List = std.ArrayListUnmanaged;
+
+const Value = value.Value;
+
+const Parser = @This();
+
+const is_test = builtin.is_test;
+const is_debug = builtin.mode == .Debug;
+const detailed_debug = false;
+
+// zig fmt: off
+const DOT    = value.rune.pack("DOT");
+const COLON  = value.rune.pack("COLON");
+const JOIN   = value.rune.pack("JOIN");
+const LABEL  = value.rune.pack("LABEL");
+const HASH   = value.rune.pack("HASH");
+const QUOTE  = value.rune.pack("QUOTE");
+const GRAVE  = value.rune.pack("GRAVE");
+const COMMA  = value.rune.pack("COMMA");
+const SQUARE = value.rune.pack("SQUARE");
+const BRACE  = value.rune.pack("BRACE");
+const VOID   = value.rune.packForced("");
+const LSTAIL = value.sstr.pack(".");
+// zig fmt: on
+
+// We could implement an optimization where we swap in a dummy cons when the
+// parser is handling a commented-out datum, but this would require changes to
+// the algorithm and doesn't seem very important, so it's not implemented.
+
+const Cons = *const fn (v1: Value, v2: Value) Value;
+
+fn dummyCons(v1: Value, v2: Value) Value {
+    _ = v1;
+    _ = v2;
+    return value.undef;
+}
+
+const real_cons = &value.pair.cons;
+const fake_cons = &dummyCons;
+
+pub const Error = enum {
+    InvalidCharacter,
+    UnclosedString,
+    UnexpectedEof,
+    UnicodeError,
+    OutOfRange,
+    ReadError,
+};
+
+pub const Context = struct {
+    // What to do next.
+    next: ?Fn = undefined,
+    // For storing a context value, like accumulated list elements.
+    val: Value = undefined,
+    // For storing a context char, like list opening bracket.
+    char: u8 = undefined,
+};
+
+pub const Alloc = struct {
+    stack: std.mem.Allocator,
+    chars: std.mem.Allocator,
+};
+
+alloc: Alloc,
+input: std.io.AnyReader = undefined,
+context: Context = .{},
+stack: List(Context) = undefined,
+chars: List(u8) = undefined,
+cons: Cons = real_cons,
+result: Value = undefined,
+unread_char: ?u8 = null,
+err_msg: []const u8 = undefined,
+
+pub fn init(
+    alloc: Alloc,
+    init_stack_capacity: usize,
+    init_chars_capacity: usize,
+) !Parser {
+    var p: Parser = .{ .alloc = alloc };
+    p.stack = try .initCapacity(alloc.stack, init_stack_capacity);
+    p.chars = try .initCapacity(alloc.chars, init_chars_capacity);
+    return p;
+}
+
+pub fn deinit(p: *Parser) void {
+    p.stack.deinit(p.alloc.stack);
+    p.chars.deinit(p.alloc.chars);
+}
+
+//
+// Input reading & unreading
+//
+
+fn read(p: *Parser) !?u8 {
+    if (is_debug and p.unread_char != null) {
+        std.debug.panic(
+            "Called read() while there was an unused character! '{c}'",
+            .{p.unread_char.?},
+        );
+    }
+    const c = p.input.readByte() catch |e| switch (e) {
+        error.EndOfStream => return null,
+        else => return p.err(.ReadError, "???"),
+    };
+    if (detailed_debug) {
+        std.debug.print("{c}", .{c});
+    }
+    return c;
+}
+
+fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 {
+    return if (try p.read()) |c| c else p.err(.UnexpectedEof, emsg);
+}
+
+fn unread(p: *Parser, c: u8) void {
+    p.unread_char = c;
+}
+
+fn getUnread(p: *Parser) ?u8 {
+    const c = p.unread_char orelse return null;
+    p.unread_char = null;
+    return c;
+}
+
+//
+// String accumulation & creation
+//
+
+fn addChar(p: *Parser, c: u8) !void {
+    try p.chars.append(p.alloc.chars, c);
+}
+
+fn getBareString(p: *Parser) Value {
+    defer p.chars.clearRetainingCapacity();
+    return if (p.chars.items.len <= 6)
+        value.sstr.pack(p.chars.items)
+    else
+        value.istr.intern(p.chars.items, false);
+}
+
+fn getQuotedString(p: *Parser) Value {
+    defer p.chars.clearRetainingCapacity();
+    return if (p.chars.items.len <= 6)
+        value.sstr.packQuoted(p.chars.items)
+    else
+        value.istr.intern(p.chars.items, true);
+}
+
+fn getRune(p: *Parser) Value {
+    defer p.chars.clearRetainingCapacity();
+    return value.rune.pack(p.chars.items);
+}
+
+//
+// Stack management & control flow
+//
+
+const Fn = enum {
+    parseUnit,
+    parseDatum,
+    endUnit,
+    returnContext,
+    endFirstDatum,
+    endJoinDatum,
+    parseJoin,
+    parseHashDatum,
+    endHashDatum,
+    parseRuneDatum,
+    endRuneDatum,
+    endLabelDatum,
+    continueList,
+    endImproperList,
+    closeImproperList,
+    endQuoteExpr,
+};
+
+fn call(p: *Parser, f: Fn) !void {
+    try switch (f) {
+        .parseUnit => parseUnit(p),
+        .parseDatum => parseDatum(p),
+        .endUnit => endUnit(p),
+        .returnContext => returnContext(p),
+        .endFirstDatum => endFirstDatum(p),
+        .endJoinDatum => endJoinDatum(p),
+        .parseJoin => parseJoin(p),
+        .parseHashDatum => parseHashDatum(p),
+        .endHashDatum => endHashDatum(p),
+        .parseRuneDatum => parseRuneDatum(p),
+        .endRuneDatum => endRuneDatum(p),
+        .endLabelDatum => endLabelDatum(p),
+        .continueList => continueList(p),
+        .endImproperList => endImproperList(p),
+        .closeImproperList => closeImproperList(p),
+        .endQuoteExpr => endQuoteExpr(p),
+    };
+}
+
+pub fn run(p: *Parser, input: std.io.AnyReader) !Value {
+    p.input = input;
+    p.context.next = .parseUnit;
+    while (p.context.next) |next| {
+        if (detailed_debug) printStack(p);
+        try p.call(next);
+    }
+    if (p.unread_char) |_| {
+        return p.err(.InvalidCharacter, "top-level");
+    }
+    return p.result;
+}
+
+fn printStack(p: *Parser) void {
+    const stack = p.stack.items;
+    std.debug.print("\n\n{}:{any} ctx:'{c}' unread:'{c}' \n", .{
+        stack.len,
+        p.context.next,
+        p.context.char,
+        p.unread_char orelse '_',
+    });
+    if (stack.len > 0) {
+        var i = stack.len;
+        while (i > 0) : (i -= 1) {
+            const prev = stack[i - 1];
+            std.debug.print("{}:{any} ctx:'{c}'\n", .{
+                i - 1,
+                prev.next,
+                prev.char,
+            });
+        }
+    }
+}
+
+fn err(
+    p: *Parser,
+    comptime e: Error,
+    comptime msg: []const u8,
+) error{ParseError} {
+    p.err_msg = @tagName(e) ++ " at: " ++ msg;
+    return error.ParseError;
+}
+
+fn push(p: *Parser, next: Fn) !void {
+    try p.stack.append(p.alloc.stack, .{ .next = next });
+}
+
+fn pushContext(p: *Parser, next: Fn) !void {
+    try p.stack.append(p.alloc.stack, .{
+        .next = next,
+        .val = p.context.val,
+        .char = p.context.char,
+    });
+}
+
+fn ret(p: *Parser) void {
+    if (p.stack.pop()) |c| {
+        p.context = c;
+    } else {
+        p.context.next = null;
+    }
+}
+
+fn subr(p: *Parser, start: Fn, next: Fn) !void {
+    try p.pushContext(next);
+    p.context.next = start;
+}
+
+fn jump(p: *Parser, next: Fn, val: ?Value) void {
+    if (val) |v| p.result = v;
+    p.context.next = next;
+}
+
+fn abort(p: *Parser, next: Fn, unread_c: u8) void {
+    p.result = VOID;
+    p.unread_char = unread_c;
+    p.context.next = next;
+}
+
+fn retval(p: *Parser, val: Value) void {
+    p.result = val;
+    p.ret();
+}
+
+//
+// Start parser functions
+//
+
+fn parseUnit(p: *Parser) !void {
+    var c1 = p.getUnread() orelse try p.read();
+    while (c1) |c| : (c1 = try p.read()) {
+        switch (try checkBlanks(p, c)) {
+            .yes => {},
+            .skip_unit => try p.push(.parseUnit),
+            .no => {
+                p.unread(c);
+                return p.subr(.parseDatum, .endUnit);
+            },
+        }
+    }
+    return p.retval(value.eof.eof);
+}
+
+fn endUnit(p: *Parser) !void {
+    const c = p.getUnread() orelse return p.ret();
+    switch (try checkBlanks(p, c)) {
+        .yes => {},
+        .skip_unit => return skipUnitAndReturn(p),
+        .no => p.unread(c),
+    }
+    return p.ret();
+}
+
+fn skipUnitAndReturn(p: *Parser) !void {
+    p.context.val = p.result;
+    return p.subr(.parseUnit, .returnContext);
+}
+
+fn returnContext(p: *Parser) !void {
+    return p.retval(p.context.val);
+}
+
+fn parseDatum(p: *Parser) !void {
+    return parseOneDatum(p, p.getUnread().?, .endFirstDatum);
+}
+
+fn endFirstDatum(p: *Parser) !void {
+    if (p.result.eq(VOID)) {
+        return p.ret();
+    }
+    return parseJoin(p);
+}
+
+fn parseJoin(p: *Parser) !void {
+    const c = p.getUnread() orelse try p.read() orelse return p.ret();
+    switch (c) {
+        '.', ':' => {
+            p.context.char = c;
+            p.unread(try p.readNoEof("join datum"));
+        },
+        else => {
+            p.context.char = 0;
+            p.unread(c);
+        },
+    }
+    p.context.val = p.result;
+    return p.subr(.parseDatum, .endJoinDatum);
+}
+
+fn endJoinDatum(p: *Parser) !void {
+    const prev = p.context.val;
+    const join = p.context.char;
+    if (p.result.eq(VOID)) {
+        if (join == 0) {
+            return p.retval(prev);
+        } else {
+            return p.err(.InvalidCharacter, "join datum");
+        }
+    }
+    const rune = switch (join) {
+        0 => JOIN,
+        '.' => DOT,
+        ':' => COLON,
+        else => unreachable,
+    };
+    const joined = p.cons(rune, p.cons(prev, p.result));
+    return p.jump(.parseJoin, joined);
+}
+
+fn parseOneDatum(p: *Parser, c: u8, next: Fn) !void {
+    if (isBareChar(c) or c == '.') {
+        return p.jump(next, try parseBareString(p, c));
+    }
+    return parseCladDatum(p, c, next);
+}
+
+fn parseBareString(p: *Parser, c: u8) !Value {
+    const allow_dots = std.ascii.isDigit(c) or switch (c) {
+        '.', '+', '-' => true,
+        else => false,
+    };
+    try p.addChar(c);
+    return parseBareStringRest(p, allow_dots);
+}
+
+fn parseBareEscString(p: *Parser) !Value {
+    try p.addChar(try parseBareEsc(p));
+    return parseBareStringRest(p, false);
+}
+
+fn parseBareStringRest(p: *Parser, allow_dots: bool) !Value {
+    while (try p.read()) |c| {
+        if (isBareChar(c) or (allow_dots and c == '.')) {
+            try p.addChar(c);
+        } else if (c == '\\') {
+            try p.addChar(try parseBareEsc(p));
+        } else {
+            p.unread(c);
+            break;
+        }
+    }
+    return p.getBareString();
+}
+
+fn parseBareEsc(p: *Parser) !u8 {
+    const c = try p.readNoEof("bare escape");
+    if (isBareEsc(c)) {
+        return c;
+    } else {
+        return p.err(.InvalidCharacter, "bare escape");
+    }
+}
+
+fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void {
+    if (c == '\\') {
+        return p.jump(next, try parseBareEscString(p));
+    }
+    if (c == '"') {
+        return p.jump(next, try parseQuotedString(p, '"'));
+    }
+    if (c == '|') {
+        return p.jump(next, try parseQuotedString(p, '|'));
+    }
+    return switch (c) {
+        '#' => parseHashExpression(p, next),
+        '(', '[', '{' => parseList(p, c, next),
+        '\'', '`', ',' => parseQuoteExpr(p, c, next),
+        else => p.abort(next, c),
+    };
+}
+
+fn parseQuotedString(p: *Parser, close: u8) !Value {
+    while (try p.read()) |c| {
+        if (c == close) {
+            return p.getQuotedString();
+        }
+        if (c != '\\') {
+            try p.addChar(c);
+        } else {
+            try parseQuotedEsc(p, close);
+        }
+    }
+    return error.UnclosedString;
+}
+
+fn parseQuotedEsc(p: *Parser, close: u8) !void {
+    const c = try p.readNoEof("quoted escape");
+    if (c == close) {
+        return p.addChar(close);
+    }
+    if (c == 'u') {
+        return parseUniHexHandleErrors(p);
+    }
+    try p.addChar(switch (c) {
+        '\\' => c,
+        '0' => 0,
+        'a' => 7,
+        'b' => 8,
+        't' => 9,
+        'n' => 10,
+        'v' => 11,
+        'f' => 12,
+        'r' => 13,
+        'e' => 27,
+        'x' => try parseHexByte(p, "hex escape"),
+        else => return p.err(.InvalidCharacter, "quoted escape"),
+    });
+}
+
+fn parseUniHexHandleErrors(p: *Parser) !void {
+    return parseUniHex(p) catch |e| switch (e) {
+        error.Utf8CannotEncodeSurrogateHalf => p.err(
+            .UnicodeError,
+            "unicode escape",
+        ),
+        else => e,
+    };
+}
+
+fn parseUniHex(p: *Parser) !void {
+    const msg = "unicode escape";
+
+    if (try p.readNoEof(msg) != '{') {
+        return p.err(.InvalidCharacter, msg);
+    }
+
+    const uc = try parseHex(p, u21, msg);
+    const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg);
+    if (c != '}') {
+        return p.err(.InvalidCharacter, msg);
+    }
+
+    const n = try std.unicode.utf8CodepointSequenceLength(uc);
+    const buf = try p.chars.addManyAsSlice(p.alloc.chars, n);
+    _ = try std.unicode.utf8Encode(uc, buf);
+}
+
+fn parseHashExpression(p: *Parser, next: Fn) !void {
+    const c = try p.readNoEof("hash expression");
+    if (try checkBlanks(p, c) != .no) {
+        return p.err(.InvalidCharacter, "hash expression");
+    }
+    if (std.ascii.isAlphabetic(c)) {
+        const r = try parseRune(p, c);
+        return parseRuneEnd(p, r, next);
+    }
+    if (c == '%') {
+        const l = try parseLabel(p);
+        return parseLabelEnd(p, l, next);
+    }
+    p.unread(c);
+    return p.subr(.parseHashDatum, next);
+}
+
+fn parseHashDatum(p: *Parser) !void {
+    return parseCladDatum(p, p.getUnread().?, .endHashDatum);
+}
+
+fn endHashDatum(p: *Parser) !void {
+    if (p.result.eq(VOID)) {
+        return p.err(.InvalidCharacter, "hash datum");
+    }
+    return p.retval(p.cons(HASH, p.result));
+}
+
+fn parseRune(p: *Parser, c1: u8) !Value {
+    try p.addChar(c1);
+    var len: usize = 1;
+    while (try p.read()) |c| : (len += 1) {
+        if (len == 6 or !std.ascii.isAlphanumeric(c)) {
+            p.unread(c);
+            return p.getRune();
+        }
+        try p.addChar(c);
+    }
+    return p.getRune();
+}
+
+fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
+    const c = p.getUnread() orelse return p.jump(next, r);
+    if (c == '\\') {
+        return p.jump(next, p.cons(r, try parseBareString(p, c)));
+    }
+    if (c == '"') {
+        return p.jump(next, p.cons(r, try parseQuotedString(p, '"')));
+    }
+    if (c == '|') {
+        return p.jump(next, p.cons(r, try parseQuotedString(p, '|')));
+    }
+    p.unread(c);
+    switch (c) {
+        '#', '(', '[', '{', '\'', '`', ',' => {
+            try p.push(next);
+            p.context.val = r;
+            // Use jump to prevent recursion.
+            return p.jump(.parseRuneDatum, null);
+        },
+        else => return p.jump(next, r),
+    }
+}
+
+fn parseRuneDatum(p: *Parser) !void {
+    return parseCladDatum(p, p.getUnread().?, .endRuneDatum);
+}
+
+fn endRuneDatum(p: *Parser) !void {
+    if (p.result.eq(VOID)) {
+        p.retval(p.context.val);
+    }
+    return p.retval(p.cons(p.context.val, p.result));
+}
+
+fn parseLabel(p: *Parser) !Value {
+    const label = try parseHex(p, u48, "datum label");
+    return value.fixnum.pack(label);
+}
+
+fn parseLabelEnd(p: *Parser, l: Value, next: Fn) !void {
+    const c = p.getUnread() orelse return p.err(.UnexpectedEof, "datum label");
+    if (c == '%') {
+        return p.jump(next, p.cons(LABEL, l));
+    }
+    if (c == '=') {
+        try p.push(next);
+        p.context.val = l;
+        return p.subr(.parseDatum, .endLabelDatum);
+    }
+    return p.err(.InvalidCharacter, "datum label");
+}
+
+fn endLabelDatum(p: *Parser) !void {
+    if (p.result.eq(VOID)) {
+        return p.err(.InvalidCharacter, "label datum");
+    }
+    return p.retval(p.cons(LABEL, p.cons(p.context.val, p.result)));
+}
+
+fn parseList(p: *Parser, open: u8, next: Fn) !void {
+    const head = switch (open) {
+        '(' => value.nil.nil,
+        '[' => p.cons(SQUARE, value.nil.nil),
+        '{' => p.cons(BRACE, value.nil.nil),
+        else => unreachable,
+    };
+    const close: u8 = switch (open) {
+        '(' => ')',
+        '[' => ']',
+        '{' => '}',
+        else => unreachable,
+    };
+    while (try p.read()) |c| {
+        if (c == close) {
+            return p.jump(next, head);
+        }
+        switch (try checkBlanks(p, c)) {
+            .yes => {},
+            .skip_unit => {
+                try listParserSetup(p, head, close, next);
+                return p.subr(.parseUnit, .parseUnit);
+            },
+            .no => {
+                try listParserSetup(p, head, close, next);
+                p.unread(c);
+                return p.jump(.parseDatum, null);
+            },
+        }
+    }
+    return p.err(.UnexpectedEof, "list");
+}
+
+fn listParserSetup(p: *Parser, head: Value, close: u8, next: Fn) !void {
+    try p.push(next);
+    p.context.val = head;
+    p.context.char = close;
+    try p.pushContext(.continueList);
+}
+
+fn continueList(p: *Parser) !void {
+    const close = p.context.char;
+
+    if (p.result.eq(VOID)) {
+        const c = p.getUnread().?;
+        if (c == close) {
+            return endList(p);
+        }
+        return p.err(.InvalidCharacter, "list");
+    }
+
+    if (p.result.eq(LSTAIL)) {
+        return p.subr(.parseUnit, .endImproperList);
+    }
+
+    p.context.val = p.cons(p.result, p.context.val);
+
+    var c1 = p.getUnread() orelse try p.read();
+    while (c1) |c| : (c1 = try p.read()) {
+        if (c == close) {
+            return endList(p);
+        }
+        switch (try checkBlanks(p, c)) {
+            .yes => {},
+            .skip_unit => {
+                try p.pushContext(.continueList);
+                return p.subr(.parseUnit, .parseUnit);
+            },
+            .no => {
+                p.unread(c);
+                return p.subr(.parseDatum, .continueList);
+            },
+        }
+    }
+    return p.err(.UnexpectedEof, "list");
+}
+
+fn endList(p: *Parser) !void {
+    return p.retval(lib.list.reverse(p.context.val));
+}
+
+fn endImproperList(p: *Parser) !void {
+    if (p.result.eq(VOID)) {
+        return p.err(.InvalidCharacter, "list tail");
+    }
+    p.context.val = lib.list.reverseWithTail(p.context.val, p.result);
+    return closeImproperList(p);
+}
+
+fn closeImproperList(p: *Parser) !void {
+    const result = p.context.val;
+    const close = p.context.char;
+    var c1 = p.getUnread() orelse try p.read();
+    while (c1) |c| : (c1 = try p.read()) {
+        if (c == close) {
+            return p.retval(result);
+        }
+        switch (try checkBlanks(p, c)) {
+            .yes => {},
+            .skip_unit => return p.subr(.parseUnit, .closeImproperList),
+            .no => return p.err(.InvalidCharacter, "after list tail"),
+        }
+    }
+    return p.err(.UnexpectedEof, "after list tail");
+}
+
+fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void {
+    const q = switch (c1) {
+        '\'' => QUOTE,
+        '`' => GRAVE,
+        ',' => COMMA,
+        else => unreachable,
+    };
+
+    try p.push(next);
+    p.context.val = q;
+    p.unread(try p.readNoEof("quote expression"));
+    return p.subr(.parseDatum, .endQuoteExpr);
+}
+
+fn endQuoteExpr(p: *Parser) !void {
+    if (p.result.eq(VOID)) {
+        return p.err(.InvalidCharacter, "quote expression datum");
+    }
+    return p.retval(p.cons(p.context.val, p.result));
+}
+
+// Helpers
+
+fn checkBlanks(p: *Parser, c: u8) !enum { yes, skip_unit, no } {
+    switch (c) {
+        '\t'...'\r', ' ' => return .yes,
+        ';' => {
+            const c2 = try p.read() orelse return .yes;
+            if (c2 == '\n') return .yes;
+            if (c2 == '~') return .skip_unit;
+            while (try p.read()) |c3| if (c3 == '\n') break;
+            return .yes;
+        },
+        else => return .no,
+    }
+}
+
+fn isBareChar(c: u8) bool {
+    return switch (c) {
+        // zig fmt: off
+        'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' ,
+        '+' , '-' , '/' , '<' , '=' , '>' , '?' , '@' , '^' , '_' , '~' => true,
+        // zig fmt: on
+        else => false,
+    };
+}
+
+fn isBareEsc(c: u8) bool {
+    return switch (c) {
+        33...126 => true,
+        else => false,
+    };
+}
+
+fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
+    var uc: T = undefined;
+
+    const c1 = try p.readNoEof(emsg);
+    uc = try parseHexDigit(p, c1, emsg);
+
+    while (try p.read()) |c| {
+        if (!std.ascii.isHex(c)) {
+            p.unread(c);
+            return uc;
+        }
+        const shl = std.math.shlExact;
+        uc = shl(T, uc, 4) catch return p.err(.OutOfRange, emsg);
+        uc |= try parseHexDigit(p, c, emsg);
+    }
+    return uc;
+}
+
+fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 {
+    const h1 = try p.readNoEof(emsg);
+    const h2 = try p.readNoEof(emsg);
+    const hi = try parseHexDigit(p, h1, emsg);
+    const lo = try parseHexDigit(p, h2, emsg);
+    return hi << 4 | lo;
+}
+
+fn parseHexDigit(p: *Parser, c: u8, comptime emsg: []const u8) !u8 {
+    return switch (c) {
+        '0'...'9' => c - '0',
+        'A'...'F' => c - 'A' + 10,
+        'a'...'f' => c - 'a' + 10,
+        else => p.err(.InvalidCharacter, emsg),
+    };
+}
diff --git a/src/zisp/io/decoder.zig b/src/zisp/io/decoder.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/zisp/io/decoder.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/zisp/io/encoder.zig b/src/zisp/io/encoder.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/zisp/io/encoder.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/zisp/io/parser.zig b/src/zisp/io/parser.zig
new file mode 100644
index 0000000..cb96e44
--- /dev/null
+++ b/src/zisp/io/parser.zig
@@ -0,0 +1,75 @@
+const builtin = @import("builtin");
+const std = @import("std");
+
+const value = @import("../value.zig");
+
+const Parser = @import("Parser.zig");
+
+const Value = value.Value;
+
+const is_test = builtin.is_test;
+const is_debug = builtin.mode == .Debug;
+
+const ctx_size = @sizeOf(Parser.Context);
+
+pub fn Sfa(init_stack: usize, init_chars: usize) type {
+    return struct {
+        const Self = @This();
+
+        pub const init_stack_capacity: usize = init_stack;
+        pub const init_chars_capacity: usize = init_chars;
+
+        pub const init_stack_mem: usize = init_stack * ctx_size;
+        pub const init_chars_mem: usize = init_chars;
+
+        stack: std.heap.StackFallbackAllocator(init_stack_mem),
+        chars: std.heap.StackFallbackAllocator(init_chars_mem),
+
+        pub fn init(fallback: std.mem.Allocator) Self {
+            return .{
+                .stack = std.heap.stackFallback(init_stack_mem, fallback),
+                .chars = std.heap.stackFallback(init_chars_mem, fallback),
+            };
+        }
+
+        pub fn allocator(s: *Self) Parser.Alloc {
+            return .{ .stack = s.stack.get(), .chars = s.chars.get() };
+        }
+    };
+}
+
+pub const DefaultSfa = Sfa(32, 2048);
+
+pub fn initSfa(alloc: anytype) !Parser {
+    const SfaType = @typeInfo(@TypeOf(alloc)).pointer.child;
+    return Parser.init(
+        alloc.allocator(),
+        SfaType.init_stack_capacity,
+        SfaType.init_chars_capacity,
+    );
+}
+
+pub fn parse(input: std.io.AnyReader) Value {
+    return _parse(input, true);
+}
+
+pub fn _parse(
+    input: std.io.AnyReader,
+    comptime panic: bool,
+) if (panic) Value else error{ParseError}!Value {
+    const alloc = std.heap.smp_allocator;
+    var sfa = DefaultSfa.init(alloc);
+    var p = initSfa(&sfa) catch |e| if (panic) @panic("OOM") else return e;
+    defer p.deinit();
+    return p.run(input) catch |e| if (panic) {
+        const format = "Parse error: {s}, unread_char: {s}\n";
+        const unread: [4]u8 =
+            if (p.unread_char) |c|
+                "0x".* ++ std.fmt.hex(c)
+            else
+                "none".*;
+        std.debug.panic(format, .{ p.err_msg, unread });
+    } else {
+        return e;
+    };
+}
diff --git a/src/zisp/io/reader.zig b/src/zisp/io/reader.zig
new file mode 100644
index 0000000..3465cb3
--- /dev/null
+++ b/src/zisp/io/reader.zig
@@ -0,0 +1,10 @@
+// See parse.zig for details.
+
+const parser = @import("parser.zig");
+const decoder = @import("decoder.zig");
+
+const Value = @import("../value.zig").Value;
+
+pub fn readCode(input: []const u8) Value {
+    return decoder.decode(parser.parse(input, .code));
+}
diff --git a/src/zisp/io/unparser.zig b/src/zisp/io/unparser.zig
new file mode 100644
index 0000000..d703182
--- /dev/null
+++ b/src/zisp/io/unparser.zig
@@ -0,0 +1,122 @@
+const std = @import("std");
+
+const value = @import("../value.zig");
+
+const istr = value.istr;
+const seq = value.seq;
+
+const ShortString = value.ShortString;
+const OtherTag = value.OtherTag;
+const Value = value.Value;
+
+pub fn unparse(w: anytype, v: Value) anyerror!void {
+    try if (value.double.check(v))
+        unparseDouble(w, v)
+    else if (value.fixnum.check(v))
+        unparseFixnum(w, v)
+    else if (value.ptr.checkZisp(v))
+        unparseHeap(w, v)
+    else
+        unparseOther(w, v);
+}
+
+fn unparseDouble(w: anytype, v: Value) !void {
+    _ = w;
+    _ = v;
+    @panic("not implemented");
+}
+
+fn unparseFixnum(w: anytype, v: Value) !void {
+    _ = w;
+    _ = v;
+    @panic("not implemented");
+}
+
+fn unparseHeap(w: anytype, v: Value) !void {
+    const p, const t = value.ptr.unpack(v);
+    try switch (t) {
+        .pair => unparsePair(w, @ptrCast(p)),
+        .seq => unparseSeq(w, @ptrCast(p)),
+        else => @panic("not implemented"),
+    };
+}
+
+fn unparseOther(w: anytype, v: Value) !void {
+    try switch (v.other.tag) {
+        .rune => unparseRune(w, v),
+        .sstr => unparseSstr(w, v),
+        .qstr => unparseQstr(w, v),
+        .char => unparseChar(w, v),
+        .misc => unparseMisc(w, v),
+    };
+}
+
+fn unparseRune(w: anytype, v: Value) !void {
+    const name = value.rune.unpack(v);
+    try w.writeByte('#');
+    try w.writeAll(name.constSlice());
+}
+
+fn unparseSstr(w: anytype, v: Value) !void {
+    const str = value.sstr.unpack(v);
+    try w.writeAll(str.constSlice());
+}
+
+fn unparseQstr(w: anytype, v: Value) !void {
+    const str = value.sstr.unpack(v);
+    try w.writeByte('"');
+    try w.writeAll(str.constSlice());
+    try w.writeByte('"');
+}
+
+fn unparseChar(w: anytype, v: Value) !void {
+    var buf: [4]u8 = undefined;
+    const len = try std.unicode.utf8Encode(v.char.value, &buf);
+    try w.writeAll(buf[0..len]);
+}
+
+fn unparseMisc(w: anytype, v: Value) !void {
+    try switch (v.misc.value) {
+        .f => w.writeAll("#f"),
+        .t => w.writeAll("#t"),
+        .nil => w.writeAll("()"),
+        .eof => w.writeAll("#eof"),
+        .undef => w.writeAll("#undef"),
+    };
+}
+
+fn unparsePair(w: anytype, p: *[2]Value) !void {
+    try w.writeByte('(');
+    try unparse(w, p[0]);
+    var cdr = p[1];
+    while (value.pair.check(cdr)) : (cdr = value.pair.cdr(cdr)) {
+        try w.writeByte(' ');
+        try unparse(w, value.pair.car(cdr));
+    }
+    if (!value.nil.check(cdr)) {
+        try w.writeByte(' ');
+        try w.writeByte('.');
+        try w.writeByte(' ');
+        try unparse(w, cdr);
+    }
+    try w.writeByte(')');
+}
+
+fn unparseSeq(w: anytype, p: *seq.Header) !void {
+    const h = istr.getHeaderFromPtr(@ptrCast(p));
+    switch (h.type) {
+        .string => try unparseString(w, h),
+        else => @panic("not implemented"),
+    }
+}
+
+fn unparseString(w: anytype, h: *seq.Header) !void {
+    const info = h.info.string;
+    if (info.quoted) {
+        try w.writeByte('"');
+    }
+    try w.writeAll(h.bytes());
+    if (info.quoted) {
+        try w.writeByte('"');
+    }
+}
diff --git a/src/zisp/io/writer.zig b/src/zisp/io/writer.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/zisp/io/writer.zig
@@ -0,0 +1 @@
+// wip
author	Taylan Kammer <taylan.kammer@gmail.com>	2025-03-30 18:34:00 +0200
committer	Taylan Kammer <taylan.kammer@gmail.com>	2025-03-30 18:34:00 +0200
commit	3d05c94b9d8aa964e4ff848c95d5999cec170e04 (patch)
tree	2864753404e8c9cb9fa8bce537772a29b90d1cbd /src/zisp/io
parent	9340f3c44ca0d9d4fdee905fc6e8b428824b9185 (diff)