summaryrefslogtreecommitdiff
path: root/src/zisp/io
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-03-30 18:34:00 +0200
committerTaylan Kammer <taylan.kammer@gmail.com>2025-03-30 18:34:00 +0200
commit3d05c94b9d8aa964e4ff848c95d5999cec170e04 (patch)
tree2864753404e8c9cb9fa8bce537772a29b90d1cbd /src/zisp/io
parent9340f3c44ca0d9d4fdee905fc6e8b428824b9185 (diff)
Big cleanup.
Diffstat (limited to 'src/zisp/io')
-rw-r--r--src/zisp/io/Parser.zig847
-rw-r--r--src/zisp/io/decoder.zig1
-rw-r--r--src/zisp/io/encoder.zig1
-rw-r--r--src/zisp/io/parser.zig75
-rw-r--r--src/zisp/io/reader.zig10
-rw-r--r--src/zisp/io/unparser.zig122
-rw-r--r--src/zisp/io/writer.zig1
7 files changed, 1057 insertions, 0 deletions
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
new file mode 100644
index 0000000..08bf8ba
--- /dev/null
+++ b/src/zisp/io/Parser.zig
@@ -0,0 +1,847 @@
+// === Syntax ===
+//
+// See docs/parser.md and spec/syntax.bnf to understand the syntax.
+//
+//
+// === Trampolining strategy ===
+//
+// Instead of using recursion directly, the parser is written in a "trampoline"
+// style, which ensures that parse depth is not limited by stack size.
+//
+// All state and context is passed around via a struct pointer. The parser has
+// a main loop, which calls a function as dictated by parser.context.next, and
+// the function may update the state to have another function called next.
+//
+// If a function wants to call a recursive subroutine, it pushes some of the
+// current context onto a stack, including what function the subroutine should
+// return to, and then updates the state to instruct the main loop to call one
+// of the entry point subroutines.
+//
+// If a function wants to make the parser return, either from a subroutine or
+// from the main loop, it sets the .result field, and tries to pop the saved
+// context. If the context stack was empty, the main loop returns.
+//
+//
+// === Buffering ===
+//
+// For efficiency, call the parser on an input stream with implicit buffering.
+//
+// The parser does not use its own buffer, beyond one character that may be
+// written back into the unread_char field, which is checked at the end to
+// ensure it's nothing other than a trailing blank or comment.
+//
+// This lack of buffering is to ensure that the parser never reads more bytes
+// from the input than what it needs to parse a datum. Consider the input:
+//
+// (a b c) (x y z)
+//
+// If we used proper buffering, like reading up to 4K bytes per read, then the
+// whole stream would be consumed at once before it's parsed. Then, the parser
+// would return the first datum, and the rest of the stream would be lost. The
+// parser would need some way to reset the input stream's read head to the end
+// of the first datum, but not all stream types may support this.
+//
+// Although in a scenario like this it would be wise to create a single Parser
+// instance to call multiple times (in which case it could continue using its
+// internal buffer from where it left), this may not always be practical.
+//
+// One could even have an input stream with Zisp s-expressions and other data
+// intertwined, in which case this lack of buffering is also crucial, since one
+// needs to alternate between calling the Zisp parser and some other parser on
+// the same input stream.
+//
+
+const builtin = @import("builtin");
+const std = @import("std");
+
+const lib = @import("../lib.zig");
+const value = @import("../value.zig");
+
+const List = std.ArrayListUnmanaged;
+
+const Value = value.Value;
+
+const Parser = @This();
+
+const is_test = builtin.is_test;
+const is_debug = builtin.mode == .Debug;
+const detailed_debug = false;
+
+// zig fmt: off
+const DOT = value.rune.pack("DOT");
+const COLON = value.rune.pack("COLON");
+const JOIN = value.rune.pack("JOIN");
+const LABEL = value.rune.pack("LABEL");
+const HASH = value.rune.pack("HASH");
+const QUOTE = value.rune.pack("QUOTE");
+const GRAVE = value.rune.pack("GRAVE");
+const COMMA = value.rune.pack("COMMA");
+const SQUARE = value.rune.pack("SQUARE");
+const BRACE = value.rune.pack("BRACE");
+const VOID = value.rune.packForced("");
+const LSTAIL = value.sstr.pack(".");
+// zig fmt: on
+
+// We could implement an optimization where we swap in a dummy cons when the
+// parser is handling a commented-out datum, but this would require changes to
+// the algorithm and doesn't seem very important, so it's not implemented.
+
+const Cons = *const fn (v1: Value, v2: Value) Value;
+
+fn dummyCons(v1: Value, v2: Value) Value {
+ _ = v1;
+ _ = v2;
+ return value.undef;
+}
+
+const real_cons = &value.pair.cons;
+const fake_cons = &dummyCons;
+
+pub const Error = enum {
+ InvalidCharacter,
+ UnclosedString,
+ UnexpectedEof,
+ UnicodeError,
+ OutOfRange,
+ ReadError,
+};
+
+pub const Context = struct {
+ // What to do next.
+ next: ?Fn = undefined,
+ // For storing a context value, like accumulated list elements.
+ val: Value = undefined,
+ // For storing a context char, like list opening bracket.
+ char: u8 = undefined,
+};
+
+pub const Alloc = struct {
+ stack: std.mem.Allocator,
+ chars: std.mem.Allocator,
+};
+
+alloc: Alloc,
+input: std.io.AnyReader = undefined,
+context: Context = .{},
+stack: List(Context) = undefined,
+chars: List(u8) = undefined,
+cons: Cons = real_cons,
+result: Value = undefined,
+unread_char: ?u8 = null,
+err_msg: []const u8 = undefined,
+
+pub fn init(
+ alloc: Alloc,
+ init_stack_capacity: usize,
+ init_chars_capacity: usize,
+) !Parser {
+ var p: Parser = .{ .alloc = alloc };
+ p.stack = try .initCapacity(alloc.stack, init_stack_capacity);
+ p.chars = try .initCapacity(alloc.chars, init_chars_capacity);
+ return p;
+}
+
+pub fn deinit(p: *Parser) void {
+ p.stack.deinit(p.alloc.stack);
+ p.chars.deinit(p.alloc.chars);
+}
+
+//
+// Input reading & unreading
+//
+
+fn read(p: *Parser) !?u8 {
+ if (is_debug and p.unread_char != null) {
+ std.debug.panic(
+ "Called read() while there was an unused character! '{c}'",
+ .{p.unread_char.?},
+ );
+ }
+ const c = p.input.readByte() catch |e| switch (e) {
+ error.EndOfStream => return null,
+ else => return p.err(.ReadError, "???"),
+ };
+ if (detailed_debug) {
+ std.debug.print("{c}", .{c});
+ }
+ return c;
+}
+
+fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 {
+ return if (try p.read()) |c| c else p.err(.UnexpectedEof, emsg);
+}
+
+fn unread(p: *Parser, c: u8) void {
+ p.unread_char = c;
+}
+
+fn getUnread(p: *Parser) ?u8 {
+ const c = p.unread_char orelse return null;
+ p.unread_char = null;
+ return c;
+}
+
+//
+// String accumulation & creation
+//
+
+fn addChar(p: *Parser, c: u8) !void {
+ try p.chars.append(p.alloc.chars, c);
+}
+
+fn getBareString(p: *Parser) Value {
+ defer p.chars.clearRetainingCapacity();
+ return if (p.chars.items.len <= 6)
+ value.sstr.pack(p.chars.items)
+ else
+ value.istr.intern(p.chars.items, false);
+}
+
+fn getQuotedString(p: *Parser) Value {
+ defer p.chars.clearRetainingCapacity();
+ return if (p.chars.items.len <= 6)
+ value.sstr.packQuoted(p.chars.items)
+ else
+ value.istr.intern(p.chars.items, true);
+}
+
+fn getRune(p: *Parser) Value {
+ defer p.chars.clearRetainingCapacity();
+ return value.rune.pack(p.chars.items);
+}
+
+//
+// Stack management & control flow
+//
+
+const Fn = enum {
+ parseUnit,
+ parseDatum,
+ endUnit,
+ returnContext,
+ endFirstDatum,
+ endJoinDatum,
+ parseJoin,
+ parseHashDatum,
+ endHashDatum,
+ parseRuneDatum,
+ endRuneDatum,
+ endLabelDatum,
+ continueList,
+ endImproperList,
+ closeImproperList,
+ endQuoteExpr,
+};
+
+fn call(p: *Parser, f: Fn) !void {
+ try switch (f) {
+ .parseUnit => parseUnit(p),
+ .parseDatum => parseDatum(p),
+ .endUnit => endUnit(p),
+ .returnContext => returnContext(p),
+ .endFirstDatum => endFirstDatum(p),
+ .endJoinDatum => endJoinDatum(p),
+ .parseJoin => parseJoin(p),
+ .parseHashDatum => parseHashDatum(p),
+ .endHashDatum => endHashDatum(p),
+ .parseRuneDatum => parseRuneDatum(p),
+ .endRuneDatum => endRuneDatum(p),
+ .endLabelDatum => endLabelDatum(p),
+ .continueList => continueList(p),
+ .endImproperList => endImproperList(p),
+ .closeImproperList => closeImproperList(p),
+ .endQuoteExpr => endQuoteExpr(p),
+ };
+}
+
+pub fn run(p: *Parser, input: std.io.AnyReader) !Value {
+ p.input = input;
+ p.context.next = .parseUnit;
+ while (p.context.next) |next| {
+ if (detailed_debug) printStack(p);
+ try p.call(next);
+ }
+ if (p.unread_char) |_| {
+ return p.err(.InvalidCharacter, "top-level");
+ }
+ return p.result;
+}
+
+fn printStack(p: *Parser) void {
+ const stack = p.stack.items;
+ std.debug.print("\n\n{}:{any} ctx:'{c}' unread:'{c}' \n", .{
+ stack.len,
+ p.context.next,
+ p.context.char,
+ p.unread_char orelse '_',
+ });
+ if (stack.len > 0) {
+ var i = stack.len;
+ while (i > 0) : (i -= 1) {
+ const prev = stack[i - 1];
+ std.debug.print("{}:{any} ctx:'{c}'\n", .{
+ i - 1,
+ prev.next,
+ prev.char,
+ });
+ }
+ }
+}
+
+fn err(
+ p: *Parser,
+ comptime e: Error,
+ comptime msg: []const u8,
+) error{ParseError} {
+ p.err_msg = @tagName(e) ++ " at: " ++ msg;
+ return error.ParseError;
+}
+
+fn push(p: *Parser, next: Fn) !void {
+ try p.stack.append(p.alloc.stack, .{ .next = next });
+}
+
+fn pushContext(p: *Parser, next: Fn) !void {
+ try p.stack.append(p.alloc.stack, .{
+ .next = next,
+ .val = p.context.val,
+ .char = p.context.char,
+ });
+}
+
+fn ret(p: *Parser) void {
+ if (p.stack.pop()) |c| {
+ p.context = c;
+ } else {
+ p.context.next = null;
+ }
+}
+
+fn subr(p: *Parser, start: Fn, next: Fn) !void {
+ try p.pushContext(next);
+ p.context.next = start;
+}
+
+fn jump(p: *Parser, next: Fn, val: ?Value) void {
+ if (val) |v| p.result = v;
+ p.context.next = next;
+}
+
+fn abort(p: *Parser, next: Fn, unread_c: u8) void {
+ p.result = VOID;
+ p.unread_char = unread_c;
+ p.context.next = next;
+}
+
+fn retval(p: *Parser, val: Value) void {
+ p.result = val;
+ p.ret();
+}
+
+//
+// Start parser functions
+//
+
+fn parseUnit(p: *Parser) !void {
+ var c1 = p.getUnread() orelse try p.read();
+ while (c1) |c| : (c1 = try p.read()) {
+ switch (try checkBlanks(p, c)) {
+ .yes => {},
+ .skip_unit => try p.push(.parseUnit),
+ .no => {
+ p.unread(c);
+ return p.subr(.parseDatum, .endUnit);
+ },
+ }
+ }
+ return p.retval(value.eof.eof);
+}
+
+fn endUnit(p: *Parser) !void {
+ const c = p.getUnread() orelse return p.ret();
+ switch (try checkBlanks(p, c)) {
+ .yes => {},
+ .skip_unit => return skipUnitAndReturn(p),
+ .no => p.unread(c),
+ }
+ return p.ret();
+}
+
+fn skipUnitAndReturn(p: *Parser) !void {
+ p.context.val = p.result;
+ return p.subr(.parseUnit, .returnContext);
+}
+
+fn returnContext(p: *Parser) !void {
+ return p.retval(p.context.val);
+}
+
+fn parseDatum(p: *Parser) !void {
+ return parseOneDatum(p, p.getUnread().?, .endFirstDatum);
+}
+
+fn endFirstDatum(p: *Parser) !void {
+ if (p.result.eq(VOID)) {
+ return p.ret();
+ }
+ return parseJoin(p);
+}
+
+fn parseJoin(p: *Parser) !void {
+ const c = p.getUnread() orelse try p.read() orelse return p.ret();
+ switch (c) {
+ '.', ':' => {
+ p.context.char = c;
+ p.unread(try p.readNoEof("join datum"));
+ },
+ else => {
+ p.context.char = 0;
+ p.unread(c);
+ },
+ }
+ p.context.val = p.result;
+ return p.subr(.parseDatum, .endJoinDatum);
+}
+
+fn endJoinDatum(p: *Parser) !void {
+ const prev = p.context.val;
+ const join = p.context.char;
+ if (p.result.eq(VOID)) {
+ if (join == 0) {
+ return p.retval(prev);
+ } else {
+ return p.err(.InvalidCharacter, "join datum");
+ }
+ }
+ const rune = switch (join) {
+ 0 => JOIN,
+ '.' => DOT,
+ ':' => COLON,
+ else => unreachable,
+ };
+ const joined = p.cons(rune, p.cons(prev, p.result));
+ return p.jump(.parseJoin, joined);
+}
+
+fn parseOneDatum(p: *Parser, c: u8, next: Fn) !void {
+ if (isBareChar(c) or c == '.') {
+ return p.jump(next, try parseBareString(p, c));
+ }
+ return parseCladDatum(p, c, next);
+}
+
+fn parseBareString(p: *Parser, c: u8) !Value {
+ const allow_dots = std.ascii.isDigit(c) or switch (c) {
+ '.', '+', '-' => true,
+ else => false,
+ };
+ try p.addChar(c);
+ return parseBareStringRest(p, allow_dots);
+}
+
+fn parseBareEscString(p: *Parser) !Value {
+ try p.addChar(try parseBareEsc(p));
+ return parseBareStringRest(p, false);
+}
+
+fn parseBareStringRest(p: *Parser, allow_dots: bool) !Value {
+ while (try p.read()) |c| {
+ if (isBareChar(c) or (allow_dots and c == '.')) {
+ try p.addChar(c);
+ } else if (c == '\\') {
+ try p.addChar(try parseBareEsc(p));
+ } else {
+ p.unread(c);
+ break;
+ }
+ }
+ return p.getBareString();
+}
+
+fn parseBareEsc(p: *Parser) !u8 {
+ const c = try p.readNoEof("bare escape");
+ if (isBareEsc(c)) {
+ return c;
+ } else {
+ return p.err(.InvalidCharacter, "bare escape");
+ }
+}
+
+fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void {
+ if (c == '\\') {
+ return p.jump(next, try parseBareEscString(p));
+ }
+ if (c == '"') {
+ return p.jump(next, try parseQuotedString(p, '"'));
+ }
+ if (c == '|') {
+ return p.jump(next, try parseQuotedString(p, '|'));
+ }
+ return switch (c) {
+ '#' => parseHashExpression(p, next),
+ '(', '[', '{' => parseList(p, c, next),
+ '\'', '`', ',' => parseQuoteExpr(p, c, next),
+ else => p.abort(next, c),
+ };
+}
+
+fn parseQuotedString(p: *Parser, close: u8) !Value {
+ while (try p.read()) |c| {
+ if (c == close) {
+ return p.getQuotedString();
+ }
+ if (c != '\\') {
+ try p.addChar(c);
+ } else {
+ try parseQuotedEsc(p, close);
+ }
+ }
+ return error.UnclosedString;
+}
+
+fn parseQuotedEsc(p: *Parser, close: u8) !void {
+ const c = try p.readNoEof("quoted escape");
+ if (c == close) {
+ return p.addChar(close);
+ }
+ if (c == 'u') {
+ return parseUniHexHandleErrors(p);
+ }
+ try p.addChar(switch (c) {
+ '\\' => c,
+ '0' => 0,
+ 'a' => 7,
+ 'b' => 8,
+ 't' => 9,
+ 'n' => 10,
+ 'v' => 11,
+ 'f' => 12,
+ 'r' => 13,
+ 'e' => 27,
+ 'x' => try parseHexByte(p, "hex escape"),
+ else => return p.err(.InvalidCharacter, "quoted escape"),
+ });
+}
+
+fn parseUniHexHandleErrors(p: *Parser) !void {
+ return parseUniHex(p) catch |e| switch (e) {
+ error.Utf8CannotEncodeSurrogateHalf => p.err(
+ .UnicodeError,
+ "unicode escape",
+ ),
+ else => e,
+ };
+}
+
+fn parseUniHex(p: *Parser) !void {
+ const msg = "unicode escape";
+
+ if (try p.readNoEof(msg) != '{') {
+ return p.err(.InvalidCharacter, msg);
+ }
+
+ const uc = try parseHex(p, u21, msg);
+ const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg);
+ if (c != '}') {
+ return p.err(.InvalidCharacter, msg);
+ }
+
+ const n = try std.unicode.utf8CodepointSequenceLength(uc);
+ const buf = try p.chars.addManyAsSlice(p.alloc.chars, n);
+ _ = try std.unicode.utf8Encode(uc, buf);
+}
+
+fn parseHashExpression(p: *Parser, next: Fn) !void {
+ const c = try p.readNoEof("hash expression");
+ if (try checkBlanks(p, c) != .no) {
+ return p.err(.InvalidCharacter, "hash expression");
+ }
+ if (std.ascii.isAlphabetic(c)) {
+ const r = try parseRune(p, c);
+ return parseRuneEnd(p, r, next);
+ }
+ if (c == '%') {
+ const l = try parseLabel(p);
+ return parseLabelEnd(p, l, next);
+ }
+ p.unread(c);
+ return p.subr(.parseHashDatum, next);
+}
+
+fn parseHashDatum(p: *Parser) !void {
+ return parseCladDatum(p, p.getUnread().?, .endHashDatum);
+}
+
+fn endHashDatum(p: *Parser) !void {
+ if (p.result.eq(VOID)) {
+ return p.err(.InvalidCharacter, "hash datum");
+ }
+ return p.retval(p.cons(HASH, p.result));
+}
+
+fn parseRune(p: *Parser, c1: u8) !Value {
+ try p.addChar(c1);
+ var len: usize = 1;
+ while (try p.read()) |c| : (len += 1) {
+ if (len == 6 or !std.ascii.isAlphanumeric(c)) {
+ p.unread(c);
+ return p.getRune();
+ }
+ try p.addChar(c);
+ }
+ return p.getRune();
+}
+
+fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
+ const c = p.getUnread() orelse return p.jump(next, r);
+ if (c == '\\') {
+ return p.jump(next, p.cons(r, try parseBareString(p, c)));
+ }
+ if (c == '"') {
+ return p.jump(next, p.cons(r, try parseQuotedString(p, '"')));
+ }
+ if (c == '|') {
+ return p.jump(next, p.cons(r, try parseQuotedString(p, '|')));
+ }
+ p.unread(c);
+ switch (c) {
+ '#', '(', '[', '{', '\'', '`', ',' => {
+ try p.push(next);
+ p.context.val = r;
+ // Use jump to prevent recursion.
+ return p.jump(.parseRuneDatum, null);
+ },
+ else => return p.jump(next, r),
+ }
+}
+
+fn parseRuneDatum(p: *Parser) !void {
+ return parseCladDatum(p, p.getUnread().?, .endRuneDatum);
+}
+
+fn endRuneDatum(p: *Parser) !void {
+ if (p.result.eq(VOID)) {
+ p.retval(p.context.val);
+ }
+ return p.retval(p.cons(p.context.val, p.result));
+}
+
+fn parseLabel(p: *Parser) !Value {
+ const label = try parseHex(p, u48, "datum label");
+ return value.fixnum.pack(label);
+}
+
+fn parseLabelEnd(p: *Parser, l: Value, next: Fn) !void {
+ const c = p.getUnread() orelse return p.err(.UnexpectedEof, "datum label");
+ if (c == '%') {
+ return p.jump(next, p.cons(LABEL, l));
+ }
+ if (c == '=') {
+ try p.push(next);
+ p.context.val = l;
+ return p.subr(.parseDatum, .endLabelDatum);
+ }
+ return p.err(.InvalidCharacter, "datum label");
+}
+
+fn endLabelDatum(p: *Parser) !void {
+ if (p.result.eq(VOID)) {
+ return p.err(.InvalidCharacter, "label datum");
+ }
+ return p.retval(p.cons(LABEL, p.cons(p.context.val, p.result)));
+}
+
+fn parseList(p: *Parser, open: u8, next: Fn) !void {
+ const head = switch (open) {
+ '(' => value.nil.nil,
+ '[' => p.cons(SQUARE, value.nil.nil),
+ '{' => p.cons(BRACE, value.nil.nil),
+ else => unreachable,
+ };
+ const close: u8 = switch (open) {
+ '(' => ')',
+ '[' => ']',
+ '{' => '}',
+ else => unreachable,
+ };
+ while (try p.read()) |c| {
+ if (c == close) {
+ return p.jump(next, head);
+ }
+ switch (try checkBlanks(p, c)) {
+ .yes => {},
+ .skip_unit => {
+ try listParserSetup(p, head, close, next);
+ return p.subr(.parseUnit, .parseUnit);
+ },
+ .no => {
+ try listParserSetup(p, head, close, next);
+ p.unread(c);
+ return p.jump(.parseDatum, null);
+ },
+ }
+ }
+ return p.err(.UnexpectedEof, "list");
+}
+
+fn listParserSetup(p: *Parser, head: Value, close: u8, next: Fn) !void {
+ try p.push(next);
+ p.context.val = head;
+ p.context.char = close;
+ try p.pushContext(.continueList);
+}
+
+fn continueList(p: *Parser) !void {
+ const close = p.context.char;
+
+ if (p.result.eq(VOID)) {
+ const c = p.getUnread().?;
+ if (c == close) {
+ return endList(p);
+ }
+ return p.err(.InvalidCharacter, "list");
+ }
+
+ if (p.result.eq(LSTAIL)) {
+ return p.subr(.parseUnit, .endImproperList);
+ }
+
+ p.context.val = p.cons(p.result, p.context.val);
+
+ var c1 = p.getUnread() orelse try p.read();
+ while (c1) |c| : (c1 = try p.read()) {
+ if (c == close) {
+ return endList(p);
+ }
+ switch (try checkBlanks(p, c)) {
+ .yes => {},
+ .skip_unit => {
+ try p.pushContext(.continueList);
+ return p.subr(.parseUnit, .parseUnit);
+ },
+ .no => {
+ p.unread(c);
+ return p.subr(.parseDatum, .continueList);
+ },
+ }
+ }
+ return p.err(.UnexpectedEof, "list");
+}
+
+fn endList(p: *Parser) !void {
+ return p.retval(lib.list.reverse(p.context.val));
+}
+
+fn endImproperList(p: *Parser) !void {
+ if (p.result.eq(VOID)) {
+ return p.err(.InvalidCharacter, "list tail");
+ }
+ p.context.val = lib.list.reverseWithTail(p.context.val, p.result);
+ return closeImproperList(p);
+}
+
+fn closeImproperList(p: *Parser) !void {
+ const result = p.context.val;
+ const close = p.context.char;
+ var c1 = p.getUnread() orelse try p.read();
+ while (c1) |c| : (c1 = try p.read()) {
+ if (c == close) {
+ return p.retval(result);
+ }
+ switch (try checkBlanks(p, c)) {
+ .yes => {},
+ .skip_unit => return p.subr(.parseUnit, .closeImproperList),
+ .no => return p.err(.InvalidCharacter, "after list tail"),
+ }
+ }
+ return p.err(.UnexpectedEof, "after list tail");
+}
+
+fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void {
+ const q = switch (c1) {
+ '\'' => QUOTE,
+ '`' => GRAVE,
+ ',' => COMMA,
+ else => unreachable,
+ };
+
+ try p.push(next);
+ p.context.val = q;
+ p.unread(try p.readNoEof("quote expression"));
+ return p.subr(.parseDatum, .endQuoteExpr);
+}
+
+fn endQuoteExpr(p: *Parser) !void {
+ if (p.result.eq(VOID)) {
+ return p.err(.InvalidCharacter, "quote expression datum");
+ }
+ return p.retval(p.cons(p.context.val, p.result));
+}
+
+// Helpers
+
+fn checkBlanks(p: *Parser, c: u8) !enum { yes, skip_unit, no } {
+ switch (c) {
+ '\t'...'\r', ' ' => return .yes,
+ ';' => {
+ const c2 = try p.read() orelse return .yes;
+ if (c2 == '\n') return .yes;
+ if (c2 == '~') return .skip_unit;
+ while (try p.read()) |c3| if (c3 == '\n') break;
+ return .yes;
+ },
+ else => return .no,
+ }
+}
+
+fn isBareChar(c: u8) bool {
+ return switch (c) {
+ // zig fmt: off
+ 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' ,
+ '+' , '-' , '/' , '<' , '=' , '>' , '?' , '@' , '^' , '_' , '~' => true,
+ // zig fmt: on
+ else => false,
+ };
+}
+
+fn isBareEsc(c: u8) bool {
+ return switch (c) {
+ 33...126 => true,
+ else => false,
+ };
+}
+
+fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
+ var uc: T = undefined;
+
+ const c1 = try p.readNoEof(emsg);
+ uc = try parseHexDigit(p, c1, emsg);
+
+ while (try p.read()) |c| {
+ if (!std.ascii.isHex(c)) {
+ p.unread(c);
+ return uc;
+ }
+ const shl = std.math.shlExact;
+ uc = shl(T, uc, 4) catch return p.err(.OutOfRange, emsg);
+ uc |= try parseHexDigit(p, c, emsg);
+ }
+ return uc;
+}
+
+fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 {
+ const h1 = try p.readNoEof(emsg);
+ const h2 = try p.readNoEof(emsg);
+ const hi = try parseHexDigit(p, h1, emsg);
+ const lo = try parseHexDigit(p, h2, emsg);
+ return hi << 4 | lo;
+}
+
+fn parseHexDigit(p: *Parser, c: u8, comptime emsg: []const u8) !u8 {
+ return switch (c) {
+ '0'...'9' => c - '0',
+ 'A'...'F' => c - 'A' + 10,
+ 'a'...'f' => c - 'a' + 10,
+ else => p.err(.InvalidCharacter, emsg),
+ };
+}
diff --git a/src/zisp/io/decoder.zig b/src/zisp/io/decoder.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/zisp/io/decoder.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/zisp/io/encoder.zig b/src/zisp/io/encoder.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/zisp/io/encoder.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/zisp/io/parser.zig b/src/zisp/io/parser.zig
new file mode 100644
index 0000000..cb96e44
--- /dev/null
+++ b/src/zisp/io/parser.zig
@@ -0,0 +1,75 @@
+const builtin = @import("builtin");
+const std = @import("std");
+
+const value = @import("../value.zig");
+
+const Parser = @import("Parser.zig");
+
+const Value = value.Value;
+
+const is_test = builtin.is_test;
+const is_debug = builtin.mode == .Debug;
+
+const ctx_size = @sizeOf(Parser.Context);
+
+pub fn Sfa(init_stack: usize, init_chars: usize) type {
+ return struct {
+ const Self = @This();
+
+ pub const init_stack_capacity: usize = init_stack;
+ pub const init_chars_capacity: usize = init_chars;
+
+ pub const init_stack_mem: usize = init_stack * ctx_size;
+ pub const init_chars_mem: usize = init_chars;
+
+ stack: std.heap.StackFallbackAllocator(init_stack_mem),
+ chars: std.heap.StackFallbackAllocator(init_chars_mem),
+
+ pub fn init(fallback: std.mem.Allocator) Self {
+ return .{
+ .stack = std.heap.stackFallback(init_stack_mem, fallback),
+ .chars = std.heap.stackFallback(init_chars_mem, fallback),
+ };
+ }
+
+ pub fn allocator(s: *Self) Parser.Alloc {
+ return .{ .stack = s.stack.get(), .chars = s.chars.get() };
+ }
+ };
+}
+
+pub const DefaultSfa = Sfa(32, 2048);
+
+pub fn initSfa(alloc: anytype) !Parser {
+ const SfaType = @typeInfo(@TypeOf(alloc)).pointer.child;
+ return Parser.init(
+ alloc.allocator(),
+ SfaType.init_stack_capacity,
+ SfaType.init_chars_capacity,
+ );
+}
+
+pub fn parse(input: std.io.AnyReader) Value {
+ return _parse(input, true);
+}
+
+pub fn _parse(
+ input: std.io.AnyReader,
+ comptime panic: bool,
+) if (panic) Value else error{ParseError}!Value {
+ const alloc = std.heap.smp_allocator;
+ var sfa = DefaultSfa.init(alloc);
+ var p = initSfa(&sfa) catch |e| if (panic) @panic("OOM") else return e;
+ defer p.deinit();
+ return p.run(input) catch |e| if (panic) {
+ const format = "Parse error: {s}, unread_char: {s}\n";
+ const unread: [4]u8 =
+ if (p.unread_char) |c|
+ "0x".* ++ std.fmt.hex(c)
+ else
+ "none".*;
+ std.debug.panic(format, .{ p.err_msg, unread });
+ } else {
+ return e;
+ };
+}
diff --git a/src/zisp/io/reader.zig b/src/zisp/io/reader.zig
new file mode 100644
index 0000000..3465cb3
--- /dev/null
+++ b/src/zisp/io/reader.zig
@@ -0,0 +1,10 @@
+// See parse.zig for details.
+
+const parser = @import("parser.zig");
+const decoder = @import("decoder.zig");
+
+const Value = @import("../value.zig").Value;
+
+pub fn readCode(input: []const u8) Value {
+ return decoder.decode(parser.parse(input, .code));
+}
diff --git a/src/zisp/io/unparser.zig b/src/zisp/io/unparser.zig
new file mode 100644
index 0000000..d703182
--- /dev/null
+++ b/src/zisp/io/unparser.zig
@@ -0,0 +1,122 @@
+const std = @import("std");
+
+const value = @import("../value.zig");
+
+const istr = value.istr;
+const seq = value.seq;
+
+const ShortString = value.ShortString;
+const OtherTag = value.OtherTag;
+const Value = value.Value;
+
+pub fn unparse(w: anytype, v: Value) anyerror!void {
+ try if (value.double.check(v))
+ unparseDouble(w, v)
+ else if (value.fixnum.check(v))
+ unparseFixnum(w, v)
+ else if (value.ptr.checkZisp(v))
+ unparseHeap(w, v)
+ else
+ unparseOther(w, v);
+}
+
+fn unparseDouble(w: anytype, v: Value) !void {
+ _ = w;
+ _ = v;
+ @panic("not implemented");
+}
+
+fn unparseFixnum(w: anytype, v: Value) !void {
+ _ = w;
+ _ = v;
+ @panic("not implemented");
+}
+
+fn unparseHeap(w: anytype, v: Value) !void {
+ const p, const t = value.ptr.unpack(v);
+ try switch (t) {
+ .pair => unparsePair(w, @ptrCast(p)),
+ .seq => unparseSeq(w, @ptrCast(p)),
+ else => @panic("not implemented"),
+ };
+}
+
+fn unparseOther(w: anytype, v: Value) !void {
+ try switch (v.other.tag) {
+ .rune => unparseRune(w, v),
+ .sstr => unparseSstr(w, v),
+ .qstr => unparseQstr(w, v),
+ .char => unparseChar(w, v),
+ .misc => unparseMisc(w, v),
+ };
+}
+
+fn unparseRune(w: anytype, v: Value) !void {
+ const name = value.rune.unpack(v);
+ try w.writeByte('#');
+ try w.writeAll(name.constSlice());
+}
+
+fn unparseSstr(w: anytype, v: Value) !void {
+ const str = value.sstr.unpack(v);
+ try w.writeAll(str.constSlice());
+}
+
+fn unparseQstr(w: anytype, v: Value) !void {
+ const str = value.sstr.unpack(v);
+ try w.writeByte('"');
+ try w.writeAll(str.constSlice());
+ try w.writeByte('"');
+}
+
+fn unparseChar(w: anytype, v: Value) !void {
+ var buf: [4]u8 = undefined;
+ const len = try std.unicode.utf8Encode(v.char.value, &buf);
+ try w.writeAll(buf[0..len]);
+}
+
+fn unparseMisc(w: anytype, v: Value) !void {
+ try switch (v.misc.value) {
+ .f => w.writeAll("#f"),
+ .t => w.writeAll("#t"),
+ .nil => w.writeAll("()"),
+ .eof => w.writeAll("#eof"),
+ .undef => w.writeAll("#undef"),
+ };
+}
+
+fn unparsePair(w: anytype, p: *[2]Value) !void {
+ try w.writeByte('(');
+ try unparse(w, p[0]);
+ var cdr = p[1];
+ while (value.pair.check(cdr)) : (cdr = value.pair.cdr(cdr)) {
+ try w.writeByte(' ');
+ try unparse(w, value.pair.car(cdr));
+ }
+ if (!value.nil.check(cdr)) {
+ try w.writeByte(' ');
+ try w.writeByte('.');
+ try w.writeByte(' ');
+ try unparse(w, cdr);
+ }
+ try w.writeByte(')');
+}
+
+fn unparseSeq(w: anytype, p: *seq.Header) !void {
+ const h = istr.getHeaderFromPtr(@ptrCast(p));
+ switch (h.type) {
+ .string => try unparseString(w, h),
+ else => @panic("not implemented"),
+ }
+}
+
+fn unparseString(w: anytype, h: *seq.Header) !void {
+ const info = h.info.string;
+ if (info.quoted) {
+ try w.writeByte('"');
+ }
+ try w.writeAll(h.bytes());
+ if (info.quoted) {
+ try w.writeByte('"');
+ }
+}
diff --git a/src/zisp/io/writer.zig b/src/zisp/io/writer.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/zisp/io/writer.zig
@@ -0,0 +1 @@
+// wip