From c922361115c8ee398ec4e26bb0af8cca4dcb9667 Mon Sep 17 00:00:00 2001
From: Taylan Kammer <taylan.kammer@gmail.com>
Date: Sat, 22 Feb 2025 15:04:05 +0100
Subject: update

---
 src/libzisp.zig            |   2 +-
 src/libzisp/io/decoder.zig |   1 +
 src/libzisp/io/encoder.zig |   1 +
 src/libzisp/io/parser.zig  | 683 +++++++++++++++++++++++++++++++++++++++
 src/libzisp/io/reader.zig  |  10 +
 src/libzisp/io/writer.zig  |   1 +
 src/libzisp/parser.zig     | 773 ---------------------------------------------
 7 files changed, 697 insertions(+), 774 deletions(-)
 create mode 100644 src/libzisp/io/decoder.zig
 create mode 100644 src/libzisp/io/encoder.zig
 create mode 100644 src/libzisp/io/parser.zig
 create mode 100644 src/libzisp/io/reader.zig
 create mode 100644 src/libzisp/io/writer.zig
 delete mode 100644 src/libzisp/parser.zig

diff --git a/src/libzisp.zig b/src/libzisp.zig
index be8b8b7..8141994 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -6,8 +6,8 @@ const builtin = @import("builtin");
 const testing = std.testing;
 
 pub const gc = @import("libzisp/gc.zig");
-pub const parser = @import("libzisp/parser.zig");
 pub const value = @import("libzisp/value.zig");
+pub const parser = @import("libzisp/io/parser.zig");
 
 pub const Value = value.Value;
 pub const Bucket = gc.Bucket;
diff --git a/src/libzisp/io/decoder.zig b/src/libzisp/io/decoder.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/libzisp/io/decoder.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/libzisp/io/encoder.zig b/src/libzisp/io/encoder.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/libzisp/io/encoder.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
new file mode 100644
index 0000000..71c6946
--- /dev/null
+++ b/src/libzisp/io/parser.zig
@@ -0,0 +1,683 @@
+//
+// === Parser for Code & Data ===
+//
+// Zisp s-expressions come in two flavors: code (sugar) and data (no sugar).
+//
+// However, code expressions are parsed into the same data types which the data
+// expressions can represent, so homoiconicity is preserved.
+//
+// The "sugar" used in code expressions is merely shorthand for more complex
+// data expressions, which could have been written by hand.
+//
+// Data expressions have a very simple format, and are only able to express a
+// minimal set of data types:
+//
+//   string -> foo , "foo bar"   ;symbols and strings are the same data type
+//
+//   rune   -> #foo              ;limited to 6 ASCII letters (a - z, A - Z)
+//
+//   pair   -> (DATUM . DATUM)   ;the only composite data type supported
+//
+//   nil    -> ()                ;we prefer the term nil over null
+//
+// The list short-hand syntax may be considered the only "syntax sugar" that is
+// supported by the data parser:
+//
+//   (DATUM DATUM DATUM)  ->  (DATUM . (DATUM . (DATUM . ())))
+//
+// We may use terms like "code parser" and "data parser" out of convenience,
+// although there may only be a single parser that implements both formats by
+// switching between modes.
+//
+// When the code parser encounters syntax sugar, it always transforms it into a
+// list starting with a rune, like in the following examples:
+//
+//   #(...)   -> (#HASH ...)
+//
+//   [...]    -> (#SQUARE ...)
+//
+//   'foo     -> (#QUOTE . foo)
+//
+// These can combine arbitrarily:
+//
+//   #{...}   -> (#HASH #BRACE ...)
+//
+//   #'foo    -> (#HASH #QUOTE . foo)
+//
+//   ##'[...] -> (#HASH #HASH #QUOTE #SQUARE ...)
+//
+// As a specialty, double-quoted strings are actually considered sugar by the
+// code parser, and are transformed as follows into data:
+//
+//   "..."    -> (#STRING . "...")
+//
+// (Otherwise, all string literals would be identifiers, or all identifiers
+// would be string literals, because Zisp doesn't differentiate strings and
+// symbols like traditional lisps.  Also, note that although we could reuse
+// #QUOTE here, instead of using #STRING, this would make it impossible to
+// differentiate between the code expressions #'foo and #"foo".)
+//
+// Runes are case-sensitive, and the code parser only emits runes using
+// upper-case letters, so lower-case runes are free for user extensions.
+//
+// You may be wondering about numbers.  As far as the parser is concerned,
+// numbers are strings.  It's the decoder (see below) that will turn bare
+// strings (those not marked with #STRING) into numbers.
+//
+// Note that 'foo becomes (quote foo) in Scheme, but (#QUOTE . foo) in Zisp.
+// The operand of #QUOTE is the entire cdr.  The same principle is used when
+// parsing other sugar:
+//
+//          Incorrect                              Correct
+//
+//   #(x y z) -> (#HASH (x y z))            #(x y z) -> (#HASH x y z)
+//
+//   [x y z]  -> (#SQUARE (x y z))          [x y z]  -> (#SQUARE x y z)
+//
+//   #{x}     -> (#HASH (#BRACE (x)))       #{x}     -> (#HASH #BRACE x)
+//
+//
+// === Decoder ===
+//
+// A separate process called "decoding" can transform simple data structures,
+// consisting of only the above types, into a richer set of Zisp data types.
+//
+// For example, the decoder may turn (#HASH ...) into a vector, as one would
+// expect a vector literal like #(...) to work in Scheme.
+//
+// Runes may be decoded in isolation as well, rather than transforming a list
+// whose head they appear in.  This is how #true and #false are implemented.
+//
+// The decoder may also perform arbitrary transforms on any type; for example,
+// it may turn bare strings (those not marked with #STRING) into numbers when
+// it's decoding data representing code.  This is how number literals are
+// implemented in Zisp.
+//
+// The decoder recognizes (#QUOTE ...) to implement the traditional quoting
+// mechanism, but in a better way:
+//
+// Traditional quote is "unhygienic" in Scheme terms.  An expression such as
+// '(foo bar) will always be read as (quote (foo bar)) regardless of what sort
+// of lexical context it appears in, so the semantics will depend on whatever
+// the identifier "quote" is bound to in that lexical context, meaning that the
+// expression may end up evaluating to something other than the list (foo bar).
+//
+// The Zisp decoder, which transforms not text to text, but objects to objects,
+// can turn (#QUOTE ...) into an abstract object which encapsulates the notion
+// of quoting, which the Zisp evaluator can recognize and act upon, ensuring
+// that an expression like '(foo bar) always turns into the list (foo bar).
+//
+// One way to think about this, in Scheme (R6RS / syntax-case) terms, is that
+// expressions like '(foo bar) turn directly into a *syntax object* when read,
+// rather than a regular list object.
+//
+// The decoder is, of course, configurable and extensible.  The transformations
+// mentioned above would be performed only when it's told to decode data which
+// represents Zisp code.  The decoder may be given a different configuration,
+// telling it to decode, for example, data which represents a different kind of
+// domain-specific data, such as application settings, build system commands,
+// complex data records with non-standard data types, and so on.
+//
+//
+// === Implementation details ===
+//
+// Instead of using recursion directly, the parser is written in something akin
+// to a manual continuation-passing style, which ensures that parsing depth is
+// not limited by the stack size.
+//
+// All state/context is passed around via a struct pointer.  The parser has a
+// main loop which calls a function, passing it the state, and also expects a
+// state pointer in return.
+//
+// The state has a .next field, which indicates which function the main loop
+// should call next.  It also has a .parent field, used as follows:
+//
+// If a function wants to call the parser recursively, it sets the .next field
+// of the current state to where the recursive call should return to, then it
+// creates a new state with a given starting point, sets its .parent field to
+// the current state, and returns the new state pointer.
+//
+// If a function wants to make the parser return, it sets the .retval field of
+// the current state, and sets .next to the .perform_return value.  This makes
+// the main loop either return to the parent state (after copying the .retval
+// field from child to parent), or if there is no parent state, it returns the
+// value as the top-level result.
+//
+// Note: While it's possible to simply set .next and return the current state,
+// to have another function be called next (possibly even setting .retval to
+// pass a value to it), this is completely unnecessary.  A few non-recursive
+// function calls will obviously not blow the stack.  It's only recursive
+// parsing that we use these fields for.
+//
+// Note 2: When calling the parser recursively, it may seem sensible to always
+// set the .next of the new state to .start_datum, because you already cleared
+// incoming whitespace and comments from the stream.  However, in some cases,
+// you must set it to .start_parse instead.  This is due to datum comments.
+// After a datum comment is parsed, the parser will ignore it and restore the
+// previous state, to try again what it was doing.  If the state was set to
+// .start_datum, this means no whitespace or comments would be tolerated after
+// the datum comment.
+//
+// Note 3: When it comes to pairs/lists, we mainly try to parse them as lists,
+// and a pair becomes a special-case of an improper list (two elements).  This
+// has the advantage of saving memory: If we implemented list parsing as pair
+// parsing, we would be calling the parser recursively, deeper and deeper, for
+// every pair that the list is made up of.  Although we're not limited by stack
+// space, thanks to the strategy described above, this would still waste memory
+// while parsing.
+//
+//
+// === Notation used in comments ===
+//
+// Some comments throughout the file give you an example of where the parser
+// currently might be in a stream.  These illustrations use the pipe symbol,
+// which looks like a cursor, to indicate the current position:
+//
+//   (foo| bar baz)   <- parser arrived at the end of the string foo
+//
+//   (foo bar baz)|   <- parser reached EOF (assuming no trailing spaces)
+//
+
+const std = @import("std");
+
+const gc = @import("../gc.zig");
+const list = @import("../list.zig");
+const value = @import("../value.zig");
+
+const Value = value.Value;
+
+const State = struct {
+    alloc: std.mem.Allocator,
+
+    input: []const u8,
+    pos: usize = 0,
+
+    mode: enum { code, data } = .code,
+
+    next: Fn = .start_parse,
+
+    parent: ?*State = null,
+
+    // Used to store various context, but most notably the stack of list
+    // elements parsed so far, so just initialize it to nil.
+    context: Value = value.nil.nil,
+
+    opening_bracket: u8 = 0,
+
+    retval: Value = value.eof.eof,
+
+    fn eof(self: *State) bool {
+        return self.pos >= self.input.len;
+    }
+
+    fn peek(self: *State) u8 {
+        return self.input[self.pos];
+    }
+
+    fn skip(self: *State) void {
+        self.pos += 1;
+    }
+
+    fn getc(self: *State) u8 {
+        const c = self.peek();
+        self.skip();
+        return c;
+    }
+
+    // Consumes whitespace and line comments.
+    fn consumeBlanks(self: *State) void {
+        while (!self.eof()) {
+            if (self.isWhitespace()) {
+                self.skip();
+            } else if (self.peek() == ';') {
+                self.skip();
+                self.consumeLineComment();
+            } else {
+                return;
+            }
+        }
+    }
+
+    fn consumeLineComment(self: *State) void {
+        while (!self.eof()) {
+            if (self.getc() == '\n') {
+                return;
+            }
+        }
+    }
+
+    fn isWhitespace(self: *State) bool {
+        return switch (self.peek()) {
+            '\t', '\n', ' ' => true,
+            else => false,
+        };
+    }
+
+    fn isFinalNull(self: *State) bool {
+        return self.peek() == 0 and self.pos == self.input.len - 1;
+    }
+
+    fn recurParse(self: *State, start_from: Fn, return_to: Fn) *State {
+        const sub = self.alloc.create(State) catch @panic("OOM");
+        sub.* = .{ .alloc = self.alloc, .input = self.input };
+        sub.pos = self.pos;
+        sub.mode = self.mode;
+        sub.next = start_from;
+        sub.parent = self;
+        self.next = return_to;
+        return sub;
+    }
+
+    fn returnDatum(self: *State, val: Value) *State {
+        self.retval = val;
+        self.next = .perform_return;
+        return self;
+    }
+
+    fn performReturn(self: *State) ?*State {
+        if (self.parent) |parent| {
+            parent.retval = self.retval;
+            parent.pos = self.pos;
+            parent.alloc.destroy(self);
+            return parent;
+        } else {
+            return null;
+        }
+    }
+};
+
+// Probably best *not* to use function pointers here, but rather dispatch to
+// functions manually based on enum value.  This should help the optimizer.
+
+const Fn = enum {
+    start_parse,
+    start_datum,
+    end_hash_datum,
+    end_rune_datum,
+    end_quote,
+    continue_list,
+    finalize_improper_list,
+    end_improper_list,
+    perform_return,
+};
+
+pub fn parse(input: []const u8) Value {
+    var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
+    var top = State{ .alloc = gpa.allocator(), .input = input };
+    var s = &top;
+    while (true) s = switch (s.next) {
+        .start_parse => startParse(s),
+        .start_datum => startDatum(s),
+        .end_hash_datum => endHashDatum(s),
+        .end_rune_datum => endRuneDatum(s),
+        .end_quote => endQuote(s),
+        .continue_list => continueList(s),
+        .finalize_improper_list => finalizeImproperList(s),
+        .end_improper_list => endImproperList(s),
+        .perform_return => s.performReturn() orelse return s.retval,
+    };
+}
+
+fn startParse(s: *State) *State {
+    s.consumeBlanks();
+    if (s.eof()) {
+        return s.returnDatum(value.eof.eof);
+    }
+    return switch (s.peek()) {
+        // whitespace already consumed
+        0...32, 127...255 => err(s, "invalid character"),
+        ')', ']', '}' => err(s, "unexpected closing bracket"),
+        else => startDatum(s),
+    };
+}
+
+// This is called when we *immediately* expect a datum and nothing else; for
+// example, no whitespace or comments, because they've already been consumed.
+fn startDatum(s: *State) *State {
+    if (s.eof()) {
+        return err(s, "expected datum, got EOF");
+    }
+    if (s.isWhitespace()) {
+        return err(s, "expected datum, got whitespace");
+    }
+    return switch (s.peek()) {
+        // whitespace checked above
+        0...32, 127...255 => err(s, "invalid character"),
+
+        ')', ']', '}' => err(s, "unexpected closing bracket"),
+
+        ';' => err(s, "expected datum, got semicolon"),
+
+        '#' => handleHash(s),
+
+        '"' => startQuotedString(s),
+
+        '\'', '`', ',' => startQuote(s),
+
+        '(', '[', '{' => startList(s),
+
+        // Periods are only allowed in the middle of a string, or to express
+        // improper lists, because the following look too much like typos:
+        //
+        //   (foo. bar)  (foo .bar)  (123. 456)  (123 .456)
+        //
+        '.' => err(s, "misplaced period"),
+
+        else => startBareString(s),
+    };
+}
+
+fn handleHash(s: *State) *State {
+    s.skip();
+    //
+    // We just consumed a hash.  Possibilities include:
+    //
+    //   #|foo       ;rune
+    //
+    //   #|;DATUM    ;datum comment
+    //
+    //   #|DATUM     ;hash-datum (code mode only)
+    //
+
+    if (s.eof()) {
+        return err(s, "EOF after hash");
+    }
+    if (s.isWhitespace()) {
+        return err(s, "whitespace after hash");
+    }
+
+    // Is it a rune?  #foo
+    switch (s.peek()) {
+        'a'...'z', 'A'...'Z' => return handleRune(s),
+        else => {},
+    }
+
+    // Is it a datum comment?  #;DATUM
+    if (s.peek() == ';') {
+        s.skip();
+        // Don't change s.next in this case.  Just let the parser try to redo
+        // what it was doing as soon as the commented-out datum has been read.
+        return s.recurParse(.start_datum, s.next);
+    }
+
+    // Otherwise, it must be a hash-datum.  #DATUM
+
+    // But data mode doesn't allow that.
+    if (s.mode == .data) {
+        return err(s, "use of hash-datum sequence not allowed in data mode");
+    }
+
+    return s.recurParse(.start_datum, .end_hash_datum);
+}
+
+fn handleRune(s: *State) *State {
+    const rune = readRune(s) orelse return err(s, "rune too long");
+    //
+    // Now we're at the end of the rune, but it could be a rune-datum:
+    //
+    //   #foo|(...)
+    //
+
+    if (isEndOfRune(s)) {
+        // Nope, just a stand-alone rune.
+        return s.returnDatum(rune);
+    }
+
+    // Otherwise, it's followed by a datum, like: #foo(...)
+
+    // Which is only allowed in code mode.
+    if (s.mode == .data) {
+        return err(s, "invalid use of hash in data mode");
+    }
+
+    s.context = rune;
+    return s.recurParse(.start_datum, .end_rune_datum);
+}
+
+fn readRune(s: *State) ?Value {
+    var buf: [6]u8 = undefined;
+    var i: u8 = 0;
+    while (!s.eof()) : (i += 1) switch (s.peek()) {
+        'a'...'z', 'A'...'Z' => {
+            if (i == buf.len) {
+                return null;
+            }
+            buf[i] = s.getc();
+        },
+        else => break,
+    };
+
+    // 'i' can't be 0 since this function is only called if at least one ASCII
+    // letter was seen after the hash.
+    std.debug.assert(i != 0);
+
+    return value.rune.pack(buf[0..i]);
+}
+
+fn isEndOfRune(s: *State) bool {
+    return s.eof() or switch (s.peek()) {
+        '\t', '\n', ' ', ')', ']', '}' => true,
+        else => false,
+    };
+}
+
+fn endRuneDatum(s: *State) *State {
+    return s.returnDatum(value.pair.cons(s.context, s.retval));
+}
+
+fn endHashDatum(s: *State) *State {
+    const rune = value.rune.pack("HASH");
+    return s.returnDatum(value.pair.cons(rune, s.retval));
+}
+
+fn startQuotedString(s: *State) *State {
+    // We're at |"..." so consume the opening quote before we start reading.
+    s.skip();
+
+    const str = readQuotedString(s) catch return err(s, "unclosed string");
+    if (s.mode == .code) {
+        // "foo bar" => (#STRING . "foo bar")
+        const rune = value.rune.pack("STRING");
+        const pair = value.pair.cons(rune, str);
+        return s.returnDatum(pair);
+    } else {
+        return s.returnDatum(str);
+    }
+}
+
+// RQS = Read Quoted String
+const RqsError = enum { Unclosed };
+
+fn readQuotedString(s: *State) !Value {
+    return try readQuotedSstr(s) orelse readQuotedLongString(s);
+}
+
+fn readQuotedSstr(s: *State) !?Value {
+    // We will reset to this position if we fail.
+    const start_pos = s.pos;
+
+    var buf: [6]u8 = undefined;
+    var i: u8 = 0;
+    while (!s.eof()) {
+        const c = s.getc();
+        if (c == '"') {
+            // ok, return what we accumulated
+            return value.sstr.pack(buf[0..i]);
+        }
+        if (i == 6) {
+            // failed; reset and bail out
+            s.pos = start_pos;
+            return null;
+        }
+        // ok, save this byte and go on
+        buf[i] = c;
+        i += 1;
+    }
+    return error.Unclosed;
+}
+
+fn readQuotedLongString(s: *State) Value {
+    return err(s, "NOT YET IMPLEMENTED");
+}
+
+fn startBareString(s: *State) *State {
+    // We're at |foo so start reading directly.
+    return readBareSstr(s) orelse readBareLongString(s);
+}
+
+fn readBareSstr(s: *State) ?*State {
+    // We will reset to this position if we fail.
+    const start_pos = s.pos;
+
+    var buf: [6]u8 = undefined;
+    var i: u8 = 0;
+    while (!s.eof()) : (i += 1) {
+        if (isBareStringEnd(s)) {
+            break;
+        }
+        if (i == buf.len) {
+            // failed; reset and bail out
+            s.pos = start_pos;
+            return null;
+        }
+        buf[i] = s.getc();
+    }
+
+    return s.returnDatum(value.sstr.pack(buf[0..i]));
+}
+
+fn isBareStringEnd(s: *State) bool {
+    // We will ignore illegal characters here, because they aren't consumed by
+    // this function; whatever code comes next must handle them.
+    return s.eof() or switch (s.peek()) {
+        0...32, 127...255 => true,
+        '(', ')', '[', ']', '{', '}', ';', '#', '"', '\'', '`', ',' => true,
+        else => false,
+    };
+}
+
+fn readBareLongString(s: *State) *State {
+    return err(s, "NOT YET IMPLEMENTED");
+}
+
+fn startQuote(s: *State) *State {
+    // We're at one of:  |'...  |`...  |,...
+    s.context = value.rune.pack(switch (s.getc()) {
+        '\'' => "QUOTE",
+        '`' => "GRAVE",
+        ',' => "COMMA",
+        else => unreachable,
+    });
+    return s.recurParse(.start_datum, .end_quote);
+}
+
+fn endQuote(s: *State) *State {
+    return s.returnDatum(value.pair.cons(s.context, s.retval));
+}
+
+// List processing is, unsurprisingly, the most complicated, and it's made even
+// more complicated by the possibility of datum comments in strange places...
+
+fn startList(s: *State) *State {
+    const open = s.getc();
+
+    if (s.mode == .data and open != '(') {
+        return err(s, "invalid opening bracket in data mode");
+    }
+
+    s.consumeBlanks();
+    if (s.eof()) {
+        return err(s, "unexpected EOF while parsing list");
+    }
+
+    s.opening_bracket = open;
+    return if (isEndOfList(s))
+        endList(s)
+    else
+        s.recurParse(.start_parse, .continue_list);
+}
+
+fn isEndOfList(s: *State) bool {
+    return switch (s.peek()) {
+        ')', ']', '}' => true,
+        else => false,
+    };
+}
+
+fn continueList(s: *State) *State {
+    s.context = value.pair.cons(s.retval, s.context);
+
+    s.consumeBlanks();
+    if (s.eof()) {
+        return err(s, "unexpected EOF while parsing list");
+    }
+
+    if (isEndOfList(s)) {
+        s.context = list.reverse(s.context);
+        return endList(s);
+    }
+
+    if (s.peek() == '.') {
+        s.skip();
+        // Scheme allows (foo .(bar)) but we don't.  Mind your spaces!
+        if (!s.isWhitespace()) {
+            return err(s, "misplaced period");
+        }
+        return s.recurParse(.start_parse, .finalize_improper_list);
+    }
+
+    return s.recurParse(.start_parse, .continue_list);
+}
+
+fn finalizeImproperList(s: *State) *State {
+    s.context = list.reverseWithTail(s.context, s.retval);
+    return endImproperList(s);
+}
+
+fn endImproperList(s: *State) *State {
+    s.consumeBlanks();
+    if (s.eof()) {
+        return err(s, "unexpected EOF while parsing list");
+    }
+
+    if (isEndOfList(s)) {
+        return endList(s);
+    }
+
+    if (s.getc() == '#') {
+        if (s.eof()) {
+            return err(s, "unexpected EOF after hash while parsing list");
+        }
+        if (s.getc() == ';') {
+            return s.recurParse(.start_datum, .end_improper_list);
+        }
+    }
+
+    return err(s, "malformed list / extra datum at end of improper list");
+}
+
+fn endList(s: *State) *State {
+    const open = s.opening_bracket;
+    const char = s.getc();
+
+    // Check for proper ending: (foo bar baz)
+    if (open == '(' and char == ')') {
+        return s.returnDatum(s.context);
+    }
+    if (open == '[' and char == ']') {
+        const rune = value.rune.pack("SQUARE");
+        return s.returnDatum(value.pair.cons(rune, s.context));
+    }
+    if (open == '{' and char == '}') {
+        const rune = value.rune.pack("BRACE");
+        return s.returnDatum(value.pair.cons(rune, s.context));
+    }
+
+    return err(s, "wrong closing bracket for list");
+}
+
+fn err(s: *State, msg: []const u8) noreturn {
+    std.debug.print("{s}\n", .{msg});
+    std.debug.print("pos: {}\n", .{s.pos});
+    @panic("parse error");
+}
diff --git a/src/libzisp/io/reader.zig b/src/libzisp/io/reader.zig
new file mode 100644
index 0000000..d6de79d
--- /dev/null
+++ b/src/libzisp/io/reader.zig
@@ -0,0 +1,10 @@
+// See parse.zig for details.
+
+const parser = @import("parser.zig");
+const decoder = @import("decoder.zig");
+
+const Value = @import("../value.zig").Value;
+
+pub fn readCode(input: []const u8) Value {
+    return decoder.decode(parser.parse(input));
+}
diff --git a/src/libzisp/io/writer.zig b/src/libzisp/io/writer.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/libzisp/io/writer.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/libzisp/parser.zig b/src/libzisp/parser.zig
deleted file mode 100644
index 1c58687..0000000
--- a/src/libzisp/parser.zig
+++ /dev/null
@@ -1,773 +0,0 @@
-//
-// === Parser for Code & Data ===
-//
-// Zisp s-expressions come in two flavors: code (sugar) and data (no sugar).
-//
-// However, code expressions are parsed into the same data types which the data
-// expressions can represent, so homoiconicity is preserved.
-//
-// The "sugar" used in code expressions is merely shorthand for more complex
-// data expressions, which could have been written by hand.
-//
-// Data expressions have a very simple format, and are only able to express a
-// minimal set of data types:
-//
-//   string -> foo , "foo bar"     ;symbols and strings are the same data type
-//
-//   number -> -1.2 , +nan.0 , ... ;this is the most complex type to represent
-//
-//   rune   -> #foo                ;limited to 6 ASCII letters (a - z, A - Z)
-//
-//   pair   -> (DATUM . DATUM)     ;the only composite data type supported
-//
-//   nil    -> ()                  ;we prefer the term nil over null
-//
-// The list short-hand syntax may be considered the only "syntax sugar" that is
-// supported by the data parser:
-//
-//   (DATUM DATUM DATUM)  ->  (DATUM . (DATUM . (DATUM . ())))
-//
-// We may use terms like "code parser" and "data parser" out of convenience,
-// although there may only be a single parser that implements both formats by
-// switching between modes.
-//
-// When the code parser encounters syntax sugar, it always transforms it into a
-// list starting with a rune, like in the following examples:
-//
-//   #(...)   -> (#HASH ...)
-//
-//   [...]    -> (#SQUARE ...)
-//
-//   'foo     -> (#QUOTE . foo)
-//
-// These can combine arbitrarily:
-//
-//   #{...}   -> (#HASH #BRACE ...)
-//
-//   #'foo    -> (#HASH #QUOTE . foo)
-//
-//   ##'[...] -> (#HASH #HASH #QUOTE #SQUARE ...)
-//
-// As a specialty, double-quoted strings are actually considered sugar by the
-// code parser, and are transformed as follows into data:
-//
-//   "..."    -> (#STRING . "...")
-//
-// (Otherwise, all string literals would be identifiers, or all identifiers
-// would be string literals, because Zisp doesn't differentiate strings and
-// symbols like traditional lisps.  Also, note that although we could reuse
-// #QUOTE here, instead of using #STRING, this would make it impossible to
-// differentiate between the expressions #'foo and #"foo" once parsing is
-// finished, even though we may want to give them different meanings.)
-//
-// Runes are case-sensitive, and the code parser only emits runes using
-// upper-case letters, so lower-case runes are free for user extensions.
-//
-// Note that 'foo becomes (quote foo) in Scheme, but (#QUOTE . foo) in Zisp.
-// The operand of #QUOTE is the entire cdr.  The same principle is used when
-// parsing other sugar:
-//
-//          Incorrect                              Correct
-//
-//   #(x y z) -> (#HASH (x y z))            #(x y z) -> (#HASH x y z)
-//
-//   [x y z]  -> (#SQUARE (x y z))          [x y z]  -> (#SQUARE x y z)
-//
-//   #{x}     -> (#HASH (#BRACE (x)))       #{x}     -> (#HASH #BRACE x)
-//
-//
-// === Decoder ===
-//
-// A separate process called "decoding" can transform simple data structures,
-// consisting of only the above types, into a richer set of Zisp data types.
-//
-// For example, the decoder may turn (#HASH ...) into a vector, as one would
-// expect a vector literal like #(...) to work in Scheme.
-//
-// Runes may be decoded in isolation as well, rather than transforming a list
-// whose head they appear in.  This is how #true and #false are implemented.
-//
-// The decoder recognizes (#QUOTE ...) to implement the traditional quoting
-// mechanism, but in a better way:
-//
-// Traditional quote is "unhygienic" in Scheme terms.  An expression such as
-// '(foo bar) will always be read as (quote (foo bar)) regardless of what sort
-// of lexical context it appears in, so the semantics will depend on whatever
-// the identifier "quote" is bound to in that lexical context.
-//
-// The Zisp decoder, which transforms not text to text, but objects to objects,
-// can turn (#QUOTE ...) into an abstract object which encapsulates the notion
-// of quoting, which the Zisp evaluator can recognize and act upon.
-//
-// One way to think about this, in Scheme (R6RS / syntax-case) terms, is that
-// expressions like '(foo bar) turn directly into a *syntax object* when read,
-// rather than a regular list object.
-//
-//
-// === Implementation details ===
-//
-// Instead of using recursion directly, the parser is written in something akin
-// to a manual continuation-passing style, which ensures that parsing depth is
-// not limited by the stack size.
-//
-// All state/context is passed around via a struct pointer.  The parser has a
-// main loop which calls a function, passing it the state, and also expects a
-// state pointer in return.
-//
-// The state has a .next field, which indicates which function the main loop
-// should call next.  It also has a .parent field, used as follows:
-//
-// If a function wants to call the parser recursively, it sets the .next field
-// of the current state to where the recursive call should return to, then it
-// creates a new state with a given starting point, sets its .parent field to
-// the current state, and returns the new state pointer.
-//
-// If a function wants to make the parser return, it sets the .retval field of
-// the current state, and sets .next to the .perform_return value.  This makes
-// the main loop either return to the parent state (after copying the .retval
-// field from child to parent), or if there is no parent state, it returns the
-// value as the top-level result.
-//
-// Note: While it's possible to simply set .next and return the current state,
-// to have another function be called next (possibly even setting .retval to
-// pass a value to it), this is completely unnecessary.  A few non-recursive
-// function calls will obviously not blow the stack.  It's only recursive
-// parsing that we use these fields for.
-//
-// Note 2: When calling the parser recursively, it may seem sensible to always
-// set the .next of the new state to .start_datum, because you already cleared
-// incoming whitespace and comments from the stream.  However, in some cases,
-// you must set it to .start_parse instead.  This is due to datum comments.
-// After a datum comment is parsed, the parser will ignore it and restore the
-// previous state, to try again what it was doing.  If the state was set to
-// .start_datum, this means no whitespace or comments would be tolerated after
-// the datum comment.
-//
-//
-// === Notation used in comments ===
-//
-// Some comments throughout the file give you an example of where the parser
-// currently might be in a stream.  These illustrations use the pipe symbol,
-// which looks like a cursor, to indicate the current position:
-//
-//   (foo| bar baz)   <- parser arrived at the end of the string foo
-//
-//   (foo bar baz)|   <- parser reached EOF (assuming no trailing spaces)
-//
-
-const std = @import("std");
-
-const gc = @import("gc.zig");
-const list = @import("list.zig");
-const value = @import("value.zig");
-
-const Value = value.Value;
-
-const State = struct {
-    alloc: std.mem.Allocator,
-
-    input: []const u8,
-    pos: usize = 0,
-
-    mode: enum { code, data } = .code,
-
-    next: Fn = .start_parse,
-
-    parent: ?*State = null,
-
-    datum_rune: Value = value.boole.f,
-    list_stack: Value = value.nil.nil,
-    opening_bracket: u8 = 0,
-    opening_quote: enum { quote, grave, comma } = .quote,
-
-    retval: Value = value.eof.eof,
-
-    fn eof(self: *State) bool {
-        return self.pos >= self.input.len;
-    }
-
-    fn peek(self: *State) u8 {
-        return self.input[self.pos];
-    }
-
-    fn skip(self: *State) void {
-        self.pos += 1;
-    }
-
-    fn getc(self: *State) u8 {
-        const c = self.peek();
-        self.skip();
-        return c;
-    }
-
-    // Consumes whitespace and line comments.
-    fn consumeBlanks(self: *State) void {
-        while (!self.eof()) {
-            if (self.isWhitespace()) {
-                self.skip();
-            } else if (self.peek() == ';') {
-                self.skip();
-                self.consumeLineComment();
-            } else {
-                return;
-            }
-        }
-    }
-
-    fn consumeLineComment(self: *State) void {
-        while (!self.eof()) {
-            if (self.getc() == '\n') {
-                return;
-            }
-        }
-    }
-
-    fn isWhitespace(self: *State) bool {
-        return switch (self.peek()) {
-            '\t', '\n', ' ' => true,
-            else => false,
-        };
-    }
-
-    fn isFinalNull(self: *State) bool {
-        return self.peek() == 0 and self.pos == self.input.len - 1;
-    }
-
-    fn recurParse(self: *State, start_from: Fn, return_to: Fn) *State {
-        const sub = self.alloc.create(State) catch @panic("OOM");
-        sub.* = .{ .alloc = self.alloc, .input = self.input };
-        sub.pos = self.pos;
-        sub.mode = self.mode;
-        sub.next = start_from;
-        sub.parent = self;
-        self.next = return_to;
-        return sub;
-    }
-
-    fn returnDatum(self: *State, val: Value) *State {
-        self.retval = val;
-        self.next = .perform_return;
-        return self;
-    }
-
-    fn performReturn(self: *State) ?*State {
-        if (self.parent) |parent| {
-            parent.retval = self.retval;
-            parent.pos = self.pos;
-            parent.alloc.destroy(self);
-            return parent;
-        } else {
-            return null;
-        }
-    }
-};
-
-// Probably best *not* to use function pointers here, but rather dispatch to
-// functions manually based on enum value.  This should help the optimizer.
-
-const Fn = enum {
-    start_parse,
-    start_datum,
-    end_hash_datum,
-    end_rune_datum,
-    end_quote,
-    continue_list,
-    end_improper_list,
-    handle_trailing_datum_comments,
-    perform_return,
-};
-
-pub fn parse(input: []const u8) Value {
-    var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
-    var top = State{ .alloc = gpa.allocator(), .input = input };
-    var s = &top;
-    while (true) s = switch (s.next) {
-        .start_parse => startParse(s),
-        .start_datum => startDatum(s),
-        .end_hash_datum => endHashDatum(s),
-        .end_rune_datum => endRuneDatum(s),
-        .end_quote => endQuote(s),
-        .continue_list => continueList(s),
-        .end_improper_list => endImproperList(s),
-        .handle_trailing_datum_comments => handleTrailingDatumComments(s),
-        .perform_return => s.performReturn() orelse return s.retval,
-    };
-}
-
-fn startParse(s: *State) *State {
-    s.consumeBlanks();
-    if (s.eof()) {
-        return s.returnDatum(value.eof.eof);
-    }
-    return switch (s.peek()) {
-        // whitespace already consumed
-        0...31, 127...255 => err(s, "invalid character"),
-        ')', ']', '}' => err(s, "unexpected closing bracket"),
-        else => startDatum(s),
-    };
-}
-
-// This is called when we *immediately* expect a datum and nothing else; for
-// example, no whitespace or comments, because they've already been consumed.
-fn startDatum(s: *State) *State {
-    if (s.eof()) {
-        return err(s, "expected datum, got EOF");
-    }
-    if (s.isWhitespace()) {
-        return err(s, "expected datum, got whitespace");
-    }
-    return switch (s.peek()) {
-        // whitespace checked above
-        0...31, 127...255 => err(s, "invalid character"),
-
-        ')', ']', '}' => err(s, "unexpected closing bracket"),
-
-        ';' => err(s, "expected datum, got semicolon"),
-
-        '#' => handleHash(s),
-
-        '"' => startQuotedString(s),
-
-        '\'', '`', ',' => startQuote(s),
-
-        '(', '[', '{' => startList(s),
-
-        '+', '-' => handlePlusMinus(s),
-
-        '0'...'9' => handleDigit(s),
-
-        // Periods only allowed between digits, and to express improper lists.
-        // Things like the following look too much like it could be a typo:
-        //
-        //   (foo .5) (foo .bar)
-        //
-        '.' => err(s, "misplaced period"),
-
-        else => startBareString(s),
-    };
-}
-
-fn handleHash(s: *State) *State {
-    s.skip();
-    //
-    // We just consumed a hash.  Possibilities include:
-    //
-    //   #|foo       ;rune
-    //
-    //   #|;DATUM    ;datum comment
-    //
-    //   #|DATUM     ;hash-datum (code mode only)
-    //
-
-    if (s.eof()) {
-        return err(s, "EOF after hash");
-    }
-    if (s.isWhitespace()) {
-        return err(s, "whitespace after hash");
-    }
-
-    // Is it a rune?  #foo
-    switch (s.peek()) {
-        'A'...'Z', 'a'...'z' => return handleRune(s),
-        else => {},
-    }
-
-    // Is it a datum comment?  #;DATUM
-    if (s.peek() == ';') {
-        s.skip();
-        // Don't change s.next in this case.  Just let the parser try to redo
-        // what it was doing as soon as the commented-out datum has been read.
-        return s.recurParse(.start_datum, s.next);
-    }
-
-    // Otherwise, it must be a hash-datum.  #DATUM
-
-    // But data mode doesn't allow that.
-    if (s.mode == .data) {
-        return err(s, "use of hash-datum sequence not allowed in data mode");
-    }
-
-    return s.recurParse(.start_datum, .end_hash_datum);
-}
-
-fn handleRune(s: *State) *State {
-    const rune = readRune(s) orelse return err(s, "rune too long");
-
-    //
-    // Now we're at the end of the rune, but it could be a rune-datum:
-    //
-    //   #foo|(...)
-    //
-
-    if (isEndOfRune(s)) {
-        // Nope, just a stand-alone rune.
-        return s.returnDatum(rune);
-    }
-
-    // Otherwise, it's followed by a datum, like: #foo(...)
-
-    // Which is only allowed in code mode.
-    if (s.mode == .data) {
-        return err(s, "invalid use of hash in data mode");
-    }
-
-    s.datum_rune = rune;
-    return s.recurParse(.start_datum, .end_rune_datum);
-}
-
-fn readRune(s: *State) ?Value {
-    var buf: [6]u8 = undefined;
-    var i: u8 = 0;
-    while (!s.eof()) : (i += 1) switch (s.peek()) {
-        'a'...'z', 'A'...'Z' => {
-            if (i == buf.len) {
-                return null;
-            }
-            buf[i] = s.getc();
-        },
-        else => break,
-    };
-
-    // 'i' can't be 0 since this function is only called if at least one ASCII
-    // letter was seen after the hash.
-    std.debug.assert(i != 0);
-
-    return value.rune.pack(buf[0..i]);
-}
-
-fn isEndOfRune(s: *State) bool {
-    return s.eof() or switch (s.peek()) {
-        '\t', '\n', ' ', ')', ']', '}' => true,
-        else => false,
-    };
-}
-
-fn endRuneDatum(s: *State) *State {
-    return s.returnDatum(value.pair.cons(
-        s.datum_rune,
-        s.retval,
-    ));
-}
-
-fn endHashDatum(s: *State) *State {
-    return s.returnDatum(value.pair.cons(
-        value.rune.pack("HASH"),
-        s.retval,
-    ));
-}
-
-fn startQuotedString(s: *State) *State {
-    // Consume opening quote.
-    s.skip();
-
-    const str = readQuotedString(s) catch return err(s, "unclosed string");
-    if (s.mode == .code) {
-        // "foo bar" => (#STRING . "foo bar")
-        const rune = value.rune.pack("STRING");
-        const pair = value.pair.cons(rune, str);
-        return s.returnDatum(pair);
-    } else {
-        return s.returnDatum(str);
-    }
-}
-
-// RQS = Read Quoted String
-const RqsError = enum { Unclosed };
-
-fn readQuotedString(s: *State) !Value {
-    return try readQuotedSstr(s) orelse readQuotedLongString(s);
-}
-
-fn readQuotedSstr(s: *State) !?Value {
-    // We will reset to this position if we fail.
-    const start_pos = s.pos;
-
-    var buf: [6]u8 = undefined;
-    var i: u8 = 0;
-    while (!s.eof()) {
-        const c = s.getc();
-        if (c == '"') {
-            // ok, return what we accumulated
-            return value.sstr.pack(buf[0..i]);
-        }
-        if (i == 6) {
-            // failed; reset and bail out
-            s.pos = start_pos;
-            return null;
-        }
-        // ok, save this byte and go on
-        buf[i] = c;
-        i += 1;
-    }
-    return error.Unclosed;
-}
-
-fn readQuotedLongString(s: *State) Value {
-    return err(s, "NOT YET IMPLEMENTED");
-}
-
-fn startBareString(s: *State) *State {
-    return readBareSstr(s) orelse readBareLongString(s);
-}
-
-fn readBareSstr(s: *State) ?*State {
-    // We will reset to this position if we fail.
-    const start_pos = s.pos;
-
-    var buf: [6]u8 = undefined;
-    var i: u8 = 0;
-    while (!s.eof()) : (i += 1) {
-        if (isBareStringEnd(s)) {
-            break;
-        }
-        if (i == buf.len) {
-            // failed; reset and bail out
-            s.pos = start_pos;
-            return null;
-        }
-        buf[i] = s.getc();
-    }
-
-    return s.returnDatum(value.sstr.pack(buf[0..i]));
-}
-
-fn isBareStringEnd(s: *State) bool {
-    // We will ignore illegal characters here, because they aren't consumed by
-    // this function; whatever code comes next must handle them.
-    return s.eof() or switch (s.peek()) {
-        0...32, 127...255 => true,
-        '(', ')', '[', ']', '{', '}', ';', '#', '"', '\'', '`', ',' => true,
-        else => false,
-    };
-}
-
-fn readBareLongString(s: *State) *State {
-    return err(s, "NOT YET IMPLEMENTED");
-}
-
-fn startQuote(s: *State) *State {
-    s.opening_quote = switch (s.getc()) {
-        '\'' => .quote,
-        '`' => .grave,
-        ',' => .comma,
-        else => unreachable,
-    };
-    return s.recurParse(.start_datum, .end_quote);
-}
-
-fn endQuote(s: *State) *State {
-    const name = switch (s.opening_quote) {
-        .quote => "QUOTE",
-        .grave => "GRAVE",
-        .comma => "COMMA",
-    };
-    return s.returnDatum(value.pair.cons(
-        value.rune.pack(name),
-        s.retval,
-    ));
-}
-
-fn startList(s: *State) *State {
-    const open = s.getc();
-
-    if (s.mode == .data and open != '(') {
-        return err(s, "invalid opening bracket in data mode");
-    }
-
-    s.consumeBlanks();
-
-    if (s.eof()) {
-        return err(s, "unexpected EOF while parsing list");
-    }
-
-    const char = s.peek();
-
-    // Check for empty lists: (), [], {}
-    if (open == '(' and char == ')') {
-        s.skip();
-        return s.returnDatum(value.nil.nil);
-    }
-    if (open == '[' and char == ']') {
-        s.skip();
-        return s.returnDatum(value.pair.cons(
-            value.rune.pack("SQUARE"),
-            value.nil.nil,
-        ));
-    }
-    if (open == '{' and char == '}') {
-        s.skip();
-        return s.returnDatum(value.pair.cons(
-            value.rune.pack("BRACE"),
-            value.nil.nil,
-        ));
-    }
-
-    s.opening_bracket = open;
-
-    // Now we're facing the first element of the list.
-    return s.recurParse(.start_parse, .continue_list);
-}
-
-fn continueList(s: *State) *State {
-    //
-    // We now consumed a list element and are at its end.  Possibilities:
-    //
-    //   (... foo| )
-    //
-    //   (... foo| . bar)
-    //
-    //   (... foo| bar baz ...)
-    //
-    // (This may be the first element, or any other; doesn't matter.)
-    //
-
-    s.consumeBlanks();
-
-    if (s.eof()) {
-        return err(s, "unexpected EOF while parsing list");
-    }
-
-    const stack = value.pair.cons(s.retval, s.list_stack);
-
-    const open = s.opening_bracket;
-    const char = s.peek();
-
-    // Check for proper ending: (foo bar baz)
-    if (open == '(' and char == ')') {
-        s.skip();
-        return s.returnDatum(list.reverse(stack));
-    }
-    if (open == '[' and char == ']') {
-        s.skip();
-        return s.returnDatum(value.pair.cons(
-            value.rune.pack("SQUARE"),
-            list.reverse(stack),
-        ));
-    }
-    if (open == '{' and char == '}') {
-        s.skip();
-        return s.returnDatum(value.pair.cons(
-            value.rune.pack("BRACE"),
-            list.reverse(stack),
-        ));
-    }
-
-    s.list_stack = stack;
-
-    // Check for improper ending: (foo bar . baz)
-    if (char == '.') {
-        s.skip();
-
-        // We should now be at (... foo .| bar) and whitespace must follow.
-        // Scheme allows (foo .(bar)) but we don't.  Mind your spaces!
-        if (!s.isWhitespace()) {
-            return err(s, "misplaced period");
-        }
-
-        return s.recurParse(.start_parse, .end_improper_list);
-    }
-
-    // OK, we're at the next element now and the list is going on.
-    return s.recurParse(.start_parse, .continue_list);
-}
-
-fn endImproperList(s: *State) *State {
-    //
-    // We're at the very end of an improper list now:
-    //
-    //   (... foo . bar| )
-    //
-    // There's another sneaky possibility though: trailing datum comments!
-    //
-    //   (... foo . bar #;DATUM ... )
-    //
-    // These were being handled "automatically" while parsing regular list
-    // elements, but here we have to special-handle them; see below.
-    //
-
-    s.consumeBlanks();
-
-    if (s.eof()) {
-        return err(s, "unexpected EOF while parsing list");
-    }
-
-    const result = list.reverseWithTail(s.list_stack, s.retval);
-
-    const char = s.getc();
-    const open = s.opening_bracket;
-
-    if (open == '(' and char == ')') {
-        return s.returnDatum(result);
-    }
-    if (open == '[' and char == ']') {
-        const rune = value.rune.pack("SQUARE");
-        return s.returnDatum(value.pair.cons(rune, result));
-    }
-    if (open == '{' and char == '}') {
-        const rune = value.rune.pack("BRACE");
-        return s.returnDatum(value.pair.cons(rune, result));
-    }
-
-    // Handle trailing datum comments, but don't allow anything else!
-
-    if (char == '#' and s.peek() == ';') {
-        s.skip();
-
-        // We will "abuse" the list_stack field to save the result.
-        s.list_stack = result;
-        return s.recurParse(.start_datum, .handle_trailing_datum_comments);
-    }
-
-    return err(s, "malformed list / extra datum at end of improper list");
-}
-
-fn handleTrailingDatumComments(s: *State) *State {
-    s.consumeBlanks();
-
-    if (s.eof()) {
-        return err(s, "unexpected EOF while parsing list");
-    }
-
-    const result = s.list_stack;
-
-    const char = s.getc();
-    const open = s.opening_bracket;
-
-    if (open == '(' and char == ')') {
-        return s.returnDatum(result);
-    }
-    if (open == '[' and char == ']') {
-        const rune = value.rune.pack("SQUARE");
-        return s.returnDatum(value.pair.cons(rune, result));
-    }
-    if (open == '{' and char == '}') {
-        const rune = value.rune.pack("BRACE");
-        return s.returnDatum(value.pair.cons(rune, result));
-    }
-
-    // Handle trailing datum comments, but don't allow anything else!
-
-    if (char == '#' and s.peek() == ';') {
-        s.skip();
-
-        // We will "abuse" the list_stack field to save the result.
-        s.list_stack = result;
-        return s.recurParse(.start_datum, .handle_trailing_datum_comments);
-    }
-
-    return err(s, "malformed list / extra datum at end of improper list");
-}
-
-fn handlePlusMinus(s: *State) *State {
-    return s;
-}
-
-fn handleDigit(s: *State) *State {
-    return s;
-}
-
-fn err(s: *State, msg: []const u8) noreturn {
-    std.debug.print("{s}\n", .{msg});
-    std.debug.print("pos: {}\n", .{s.pos});
-    @panic("parse error");
-}
-- 
cgit v1.2.3