summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-02-22 18:01:33 +0100
committerTaylan Kammer <taylan.kammer@gmail.com>2025-02-22 18:01:33 +0100
commitb7fb551ae61d26c30e6078f1f617862430141ce3 (patch)
tree6ca7992432d11e8997def25671561454c7660c92
parentc922361115c8ee398ec4e26bb0af8cca4dcb9667 (diff)
update
-rw-r--r--src/libzisp.zig16
-rw-r--r--src/libzisp/io.zig8
-rw-r--r--src/libzisp/io/parser.zig136
-rw-r--r--src/libzisp/io/reader.zig2
-rw-r--r--src/libzisp/io/unparser.zig1
-rw-r--r--src/libzisp/lib.zig1
-rw-r--r--src/libzisp/lib/list.zig (renamed from src/libzisp/list.zig)2
7 files changed, 94 insertions, 72 deletions
diff --git a/src/libzisp.zig b/src/libzisp.zig
index 8141994..400f9fb 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -6,11 +6,11 @@ const builtin = @import("builtin");
const testing = std.testing;
pub const gc = @import("libzisp/gc.zig");
+pub const io = @import("libzisp/io.zig");
+pub const lib = @import("libzisp/lib.zig");
pub const value = @import("libzisp/value.zig");
-pub const parser = @import("libzisp/io/parser.zig");
pub const Value = value.Value;
-pub const Bucket = gc.Bucket;
test "double" {
const d1: f64 = 0.123456789;
@@ -45,7 +45,7 @@ test "fixnum" {
test "ptr" {
const ptr = value.ptr;
- const val: [*]Bucket = @ptrFromInt(256);
+ const val: [*]gc.Bucket = @ptrFromInt(256);
const tag = ptr.Tag.string;
const p = ptr.pack(val, tag);
@@ -251,7 +251,7 @@ test "pair" {
}
test "parse" {
- const val = parser.parse("\"foo\"");
+ const val = io.parser.parseCode("\"foo\"");
const r, const rl = value.rune.unpack(value.pair.car(val));
const s, const sl = value.sstr.unpack(value.pair.cdr(val));
try std.testing.expectEqualStrings("STRING", r[0..rl]);
@@ -259,7 +259,7 @@ test "parse" {
}
test "parse2" {
- const val = parser.parse(
+ const val = io.parser.parseCode(
\\ ;; Testing some crazy datum comments
\\ ##;"bar"#;([x #"y"]{##`,'z})"foo"
\\ ;; end
@@ -278,7 +278,9 @@ test "parse2" {
}
test "parse3" {
- const val = parser.parse("(foo #;x #;(x y) #;x #bar [#x #\"baz\"] 'bat)");
+ const val = io.parser.parseCode(
+ \\(foo #;x #;(x y) #;x #bar [#x #"baz"] 'bat)
+ );
const car = value.pair.car;
const cdr = value.pair.cdr;
@@ -292,7 +294,7 @@ test "parse3" {
}
test "parse4" {
- const val = parser.parse("(foo . #;x bar #;y)");
+ const val = io.parser.parseCode("(foo . #;x bar #;y)");
const s, const sl = value.sstr.unpack(value.pair.car(val));
try std.testing.expectEqualStrings("foo", s[0..sl]);
diff --git a/src/libzisp/io.zig b/src/libzisp/io.zig
new file mode 100644
index 0000000..3d6d384
--- /dev/null
+++ b/src/libzisp/io.zig
@@ -0,0 +1,8 @@
+pub const parser = @import("io/parser.zig");
+pub const unparser = @import("io/unparser.zig");
+
+pub const decoder = @import("io/decoder.zig");
+pub const encoder = @import("io/encoder.zig");
+
+pub const reader = @import("io/reader.zig");
+pub const writer = @import("io/writer.zig");
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index 71c6946..5162c2f 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -9,19 +9,21 @@
// The "sugar" used in code expressions is merely shorthand for more complex
// data expressions, which could have been written by hand.
//
-// Data expressions have a very simple format, and are only able to express a
-// minimal set of data types:
+// Data expressions have a very simple format, and are only able to express the
+// bare minimum set of data types needed to represent more complex data:
//
-// string -> foo , "foo bar" ;symbols and strings are the same data type
+// type format comment
+// ---- ------ -------
//
-// rune -> #foo ;limited to 6 ASCII letters (a - z, A - Z)
+// string foo , "foo bar" symbols and strings are the same data type
//
-// pair -> (DATUM . DATUM) ;the only composite data type supported
+// rune #name name is 1-6 ASCII letters (a - z, A - Z)
//
-// nil -> () ;we prefer the term nil over null
+// pair (DATUM . DATUM) the only composite data type supported
//
-// The list short-hand syntax may be considered the only "syntax sugar" that is
-// supported by the data parser:
+// nil () we prefer the term nil over null
+//
+// The list short-hand syntax is the only "syntax sugar" supported in data:
//
// (DATUM DATUM DATUM) -> (DATUM . (DATUM . (DATUM . ())))
//
@@ -62,7 +64,7 @@
//
// You may be wondering about numbers. As far as the parser is concerned,
// numbers are strings. It's the decoder (see below) that will turn bare
-// strings (those not marked with #STRING) into numbers.
+// strings (those not marked with #STRING) into numbers where appropriate.
//
// Note that 'foo becomes (quote foo) in Scheme, but (#QUOTE . foo) in Zisp.
// The operand of #QUOTE is the entire cdr. The same principle is used when
@@ -94,7 +96,7 @@
// implemented in Zisp.
//
// The decoder recognizes (#QUOTE ...) to implement the traditional quoting
-// mechanism, but in a better way:
+// mechanism, but with a significant difference:
//
// Traditional quote is "unhygienic" in Scheme terms. An expression such as
// '(foo bar) will always be read as (quote (foo bar)) regardless of what sort
@@ -163,7 +165,7 @@
// has the advantage of saving memory: If we implemented list parsing as pair
// parsing, we would be calling the parser recursively, deeper and deeper, for
// every pair that the list is made up of. Although we're not limited by stack
-// space, thanks to the strategy described above, this would still waste memory
+// space (thanks to the strategy described above) this would still waste memory
// while parsing.
//
//
@@ -180,31 +182,23 @@
const std = @import("std");
-const gc = @import("../gc.zig");
-const list = @import("../list.zig");
+const lib = @import("../lib.zig");
const value = @import("../value.zig");
const Value = value.Value;
+pub const Mode = enum { code, data };
+
const State = struct {
alloc: std.mem.Allocator,
-
input: []const u8,
pos: usize = 0,
-
- mode: enum { code, data } = .code,
-
+ mode: Mode = undefined,
next: Fn = .start_parse,
-
parent: ?*State = null,
-
- // Used to store various context, but most notably the stack of list
- // elements parsed so far, so just initialize it to nil.
- context: Value = value.nil.nil,
-
- opening_bracket: u8 = 0,
-
- retval: Value = value.eof.eof,
+ context: Value = undefined,
+ opening_bracket: u8 = undefined,
+ retval: Value = undefined,
fn eof(self: *State) bool {
return self.pos >= self.input.len;
@@ -258,14 +252,17 @@ const State = struct {
}
fn recurParse(self: *State, start_from: Fn, return_to: Fn) *State {
- const sub = self.alloc.create(State) catch @panic("OOM");
- sub.* = .{ .alloc = self.alloc, .input = self.input };
- sub.pos = self.pos;
- sub.mode = self.mode;
- sub.next = start_from;
- sub.parent = self;
+ const newState = self.alloc.create(State) catch @panic("OOM");
+ newState.* = .{
+ .alloc = self.alloc,
+ .input = self.input,
+ .pos = self.pos,
+ .mode = self.mode,
+ .next = start_from,
+ .parent = self,
+ };
self.next = return_to;
- return sub;
+ return newState;
}
fn returnDatum(self: *State, val: Value) *State {
@@ -296,14 +293,18 @@ const Fn = enum {
end_rune_datum,
end_quote,
continue_list,
- finalize_improper_list,
+ finish_improper_list,
end_improper_list,
perform_return,
};
-pub fn parse(input: []const u8) Value {
+pub fn parseCode(input: []const u8) Value {
+ return parse(input, .code);
+}
+
+pub fn parse(input: []const u8, mode: Mode) Value {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
- var top = State{ .alloc = gpa.allocator(), .input = input };
+ var top = State{ .alloc = gpa.allocator(), .input = input, .mode = mode };
var s = &top;
while (true) s = switch (s.next) {
.start_parse => startParse(s),
@@ -312,7 +313,7 @@ pub fn parse(input: []const u8) Value {
.end_rune_datum => endRuneDatum(s),
.end_quote => endQuote(s),
.continue_list => continueList(s),
- .finalize_improper_list => finalizeImproperList(s),
+ .finish_improper_list => finishImproperList(s),
.end_improper_list => endImproperList(s),
.perform_return => s.performReturn() orelse return s.retval,
};
@@ -578,6 +579,10 @@ fn endQuote(s: *State) *State {
// List processing is, unsurprisingly, the most complicated, and it's made even
// more complicated by the possibility of datum comments in strange places...
+// Make sure to use .start_parse instead of .start_datum to handle elements, so
+// that an arbitrary number of datum comments, separated by blanks (whitespace
+// and line comments) are handled automatically.
+
fn startList(s: *State) *State {
const open = s.getc();
@@ -590,6 +595,7 @@ fn startList(s: *State) *State {
return err(s, "unexpected EOF while parsing list");
}
+ s.context = value.nil.nil;
s.opening_bracket = open;
return if (isEndOfList(s))
endList(s)
@@ -604,7 +610,27 @@ fn isEndOfList(s: *State) bool {
};
}
+fn endList(s: *State) *State {
+ const open = s.opening_bracket;
+ const char = s.getc();
+
+ if (open == '(' and char == ')') {
+ return s.returnDatum(s.context);
+ }
+ if (open == '[' and char == ']') {
+ const rune = value.rune.pack("SQUARE");
+ return s.returnDatum(value.pair.cons(rune, s.context));
+ }
+ if (open == '{' and char == '}') {
+ const rune = value.rune.pack("BRACE");
+ return s.returnDatum(value.pair.cons(rune, s.context));
+ }
+
+ return err(s, "wrong closing bracket for list");
+}
+
fn continueList(s: *State) *State {
+ // Note that this accumulates list elements in reverse.
s.context = value.pair.cons(s.retval, s.context);
s.consumeBlanks();
@@ -613,7 +639,7 @@ fn continueList(s: *State) *State {
}
if (isEndOfList(s)) {
- s.context = list.reverse(s.context);
+ s.context = lib.list.reverse(s.context);
return endList(s);
}
@@ -623,21 +649,25 @@ fn continueList(s: *State) *State {
if (!s.isWhitespace()) {
return err(s, "misplaced period");
}
- return s.recurParse(.start_parse, .finalize_improper_list);
+ return s.recurParse(.start_parse, .finish_improper_list);
}
return s.recurParse(.start_parse, .continue_list);
}
-fn finalizeImproperList(s: *State) *State {
- s.context = list.reverseWithTail(s.context, s.retval);
+fn finishImproperList(s: *State) *State {
+ s.context = lib.list.reverseWithTail(s.context, s.retval);
return endImproperList(s);
}
+// Handling the end of an improper list is a bit awkward, because there may be
+// datum comments *after* the final cdr, where we don't actually want to parse
+// any further data. So we keep looping here just looking for datum comments.
+
fn endImproperList(s: *State) *State {
s.consumeBlanks();
if (s.eof()) {
- return err(s, "unexpected EOF while parsing list");
+ return err(s, "unexpected EOF at end of improper list");
}
if (isEndOfList(s)) {
@@ -646,7 +676,7 @@ fn endImproperList(s: *State) *State {
if (s.getc() == '#') {
if (s.eof()) {
- return err(s, "unexpected EOF after hash while parsing list");
+ return err(s, "unexpected hash and EOF at end of improper list");
}
if (s.getc() == ';') {
return s.recurParse(.start_datum, .end_improper_list);
@@ -656,26 +686,6 @@ fn endImproperList(s: *State) *State {
return err(s, "malformed list / extra datum at end of improper list");
}
-fn endList(s: *State) *State {
- const open = s.opening_bracket;
- const char = s.getc();
-
- // Check for proper ending: (foo bar baz)
- if (open == '(' and char == ')') {
- return s.returnDatum(s.context);
- }
- if (open == '[' and char == ']') {
- const rune = value.rune.pack("SQUARE");
- return s.returnDatum(value.pair.cons(rune, s.context));
- }
- if (open == '{' and char == '}') {
- const rune = value.rune.pack("BRACE");
- return s.returnDatum(value.pair.cons(rune, s.context));
- }
-
- return err(s, "wrong closing bracket for list");
-}
-
fn err(s: *State, msg: []const u8) noreturn {
std.debug.print("{s}\n", .{msg});
std.debug.print("pos: {}\n", .{s.pos});
diff --git a/src/libzisp/io/reader.zig b/src/libzisp/io/reader.zig
index d6de79d..3465cb3 100644
--- a/src/libzisp/io/reader.zig
+++ b/src/libzisp/io/reader.zig
@@ -6,5 +6,5 @@ const decoder = @import("decoder.zig");
const Value = @import("../value.zig").Value;
pub fn readCode(input: []const u8) Value {
- return decoder.decode(parser.parse(input));
+ return decoder.decode(parser.parse(input, .code));
}
diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/libzisp/io/unparser.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/libzisp/lib.zig b/src/libzisp/lib.zig
new file mode 100644
index 0000000..7752110
--- /dev/null
+++ b/src/libzisp/lib.zig
@@ -0,0 +1 @@
+pub const list = @import("lib/list.zig");
diff --git a/src/libzisp/list.zig b/src/libzisp/lib/list.zig
index a4ce7a8..9d6a6bc 100644
--- a/src/libzisp/list.zig
+++ b/src/libzisp/lib/list.zig
@@ -1,4 +1,4 @@
-const value = @import("value.zig");
+const value = @import("../value.zig");
const Value = value.Value;