diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-02-22 18:01:33 +0100 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-02-22 18:01:33 +0100 |
| commit | b7fb551ae61d26c30e6078f1f617862430141ce3 (patch) | |
| tree | 6ca7992432d11e8997def25671561454c7660c92 | |
| parent | c922361115c8ee398ec4e26bb0af8cca4dcb9667 (diff) | |
update
| -rw-r--r-- | src/libzisp.zig | 16 | ||||
| -rw-r--r-- | src/libzisp/io.zig | 8 | ||||
| -rw-r--r-- | src/libzisp/io/parser.zig | 136 | ||||
| -rw-r--r-- | src/libzisp/io/reader.zig | 2 | ||||
| -rw-r--r-- | src/libzisp/io/unparser.zig | 1 | ||||
| -rw-r--r-- | src/libzisp/lib.zig | 1 | ||||
| -rw-r--r-- | src/libzisp/lib/list.zig (renamed from src/libzisp/list.zig) | 2 |
7 files changed, 94 insertions, 72 deletions
diff --git a/src/libzisp.zig b/src/libzisp.zig index 8141994..400f9fb 100644 --- a/src/libzisp.zig +++ b/src/libzisp.zig @@ -6,11 +6,11 @@ const builtin = @import("builtin"); const testing = std.testing; pub const gc = @import("libzisp/gc.zig"); +pub const io = @import("libzisp/io.zig"); +pub const lib = @import("libzisp/lib.zig"); pub const value = @import("libzisp/value.zig"); -pub const parser = @import("libzisp/io/parser.zig"); pub const Value = value.Value; -pub const Bucket = gc.Bucket; test "double" { const d1: f64 = 0.123456789; @@ -45,7 +45,7 @@ test "fixnum" { test "ptr" { const ptr = value.ptr; - const val: [*]Bucket = @ptrFromInt(256); + const val: [*]gc.Bucket = @ptrFromInt(256); const tag = ptr.Tag.string; const p = ptr.pack(val, tag); @@ -251,7 +251,7 @@ test "pair" { } test "parse" { - const val = parser.parse("\"foo\""); + const val = io.parser.parseCode("\"foo\""); const r, const rl = value.rune.unpack(value.pair.car(val)); const s, const sl = value.sstr.unpack(value.pair.cdr(val)); try std.testing.expectEqualStrings("STRING", r[0..rl]); @@ -259,7 +259,7 @@ test "parse" { } test "parse2" { - const val = parser.parse( + const val = io.parser.parseCode( \\ ;; Testing some crazy datum comments \\ ##;"bar"#;([x #"y"]{##`,'z})"foo" \\ ;; end @@ -278,7 +278,9 @@ test "parse2" { } test "parse3" { - const val = parser.parse("(foo #;x #;(x y) #;x #bar [#x #\"baz\"] 'bat)"); + const val = io.parser.parseCode( + \\(foo #;x #;(x y) #;x #bar [#x #"baz"] 'bat) + ); const car = value.pair.car; const cdr = value.pair.cdr; @@ -292,7 +294,7 @@ test "parse3" { } test "parse4" { - const val = parser.parse("(foo . #;x bar #;y)"); + const val = io.parser.parseCode("(foo . #;x bar #;y)"); const s, const sl = value.sstr.unpack(value.pair.car(val)); try std.testing.expectEqualStrings("foo", s[0..sl]); diff --git a/src/libzisp/io.zig b/src/libzisp/io.zig new file mode 100644 index 0000000..3d6d384 --- /dev/null +++ b/src/libzisp/io.zig @@ -0,0 +1,8 @@ +pub const parser = @import("io/parser.zig"); +pub const unparser = @import("io/unparser.zig"); + +pub const decoder = @import("io/decoder.zig"); +pub const encoder = @import("io/encoder.zig"); + +pub const reader = @import("io/reader.zig"); +pub const writer = @import("io/writer.zig"); diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig index 71c6946..5162c2f 100644 --- a/src/libzisp/io/parser.zig +++ b/src/libzisp/io/parser.zig @@ -9,19 +9,21 @@ // The "sugar" used in code expressions is merely shorthand for more complex // data expressions, which could have been written by hand. // -// Data expressions have a very simple format, and are only able to express a -// minimal set of data types: +// Data expressions have a very simple format, and are only able to express the +// bare minimum set of data types needed to represent more complex data: // -// string -> foo , "foo bar" ;symbols and strings are the same data type +// type format comment +// ---- ------ ------- // -// rune -> #foo ;limited to 6 ASCII letters (a - z, A - Z) +// string foo , "foo bar" symbols and strings are the same data type // -// pair -> (DATUM . DATUM) ;the only composite data type supported +// rune #name name is 1-6 ASCII letters (a - z, A - Z) // -// nil -> () ;we prefer the term nil over null +// pair (DATUM . DATUM) the only composite data type supported // -// The list short-hand syntax may be considered the only "syntax sugar" that is -// supported by the data parser: +// nil () we prefer the term nil over null +// +// The list short-hand syntax is the only "syntax sugar" supported in data: // // (DATUM DATUM DATUM) -> (DATUM . (DATUM . (DATUM . ()))) // @@ -62,7 +64,7 @@ // // You may be wondering about numbers. As far as the parser is concerned, // numbers are strings. It's the decoder (see below) that will turn bare -// strings (those not marked with #STRING) into numbers. +// strings (those not marked with #STRING) into numbers where appropriate. // // Note that 'foo becomes (quote foo) in Scheme, but (#QUOTE . foo) in Zisp. // The operand of #QUOTE is the entire cdr. The same principle is used when @@ -94,7 +96,7 @@ // implemented in Zisp. // // The decoder recognizes (#QUOTE ...) to implement the traditional quoting -// mechanism, but in a better way: +// mechanism, but with a significant difference: // // Traditional quote is "unhygienic" in Scheme terms. An expression such as // '(foo bar) will always be read as (quote (foo bar)) regardless of what sort @@ -163,7 +165,7 @@ // has the advantage of saving memory: If we implemented list parsing as pair // parsing, we would be calling the parser recursively, deeper and deeper, for // every pair that the list is made up of. Although we're not limited by stack -// space, thanks to the strategy described above, this would still waste memory +// space (thanks to the strategy described above) this would still waste memory // while parsing. // // @@ -180,31 +182,23 @@ const std = @import("std"); -const gc = @import("../gc.zig"); -const list = @import("../list.zig"); +const lib = @import("../lib.zig"); const value = @import("../value.zig"); const Value = value.Value; +pub const Mode = enum { code, data }; + const State = struct { alloc: std.mem.Allocator, - input: []const u8, pos: usize = 0, - - mode: enum { code, data } = .code, - + mode: Mode = undefined, next: Fn = .start_parse, - parent: ?*State = null, - - // Used to store various context, but most notably the stack of list - // elements parsed so far, so just initialize it to nil. - context: Value = value.nil.nil, - - opening_bracket: u8 = 0, - - retval: Value = value.eof.eof, + context: Value = undefined, + opening_bracket: u8 = undefined, + retval: Value = undefined, fn eof(self: *State) bool { return self.pos >= self.input.len; @@ -258,14 +252,17 @@ const State = struct { } fn recurParse(self: *State, start_from: Fn, return_to: Fn) *State { - const sub = self.alloc.create(State) catch @panic("OOM"); - sub.* = .{ .alloc = self.alloc, .input = self.input }; - sub.pos = self.pos; - sub.mode = self.mode; - sub.next = start_from; - sub.parent = self; + const newState = self.alloc.create(State) catch @panic("OOM"); + newState.* = .{ + .alloc = self.alloc, + .input = self.input, + .pos = self.pos, + .mode = self.mode, + .next = start_from, + .parent = self, + }; self.next = return_to; - return sub; + return newState; } fn returnDatum(self: *State, val: Value) *State { @@ -296,14 +293,18 @@ const Fn = enum { end_rune_datum, end_quote, continue_list, - finalize_improper_list, + finish_improper_list, end_improper_list, perform_return, }; -pub fn parse(input: []const u8) Value { +pub fn parseCode(input: []const u8) Value { + return parse(input, .code); +} + +pub fn parse(input: []const u8, mode: Mode) Value { var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - var top = State{ .alloc = gpa.allocator(), .input = input }; + var top = State{ .alloc = gpa.allocator(), .input = input, .mode = mode }; var s = ⊤ while (true) s = switch (s.next) { .start_parse => startParse(s), @@ -312,7 +313,7 @@ pub fn parse(input: []const u8) Value { .end_rune_datum => endRuneDatum(s), .end_quote => endQuote(s), .continue_list => continueList(s), - .finalize_improper_list => finalizeImproperList(s), + .finish_improper_list => finishImproperList(s), .end_improper_list => endImproperList(s), .perform_return => s.performReturn() orelse return s.retval, }; @@ -578,6 +579,10 @@ fn endQuote(s: *State) *State { // List processing is, unsurprisingly, the most complicated, and it's made even // more complicated by the possibility of datum comments in strange places... +// Make sure to use .start_parse instead of .start_datum to handle elements, so +// that an arbitrary number of datum comments, separated by blanks (whitespace +// and line comments) are handled automatically. + fn startList(s: *State) *State { const open = s.getc(); @@ -590,6 +595,7 @@ fn startList(s: *State) *State { return err(s, "unexpected EOF while parsing list"); } + s.context = value.nil.nil; s.opening_bracket = open; return if (isEndOfList(s)) endList(s) @@ -604,7 +610,27 @@ fn isEndOfList(s: *State) bool { }; } +fn endList(s: *State) *State { + const open = s.opening_bracket; + const char = s.getc(); + + if (open == '(' and char == ')') { + return s.returnDatum(s.context); + } + if (open == '[' and char == ']') { + const rune = value.rune.pack("SQUARE"); + return s.returnDatum(value.pair.cons(rune, s.context)); + } + if (open == '{' and char == '}') { + const rune = value.rune.pack("BRACE"); + return s.returnDatum(value.pair.cons(rune, s.context)); + } + + return err(s, "wrong closing bracket for list"); +} + fn continueList(s: *State) *State { + // Note that this accumulates list elements in reverse. s.context = value.pair.cons(s.retval, s.context); s.consumeBlanks(); @@ -613,7 +639,7 @@ fn continueList(s: *State) *State { } if (isEndOfList(s)) { - s.context = list.reverse(s.context); + s.context = lib.list.reverse(s.context); return endList(s); } @@ -623,21 +649,25 @@ fn continueList(s: *State) *State { if (!s.isWhitespace()) { return err(s, "misplaced period"); } - return s.recurParse(.start_parse, .finalize_improper_list); + return s.recurParse(.start_parse, .finish_improper_list); } return s.recurParse(.start_parse, .continue_list); } -fn finalizeImproperList(s: *State) *State { - s.context = list.reverseWithTail(s.context, s.retval); +fn finishImproperList(s: *State) *State { + s.context = lib.list.reverseWithTail(s.context, s.retval); return endImproperList(s); } +// Handling the end of an improper list is a bit awkward, because there may be +// datum comments *after* the final cdr, where we don't actually want to parse +// any further data. So we keep looping here just looking for datum comments. + fn endImproperList(s: *State) *State { s.consumeBlanks(); if (s.eof()) { - return err(s, "unexpected EOF while parsing list"); + return err(s, "unexpected EOF at end of improper list"); } if (isEndOfList(s)) { @@ -646,7 +676,7 @@ fn endImproperList(s: *State) *State { if (s.getc() == '#') { if (s.eof()) { - return err(s, "unexpected EOF after hash while parsing list"); + return err(s, "unexpected hash and EOF at end of improper list"); } if (s.getc() == ';') { return s.recurParse(.start_datum, .end_improper_list); @@ -656,26 +686,6 @@ fn endImproperList(s: *State) *State { return err(s, "malformed list / extra datum at end of improper list"); } -fn endList(s: *State) *State { - const open = s.opening_bracket; - const char = s.getc(); - - // Check for proper ending: (foo bar baz) - if (open == '(' and char == ')') { - return s.returnDatum(s.context); - } - if (open == '[' and char == ']') { - const rune = value.rune.pack("SQUARE"); - return s.returnDatum(value.pair.cons(rune, s.context)); - } - if (open == '{' and char == '}') { - const rune = value.rune.pack("BRACE"); - return s.returnDatum(value.pair.cons(rune, s.context)); - } - - return err(s, "wrong closing bracket for list"); -} - fn err(s: *State, msg: []const u8) noreturn { std.debug.print("{s}\n", .{msg}); std.debug.print("pos: {}\n", .{s.pos}); diff --git a/src/libzisp/io/reader.zig b/src/libzisp/io/reader.zig index d6de79d..3465cb3 100644 --- a/src/libzisp/io/reader.zig +++ b/src/libzisp/io/reader.zig @@ -6,5 +6,5 @@ const decoder = @import("decoder.zig"); const Value = @import("../value.zig").Value; pub fn readCode(input: []const u8) Value { - return decoder.decode(parser.parse(input)); + return decoder.decode(parser.parse(input, .code)); } diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig new file mode 100644 index 0000000..eb27e20 --- /dev/null +++ b/src/libzisp/io/unparser.zig @@ -0,0 +1 @@ +// wip diff --git a/src/libzisp/lib.zig b/src/libzisp/lib.zig new file mode 100644 index 0000000..7752110 --- /dev/null +++ b/src/libzisp/lib.zig @@ -0,0 +1 @@ +pub const list = @import("lib/list.zig"); diff --git a/src/libzisp/list.zig b/src/libzisp/lib/list.zig index a4ce7a8..9d6a6bc 100644 --- a/src/libzisp/list.zig +++ b/src/libzisp/lib/list.zig @@ -1,4 +1,4 @@ -const value = @import("value.zig"); +const value = @import("../value.zig"); const Value = value.Value; |
