summaryrefslogtreecommitdiff
path: root/src/zisp/io/Parser.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/zisp/io/Parser.zig')
-rw-r--r--src/zisp/io/Parser.zig195
1 files changed, 103 insertions, 92 deletions
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
index d4a0a68..f768468 100644
--- a/src/zisp/io/Parser.zig
+++ b/src/zisp/io/Parser.zig
@@ -1,7 +1,7 @@
//!
//! === Syntax ===
//!
-//! See doc/c1/1-parse.md to understand the implemented syntax.
+//! See /doc/0/1-parse.md to understand the implemented syntax.
//!
//!
//! === Trampolining strategy ===
@@ -43,8 +43,8 @@ const gc = @import("../gc.zig");
const lib = @import("../lib.zig");
const value = @import("../value.zig");
+const ListPool = gc.ListPool;
const IstrSet = gc.IstrSet;
-const PairPool = gc.PairPool;
const Value = value.Value;
const Parser = @This();
@@ -81,21 +81,22 @@ pub const Error = enum {
};
pub const Context = struct {
- // What to do next.
+ /// What to do next.
next: ?Fn = undefined,
- // For storing a context value, like datum to join in join syntax.
+ /// For storing a context value, like datum to join in join syntax.
val: Value = undefined,
- // For storing a context char, like list opening bracket.
+ /// For storing a context char, like list opening bracket.
char: u8 = undefined,
- // Count of list elements on current parse level.
- list_len: usize = undefined,
+ /// Start index of list elements on current parse level, within the global
+ /// list element accumulation array.
+ list_start: usize = undefined,
};
-alloc: Alloc,
+list_pool: ?*ListPool,
istr_set: ?*IstrSet,
-pair_pool: *PairPool,
+alloc: Alloc,
-input: *Reader = undefined,
+reader: *Reader = undefined,
context: Context = .{},
ctx_stack: List(Context) = undefined,
@@ -107,23 +108,23 @@ unread_char: ?u8 = null,
err_msg: []const u8 = undefined,
pub fn init(alloc: Alloc) !Parser {
+ const list_pool = gc.mainListPool();
const istr_set = gc.mainIstrSet();
- const pair_pool = gc.mainPairPool();
- return initCustom(alloc, 32, 2048, 32, istr_set, pair_pool);
+ return initCustom(list_pool, istr_set, alloc, 32, 2048, 32);
}
pub fn initCustom(
+ list_pool: ?*ListPool,
+ istr_set: ?*IstrSet,
alloc: Alloc,
init_ctx_stack_cap: usize,
init_str_chars_cap: usize,
init_list_elts_cap: usize,
- istr_set: ?*IstrSet,
- pair_pool: *PairPool,
) !Parser {
var p: Parser = .{
- .alloc = alloc,
+ .list_pool = list_pool,
.istr_set = istr_set,
- .pair_pool = pair_pool,
+ .alloc = alloc,
};
p.ctx_stack = try .initCapacity(alloc, init_ctx_stack_cap);
p.str_chars = try .initCapacity(alloc, init_str_chars_cap);
@@ -148,7 +149,7 @@ fn read(p: *Parser) !?u8 {
.{p.unread_char.?},
);
}
- const c = p.input.takeByte() catch |e| switch (e) {
+ const c = p.reader.takeByte() catch |e| switch (e) {
error.EndOfStream => return null,
else => return p.err(.ReadError, "???"),
};
@@ -158,6 +159,13 @@ fn read(p: *Parser) !?u8 {
return c;
}
+fn readIntoSlice(p: *Parser, slice: []u8) !void {
+ p.reader.readSliceAll(slice) catch |e| return switch (e) {
+ error.EndOfStream => p.err(.UnexpectedEof, "reading into slice"),
+ else => p.err(.ReadError, "???"),
+ };
+}
+
fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 {
return try p.read() orelse p.err(.UnexpectedEof, emsg);
}
@@ -222,25 +230,22 @@ fn getCharsAsRune(p: *Parser) Value {
}
//
-// Pair consing & list creation
+// List creation
//
-fn cons(p: *Parser, car: Value, cdr: Value) !Value {
- return value.pair.consInPool(p.pair_pool, car, cdr);
-}
-
fn addListElt(p: *Parser, elt: Value) !void {
try p.list_elts.append(p.alloc, elt);
- p.context.list_len += 1;
}
-fn getList(p: *Parser, tail: Value) !Value {
- var list = tail;
- for (0..p.context.list_len) |_| {
- const elt = p.list_elts.pop() orelse unreachable;
- list = try p.cons(elt, list);
- }
- return list;
+fn getList(p: *Parser) !Value {
+ if (p.list_elts.items.len == p.context.list_start) return value.nil;
+ defer p.list_elts.items.len = p.context.list_start;
+ const vals = p.list_elts.items[p.context.list_start..];
+ return value.list.new(p.alloc, p.list_pool, vals);
+}
+
+fn makeList(p: *Parser, vals: []const Value) !Value {
+ return value.list.new(p.alloc, p.list_pool, vals);
}
//
@@ -259,8 +264,6 @@ const Fn = enum {
endRuneDatum,
endLabelDatum,
continueList,
- endImproperList,
- closeImproperList,
endQuoteExpr,
};
@@ -277,14 +280,12 @@ inline fn call(p: *Parser, f: Fn) !void {
.endRuneDatum => p.endRuneDatum(),
.endLabelDatum => p.endLabelDatum(),
.continueList => p.continueList(),
- .endImproperList => p.endImproperList(),
- .closeImproperList => p.closeImproperList(),
.endQuoteExpr => p.endQuoteExpr(),
};
}
-pub fn run(p: *Parser, input: *Reader) !Value {
- p.input = input;
+pub fn run(p: *Parser, reader: *Reader) !Value {
+ p.reader = reader;
p.context.next = .parseUnit;
while (p.context.next) |next| {
if (detailed_debug) p.printStack();
@@ -336,7 +337,7 @@ fn pushContext(p: *Parser, next: Fn) !void {
.next = next,
.val = p.context.val,
.char = p.context.char,
- .list_len = p.context.list_len,
+ .list_start = p.context.list_start,
});
}
@@ -455,7 +456,7 @@ fn endJoinDatum(p: *Parser) !void {
':' => COLON,
else => unreachable,
};
- const joined = try p.cons(rune, try p.cons(prev, p.result));
+ const joined = try p.makeList(&.{ rune, prev, p.result });
return p.jump(.parseJoin, joined);
}
@@ -511,21 +512,44 @@ fn getString(p: *Parser, comptime close: u8) !Value {
};
const s = try p.getCharsAsString();
return switch (close) {
- '|' => try p.cons(PQSTR, s),
- '"' => try p.cons(DQSTR, s),
+ '|' => try p.makeList(&.{ PQSTR, s }),
+ '"' => try p.makeList(&.{ DQSTR, s }),
else => unreachable,
};
}
fn getAtString(p: *Parser) !Value {
const stop = try p.readNoEof("at-string");
+ return if (stop == 255) p.getAtLenStr() else p.getAtSentinelStr(stop);
+}
+
+fn getAtLenStr(p: *Parser) !Value {
+ var len: u48 = 0;
+ inline for (0..6) |_| {
+ len <<= 8;
+ len += try p.readNoEof("at-length-string");
+ }
+ const AH = value.array.ArrayHeader;
+ const aln: std.mem.Alignment = @enumFromInt(@alignOf(AH));
+ const mem = try p.alloc.alignedAlloc(u8, aln, @sizeOf(AH) + len);
+ const arr: value.array.ArrayPtr = @ptrCast(mem);
+ arr.* = .{
+ .len_or_ptr = len,
+ .type = .str,
+ .info = .{ .str = .{} },
+ };
+ try p.readIntoSlice(arr.bytes());
+ return p.makeList(&.{ ATSTR, value.ptr.pack(.array, arr) });
+}
+
+fn getAtSentinelStr(p: *Parser, stop: u8) !Value {
while (try p.readNoEofOpt("at-string")) |c| {
if (c == stop) break;
try p.addChar(c);
}
const str = try p.getCharsAsString();
const byte = value.fixnum.pack(stop);
- return try p.cons(ATSTR, try p.cons(byte, str));
+ return p.makeList(&.{ ATSTR, byte, str });
}
fn skipStringLfEscape(p: *Parser) !u8 {
@@ -591,8 +615,9 @@ fn parseHashExpr(p: *Parser, next: Fn) !void {
},
'\\' => {
const c1 = try p.readNoEof("hash-backslash");
- const bs = try p.getBareString(c1);
- return p.jump(next, try p.cons(HASH, bs));
+ const str = try p.getBareString(c1);
+ const val = try p.makeList(&.{ HASH, str });
+ return p.jump(next, val);
},
'!' => return p.parseHashBang(next),
'%' => return p.parseLabel(next),
@@ -611,7 +636,7 @@ fn endHashDatum(p: *Parser) !void {
if (p.result.eq(value.none)) {
return p.err(.InvalidCharacter, "hash datum");
}
- return p.retval(try p.cons(HASH, p.result));
+ return p.retval(try p.makeList(&.{ HASH, p.result }));
}
fn getRune(p: *Parser, c1: u8) !Value {
@@ -635,11 +660,25 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
switch (c) {
'\\' => {
const c1 = try p.readNoEof("rune-backslash");
- return p.jump(next, try p.cons(r, try p.getBareString(c1)));
+ const str = try p.getBareString(c1);
+ const val = try p.makeList(&.{ r, str });
+ return p.jump(next, val);
+ },
+ '"' => {
+ const str = try p.getString('"');
+ const val = try p.makeList(&.{ r, str });
+ return p.jump(next, val);
+ },
+ '|' => {
+ const str = try p.getString('|');
+ const val = try p.makeList(&.{ r, str });
+ return p.jump(next, val);
+ },
+ '@' => {
+ const str = try p.getAtString();
+ const val = try p.makeList(&.{ r, str });
+ return p.jump(next, val);
},
- '"' => return p.jump(next, try p.cons(r, try p.getString('"'))),
- '|' => return p.jump(next, try p.cons(r, try p.getString('|'))),
- '@' => return p.jump(next, try p.cons(r, try p.getAtString())),
'#', '(', '[', '{', '\'', '`', ',' => {
p.unread(c);
try p.push(next);
@@ -654,31 +693,31 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
}
fn endRuneDatum(p: *Parser) !void {
- return p.retval(try p.cons(p.context.val, p.result));
+ return p.retval(try p.makeList(&.{ p.context.val, p.result }));
}
fn parseHashBang(p: *Parser, next: Fn) !void {
- const val = try p.getHashBangValue();
- return p.jump(next, try p.cons(SHBANG, val));
+ const interp, const arg_line = try p.getHashBangValue();
+ if (arg_line) |args| {
+ return p.jump(next, try p.makeList(&.{ SHBANG, interp, args }));
+ } else {
+ return p.jump(next, try p.makeList(&.{ SHBANG, interp }));
+ }
}
-fn getHashBangValue(p: *Parser) !Value {
+fn getHashBangValue(p: *Parser) !struct { Value, ?Value } {
while (try p.readNoEofOpt("hash-bang")) |c| switch (c) {
' ', '\t' => continue,
'\n' => return p.err(.InvalidCharacter, "hash-bang"),
else => {
try p.addChar(c);
while (try p.read()) |c2| switch (c2) {
- '\n' => return p.getCharsAsString(),
+ '\n' => return .{ try p.getCharsAsString(), null },
' ', '\t' => break,
else => try p.addChar(c2),
};
const interp = try p.getCharsAsString();
- if (try p.getHashBangArgLine()) |arg_line| {
- return try p.cons(interp, arg_line);
- } else {
- return interp;
- }
+ return .{ interp, try p.getHashBangArgLine() };
},
};
unreachable;
@@ -704,7 +743,7 @@ fn parseLabel(p: *Parser, next: Fn) !void {
const n = try p.parseHex(u48, "datum label");
const l = value.fixnum.pack(n);
switch (p.getUnread() orelse try p.readNoEof("datum label")) {
- '%' => return p.jump(next, try p.cons(LABEL, l)),
+ '%' => return p.jump(next, try p.makeList(&.{ LABEL, l })),
'=' => {
try p.push(next);
p.context.val = l;
@@ -718,7 +757,7 @@ fn endLabelDatum(p: *Parser) !void {
if (p.result.eq(value.none)) {
return p.err(.InvalidCharacter, "label datum");
}
- return p.retval(try p.cons(LABEL, try p.cons(p.context.val, p.result)));
+ return p.retval(try p.makeList(&.{ LABEL, p.context.val, p.result }));
}
fn parseList(p: *Parser, open: u8, next: Fn) !void {
@@ -729,7 +768,7 @@ fn parseList(p: *Parser, open: u8, next: Fn) !void {
'{' => '}',
else => unreachable,
};
- p.context.list_len = 0;
+ p.context.list_start = p.list_elts.items.len;
switch (open) {
'(' => {},
'[' => try p.addListElt(SQUARE),
@@ -750,9 +789,6 @@ fn continueList(p: *Parser) !void {
if (c == close) {
return p.endList();
}
- if (c == '&') {
- return p.subr(.parseUnit, .endImproperList);
- }
return p.err(.InvalidCharacter, "list");
}
@@ -762,32 +798,7 @@ fn continueList(p: *Parser) !void {
}
fn endList(p: *Parser) !void {
- return p.retval(try p.getList(value.nil));
-}
-
-fn endImproperList(p: *Parser) !void {
- if (p.result.eq(value.none)) {
- return p.err(.InvalidCharacter, "list tail");
- }
- p.context.val = try p.getList(p.result);
- return p.closeImproperList();
-}
-
-fn closeImproperList(p: *Parser) !void {
- const result = p.context.val;
- const close = p.context.char;
- var c1 = p.getUnread() orelse try p.read();
- while (c1) |c| : (c1 = try p.read()) {
- if (c == close) {
- return p.retval(result);
- }
- switch (try p.checkBlank(c)) {
- .yes => {},
- .skip_unit => return p.subr(.parseUnit, .closeImproperList),
- .no => return p.err(.InvalidCharacter, "after list tail"),
- }
- }
- return p.err(.UnexpectedEof, "after list tail");
+ return p.retval(try p.getList());
}
fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void {
@@ -808,7 +819,7 @@ fn endQuoteExpr(p: *Parser) !void {
if (p.result.eq(value.none)) {
return p.err(.InvalidCharacter, "quote expression datum");
}
- return p.retval(try p.cons(p.context.val, p.result));
+ return p.retval(try p.makeList(&.{ p.context.val, p.result }));
}
// Helpers
@@ -836,7 +847,7 @@ pub fn isSpecialBareChar(c: u8) bool {
pub fn isBareChar(c: u8) bool {
return switch (c) {
// zig fmt: off
- 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '*' ,
+ 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' ,
'+' , '-' , '/' , '<' , '=' , '>' , '?' , '^' , '_' , '~' , => true,
// zig fmt: on
else => false,