diff options
Diffstat (limited to 'src/zisp/io/Parser.zig')
| -rw-r--r-- | src/zisp/io/Parser.zig | 195 |
1 files changed, 103 insertions, 92 deletions
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig index d4a0a68..f768468 100644 --- a/src/zisp/io/Parser.zig +++ b/src/zisp/io/Parser.zig @@ -1,7 +1,7 @@ //! //! === Syntax === //! -//! See doc/c1/1-parse.md to understand the implemented syntax. +//! See /doc/0/1-parse.md to understand the implemented syntax. //! //! //! === Trampolining strategy === @@ -43,8 +43,8 @@ const gc = @import("../gc.zig"); const lib = @import("../lib.zig"); const value = @import("../value.zig"); +const ListPool = gc.ListPool; const IstrSet = gc.IstrSet; -const PairPool = gc.PairPool; const Value = value.Value; const Parser = @This(); @@ -81,21 +81,22 @@ pub const Error = enum { }; pub const Context = struct { - // What to do next. + /// What to do next. next: ?Fn = undefined, - // For storing a context value, like datum to join in join syntax. + /// For storing a context value, like datum to join in join syntax. val: Value = undefined, - // For storing a context char, like list opening bracket. + /// For storing a context char, like list opening bracket. char: u8 = undefined, - // Count of list elements on current parse level. - list_len: usize = undefined, + /// Start index of list elements on current parse level, within the global + /// list element accumulation array. + list_start: usize = undefined, }; -alloc: Alloc, +list_pool: ?*ListPool, istr_set: ?*IstrSet, -pair_pool: *PairPool, +alloc: Alloc, -input: *Reader = undefined, +reader: *Reader = undefined, context: Context = .{}, ctx_stack: List(Context) = undefined, @@ -107,23 +108,23 @@ unread_char: ?u8 = null, err_msg: []const u8 = undefined, pub fn init(alloc: Alloc) !Parser { + const list_pool = gc.mainListPool(); const istr_set = gc.mainIstrSet(); - const pair_pool = gc.mainPairPool(); - return initCustom(alloc, 32, 2048, 32, istr_set, pair_pool); + return initCustom(list_pool, istr_set, alloc, 32, 2048, 32); } pub fn initCustom( + list_pool: ?*ListPool, + istr_set: ?*IstrSet, alloc: Alloc, init_ctx_stack_cap: usize, init_str_chars_cap: usize, init_list_elts_cap: usize, - istr_set: ?*IstrSet, - pair_pool: *PairPool, ) !Parser { var p: Parser = .{ - .alloc = alloc, + .list_pool = list_pool, .istr_set = istr_set, - .pair_pool = pair_pool, + .alloc = alloc, }; p.ctx_stack = try .initCapacity(alloc, init_ctx_stack_cap); p.str_chars = try .initCapacity(alloc, init_str_chars_cap); @@ -148,7 +149,7 @@ fn read(p: *Parser) !?u8 { .{p.unread_char.?}, ); } - const c = p.input.takeByte() catch |e| switch (e) { + const c = p.reader.takeByte() catch |e| switch (e) { error.EndOfStream => return null, else => return p.err(.ReadError, "???"), }; @@ -158,6 +159,13 @@ fn read(p: *Parser) !?u8 { return c; } +fn readIntoSlice(p: *Parser, slice: []u8) !void { + p.reader.readSliceAll(slice) catch |e| return switch (e) { + error.EndOfStream => p.err(.UnexpectedEof, "reading into slice"), + else => p.err(.ReadError, "???"), + }; +} + fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 { return try p.read() orelse p.err(.UnexpectedEof, emsg); } @@ -222,25 +230,22 @@ fn getCharsAsRune(p: *Parser) Value { } // -// Pair consing & list creation +// List creation // -fn cons(p: *Parser, car: Value, cdr: Value) !Value { - return value.pair.consInPool(p.pair_pool, car, cdr); -} - fn addListElt(p: *Parser, elt: Value) !void { try p.list_elts.append(p.alloc, elt); - p.context.list_len += 1; } -fn getList(p: *Parser, tail: Value) !Value { - var list = tail; - for (0..p.context.list_len) |_| { - const elt = p.list_elts.pop() orelse unreachable; - list = try p.cons(elt, list); - } - return list; +fn getList(p: *Parser) !Value { + if (p.list_elts.items.len == p.context.list_start) return value.nil; + defer p.list_elts.items.len = p.context.list_start; + const vals = p.list_elts.items[p.context.list_start..]; + return value.list.new(p.alloc, p.list_pool, vals); +} + +fn makeList(p: *Parser, vals: []const Value) !Value { + return value.list.new(p.alloc, p.list_pool, vals); } // @@ -259,8 +264,6 @@ const Fn = enum { endRuneDatum, endLabelDatum, continueList, - endImproperList, - closeImproperList, endQuoteExpr, }; @@ -277,14 +280,12 @@ inline fn call(p: *Parser, f: Fn) !void { .endRuneDatum => p.endRuneDatum(), .endLabelDatum => p.endLabelDatum(), .continueList => p.continueList(), - .endImproperList => p.endImproperList(), - .closeImproperList => p.closeImproperList(), .endQuoteExpr => p.endQuoteExpr(), }; } -pub fn run(p: *Parser, input: *Reader) !Value { - p.input = input; +pub fn run(p: *Parser, reader: *Reader) !Value { + p.reader = reader; p.context.next = .parseUnit; while (p.context.next) |next| { if (detailed_debug) p.printStack(); @@ -336,7 +337,7 @@ fn pushContext(p: *Parser, next: Fn) !void { .next = next, .val = p.context.val, .char = p.context.char, - .list_len = p.context.list_len, + .list_start = p.context.list_start, }); } @@ -455,7 +456,7 @@ fn endJoinDatum(p: *Parser) !void { ':' => COLON, else => unreachable, }; - const joined = try p.cons(rune, try p.cons(prev, p.result)); + const joined = try p.makeList(&.{ rune, prev, p.result }); return p.jump(.parseJoin, joined); } @@ -511,21 +512,44 @@ fn getString(p: *Parser, comptime close: u8) !Value { }; const s = try p.getCharsAsString(); return switch (close) { - '|' => try p.cons(PQSTR, s), - '"' => try p.cons(DQSTR, s), + '|' => try p.makeList(&.{ PQSTR, s }), + '"' => try p.makeList(&.{ DQSTR, s }), else => unreachable, }; } fn getAtString(p: *Parser) !Value { const stop = try p.readNoEof("at-string"); + return if (stop == 255) p.getAtLenStr() else p.getAtSentinelStr(stop); +} + +fn getAtLenStr(p: *Parser) !Value { + var len: u48 = 0; + inline for (0..6) |_| { + len <<= 8; + len += try p.readNoEof("at-length-string"); + } + const AH = value.array.ArrayHeader; + const aln: std.mem.Alignment = @enumFromInt(@alignOf(AH)); + const mem = try p.alloc.alignedAlloc(u8, aln, @sizeOf(AH) + len); + const arr: value.array.ArrayPtr = @ptrCast(mem); + arr.* = .{ + .len_or_ptr = len, + .type = .str, + .info = .{ .str = .{} }, + }; + try p.readIntoSlice(arr.bytes()); + return p.makeList(&.{ ATSTR, value.ptr.pack(.array, arr) }); +} + +fn getAtSentinelStr(p: *Parser, stop: u8) !Value { while (try p.readNoEofOpt("at-string")) |c| { if (c == stop) break; try p.addChar(c); } const str = try p.getCharsAsString(); const byte = value.fixnum.pack(stop); - return try p.cons(ATSTR, try p.cons(byte, str)); + return p.makeList(&.{ ATSTR, byte, str }); } fn skipStringLfEscape(p: *Parser) !u8 { @@ -591,8 +615,9 @@ fn parseHashExpr(p: *Parser, next: Fn) !void { }, '\\' => { const c1 = try p.readNoEof("hash-backslash"); - const bs = try p.getBareString(c1); - return p.jump(next, try p.cons(HASH, bs)); + const str = try p.getBareString(c1); + const val = try p.makeList(&.{ HASH, str }); + return p.jump(next, val); }, '!' => return p.parseHashBang(next), '%' => return p.parseLabel(next), @@ -611,7 +636,7 @@ fn endHashDatum(p: *Parser) !void { if (p.result.eq(value.none)) { return p.err(.InvalidCharacter, "hash datum"); } - return p.retval(try p.cons(HASH, p.result)); + return p.retval(try p.makeList(&.{ HASH, p.result })); } fn getRune(p: *Parser, c1: u8) !Value { @@ -635,11 +660,25 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { switch (c) { '\\' => { const c1 = try p.readNoEof("rune-backslash"); - return p.jump(next, try p.cons(r, try p.getBareString(c1))); + const str = try p.getBareString(c1); + const val = try p.makeList(&.{ r, str }); + return p.jump(next, val); + }, + '"' => { + const str = try p.getString('"'); + const val = try p.makeList(&.{ r, str }); + return p.jump(next, val); + }, + '|' => { + const str = try p.getString('|'); + const val = try p.makeList(&.{ r, str }); + return p.jump(next, val); + }, + '@' => { + const str = try p.getAtString(); + const val = try p.makeList(&.{ r, str }); + return p.jump(next, val); }, - '"' => return p.jump(next, try p.cons(r, try p.getString('"'))), - '|' => return p.jump(next, try p.cons(r, try p.getString('|'))), - '@' => return p.jump(next, try p.cons(r, try p.getAtString())), '#', '(', '[', '{', '\'', '`', ',' => { p.unread(c); try p.push(next); @@ -654,31 +693,31 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { } fn endRuneDatum(p: *Parser) !void { - return p.retval(try p.cons(p.context.val, p.result)); + return p.retval(try p.makeList(&.{ p.context.val, p.result })); } fn parseHashBang(p: *Parser, next: Fn) !void { - const val = try p.getHashBangValue(); - return p.jump(next, try p.cons(SHBANG, val)); + const interp, const arg_line = try p.getHashBangValue(); + if (arg_line) |args| { + return p.jump(next, try p.makeList(&.{ SHBANG, interp, args })); + } else { + return p.jump(next, try p.makeList(&.{ SHBANG, interp })); + } } -fn getHashBangValue(p: *Parser) !Value { +fn getHashBangValue(p: *Parser) !struct { Value, ?Value } { while (try p.readNoEofOpt("hash-bang")) |c| switch (c) { ' ', '\t' => continue, '\n' => return p.err(.InvalidCharacter, "hash-bang"), else => { try p.addChar(c); while (try p.read()) |c2| switch (c2) { - '\n' => return p.getCharsAsString(), + '\n' => return .{ try p.getCharsAsString(), null }, ' ', '\t' => break, else => try p.addChar(c2), }; const interp = try p.getCharsAsString(); - if (try p.getHashBangArgLine()) |arg_line| { - return try p.cons(interp, arg_line); - } else { - return interp; - } + return .{ interp, try p.getHashBangArgLine() }; }, }; unreachable; @@ -704,7 +743,7 @@ fn parseLabel(p: *Parser, next: Fn) !void { const n = try p.parseHex(u48, "datum label"); const l = value.fixnum.pack(n); switch (p.getUnread() orelse try p.readNoEof("datum label")) { - '%' => return p.jump(next, try p.cons(LABEL, l)), + '%' => return p.jump(next, try p.makeList(&.{ LABEL, l })), '=' => { try p.push(next); p.context.val = l; @@ -718,7 +757,7 @@ fn endLabelDatum(p: *Parser) !void { if (p.result.eq(value.none)) { return p.err(.InvalidCharacter, "label datum"); } - return p.retval(try p.cons(LABEL, try p.cons(p.context.val, p.result))); + return p.retval(try p.makeList(&.{ LABEL, p.context.val, p.result })); } fn parseList(p: *Parser, open: u8, next: Fn) !void { @@ -729,7 +768,7 @@ fn parseList(p: *Parser, open: u8, next: Fn) !void { '{' => '}', else => unreachable, }; - p.context.list_len = 0; + p.context.list_start = p.list_elts.items.len; switch (open) { '(' => {}, '[' => try p.addListElt(SQUARE), @@ -750,9 +789,6 @@ fn continueList(p: *Parser) !void { if (c == close) { return p.endList(); } - if (c == '&') { - return p.subr(.parseUnit, .endImproperList); - } return p.err(.InvalidCharacter, "list"); } @@ -762,32 +798,7 @@ fn continueList(p: *Parser) !void { } fn endList(p: *Parser) !void { - return p.retval(try p.getList(value.nil)); -} - -fn endImproperList(p: *Parser) !void { - if (p.result.eq(value.none)) { - return p.err(.InvalidCharacter, "list tail"); - } - p.context.val = try p.getList(p.result); - return p.closeImproperList(); -} - -fn closeImproperList(p: *Parser) !void { - const result = p.context.val; - const close = p.context.char; - var c1 = p.getUnread() orelse try p.read(); - while (c1) |c| : (c1 = try p.read()) { - if (c == close) { - return p.retval(result); - } - switch (try p.checkBlank(c)) { - .yes => {}, - .skip_unit => return p.subr(.parseUnit, .closeImproperList), - .no => return p.err(.InvalidCharacter, "after list tail"), - } - } - return p.err(.UnexpectedEof, "after list tail"); + return p.retval(try p.getList()); } fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void { @@ -808,7 +819,7 @@ fn endQuoteExpr(p: *Parser) !void { if (p.result.eq(value.none)) { return p.err(.InvalidCharacter, "quote expression datum"); } - return p.retval(try p.cons(p.context.val, p.result)); + return p.retval(try p.makeList(&.{ p.context.val, p.result })); } // Helpers @@ -836,7 +847,7 @@ pub fn isSpecialBareChar(c: u8) bool { pub fn isBareChar(c: u8) bool { return switch (c) { // zig fmt: off - 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '*' , + 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' , '+' , '-' , '/' , '<' , '=' , '>' , '?' , '^' , '_' , '~' , => true, // zig fmt: on else => false, |
