From b80fd02194693700697921bfe37b30b52f78559f Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Fri, 29 May 2026 17:50:45 +0200 Subject: Parser cleanup. --- src/zisp/io/Parser.zig | 74 ++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig index dbcd6ad..ce61817 100644 --- a/src/zisp/io/Parser.zig +++ b/src/zisp/io/Parser.zig @@ -83,7 +83,6 @@ pub const Error = enum { ReadError, UnexpectedEof, InvalidCharacter, - UnclosedString, UnicodeLengthError, UnicodeEncodeError, RuneTooLong, @@ -174,12 +173,26 @@ fn addChar(p: *Parser, c: u8) !void { try p.chars.append(p.alloc, c); } -fn getCharsAsString(p: *Parser) Value { +fn addUnicode(p: *Parser, uc: u21) !void { + const n = std.unicode.utf8CodepointSequenceLength(uc) catch { + return p.err(.UnicodeLengthError, "UTF-8 injection"); + }; + const buf = try p.chars.addManyAsSlice(p.alloc, n); + const n2 = std.unicode.utf8Encode(uc, buf) catch { + return p.err(.UnicodeEncodeError, "UTF-8 injection"); + }; + std.debug.assert(n == n2); +} + +fn getCharsAsString(p: *Parser) !Value { defer p.chars.clearRetainingCapacity(); - return if (value.sstr.isValidSstr(p.chars.items)) - value.sstr.pack(p.chars.items) + const s = p.chars.items; + if (value.sstr.isValidSstr(s)) + return value.sstr.pack(s); + else if (value.istr.isValidIstr(s)) + return value.istr.intern(s); else - value.istr.intern(p.chars.items); + return @panic("not implemented"); // TODO } fn getCharsAsRune(p: *Parser) Value { @@ -431,15 +444,9 @@ fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void { } fn getString(p: *Parser, comptime close: u8) !Value { - while (try p.read()) |c| sw: switch (c) { - close => { - const s = p.getCharsAsString(); - return switch (close) { - '|' => p.cons(PQSTR, s), - '"' => p.cons(DQSTR, s), - else => unreachable, - }; - }, + const msg = "string(" ++ .{close} ++ ")"; + while (try p.readNoEof(msg)) |c| sw: switch (c) { + close => break, '\\' => switch (try p.readNoEof("string backslash escape")) { '\\', '|', '"' => |c2| try p.addChar(c2), '\t', ' ' => { @@ -458,68 +465,59 @@ fn getString(p: *Parser, comptime close: u8) !Value { // continue statement passing a new char directly to the switch. else => |c2| try p.addChar(c2), }; - return p.err(.UnclosedString, .{close} ++ " string"); + const s = try p.getCharsAsString(); + return switch (close) { + '|' => p.cons(PQSTR, s), + '"' => p.cons(DQSTR, s), + else => unreachable, + }; } fn getAtString(p: *Parser) !Value { const sentinel = try p.readNoEof("at-string"); - while (try p.read()) |c| { - if (c == sentinel) { - const s = p.getCharsAsString(); - return p.cons(ATSTR, s); - } + while (try p.readNoEof("at-string")) |c| { + if (c == sentinel) break; try p.addChar(c); } - return p.err(.UnclosedString, "at-string"); + const s = p.getCharsAsString(); + return p.cons(ATSTR, s); } fn skipStringLfEscape(p: *Parser) !u8 { const msg = "string linefeed escape"; - while (try p.read()) |c| switch (c) { + while (try p.readNoEof(msg)) |c| switch (c) { '\t', ' ' => {}, '\n' => return p.skipStringIndent(), else => return p.err(.InvalidCharacter, msg), }; - return p.err(.UnclosedString, msg); } fn skipStringIndent(p: *Parser) !u8 { - while (try p.read()) |c| switch (c) { + while (try p.readNoEof("string newline escape")) |c| switch (c) { '\t', ' ' => {}, else => return c, }; - return p.err(.UnclosedString, "string linefeed escape"); } fn parseStringRawHexEsc(p: *Parser) !void { const msg = "string raw hex escape"; - while (try p.read()) |c1| { - if (c1 == ';') return; + while (try p.readNoEof(msg)) |c1| { + if (c1 == ';') break; const c2 = try p.readNoEof(msg); const hi = try p.parseHexDigit(c1, msg); const lo = try p.parseHexDigit(c2, msg); try p.addChar(hi << 4 | lo); } - return p.err(.UnclosedString, msg); } fn parseStringUniHexEsc(p: *Parser) !void { const msg = "string unicode escape"; - const uc = try p.parseHex(u21, msg); const c = p.getUnread() orelse try p.readNoEof(msg); if (c != ';') { return p.err(.InvalidCharacter, msg); } - - const n = std.unicode.utf8CodepointSequenceLength(uc) catch { - return p.err(.UnicodeLengthError, msg); - }; - const buf = try p.chars.addManyAsSlice(p.alloc, n); - const n2 = std.unicode.utf8Encode(uc, buf) catch { - return p.err(.UnicodeEncodeError, msg); - }; - std.debug.assert(n == n2); + try p.addUnicode(uc); } fn parseStringCharEsc(p: *Parser, c: u8) !void { -- cgit v1.2.3