diff options
Diffstat (limited to 'src/zisp/io/Parser.zig')
| -rw-r--r-- | src/zisp/io/Parser.zig | 160 |
1 files changed, 85 insertions, 75 deletions
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig index 14db959..4a2ed35 100644 --- a/src/zisp/io/Parser.zig +++ b/src/zisp/io/Parser.zig @@ -188,7 +188,7 @@ fn addChar(p: *Parser, c: u8) !void { try p.chars.append(p.alloc.chars, c); } -fn getString(p: *Parser) Value { +fn getCharsAsString(p: *Parser) Value { defer p.chars.clearRetainingCapacity(); return if (value.sstr.isValidSstr(p.chars.items)) value.sstr.pack(p.chars.items) @@ -196,7 +196,7 @@ fn getString(p: *Parser) Value { value.istr.intern(p.chars.items); } -fn getRune(p: *Parser) Value { +fn getCharsAsRune(p: *Parser) Value { defer p.chars.clearRetainingCapacity(); return value.rune.pack(p.chars.items); } @@ -433,84 +433,111 @@ fn parseBareString(p: *Parser, c1: u8) !Value { break; } } - return p.getString(); + return p.getCharsAsString(); } fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void { return switch (c) { - '|' => p.jump(next, try p.parseEscapedString('|')), - '"' => p.jump(next, try p.parseEscapedString('"')), - '#' => p.parseHashExpression(next), + '|' => p.jump(next, try p.parseString('|')), + '"' => p.jump(next, try p.parseString('"')), + '#' => p.parseHashExpr(next), '(', '[', '{' => p.parseList(c, next), '\'', '`', ',' => p.parseQuoteExpr(c, next), else => p.abort(next, c), }; } -fn parseEscapedString(p: *Parser, close: u8) !Value { - while (try p.read()) |c| { - if (c == close) { - const s = p.getString(); +fn parseString(p: *Parser, comptime close: u8) !Value { + while (try p.read()) |c| sw: switch (c) { + close => { + const s = p.getCharsAsString(); return if (close == '"') p.cons(QUOTE, s) else s; - } - if (c != '\\') { - try p.addChar(c); - } else { - try p.parseQuotedEsc(close); - } - } - return error.UnclosedString; + }, + '\\' => switch (try p.readNoEof("string backslash escape")) { + '\\', '|', '"' => |c2| try p.addChar(c2), + '\t', ' ' => { + const c2 = try p.skipStringLfEscape(); + continue :sw c2; + }, + '\n' => { + const c2 = try p.skipStringIndent(); + continue :sw c2; + }, + 'x' => try p.parseStringRawHexEsc(), + 'u' => try p.parseStringUniHexEsc(), + else => |c2| try p.parseStringCharEsc(c2), + }, + // Important to use a capture here, since it may come from a labeled + // continue statement passing a new char directly to the switch. + else => |c2| try p.addChar(c2), + }; + return p.err(.UnclosedString, .{close} ++ " string"); } -fn parseQuotedEsc(p: *Parser, close: u8) !void { - const c = try p.readNoEof("quoted escape"); - if (c == close) return p.addChar(close); - if (c == 'u') return p.parseUniHexHandleErrors(); - try p.addChar(switch (c) { - '\\' => c, - '0' => 0, - 'a' => 7, - 'b' => 8, - 't' => 9, - 'n' => 10, - 'v' => 11, - 'f' => 12, - 'r' => 13, - 'e' => 27, - 'x' => try p.parseHexByte("hex escape"), - else => return p.err(.InvalidCharacter, "quoted escape"), - }); +fn skipStringLfEscape(p: *Parser) !u8 { + const msg = "string linefeed escape"; + while (try p.read()) |c| switch (c) { + '\t', ' ' => {}, + '\n' => return p.skipStringIndent(), + else => return p.err(.InvalidCharacter, msg), + }; + return p.err(.UnclosedString, msg); } -fn parseUniHexHandleErrors(p: *Parser) !void { - return p.parseUniHex() catch |e| switch (e) { - error.Utf8CannotEncodeSurrogateHalf => p.err( - .UnicodeError, - "unicode escape", - ), - else => e, +fn skipStringIndent(p: *Parser) !u8 { + while (try p.read()) |c| switch (c) { + '\t', ' ' => {}, + else => return c, }; + return p.err(.UnclosedString, "string linefeed escape"); } -fn parseUniHex(p: *Parser) !void { - const msg = "unicode escape"; - - if (try p.readNoEof(msg) != '{') { - return p.err(.InvalidCharacter, msg); +fn parseStringRawHexEsc(p: *Parser) !void { + const msg = "string raw hex escape"; + while (try p.read()) |c1| { + if (c1 == ';') return; + const c2 = try p.readNoEof(msg); + const hi = try p.parseHexDigit(c1, msg); + const lo = try p.parseHexDigit(c2, msg); + try p.addChar(hi << 4 | lo); } + return p.err(.UnclosedString, msg); +} + +fn parseStringUniHexEsc(p: *Parser) !void { + const msg = "string unicode escape"; const uc = try p.parseHex(u21, msg); - const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg); - if (c != '}') { + const c = p.getUnread() orelse try p.readNoEof(msg); + if (c != ';') { return p.err(.InvalidCharacter, msg); } - const n = try std.unicode.utf8CodepointSequenceLength(uc); + const n = std.unicode.utf8CodepointSequenceLength(uc) catch { + return p.err(.UnicodeError, msg); + }; const buf = try p.chars.addManyAsSlice(p.alloc.chars, n); - _ = try std.unicode.utf8Encode(uc, buf); + const n2 = std.unicode.utf8Encode(uc, buf) catch { + return p.err(.UnicodeError, msg); + }; + std.debug.assert(n == n2); +} + +fn parseStringCharEsc(p: *Parser, c: u8) !void { + try p.addChar(switch (c) { + 'a' => 7, + 'b' => 8, + 't' => 9, + 'n' => 10, + 'v' => 11, + 'f' => 12, + 'r' => 13, + 'e' => 27, + else => return p.err(.InvalidCharacter, "string backslash escape"), + }); } -fn parseHashExpression(p: *Parser, next: Fn) !void { +fn parseHashExpr(p: *Parser, next: Fn) !void { const c = try p.readNoEof("hash expression"); if (std.ascii.isAlphabetic(c)) { const r = try p.parseRune(c); @@ -544,11 +571,11 @@ fn parseRune(p: *Parser, c1: u8) !Value { while (try p.read()) |c| : (len += 1) { if (len == 6 or !std.ascii.isAlphanumeric(c)) { p.unread(c); - return p.getRune(); + return p.getCharsAsRune(); } try p.addChar(c); } - return p.getRune(); + return p.getCharsAsRune(); } fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { @@ -558,10 +585,10 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { return p.jump(next, p.cons(r, try p.parseBareString(c1))); } if (c == '"') { - return p.jump(next, p.cons(r, try p.parseEscapedString('"'))); + return p.jump(next, p.cons(r, try p.parseString('"'))); } if (c == '|') { - return p.jump(next, p.cons(r, try p.parseEscapedString('|'))); + return p.jump(next, p.cons(r, try p.parseString('|'))); } p.unread(c); switch (c) { @@ -752,18 +779,9 @@ fn isBareChar(c: u8) bool { }; } -fn isBareEsc(c: u8) bool { - return switch (c) { - 33...126 => true, - else => false, - }; -} - fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T { - var uc: T = undefined; - const c1 = try p.readNoEof(emsg); - uc = try p.parseHexDigit(c1, emsg); + var uc: T = try p.parseHexDigit(c1, emsg); while (try p.read()) |c| { if (!std.ascii.isHex(c)) { @@ -777,14 +795,6 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T { return uc; } -fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 { - const h1 = try p.readNoEof(emsg); - const h2 = try p.readNoEof(emsg); - const hi = try p.parseHexDigit(h1, emsg); - const lo = try p.parseHexDigit(h2, emsg); - return hi << 4 | lo; -} - fn parseHexDigit(p: *Parser, c: u8, comptime emsg: []const u8) !u8 { return switch (c) { '0'...'9' => c - '0', |
