summaryrefslogtreecommitdiff
path: root/src/zisp/io/Parser.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/zisp/io/Parser.zig')
-rw-r--r--src/zisp/io/Parser.zig160
1 files changed, 85 insertions, 75 deletions
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
index 14db959..4a2ed35 100644
--- a/src/zisp/io/Parser.zig
+++ b/src/zisp/io/Parser.zig
@@ -188,7 +188,7 @@ fn addChar(p: *Parser, c: u8) !void {
try p.chars.append(p.alloc.chars, c);
}
-fn getString(p: *Parser) Value {
+fn getCharsAsString(p: *Parser) Value {
defer p.chars.clearRetainingCapacity();
return if (value.sstr.isValidSstr(p.chars.items))
value.sstr.pack(p.chars.items)
@@ -196,7 +196,7 @@ fn getString(p: *Parser) Value {
value.istr.intern(p.chars.items);
}
-fn getRune(p: *Parser) Value {
+fn getCharsAsRune(p: *Parser) Value {
defer p.chars.clearRetainingCapacity();
return value.rune.pack(p.chars.items);
}
@@ -433,84 +433,111 @@ fn parseBareString(p: *Parser, c1: u8) !Value {
break;
}
}
- return p.getString();
+ return p.getCharsAsString();
}
fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void {
return switch (c) {
- '|' => p.jump(next, try p.parseEscapedString('|')),
- '"' => p.jump(next, try p.parseEscapedString('"')),
- '#' => p.parseHashExpression(next),
+ '|' => p.jump(next, try p.parseString('|')),
+ '"' => p.jump(next, try p.parseString('"')),
+ '#' => p.parseHashExpr(next),
'(', '[', '{' => p.parseList(c, next),
'\'', '`', ',' => p.parseQuoteExpr(c, next),
else => p.abort(next, c),
};
}
-fn parseEscapedString(p: *Parser, close: u8) !Value {
- while (try p.read()) |c| {
- if (c == close) {
- const s = p.getString();
+fn parseString(p: *Parser, comptime close: u8) !Value {
+ while (try p.read()) |c| sw: switch (c) {
+ close => {
+ const s = p.getCharsAsString();
return if (close == '"') p.cons(QUOTE, s) else s;
- }
- if (c != '\\') {
- try p.addChar(c);
- } else {
- try p.parseQuotedEsc(close);
- }
- }
- return error.UnclosedString;
+ },
+ '\\' => switch (try p.readNoEof("string backslash escape")) {
+ '\\', '|', '"' => |c2| try p.addChar(c2),
+ '\t', ' ' => {
+ const c2 = try p.skipStringLfEscape();
+ continue :sw c2;
+ },
+ '\n' => {
+ const c2 = try p.skipStringIndent();
+ continue :sw c2;
+ },
+ 'x' => try p.parseStringRawHexEsc(),
+ 'u' => try p.parseStringUniHexEsc(),
+ else => |c2| try p.parseStringCharEsc(c2),
+ },
+ // Important to use a capture here, since it may come from a labeled
+ // continue statement passing a new char directly to the switch.
+ else => |c2| try p.addChar(c2),
+ };
+ return p.err(.UnclosedString, .{close} ++ " string");
}
-fn parseQuotedEsc(p: *Parser, close: u8) !void {
- const c = try p.readNoEof("quoted escape");
- if (c == close) return p.addChar(close);
- if (c == 'u') return p.parseUniHexHandleErrors();
- try p.addChar(switch (c) {
- '\\' => c,
- '0' => 0,
- 'a' => 7,
- 'b' => 8,
- 't' => 9,
- 'n' => 10,
- 'v' => 11,
- 'f' => 12,
- 'r' => 13,
- 'e' => 27,
- 'x' => try p.parseHexByte("hex escape"),
- else => return p.err(.InvalidCharacter, "quoted escape"),
- });
+fn skipStringLfEscape(p: *Parser) !u8 {
+ const msg = "string linefeed escape";
+ while (try p.read()) |c| switch (c) {
+ '\t', ' ' => {},
+ '\n' => return p.skipStringIndent(),
+ else => return p.err(.InvalidCharacter, msg),
+ };
+ return p.err(.UnclosedString, msg);
}
-fn parseUniHexHandleErrors(p: *Parser) !void {
- return p.parseUniHex() catch |e| switch (e) {
- error.Utf8CannotEncodeSurrogateHalf => p.err(
- .UnicodeError,
- "unicode escape",
- ),
- else => e,
+fn skipStringIndent(p: *Parser) !u8 {
+ while (try p.read()) |c| switch (c) {
+ '\t', ' ' => {},
+ else => return c,
};
+ return p.err(.UnclosedString, "string linefeed escape");
}
-fn parseUniHex(p: *Parser) !void {
- const msg = "unicode escape";
-
- if (try p.readNoEof(msg) != '{') {
- return p.err(.InvalidCharacter, msg);
+fn parseStringRawHexEsc(p: *Parser) !void {
+ const msg = "string raw hex escape";
+ while (try p.read()) |c1| {
+ if (c1 == ';') return;
+ const c2 = try p.readNoEof(msg);
+ const hi = try p.parseHexDigit(c1, msg);
+ const lo = try p.parseHexDigit(c2, msg);
+ try p.addChar(hi << 4 | lo);
}
+ return p.err(.UnclosedString, msg);
+}
+
+fn parseStringUniHexEsc(p: *Parser) !void {
+ const msg = "string unicode escape";
const uc = try p.parseHex(u21, msg);
- const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg);
- if (c != '}') {
+ const c = p.getUnread() orelse try p.readNoEof(msg);
+ if (c != ';') {
return p.err(.InvalidCharacter, msg);
}
- const n = try std.unicode.utf8CodepointSequenceLength(uc);
+ const n = std.unicode.utf8CodepointSequenceLength(uc) catch {
+ return p.err(.UnicodeError, msg);
+ };
const buf = try p.chars.addManyAsSlice(p.alloc.chars, n);
- _ = try std.unicode.utf8Encode(uc, buf);
+ const n2 = std.unicode.utf8Encode(uc, buf) catch {
+ return p.err(.UnicodeError, msg);
+ };
+ std.debug.assert(n == n2);
+}
+
+fn parseStringCharEsc(p: *Parser, c: u8) !void {
+ try p.addChar(switch (c) {
+ 'a' => 7,
+ 'b' => 8,
+ 't' => 9,
+ 'n' => 10,
+ 'v' => 11,
+ 'f' => 12,
+ 'r' => 13,
+ 'e' => 27,
+ else => return p.err(.InvalidCharacter, "string backslash escape"),
+ });
}
-fn parseHashExpression(p: *Parser, next: Fn) !void {
+fn parseHashExpr(p: *Parser, next: Fn) !void {
const c = try p.readNoEof("hash expression");
if (std.ascii.isAlphabetic(c)) {
const r = try p.parseRune(c);
@@ -544,11 +571,11 @@ fn parseRune(p: *Parser, c1: u8) !Value {
while (try p.read()) |c| : (len += 1) {
if (len == 6 or !std.ascii.isAlphanumeric(c)) {
p.unread(c);
- return p.getRune();
+ return p.getCharsAsRune();
}
try p.addChar(c);
}
- return p.getRune();
+ return p.getCharsAsRune();
}
fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
@@ -558,10 +585,10 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
return p.jump(next, p.cons(r, try p.parseBareString(c1)));
}
if (c == '"') {
- return p.jump(next, p.cons(r, try p.parseEscapedString('"')));
+ return p.jump(next, p.cons(r, try p.parseString('"')));
}
if (c == '|') {
- return p.jump(next, p.cons(r, try p.parseEscapedString('|')));
+ return p.jump(next, p.cons(r, try p.parseString('|')));
}
p.unread(c);
switch (c) {
@@ -752,18 +779,9 @@ fn isBareChar(c: u8) bool {
};
}
-fn isBareEsc(c: u8) bool {
- return switch (c) {
- 33...126 => true,
- else => false,
- };
-}
-
fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
- var uc: T = undefined;
-
const c1 = try p.readNoEof(emsg);
- uc = try p.parseHexDigit(c1, emsg);
+ var uc: T = try p.parseHexDigit(c1, emsg);
while (try p.read()) |c| {
if (!std.ascii.isHex(c)) {
@@ -777,14 +795,6 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
return uc;
}
-fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 {
- const h1 = try p.readNoEof(emsg);
- const h2 = try p.readNoEof(emsg);
- const hi = try p.parseHexDigit(h1, emsg);
- const lo = try p.parseHexDigit(h2, emsg);
- return hi << 4 | lo;
-}
-
fn parseHexDigit(p: *Parser, c: u8, comptime emsg: []const u8) !u8 {
return switch (c) {
'0'...'9' => c - '0',