1 files changed, 85 insertions, 75 deletions
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
index 14db959..4a2ed35 100644
--- a/src/zisp/io/Parser.zig
+++ b/src/zisp/io/Parser.zig
@@ -188,7 +188,7 @@ fn addChar(p: *Parser, c: u8) !void {
     try p.chars.append(p.alloc.chars, c);
 }
 
-fn getString(p: *Parser) Value {
+fn getCharsAsString(p: *Parser) Value {
     defer p.chars.clearRetainingCapacity();
     return if (value.sstr.isValidSstr(p.chars.items))
         value.sstr.pack(p.chars.items)
@@ -196,7 +196,7 @@ fn getString(p: *Parser) Value {
         value.istr.intern(p.chars.items);
 }
 
-fn getRune(p: *Parser) Value {
+fn getCharsAsRune(p: *Parser) Value {
     defer p.chars.clearRetainingCapacity();
     return value.rune.pack(p.chars.items);
 }
@@ -433,84 +433,111 @@ fn parseBareString(p: *Parser, c1: u8) !Value {
             break;
         }
     }
-    return p.getString();
+    return p.getCharsAsString();
 }
 
 fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void {
     return switch (c) {
-        '|' => p.jump(next, try p.parseEscapedString('|')),
-        '"' => p.jump(next, try p.parseEscapedString('"')),
-        '#' => p.parseHashExpression(next),
+        '|' => p.jump(next, try p.parseString('|')),
+        '"' => p.jump(next, try p.parseString('"')),
+        '#' => p.parseHashExpr(next),
         '(', '[', '{' => p.parseList(c, next),
         '\'', '`', ',' => p.parseQuoteExpr(c, next),
         else => p.abort(next, c),
     };
 }
 
-fn parseEscapedString(p: *Parser, close: u8) !Value {
-    while (try p.read()) |c| {
-        if (c == close) {
-            const s = p.getString();
+fn parseString(p: *Parser, comptime close: u8) !Value {
+    while (try p.read()) |c| sw: switch (c) {
+        close => {
+            const s = p.getCharsAsString();
             return if (close == '"') p.cons(QUOTE, s) else s;
-        }
-        if (c != '\\') {
-            try p.addChar(c);
-        } else {
-            try p.parseQuotedEsc(close);
-        }
-    }
-    return error.UnclosedString;
+        },
+        '\\' => switch (try p.readNoEof("string backslash escape")) {
+            '\\', '|', '"' => |c2| try p.addChar(c2),
+            '\t', ' ' => {
+                const c2 = try p.skipStringLfEscape();
+                continue :sw c2;
+            },
+            '\n' => {
+                const c2 = try p.skipStringIndent();
+                continue :sw c2;
+            },
+            'x' => try p.parseStringRawHexEsc(),
+            'u' => try p.parseStringUniHexEsc(),
+            else => |c2| try p.parseStringCharEsc(c2),
+        },
+        // Important to use a capture here, since it may come from a labeled
+        // continue statement passing a new char directly to the switch.
+        else => |c2| try p.addChar(c2),
+    };
+    return p.err(.UnclosedString, .{close} ++ " string");
 }
 
-fn parseQuotedEsc(p: *Parser, close: u8) !void {
-    const c = try p.readNoEof("quoted escape");
-    if (c == close) return p.addChar(close);
-    if (c == 'u') return p.parseUniHexHandleErrors();
-    try p.addChar(switch (c) {
-        '\\' => c,
-        '0' => 0,
-        'a' => 7,
-        'b' => 8,
-        't' => 9,
-        'n' => 10,
-        'v' => 11,
-        'f' => 12,
-        'r' => 13,
-        'e' => 27,
-        'x' => try p.parseHexByte("hex escape"),
-        else => return p.err(.InvalidCharacter, "quoted escape"),
-    });
+fn skipStringLfEscape(p: *Parser) !u8 {
+    const msg = "string linefeed escape";
+    while (try p.read()) |c| switch (c) {
+        '\t', ' ' => {},
+        '\n' => return p.skipStringIndent(),
+        else => return p.err(.InvalidCharacter, msg),
+    };
+    return p.err(.UnclosedString, msg);
 }
 
-fn parseUniHexHandleErrors(p: *Parser) !void {
-    return p.parseUniHex() catch |e| switch (e) {
-        error.Utf8CannotEncodeSurrogateHalf => p.err(
-            .UnicodeError,
-            "unicode escape",
-        ),
-        else => e,
+fn skipStringIndent(p: *Parser) !u8 {
+    while (try p.read()) |c| switch (c) {
+        '\t', ' ' => {},
+        else => return c,
     };
+    return p.err(.UnclosedString, "string linefeed escape");
 }
 
-fn parseUniHex(p: *Parser) !void {
-    const msg = "unicode escape";
-
-    if (try p.readNoEof(msg) != '{') {
-        return p.err(.InvalidCharacter, msg);
+fn parseStringRawHexEsc(p: *Parser) !void {
+    const msg = "string raw hex escape";
+    while (try p.read()) |c1| {
+        if (c1 == ';') return;
+        const c2 = try p.readNoEof(msg);
+        const hi = try p.parseHexDigit(c1, msg);
+        const lo = try p.parseHexDigit(c2, msg);
+        try p.addChar(hi << 4 | lo);
     }
+    return p.err(.UnclosedString, msg);
+}
+
+fn parseStringUniHexEsc(p: *Parser) !void {
+    const msg = "string unicode escape";
 
     const uc = try p.parseHex(u21, msg);
-    const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg);
-    if (c != '}') {
+    const c = p.getUnread() orelse try p.readNoEof(msg);
+    if (c != ';') {
         return p.err(.InvalidCharacter, msg);
     }
 
-    const n = try std.unicode.utf8CodepointSequenceLength(uc);
+    const n = std.unicode.utf8CodepointSequenceLength(uc) catch {
+        return p.err(.UnicodeError, msg);
+    };
     const buf = try p.chars.addManyAsSlice(p.alloc.chars, n);
-    _ = try std.unicode.utf8Encode(uc, buf);
+    const n2 = std.unicode.utf8Encode(uc, buf) catch {
+        return p.err(.UnicodeError, msg);
+    };
+    std.debug.assert(n == n2);
+}
+
+fn parseStringCharEsc(p: *Parser, c: u8) !void {
+    try p.addChar(switch (c) {
+        'a' => 7,
+        'b' => 8,
+        't' => 9,
+        'n' => 10,
+        'v' => 11,
+        'f' => 12,
+        'r' => 13,
+        'e' => 27,
+        else => return p.err(.InvalidCharacter, "string backslash escape"),
+    });
 }
 
-fn parseHashExpression(p: *Parser, next: Fn) !void {
+fn parseHashExpr(p: *Parser, next: Fn) !void {
     const c = try p.readNoEof("hash expression");
     if (std.ascii.isAlphabetic(c)) {
         const r = try p.parseRune(c);
@@ -544,11 +571,11 @@ fn parseRune(p: *Parser, c1: u8) !Value {
     while (try p.read()) |c| : (len += 1) {
         if (len == 6 or !std.ascii.isAlphanumeric(c)) {
             p.unread(c);
-            return p.getRune();
+            return p.getCharsAsRune();
         }
         try p.addChar(c);
     }
-    return p.getRune();
+    return p.getCharsAsRune();
 }
 
 fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
@@ -558,10 +585,10 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
         return p.jump(next, p.cons(r, try p.parseBareString(c1)));
     }
     if (c == '"') {
-        return p.jump(next, p.cons(r, try p.parseEscapedString('"')));
+        return p.jump(next, p.cons(r, try p.parseString('"')));
     }
     if (c == '|') {
-        return p.jump(next, p.cons(r, try p.parseEscapedString('|')));
+        return p.jump(next, p.cons(r, try p.parseString('|')));
     }
     p.unread(c);
     switch (c) {
@@ -752,18 +779,9 @@ fn isBareChar(c: u8) bool {
     };
 }
 
-fn isBareEsc(c: u8) bool {
-    return switch (c) {
-        33...126 => true,
-        else => false,
-    };
-}
-
 fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
-    var uc: T = undefined;
-
     const c1 = try p.readNoEof(emsg);
-    uc = try p.parseHexDigit(c1, emsg);
+    var uc: T = try p.parseHexDigit(c1, emsg);
 
     while (try p.read()) |c| {
         if (!std.ascii.isHex(c)) {
@@ -777,14 +795,6 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T {
     return uc;
 }
 
-fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 {
-    const h1 = try p.readNoEof(emsg);
-    const h2 = try p.readNoEof(emsg);
-    const hi = try p.parseHexDigit(h1, emsg);
-    const lo = try p.parseHexDigit(h2, emsg);
-    return hi << 4 | lo;
-}
-
 fn parseHexDigit(p: *Parser, c: u8, comptime emsg: []const u8) !u8 {
     return switch (c) {
         '0'...'9' => c - '0',