From e77c34f654a47cb90857f1ac4d6957e008858d6a Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Wed, 3 Jun 2026 20:56:00 +0200 Subject: At-quoted strings carry the sentinel. --- doc/c1/1-parse.md | 29 ++++++++++++++++------------- src/zisp/io/Parser.zig | 9 +++++---- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/doc/c1/1-parse.md b/doc/c1/1-parse.md index babf160..d4c4c2e 100644 --- a/doc/c1/1-parse.md +++ b/doc/c1/1-parse.md @@ -176,23 +176,26 @@ is only a *datum* if it adheres to additional constraints as explained below. Strings can appear *bare* or be quoted in various ways. A quoted string is in fact parsed into a pair value with a rune in the first position to identify the -quotation variant that was parsed, and the string value in the second position. - - +-----------+----------------------+ - | Syntax | Parse output | - +-----------+----------------------+ - | |bytes| | (#PQSTR & ) | - +-----------+----------------------+ - | "bytes" | (#DQSTR & ) | - +-----------+----------------------+ - | @_bytes_ | (#ATSTR & ) | - +-----------+----------------------+ +quotation variant that was parsed, and the string value in the second position; +or, in case of at-quoted strings, a special construct we will look at later. + + +-----------+-----------------------------+ + | Syntax | Parse output | + +-----------+-----------------------------+ + | |bytes| | (#PQSTR & ) | + +-----------+-----------------------------+ + | "bytes" | (#DQSTR & ) | + +-----------+-----------------------------+ + | @_bytes_ | (#ATSTR & ) | + +-----------+-----------------------------+ The visual token `` denotes the actual string, as a Zisp value, in the -second position of the pair. +second position of the pair. The visual token `` stands for an integer +Zisp value between 0 and 255. These external representations of strings will be explained in more detail -further below, including backslash escape sequences allowed within. +further below, including backslash escape sequences allowed within, and how +exactly at-quoted strings work. Strings have a fixed length, counted in bytes. Each byte can have any value, including zero (ASCII NUL). The parser reads bytes, not Unicode characters; a diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig index e29868a..a56a61b 100644 --- a/src/zisp/io/Parser.zig +++ b/src/zisp/io/Parser.zig @@ -481,13 +481,14 @@ fn getString(p: *Parser, comptime close: u8) !Value { } fn getAtString(p: *Parser) !Value { - const sentinel = try p.readNoEof("at-string"); + const stop = try p.readNoEof("at-string"); while (try p.readNoEof2("at-string")) |c| { - if (c == sentinel) break; + if (c == stop) break; try p.addChar(c); } - const s = try p.getCharsAsString(); - return p.cons(ATSTR, s); + const str = try p.getCharsAsString(); + const byte = value.fixnum.pack(stop); + return p.cons(ATSTR, p.cons(byte, str)); } fn skipStringLfEscape(p: *Parser) !u8 { -- cgit v1.2.3