From cf934006c650d3d008a4408bedbd95597f906e43 Mon Sep 17 00:00:00 2001 From: Taylan Kammer Date: Mon, 31 Mar 2025 23:46:24 +0200 Subject: parser cleanup --- spec/parser.bnf | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ spec/parser.ebnf | 75 -------------------------------------------------- src/zisp/io/Parser.zig | 68 ++++++++++++++++++++++----------------------- 3 files changed, 109 insertions(+), 109 deletions(-) create mode 100644 spec/parser.bnf delete mode 100644 spec/parser.ebnf diff --git a/spec/parser.bnf b/spec/parser.bnf new file mode 100644 index 0000000..caa24f3 --- /dev/null +++ b/spec/parser.bnf @@ -0,0 +1,75 @@ +unit : blank* ( datum blank? | EOF ) ; + + +blank : 9...13 | comment ; + +datum : one_datum ( join_char? one_datum )* ; + +join_char : '.' | ':' ; + + +comment : ';' ( skip_unit | skip_line ) ; + +skip_unit : '~' unit ; + +skip_line : ( ~LF )* LF? ; + + +one_datum : bare_string | clad_datum ; + +bare_string : ( '.' | '+' | '-' | DIGIT ) ( bare_str_elt | '.' )* + | bare_str_elt+ + ; + +clad_datum : '\' bare_esc_str + | '|' pipe_str_elt* '|' + | '"' quot_str_elt* '"' + | '#' hash_expr + | '(' list ')' + | '[' list ']' + | '{' list '}' + | quote_expr + ; + + +bare_str_elt : bare_char | '\' bare_esc ; + + +bare_esc_str : bare_esc bare_str_elt* ; + +pipe_str_elt : ~( '|' | '\' ) | '\' pipe_esc ; + +quot_str_elt : ~( '"' | '\' ) | '\' quot_esc ; + +hash_expr : rune clad_datum? + | '%' label ( '%' | '=' datum ) + | clad_datum + ; + +list : unit* ( '.' unit )? blank* ; + +quote_expr : ( "'" | "`" | "," ) datum ; + + +bare_char : ALPHA | DIGIT | bare_punct ; + +bare_punct : '!' | '$' | '%' | '&' | '*' | '+' | '-' | '/' + | '<' | '=' | '>' | '?' | '@' | '^' | '_' | '~' + ; + +bare_esc : 33...126 ; + + +pipe_esc : string_esc | '|' ; + +quot_esc : string_esc | '"' ; + +string_esc : '\' | 'a' | 'b' | 'e' | 'f' | 'n' | 'r' | 't' | 'v' + | 'x' HEXDIG{2} + | 'u' '{' HEXDIG+ '}' + ; + + +rune : ALPHA ( ALPHA | DIGIT ){0,5} ; + +label : HEXDIG{1,12} ; diff --git a/spec/parser.ebnf b/spec/parser.ebnf deleted file mode 100644 index caa24f3..0000000 --- a/spec/parser.ebnf +++ /dev/null @@ -1,75 +0,0 @@ -unit : blank* ( datum blank? | EOF ) ; - - -blank : 9...13 | comment ; - -datum : one_datum ( join_char? one_datum )* ; - -join_char : '.' | ':' ; - - -comment : ';' ( skip_unit | skip_line ) ; - -skip_unit : '~' unit ; - -skip_line : ( ~LF )* LF? ; - - -one_datum : bare_string | clad_datum ; - -bare_string : ( '.' | '+' | '-' | DIGIT ) ( bare_str_elt | '.' )* - | bare_str_elt+ - ; - -clad_datum : '\' bare_esc_str - | '|' pipe_str_elt* '|' - | '"' quot_str_elt* '"' - | '#' hash_expr - | '(' list ')' - | '[' list ']' - | '{' list '}' - | quote_expr - ; - - -bare_str_elt : bare_char | '\' bare_esc ; - - -bare_esc_str : bare_esc bare_str_elt* ; - -pipe_str_elt : ~( '|' | '\' ) | '\' pipe_esc ; - -quot_str_elt : ~( '"' | '\' ) | '\' quot_esc ; - -hash_expr : rune clad_datum? - | '%' label ( '%' | '=' datum ) - | clad_datum - ; - -list : unit* ( '.' unit )? blank* ; - -quote_expr : ( "'" | "`" | "," ) datum ; - - -bare_char : ALPHA | DIGIT | bare_punct ; - -bare_punct : '!' | '$' | '%' | '&' | '*' | '+' | '-' | '/' - | '<' | '=' | '>' | '?' | '@' | '^' | '_' | '~' - ; - -bare_esc : 33...126 ; - - -pipe_esc : string_esc | '|' ; - -quot_esc : string_esc | '"' ; - -string_esc : '\' | 'a' | 'b' | 'e' | 'f' | 'n' | 'r' | 't' | 'v' - | 'x' HEXDIG{2} - | 'u' '{' HEXDIG+ '}' - ; - - -rune : ALPHA ( ALPHA | DIGIT ){0,5} ; - -label : HEXDIG{1,12} ; diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig index df9e238..7d14808 100644 --- a/src/zisp/io/Parser.zig +++ b/src/zisp/io/Parser.zig @@ -344,7 +344,7 @@ fn retval(p: *Parser, val: Value) void { fn parseUnit(p: *Parser) !void { var c1 = p.getUnread() orelse try p.read(); while (c1) |c| : (c1 = try p.read()) { - switch (try checkBlanks(p, c)) { + switch (try p.checkBlanks(c)) { .yes => {}, .skip_unit => try p.push(.parseUnit), .no => { @@ -358,7 +358,7 @@ fn parseUnit(p: *Parser) !void { fn endUnit(p: *Parser) !void { const c = p.getUnread() orelse return p.ret(); - switch (try checkBlanks(p, c)) { + switch (try p.checkBlanks(c)) { .yes => {}, .skip_unit => return skipUnitAndReturn(p), .no => p.unread(c), @@ -376,7 +376,7 @@ fn returnContext(p: *Parser) !void { } fn parseDatum(p: *Parser) !void { - return parseOneDatum(p, p.getUnread().?, .endFirstDatum); + return p.parseOneDatum(p.getUnread().?, .endFirstDatum); } fn endFirstDatum(p: *Parser) !void { @@ -424,9 +424,9 @@ fn endJoinDatum(p: *Parser) !void { fn parseOneDatum(p: *Parser, c: u8, next: Fn) !void { if (isBareChar(c) or c == '.') { - return p.jump(next, try parseBareString(p, c)); + return p.jump(next, try p.parseBareString(c)); } - return parseCladDatum(p, c, next); + return p.parseCladDatum(c, next); } fn parseBareString(p: *Parser, c: u8) !Value { @@ -435,12 +435,12 @@ fn parseBareString(p: *Parser, c: u8) !Value { else => false, }; try p.addChar(c); - return parseBareStringRest(p, allow_dots); + return p.parseBareStringRest(allow_dots); } fn parseBareEscString(p: *Parser) !Value { try p.addChar(try parseBareEsc(p)); - return parseBareStringRest(p, false); + return p.parseBareStringRest(false); } fn parseBareStringRest(p: *Parser, allow_dots: bool) !Value { @@ -471,15 +471,15 @@ fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void { return p.jump(next, try parseBareEscString(p)); } if (c == '"') { - return p.jump(next, try parseQuotedString(p, '"')); + return p.jump(next, try p.parseQuotedString('"')); } if (c == '|') { - return p.jump(next, try parseQuotedString(p, '|')); + return p.jump(next, try p.parseQuotedString('|')); } return switch (c) { - '#' => parseHashExpression(p, next), - '(', '[', '{' => parseList(p, c, next), - '\'', '`', ',' => parseQuoteExpr(p, c, next), + '#' => p.parseHashExpression(next), + '(', '[', '{' => p.parseList(c, next), + '\'', '`', ',' => p.parseQuoteExpr(c, next), else => p.abort(next, c), }; } @@ -492,7 +492,7 @@ fn parseQuotedString(p: *Parser, close: u8) !Value { if (c != '\\') { try p.addChar(c); } else { - try parseQuotedEsc(p, close); + try p.parseQuotedEsc(close); } } return error.UnclosedString; @@ -517,7 +517,7 @@ fn parseQuotedEsc(p: *Parser, close: u8) !void { 'f' => 12, 'r' => 13, 'e' => 27, - 'x' => try parseHexByte(p, "hex escape"), + 'x' => try p.parseHexByte("hex escape"), else => return p.err(.InvalidCharacter, "quoted escape"), }); } @@ -539,7 +539,7 @@ fn parseUniHex(p: *Parser) !void { return p.err(.InvalidCharacter, msg); } - const uc = try parseHex(p, u21, msg); + const uc = try p.parseHex(u21, msg); const c = p.getUnread() orelse return p.err(.UnexpectedEof, msg); if (c != '}') { return p.err(.InvalidCharacter, msg); @@ -552,23 +552,23 @@ fn parseUniHex(p: *Parser) !void { fn parseHashExpression(p: *Parser, next: Fn) !void { const c = try p.readNoEof("hash expression"); - if (try checkBlanks(p, c) != .no) { + if (try p.checkBlanks(c) != .no) { return p.err(.InvalidCharacter, "hash expression"); } if (std.ascii.isAlphabetic(c)) { - const r = try parseRune(p, c); - return parseRuneEnd(p, r, next); + const r = try p.parseRune(c); + return p.parseRuneEnd(r, next); } if (c == '%') { const l = try parseLabel(p); - return parseLabelEnd(p, l, next); + return p.parseLabelEnd(l, next); } p.unread(c); return p.subr(.parseHashDatum, next); } fn parseHashDatum(p: *Parser) !void { - return parseCladDatum(p, p.getUnread().?, .endHashDatum); + return p.parseCladDatum(p.getUnread().?, .endHashDatum); } fn endHashDatum(p: *Parser) !void { @@ -594,13 +594,13 @@ fn parseRune(p: *Parser, c1: u8) !Value { fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { const c = p.getUnread() orelse return p.jump(next, r); if (c == '\\') { - return p.jump(next, p.cons(r, try parseBareString(p, c))); + return p.jump(next, p.cons(r, try p.parseBareString(c))); } if (c == '"') { - return p.jump(next, p.cons(r, try parseQuotedString(p, '"'))); + return p.jump(next, p.cons(r, try p.parseQuotedString('"'))); } if (c == '|') { - return p.jump(next, p.cons(r, try parseQuotedString(p, '|'))); + return p.jump(next, p.cons(r, try p.parseQuotedString('|'))); } p.unread(c); switch (c) { @@ -615,7 +615,7 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void { } fn parseRuneDatum(p: *Parser) !void { - return parseCladDatum(p, p.getUnread().?, .endRuneDatum); + return p.parseCladDatum(p.getUnread().?, .endRuneDatum); } fn endRuneDatum(p: *Parser) !void { @@ -626,7 +626,7 @@ fn endRuneDatum(p: *Parser) !void { } fn parseLabel(p: *Parser) !Value { - const label = try parseHex(p, u48, "datum label"); + const label = try p.parseHex(u48, "datum label"); return value.fixnum.pack(label); } @@ -667,14 +667,14 @@ fn parseList(p: *Parser, open: u8, next: Fn) !void { if (c == close) { return p.jump(next, head); } - switch (try checkBlanks(p, c)) { + switch (try p.checkBlanks(c)) { .yes => {}, .skip_unit => { - try listParserSetup(p, head, close, next); + try p.listParserSetup(head, close, next); return p.subr(.parseUnit, .parseUnit); }, .no => { - try listParserSetup(p, head, close, next); + try p.listParserSetup(head, close, next); p.unread(c); return p.jump(.parseDatum, null); }, @@ -712,7 +712,7 @@ fn continueList(p: *Parser) !void { if (c == close) { return endList(p); } - switch (try checkBlanks(p, c)) { + switch (try p.checkBlanks(c)) { .yes => {}, .skip_unit => { try p.pushContext(.continueList); @@ -747,7 +747,7 @@ fn closeImproperList(p: *Parser) !void { if (c == close) { return p.retval(result); } - switch (try checkBlanks(p, c)) { + switch (try p.checkBlanks(c)) { .yes => {}, .skip_unit => return p.subr(.parseUnit, .closeImproperList), .no => return p.err(.InvalidCharacter, "after list tail"), @@ -814,7 +814,7 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T { var uc: T = undefined; const c1 = try p.readNoEof(emsg); - uc = try parseHexDigit(p, c1, emsg); + uc = try p.parseHexDigit(c1, emsg); while (try p.read()) |c| { if (!std.ascii.isHex(c)) { @@ -823,7 +823,7 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T { } const shl = std.math.shlExact; uc = shl(T, uc, 4) catch return p.err(.OutOfRange, emsg); - uc |= try parseHexDigit(p, c, emsg); + uc |= try p.parseHexDigit(c, emsg); } return uc; } @@ -831,8 +831,8 @@ fn parseHex(p: *Parser, T: type, comptime emsg: []const u8) !T { fn parseHexByte(p: *Parser, comptime emsg: []const u8) !u8 { const h1 = try p.readNoEof(emsg); const h2 = try p.readNoEof(emsg); - const hi = try parseHexDigit(p, h1, emsg); - const lo = try parseHexDigit(p, h2, emsg); + const hi = try p.parseHexDigit(h1, emsg); + const lo = try p.parseHexDigit(h2, emsg); return hi << 4 | lo; } -- cgit v1.2.3