Doc fix & code cleanup.

author: Taylan Kammer <taylan.kammer@gmail.com> 2025-03-02 17:26:37 +0100
committer: Taylan Kammer <taylan.kammer@gmail.com> 2025-03-02 17:26:37 +0100
commit: 56003f9f6f61070b94af19b2a957c84b941c7a0f (patch)
tree: 8007c7923a809dc3786caf467bf831e0006a7ba7
parent: b6066fc1474c5ef96b7137242fb2e7665df29fcc (diff)
2 files changed, 62 insertions, 75 deletions
diff --git a/src/libzisp.zig b/src/libzisp.zig
index 1a4789d..acaf002 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -303,7 +303,11 @@ test "parse bench" {
     std.mem.doNotOptimizeAway(timer.lap());
     for (0..1000) |i| {
         _ = i;
-        std.mem.doNotOptimizeAway(io.parser.parse("(a b c (x y z (a b c (x y z (a b c (x y z (a b c (x y z (a b c (x y z (a b c (x y z (a b c (x y z) d e f) d e f) d e f) d e f) d e f) d e f) d e f) d e f) d e f) d e f) d e f) 1 2 3))"));
+        std.mem.doNotOptimizeAway(io.parser.parse(
+            \\(a b c (x y z (a b c (x y z (a b c (x y z (a b c (x y z (a b c
+            \\(x y z (a b c (x y z (a b c (x y z) d e f) d e f) d e f) d e f)
+            \\d e f) d e f) d e f) d e f) d e f) d e f) d e f) 1 2 3))
+        ));
     }
     const ns: f64 = @floatFromInt(timer.lap());
     const secs = ns / 1_000_000_000;
diff --git a/src/libzisp/io/parser.zig b/src/libzisp/io/parser.zig
index f35264d..5b103eb 100644
--- a/src/libzisp/io/parser.zig
+++ b/src/libzisp/io/parser.zig
@@ -149,42 +149,28 @@
 //
 // === Implementation details ===
 //
-// Instead of using recursion directly, the parser is written in something akin
-// to a manual continuation-passing style, which ensures that parsing depth is
-// not limited by the stack size.  This is also called "trampolining."
+// Instead of using recursion directly, the parser is written in a "trampoline"
+// style, which ensures that parsing depth is not limited by the stack size.
 //
-// All state/context is passed around via a struct pointer.  The parser has a
-// main loop which calls a function, passing it the state, and also expects a
-// state pointer in return.
+// All state and context is passed around via a struct pointer.  The parser has
+// a main loop, which calls a function as dictated by state.context.next, and
+// the function may update the state to have another function called next.
 //
-// The state has a .next field, that indicates which function the main loop
-// should call next.  It also has a .parent field, potentially linking to a
-// previous state.  These two fields are used as follows:
+// If a function wants to call the parser recursively, it pushes some of the
+// current context onto a stack, including what function the recursive parser
+// should "return" to, and then updates the state to instruct the main loop to
+// call one of the starting functions.
 //
-// If a function wants to call the parser recursively, it sets the .next field
-// of the *current* state to where the recursive call should return to.  Then,
-// it creates a new state, sets the .next field of that one to where it should
-// start (either .start_parse or .start_datum), sets .parent to the old state,
-// and returns the new one.
-//
-// So, the main loop will now do what the newly returned state is instructing.
-//
-// If a function wants to make the parser return from this recursive parsing
-// routine, it sets the .retval field of the current state, and sets .next to
-// the special .perform_return instruction.  This makes the main loop copy the
-// value in .retval into the .retval field of the parent state, and reinstates
-// it, continuing with whatever was put in its .next field, which should be a
-// function that will consume the return value.
-//
-// If .parent is null, the main loop returns .retval as the final result.
+// If a function wants to make the parser return, either from a recursive parse
+// or from the main loop, it sets the .retval field, and tries to pop the saved
+// context.  If the context stack was empty, the main loop returns.
 //
 // Some further notes:
 //
-// While it's possible to just set .next and return the current state, to make
-// the main loop call another function next (possibly even setting .retval to
-// pass a value to it), this is completely unnecessary.  A few non-recursive
-// function calls won't blow the stack.  It's only recursive parsing that we
-// need the above strategy for.
+// While it's possible to just set .next and return, to make the main loop call
+// another function next (possibly even setting .retval to pass a value to it),
+// this is completely unnecessary.  A few non-recursive calls won't blow the
+// stack.  It's only recursive parsing that we need the above strategy for.
 //
 // The difference between .start_parse and .start_datum is that the former will
 // allow whitespace and comments at the beginning, whereas the latter expects a
@@ -195,24 +181,19 @@
 // out of the way.  However, in some cases, we must use .start_parse instead.
 // This is because of datum comments.  When one appears, we start a recursive
 // parser, but instead of making it return to a function that will consume the
-// result, we make it return to the original state's starting point.  This way,
-// the parser "retries" what it was originally doing, with the comment out of
-// the way.  If we always used .start_datum for recursive parsing, this would
-// never allow whitespace *after* a datum comment, because we would be back at
-// .start_datum as soon as we reach the end of the datum comment.  Whitespace
-// after a datum comment is sometimes allowed and sometimes not, so there's no
-// generic solution that's always correct:
-//
-//   (foo #;bar baz)     ;; whitespace after #;bar allowed
+// result, we make it return to the original starting point, so the result is
+// ignored and the parser retries what it was originally doing.  If we always
+// used .start_datum, this would never allow whitespace after a datum comment,
+// since we would be back at .start_datum after the comment is out of the way:
 //
-//   #'#;(+ 1 2)(+ 3 4)  ;; whitespace after #;(+ 1 2) not allowed
+//   (foo #;bar baz)  ;must use .start_parse at the start of each element
 //
 // When it comes to pairs and lists, we basically treat everything as a list,
 // and a pair is just seen as the shortest possible improper list.  This saves
 // memory: If we implemented list parsing as pair parsing, we would be calling
 // the parser recursively, deeper and deeper, for every list element.  Though
 // we're not limited by stack space thanks to the strategy described above, it
-// would still waste memory by creating tons of new state objects.
+// would still waste memory and the time it takes to allocate memory.
 //
 //
 // === Notation used in comments ===
@@ -385,12 +366,12 @@ fn startParse(s: *State) void {
     if (s.eof()) {
         return s.returnDatum(value.eof.eof);
     }
-    return switch (s.peek()) {
+    switch (s.peek()) {
         // whitespace already consumed
         0...32, 127...255 => err(s, "invalid character"),
         ')', ']', '}' => err(s, "unexpected closing bracket"),
         else => startDatum(s),
-    };
+    }
 }
 
 // This is called when we *immediately* expect a datum and nothing else; for
@@ -402,7 +383,7 @@ fn startDatum(s: *State) void {
     if (s.isWhitespace()) {
         return err(s, "expected datum, got whitespace");
     }
-    return switch (s.peek()) {
+    switch (s.peek()) {
         // whitespace checked above
         0...32, 127...255 => err(s, "invalid character"),
 
@@ -421,7 +402,7 @@ fn startDatum(s: *State) void {
         '.' => err(s, "misplaced period"),
 
         else => startBareString(s),
-    };
+    }
 }
 
 fn endDatum(s: *State, d: Value) void {
@@ -448,7 +429,7 @@ fn endDatum(s: *State, d: Value) void {
     }
     s.context.val = d;
     s.context.char = c;
-    return s.recurParse(.start_datum, .end_join_datum);
+    s.recurParse(.start_datum, .end_join_datum);
 }
 
 fn checkTrailingDatumComment(s: *State) bool {
@@ -478,7 +459,7 @@ fn endJoinedDatum(s: *State) void {
         else => "JOIN",
     });
     const joined = value.pair.cons(s.context.val, s.retval);
-    return endDatum(s, value.pair.cons(rune, joined));
+    endDatum(s, value.pair.cons(rune, joined));
 }
 
 fn handleHash(s: *State) void {
@@ -503,21 +484,21 @@ fn handleHash(s: *State) void {
     }
 
     switch (s.peek()) {
-        'a'...'z', 'A'...'Z' => return handleRune(s),
-        '0'...'9' => return handleDatumLabel(s),
+        'a'...'z', 'A'...'Z' => handleRune(s),
+        '0'...'9' => handleDatumLabel(s),
         ';' => {
             s.skip();
             // Don't change next in this case.  Just let the parser redo what it
             // was doing as soon as the commented-out datum has been read.
-            return s.recurParse(.start_datum, s.context.next);
+            s.recurParse(.start_datum, s.context.next);
         },
-        else => return s.recurParse(.start_datum, .end_hash_datum),
+        else => s.recurParse(.start_datum, .end_hash_datum),
     }
 }
 
 fn handleRune(s: *State) void {
     const r = readRune(s) orelse return err(s, "rune too long");
-    return endDatum(s, r);
+    endDatum(s, r);
 }
 
 fn readRune(s: *State) ?Value {
@@ -551,7 +532,7 @@ fn handleDatumLabel(s: *State) void {
     }
 
     s.context.val = n;
-    return s.recurParse(.start_datum, .end_label_datum);
+    s.recurParse(.start_datum, .end_label_datum);
 }
 
 fn readDatumLabel(s: *State) ?Value {
@@ -561,12 +542,12 @@ fn readDatumLabel(s: *State) ?Value {
 fn endLabelDatum(s: *State) void {
     const rune = value.rune.pack("LABEL");
     const payload = value.pair.cons(s.context.val, s.retval);
-    return endDatum(s, value.pair.cons(rune, payload));
+    endDatum(s, value.pair.cons(rune, payload));
 }
 
 fn endHashDatum(s: *State) void {
     const rune = value.rune.pack("HASH");
-    return endDatum(s, value.pair.cons(rune, s.retval));
+    endDatum(s, value.pair.cons(rune, s.retval));
 }
 
 fn startQuotedString(s: *State) void {
@@ -574,7 +555,7 @@ fn startQuotedString(s: *State) void {
     s.skip();
 
     const str = readQuotedString(s) catch return err(s, "unclosed string");
-    return endDatum(s, str);
+    endDatum(s, str);
 }
 
 // RQS = Read Quoted String
@@ -608,21 +589,22 @@ fn readQuotedSstr(s: *State) !?Value {
     return error.Unclosed;
 }
 
-fn readQuotedLongString(s: *State) Value {
-    return err(s, "NOT YET IMPLEMENTED");
+fn readQuotedLongString(s: *State) !Value {
+    return err(s, "TODO: NOT YET IMPLEMENTED");
 }
 
 fn startBareString(s: *State) void {
     // We're at |foo so start reading directly.
-    return readBareSstr(s) orelse readBareLongString(s);
+    const str = readBareSstr(s) orelse readBareLongString(s);
+    endDatum(s, str);
 }
 
-fn readBareSstr(s: *State) ?void {
-    const sp = s.pos;
+fn readBareSstr(s: *State) ?Value {
+    const pos = s.pos;
     if (readShortString(s, isSstrChar, value.sstr.pack)) |sstr| {
-        return endDatum(s, sstr);
+        return sstr;
     } else {
-        s.pos = sp;
+        s.pos = pos;
         return null;
     }
 }
@@ -637,8 +619,8 @@ fn isSstrChar(c: u8) bool {
     };
 }
 
-fn readBareLongString(s: *State) void {
-    return err(s, "NOT YET IMPLEMENTED");
+fn readBareLongString(s: *State) Value {
+    return err(s, "TODO: NOT YET IMPLEMENTED");
 }
 
 fn startQuote(s: *State) void {
@@ -649,11 +631,11 @@ fn startQuote(s: *State) void {
         ',' => "COMMA",
         else => unreachable,
     });
-    return s.recurParse(.start_datum, .end_quote_datum);
+    s.recurParse(.start_datum, .end_quote_datum);
 }
 
 fn endQuoteDatum(s: *State) void {
-    return endDatum(s, value.pair.cons(s.context.val, s.retval));
+    endDatum(s, value.pair.cons(s.context.val, s.retval));
 }
 
 // List processing is, unsurprisingly, the most complicated, and it's made even
@@ -673,10 +655,11 @@ fn startList(s: *State) void {
 
     s.context.val = value.nil.nil;
     s.context.char = open;
-    return if (isEndOfList(s))
-        endList(s)
-    else
+    if (isEndOfList(s)) {
+        endList(s);
+    } else {
         s.recurParse(.start_parse, .continue_list);
+    }
 }
 
 fn isEndOfList(s: *State) bool {
@@ -702,7 +685,7 @@ fn endList(s: *State) void {
         return endDatum(s, value.pair.cons(rune, s.context.val));
     }
 
-    return err(s, "wrong closing bracket for list");
+    err(s, "wrong closing bracket for list");
 }
 
 fn continueList(s: *State) void {
@@ -728,12 +711,12 @@ fn continueList(s: *State) void {
         return s.recurParse(.start_parse, .finish_improper_list);
     }
 
-    return s.recurParse(.start_parse, .continue_list);
+    s.recurParse(.start_parse, .continue_list);
 }
 
 fn finishImproperList(s: *State) void {
     s.context.val = lib.list.reverseWithTail(s.context.val, s.retval);
-    return endImproperList(s);
+    endImproperList(s);
 }
 
 // Handling the end of an improper list is a bit awkward, because there may be
@@ -759,7 +742,7 @@ fn endImproperList(s: *State) void {
         }
     }
 
-    return err(s, "malformed list / extra datum at end of improper list");
+    err(s, "malformed list / extra datum at end of improper list");
 }
 
 fn err(s: *State, msg: []const u8) noreturn {
author	Taylan Kammer <taylan.kammer@gmail.com>	2025-03-02 17:26:37 +0100
committer	Taylan Kammer <taylan.kammer@gmail.com>	2025-03-02 17:26:37 +0100
commit	56003f9f6f61070b94af19b2a957c84b941c7a0f (patch)
tree	8007c7923a809dc3786caf467bf831e0006a7ba7
parent	b6066fc1474c5ef96b7137242fb2e7665df29fcc (diff)