An update of sorts.

author: Taylan Kammer <taylan.kammer@gmail.com> 2026-05-23 22:22:57 +0200
committer: Taylan Kammer <taylan.kammer@gmail.com> 2026-05-23 22:22:57 +0200
commit: 378f8598a5a57b731948241e41f584f5172dc2a2 (patch)
tree: e9352110efe5b204a5abe7e00693be2004aab4e5
parent: f1f134d072e375335be5c1203095115fef1db253 (diff)
12 files changed, 504 insertions, 38 deletions
diff --git a/docs/c1/1-parse.md b/docs/c1/1-parse.md
index 73b8d8a..6484cab 100644
--- a/docs/c1/1-parse.md
+++ b/docs/c1/1-parse.md
@@ -12,48 +12,52 @@ which is necessary to strategically construct more complex code and data:
     +--------+-----------------+--------+----------+------+
 
 The parser can also output non-negative integers, but this is only used for
-datum labels; number literals are handled by the *decoder* (see next).
+datum labels; number literals are handled by the *decoder* instead.
 
-The parser recognizes various "syntax sugar" and transforms it into uses of the
-above data types.  The most ubiquitous example is of course the list:
 
-    (datum1 datum2 ...)  ->  (datum1 & (datum2 & (... & ())))
+## Decoder
 
-The following table summarizes the other supported transformations:
+A separate process called *decoding* can transform such data into more complex
+types.  For example, `(#HASH x y z)` could be decoded into an array, so the
+expression `#(x y z)` could work like in Scheme; or `(#SQUARE x y z)` could be
+decoded into a function call expression that will, at run-time, allocate and
+initialize a dynamic array with three elements, so the expression `[x y z]`
+would work like in JavaScript.
 
-    "xyz"   -> (#QUOTE & |xyz|)       #datum       -> (#HASH & datum)
+Decoding also resolves datum labels, goes over strings to find ones that are
+actually a number literal, and takes care of a number of other transformations.
+This offloads complexity, allowing the parser to remain extremely simple.  See
+the dedicated documentation of the decoder for more.
 
-    [...]   -> (#SQUARE ...)          #rune(...)   -> (#rune ...)
 
-    {...}   -> (#BRACE ...)           dat1dat2     -> (#JOIN dat1 & dat2)
+## Syntax sugar
 
-    'datum  -> (#QUOTE & datum)       dat1.dat2    -> (#DOT dat1 & dat2)
+The parser recognizes various "syntax sugar" and transforms it into uses of the
+above listed minimal data types.  The most ubiquitous example is the list:
 
-    `datum  -> (#GRAVE & datum)       dat1:dat2    -> (#COLON dat1 & dat2)
+    (datum1 datum2 ...)  ->  (datum1 & (datum2 & (... & ())))
 
-    ,datum  -> (#COMMA & datum)       #%hex%       -> (#LABEL & hex)
+The following table summarizes the other transformations available:
 
-                                      #%hex=datum  -> (#LABEL hex & datum)
+    "xyz"   -> (#QUOTE & |xyz|)       #datum       -> (#HASH & datum)
 
-A separate process called *decoding* can transform such data into more complex
-types.  For example, `(#HASH x y z)` could be decoded into a vector, so the
-expression `#(x y z)` works just like in Scheme.
+    ~_xyz_  -> (#TILDE & |xyz|)       #rune(...)   -> (#rune ...)
 
-Decoding also resolves datum labels, goes over strings to find ones that are
-actually a number literal, and takes care of a number of other transformations.
-This offloads complexity, allowing the parser to remain extremely simple.  See
-the dedicated documentation of the decoder for more.
+    [...]   -> (#SQUARE ...)          dat1dat2     -> (#JOIN dat1 & dat2)
+                                 
+    {...}   -> (#BRACE ...)           dat1.dat2    -> (#DOT dat1 & dat2)
+                                 
+    'datum  -> (#QUOTE & datum)       dat1:dat2    -> (#COLON dat1 & dat2)
+                                 
+    `datum  -> (#GRAVE & datum)       #%hex=datum  -> (#LABEL hex & datum)
+                                 
+    ,datum  -> (#COMMA & datum)       #%hex%       -> (#LABEL & hex)
 
-Further notes about the syntax sugar table and examples above:
+Notes about the table and examples:
 
 * The terms datum, dat1, and dat2 each refer to an arbitrary datum; ellipsis
   means zero or more data; hex is a hexadecimal number of up to 12 digits.
 
-* The `#datum` form only applies when the datum following the hash sign is a
-  list, quoted string, quote expression, another expression starting with the
-  hash sign, or a pipe-quoted string (see next).  A bare string can follow the
-  hash sign by separating the two with a backslash: `#\string`
-
 * Strings can be quoted with pipes, like symbols in Scheme.  This is the "real"
   string literal syntax, whereas using double quotes is syntax sugar for a
   quoted string literal.
@@ -62,6 +66,16 @@ Further notes about the syntax sugar table and examples above:
 
       "foo bar baz"  -> (#QUOTE & |foo bar baz|)
 
+* See the next section for an explanation of the tilde syntax, which implements
+  "raw" string literals.
+
+* The `#datum` form only applies when the datum following the hash sign is
+  anything other than a bare string (unquoted, without pipe symbol) since
+  otherwise this would be ambiguous with a rune literal.  A bare string can
+  nevertheless follow the hash sign by separating the two with a backslash:
+
+      #\string  ->  (#HASH & string)
+
 * Though not represented in the table due to notational difficulty, the form
   `#rune(...)` doesn't require a list in the second position; any datum that
   works with the `#datum` syntax also works with `#rune<DATUM>`.
@@ -81,7 +95,7 @@ Further notes about the syntax sugar table and examples above:
 
 * Syntax sugar can combine arbitrarily.  Some examples follow.  Any of these may
   or may not actually have a meaning in code; many could simply end up producing
-  a syntax error at the macro-expand stage.
+  an error during decoding, or later interpretation of code.
 
       #{...}            -> (#HASH #BRACE ...)
 
@@ -111,7 +125,106 @@ Further notes about the syntax sugar table and examples above:
 * Runes are case-sensitive, and the parser always emits runes using upper-case
   letters when expressing syntax sugar.  Uppercase rune names are reserved for
   Zisp's internal use and standard library; users can use lowercase runes with
-  custom meaning without worrying about clashes.
+  custom meaning without worrying about clashes, with the exception of a small
+  number of lowercase runes such as `#true` and `#false` that are part of the
+  default decoder settings.
+
+
+## Tilde strings
+
+There is a special type of syntax sugar for "raw" strings, meaning that no
+backslash escapes nor any other kind of escape sequence are recognized.
+
+This raw string syntax begins with a tilde, followed by any byte.  That byte
+becomes the termination marker, and the string cannot represent a literal
+occurrence of it, since there are no escape sequences.
+
+    ~%foo \ bar%  ->  (#TILDE |foo \\ bar|)
+
+This can be useful, for instance, when representing regular expressions as
+quoted string literals in code:
+
+    ~/^foo\\(bar|baz)\.\[".*"\]$/     ;; matches e.g. foo\bar.["blah"]
+
+Were it not for this syntax, this regular expression would need to be
+represented by the following quoted string literal in Zisp code:
+
+    "^foo\\\\(bar|baz)\\t\\[\".*\"\\]$"
+
+Alternatively, imagine searching for certain MS Windows file paths:
+
+    ~_C:\\\\User\\foo_                ;; matches C:\\User\foo
+
+That's already ugly.  Without raw strings, it would need to look like this:
+
+    "C:\\\\\\\\User\\\\foo"
+
+Typically, the rune `#TILDE` would be treated as a synonym to `#QUOTE` by the
+decoder, though creative programmers could repurpose it.
+
+
+## Newlines in strings
+
+Normally, a newline in a string has no special meaning and simply becomes part
+of the string.  However, newlines can be backslash-escaped, which simple erases
+them; the escaped newline can also be preceded or followed by any number of tab
+and space characters, which are all stripped as well.  (Note: It's not blanks
+preceding the backslash that are stripped, but blanks following the backslash
+and preceding the newline; i.e., blanks at the end of the line.)
+
+Following are some examples of how multi-line strings can appear in source code
+with different intentions and meanings:
+
+    (define paragraph "This paragraph has been visually split into multiple \
+                       lines, but the newlines are escaped, so it's one line.")
+
+    (define json-object '|   ;; use '|| so we needn't escape "key" etc.
+      {
+        "key": "value"
+      }
+    |)
+
+The second example is actually slightly problematic.  It begins with a newline,
+which may be undesirable, but escaping that newline would cause the first line
+to have no indentation, thus the opening `{` would not line up with the closing
+`}` when this string is printed out.  Further, if the entire block of code is
+indented, then the string contents may be more indented than intended.  (No pun
+or rhyme intended.)  Consider:
+
+    (let ((foo one))
+      (let ((bar two))
+        (let ((json-object '|
+                 {
+                   "key": "value"
+                 }
+               |))
+          (do-whatever))))
+
+The string bound to `json-object` has way more indentation than the programmer
+intended.  Should the parser attempt to solve this issue?
+
+Thankfully, we have the decoder.  The implementation of `#QUOTE` can simply
+implement a post-processing algorithm such as the one used for Java 15 text
+blocks feature: [JEP 378: Text Blocks](https://openjdk.org/jeps/378)
+
+The only feature Zisp cannot offer here is a way to fence off multi-line strings
+with a longer token such as `"""` as seen in Python or Java, or an arbitrary
+word as seen in Bourne shell and PHP "here doc" syntax.  For simplicity, the
+Zisp parser omits such features.
+
+That said, if a programmer truly wanted to have arbitrary text blocks in code,
+without needing to escape anything in them, it's possible to abuse the tilde
+string syntax by using it with an ASCII control character which is displayed
+visibly by a text editor.  In the following, the characters `^\` are meant to
+represent a literal ASCII File Separator character in the source code:
+
+    (define json-object ~^\
+      {
+        "key": "value"
+      }
+      ^\)
+
+Hey, it works fine in Emacs, so why not??  (`C-q C-\` to insert the `^\`.)
 
 <!--
 ;; Local Variables:
diff --git a/docs/c1/grammar/index.md b/docs/c1/grammar/index.md
index 5bedbfc..d70021a 100644
--- a/docs/c1/grammar/index.md
+++ b/docs/c1/grammar/index.md
@@ -59,6 +59,9 @@ The following limits are not represented in the grammar:
   terribly confusing for a human reader.  Consider: `#foobarbaz`.
   This would parse as a `Datum` joining `#foobar` and `baz`.
 
+  (The ABNF does not suffer from this issue, since it explicitly
+   enumerates the join possibilities anyway.)
+
 * A `Label` is the hexadecimal representation of a 48-bit integer,
   meaning it allows for a maximum of 12 hexadecimal digits.  Longer
   values are grammatical, but signal an out-of-range error, so as to
@@ -67,6 +70,9 @@ The following limits are not represented in the grammar:
   signal an invalid character error at the letter `d` if the grammar
   limited a `Label` to 12 hexadecimal digits.
 
+  (As above, the ABNF doesn't care about this.  You probably don't
+   want to use the ABNF to generate a parser anyway.)
+
 
 ## Stream-parsing strategy
 
diff --git a/docs/index.md b/docs/index.md
index 471d518..6aa03ca 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -17,13 +17,12 @@ code base.
 1. [Chapter 1: Genesis](c1/)
 
    This chapter goes through the processes involved in reading source
-   files and ultimately producing binaries from them.
+   code, running it, and optionally compiling it.
 
    1. [Parse](c1/1-parse.html)
    2. [Decode](c1/2-decode.html)
-   3. [Expand](c1/3-expand.html)
-   4. [Execute](c1/4-execute.html)
-   5. [Compile](c1/5-compile.html)
+   3. [Execute](c1/3-execute.html)
+   4. [Compile](c1/4-compile.html)
 
 2. [Chapter 2: Types](c2/)
 
diff --git a/html/gen.sh b/html/gen.sh
index ad362c7..cedc4b2 100755
--- a/html/gen.sh
+++ b/html/gen.sh
@@ -18,7 +18,7 @@ md2ht() {
     fi
     echo "$src -> $dst"
     {
-        title=$(sed 's/# //; q' "$src")
+        title=$(sed 's/# //; s/&/\\&/; q' "$src")
         sed "s/__TITLE__/$title/" prelude.html
         echo "<body>"
         markdown2 "$src" -x fenced-code-blocks,highlightjs-lang,tables
diff --git a/notes/250219-reader.md b/notes/250219-reader.md
index 4454783..e930e21 100644
--- a/notes/250219-reader.md
+++ b/notes/250219-reader.md
@@ -526,6 +526,5 @@ Just to summarize what I actually ended up implementing in the end:
 
 See here for full documentation of Zisp expressions as implemented:
 
-- [Informal docs](https://git.tkammer.de/zisp/tree/docs/parser.md)
-- [Formal spec](https://git.tkammer.de/zisp/tree/spec/syntax.md)
-- [ABNF](https://git.tkammer.de/zisp/tree/spec/syntax.abnf)
+- [Informal docs](https://tkammer.de/zisp/docs/c1/1-parse.html)
+- [Formal spec](https://tkammer.de/zisp/docs/c1/grammar/index.html)
diff --git a/notes/250920-libgccjit.md b/notes/250920-libgccjit.md
index 8ecf4f2..882b69a 100644
--- a/notes/250920-libgccjit.md
+++ b/notes/250920-libgccjit.md
@@ -1,5 +1,7 @@
 # Using libgccjit?
 
+_2025 September_
+
 The installed size of libgccjit varies from 18 MB (armhf) to 53 MB
 (riscv64), with 37 MB for amd64.
 
diff --git a/notes/260107-decoder.md b/notes/260107-decoder.md
index a1118b7..5fa3677 100644
--- a/notes/260107-decoder.md
+++ b/notes/260107-decoder.md
@@ -191,7 +191,7 @@ However, this causes such custom object literals to not stand out:
 ```
 
 For this reason, it would be a convention that decoder rules are used
-to implement new object literal syntax, and macros used for then you
+to implement new object literal syntax, and macros used for when you
 want to output code, with hygienic bindings.
 
 ```scheme
diff --git a/notes/260109-uninterned.md b/notes/260109-uninterned.md
index 5899cba..5d914b1 100644
--- a/notes/260109-uninterned.md
+++ b/notes/260109-uninterned.md
@@ -1,5 +1,7 @@
 # What strings NOT to intern?
 
+_2026 January_
+
 Although it's become fairly normal these days to define grammars in
 terms of Unicode, there's a certain simplicity to just defining them
 in terms of bytes instead.
diff --git a/notes/260516-joke-rant.md b/notes/260516-joke-rant.md
new file mode 100644
index 0000000..a1c6f64
--- /dev/null
+++ b/notes/260516-joke-rant.md
@@ -0,0 +1,69 @@
+# Compilers are FAKE
+
+_2026 May NOT_
+
+Languages typically either
+
+- follow the "interpreted language" mentality and at best add things
+  like bytecode cache files or JIT as behind-the-scenes optimizations,
+  or
+
+- follow the "compiled language" mentality where code written in the
+  language is a representation of the intended behaviour of an
+  executable that will eventually be produced.
+
+In the latter model, compile-time logic is often limited to primitive
+pre-processors, declarative DSLs or otherwise limited subsystems (C++
+templates, Zig comptime, etc.) and often even offloaded to an entirely
+separate "build system" language.  In the former model, compilation is
+swept under the rug like it's shameful dirt.
+
+Why?
+
+How about designing a language that embraces the reality of what it
+means to "compile code": Run an executable with source code as input.
+Sound familiar?  IT'S WHAT AN INTERPRETER DOES!  So, in my preferred
+language:
+
+- The "interpreter" includes APIs for explicitly compiling any of the
+  currently defined functions, modules, etc. straight from their in
+  memory representation (be it AST or bytecode) into native code or
+  assembly snippets, with the ability to then "serialize" or assemble
+  and link these to create object files or executables.
+  
+- The "compiler" is just a thin wrapper around these APIs, loading
+  some source files that define functions and globals etc. (all of
+  which is first interpreted) then compiling them and producing
+  executables.
+  
+- The "top level" of any source file is naturally executed at compile
+  time.  You get meta programming and a build system language "for
+  free" since these kinds of features can simply be libraries.
+
+Some lisp/scheme implementations already contain an interpreter and
+compiler in one, exposing parts of the compiler through APIs, and
+allow full compile time code execution via macros.  However, in my
+experience, they still tend to embrace the traditional interpreted
+vs. compiled mentality, which sometimes just makes things awkward.
+For example, GNU Guile insists on automatically compiling everything
+into a bytecode cache on the filesystem, unless you pass a CLI flag
+disabling this behavior.  And if you write a Guile Scheme codebase,
+you still typically use Autotools and Make to call `guild` to compile
+the Scheme files.  Just why?  You have the full Guile runtime at your
+disposal, at compile time...  I propose a change in mentality.
+
+## Dial the schizophrenia to 11
+
+Compilers are all FAKE.  They just pretend to be compilers.  All a
+compiler is, is an interpreter for a language that pretends to be a
+"compiled language" when in reality it's just an interpreted DSL for
+declaring the desired behavior of an executable, which is produced
+when the codebase written in this DSL is interpreted ("compiled") by
+the interpreter ("compiler").
+
+They have played us for absolute fools.
+
+C is a declarative, interpreted DSL to describe executables.
+
+GCC is an interpreter that runs C, a declarative DSL describing the
+executable that GCC should shit out.
diff --git a/notes/260522-interpreter.md b/notes/260522-interpreter.md
new file mode 100644
index 0000000..cf87180
--- /dev/null
+++ b/notes/260522-interpreter.md
@@ -0,0 +1,261 @@
+# The interpreter and the compiler
+
+_2026 May_
+
+Last December, I wrote the following in the context of how one might
+[bootstrap](250329-boot.html) Zisp even if it uses a self-hosting
+compiler:
+
+* There will be a Zisp interpreter written in Zig, which is fairly
+  simple and naive in its implementation and, for example, ignores
+  static type declarations.  It should support the full Zisp language
+  including hygienic macros, but be as easy as possible to maintain.
+
+* The Zisp compiler will be written in Zisp.  The interpreter can run
+  the compiler (since it can run any Zisp program) and will be used to
+  compile the compiler.
+
+After some pondering on a variety of topics, I've decided to stick
+with this, just with one significant added insight:
+
+The interpreter will not be some bootstrapping hack and then put in
+the dustbin until someone needs to bootstrap from scratch again.
+Rather, the interpreter will be a first-class citizen of the Zisp
+implementation.
+
+This is because a simple interpreter without any compilation overhead
+is useful for an entire class of applications: Small to medium size
+scripts that you simply plop into `~/bin` with a shebang line at the
+top, or other similarly small programs that are simply distributed as
+monolithic source files, or at most a small collection of files.
+
+The interpreter may be slow, but these would be the kinds of programs
+one might otherwise write in GNU Bash or the like (which is also quite
+slow) except GNU Bash doesn't even have proper data structures, so it
+becomes a terrible choice very quickly.  The next consideration after
+Bash would typically be a language like Python, and although even the
+CPython interpreter might beat the naive Zisp interpreter (because the
+former at least uses bytecode and had a ton of engineering poured into
+it) this shouldn't really matter, since the kind of tiny application
+we're talking about typically wouldn't involve heavy computation.
+
+(Besides, a Zisp script could choose to compile parts of itself; more
+on this later.)
+
+Another example are build scripts.  One of the first ideas I had when
+pondering on Zisp's design is how [compilation](250210-compile.html)
+should automatically evaluate the top-level of a program, simply
+because this feels most natural to me.  Furthermore, I've pondered
+about how it should be possible to [serialize](250210-serialize.html)
+everything in the language, so compiling a program would be a matter
+of calling something like `(write main)` after the main function is
+defined.  Both of these fit naturally with the idea that a build
+script for a Zisp program would essentially just be a Zisp script
+which imports all the files in the codebase, compiles everything, and
+writes out the result.  Such a build script would be interpreted, with
+the compiler being a shared library it loads.
+
+The compiler itself would typically still be shipped in compiled form,
+as well as the rest of the standard library, though it's conceivable
+that there might be benefits to having stdlib sources available; the
+compiler may be able to do better whole-program analysis, achieving
+better results than what you might get from LTO.
+
+## The programmer is in control of compilation
+
+Shipping an interpreter, with a compiler as a library, being able to
+compile things on-the-fly as instructed by the interpreted source
+itself, enables some novel strategies in development and deployment.
+
+### Manual JIT
+
+First, imagine you started developing a program as a fairly small
+script but at some point begin to realize that it does, after all,
+involve some heavy computations that could benefit from improved
+performance.
+
+Maybe it takes 10-20 minutes to run, with the majority of that time
+spent on one or two functions sifting through massive amounts of data
+and doing some heavy computation, involving some tight loops.  Well,
+your interpreter includes a compiler, so what about you simply just
+call the compiler on those functions right after defining them?
+
+Note that we're not talking about compiling *files* but simply some
+functions that are sitting in memory as AST and would otherwise be
+interpreted naively and slowly.
+
+It's said that the difference between a naive AST interpreter, and
+compiled native code, can be as high as a 5-20x difference, so your
+script running in 20 minutes could be reduced down to 1-2 minutes; a
+little extra computation is added up-front to compile a function or
+two, then they run blazing fast.
+
+### Native targeting, and user data/code specialization
+
+The fact that you have a compiler in your runtime, and that it has a
+well-designed easy to use API, opens the door to a somewhat unusual
+software deployment strategy:
+
+Despite the fact that your application is rather sophisticated and
+needs to run at peak performance, you distribute it as source code,
+with a "boot" process that compiles all the sources every time when
+it's started up on the end user's machine.  (Well, the compilation
+result could be cached into files on disk too, but that's a detail.)
+
+This has two advantages.  For one, the code is always compiled for the
+exact native architecture, not just an ISA family.  This can improve
+performance a little, sometimes.
+
+Secondly, and more interestingly, data *and even code* read from a
+configuration file can be compiled straight into the native code
+that's being generated.
+
+If you know Nginx's configuration format, you may know that it has
+some limitations that appear a bit strange, typically because the
+directives need to be "compiled" into something efficient if they
+declare some logic that has to be executed on every single request.
+Since Nginx doesn't want to implement a sophisticated compiled DSL
+like Varnish, it ends up being somewhat limited.  Varnish does make
+that jump and implements a whole DSL for per-request decisions, which
+is transpiled to C, compiled into a dynamic lib and loaded.
+
+Imagine Nginx was written in Zisp, and distributed in source format.
+You could have arbitrary code in your configuration, for per-request
+decisions, which would be compiled into native code and potentially
+inlined straight into Nginx's request handler.  Imagine Varnish was
+written in Zisp.  It wouldn't need to invent a whole new language!
+
+(I just realized Varnish has been renamed to Vinyl Cache, but I
+suspect most people still know it as Varnish, like me just now.)
+
+Just as an aside, I think this "compile at startup and cache it"
+strategy is used by Elixir.  Or maybe I just got that impression
+because I've installed Pleroma (an Elixir application) from Git.
+Either way, I doubt my idea is entirely new; this is definitely a
+strategy that can already be used by any application written in a
+language with a compiler built into the runtime, like many Lisp or
+Scheme implementations.
+
+## Why not automatic JIT?
+
+Although a more "proper" JIT has some advantages, like being able to
+specialize on arbitrary run-time data (not just config files or other
+such "boot-time" data), they typically produce significantly worse
+code than a "full AOT compiler in a JIT-shaped trench coat" because
+the AOT compiler simply spends a *lot* more time on analysis upfront.
+Don't cite me on this, but it appears to be the current consensus.
+
+Traditional JIT, as opposed to what LLVM and GCC offer (i.e., AOT in a
+JIT shaped trench coat), needs to be low latency, since it's done on
+the fly, transparently, and concurrently.  Imagine your browser ran
+GCC or LLVM for every JS file it received.  That would be ridiculous.
+Note that JS is special in that it's basically the only programming
+language where arbitrary new code is loaded *all the time* during the
+normal course of operations.  Other languages just don't need this.
+It's just JS where high upfront latency is unacceptable.
+
+Why do Java, Lua, and a bunch of other dynamic languages use JIT?
+Partly, it may be cultural: Native AOT compilation feels yucky,
+invoking associations such as long compile times multiplied by the
+number of target architectures, needing to ship binary blobs, and the
+primitive C ABI.  Java can have its own rich ABI, and languages like
+Lua don't have an ABI at all because everything is source code.  If
+programmers can simply ship source files, or at worst cross-platform
+byte code like for the JVM, and then the JIT magically makes things
+faster, there's less headache I guess.  (There is AOT for Java, but
+it's a niche.)
+
+Another reason, probably, is that many high-level languages are very
+dynamic and lack a serious static type system that would be needed to
+generate peak performance AOT compiled code.
+
+Zisp is all about breaking norms, and giving the programmer maximum
+freedom.  The interpreter might one day incorporate some lightweight
+JIT, but my aim is to ensure that a Zisp programmer always has the
+ability to generate peak-performance native compiled binaries, through
+a combination of features such as: An optional but serious static type
+system, the ability to completely take control over memory management
+rather than relying on GC, and integrating with a high-end AOT native
+compiler like GCC.
+
+Tall claims, I know.  Stop looking at me like that.  Yes I know, all I
+have so far is a fucking s-expression parser, a NaN packing strategy
+for dynamic typing, and dreams.  But if I keep dreaming and planning,
+I'm sure the implementation will spontaneously pop into existence any
+day now.
+
+## Summary of planned implementation architecture
+
+Just to recap, here's the plan so far:
+
+1. A code base in a low level language (probably Zig but not married
+   to it) implements the Zisp core, meaning interpreter, basic data
+   types, and a slim standard library.  Comparable to R7RS-small in
+   complexity, give or take.  The interpreter accepts but ignores
+   advanced code constructs intended to help the compiler, such as
+   declarations and directives related to static typing and explicit
+   object lifetime management.  (Simple bindings to libgccjit are
+   exposed; libgccjit.so is an optional run-time dependency.)  This
+   yields libzisp.so and the zisp executable, which are like liblua
+   and the lua executable.  You *can* use just this if you need a
+   minimal Zisp interpreter with a barebones stdlib; OS package
+   repositories could deploy these in a "zisp-core" package.
+
+2. Richer standard library routines are written in Zisp, but the
+   sources are meant to stay in the source code repo; wait for it.
+
+3. An advanced compiler, which actually understands the constructs
+   mentioned in point 1, is written in Zisp.  The compiler infers
+   static types where possible, and applies strategies to decrease GC
+   pressure, such as escape analysis, even if compiled code offers no
+   helpful declarations at all.  But with full static typing and
+   manual memory management, Zisp can practically be used as if it's
+   yet another low-level language front-end for GCC; it's up to the
+   programmer how much effort they want to put into improving the
+   performance of their code.  The compiler implementation may use
+   parts of the richer standard library mentioned above, which is not
+   yet compiled, mind you.
+
+4. The interpreter runs the compiler to compile the compiler; this
+   yields libzispcomp.so which Zisp can load dynamically so when
+   deploying Zisp you don't need to compile the compiler on every
+   end-user machine.  (Zisp can load any .so dynamically really.)
+   Standard library routines written in Zisp are imported directly
+   from within the source code repo at this point, and are merely
+   interpreted, since the compiler itself wasn't ready yet.
+   (Actually, you could run the compiler with the interpreter to
+   compile the stdlib first, then use the compiled stdlib while
+   compiling the compiler.  But this would probably be slower.)
+
+5. The richer standard library routines are finally compiled, giving
+   us libzisputil.so, which contains goodies that interpreted Zisp
+   code can also load and use, so Zisp scripts aren't limited to the
+   barebones stdlib anymore.
+
+In OS package repositories, you'd have zisp-core which only contains
+libzisp.so and the zisp executable, and then you'd have the standard
+zisp package which also pulls in libzispcomp and libzisputil as two
+additional packages.
+
+Actually, libzispcomp itself would probably depend on libzisputil
+anyway, but if you're an absolute nerd you *could* manually install
+only zisp-core and libzisputil, giving you an interpreter and rich
+standard library, without a compiler.  This would allow you to omit
+libgccjit as well, which could be useful if you want to use the Zisp
+interpreter for simple scripts on some minimal systems.
+
+## Closing up
+
+Funny, I had totally forgotten about this note:
+
+- [Using libgccjit?](250920-libgccjit.html)
+
+Yes, I will most definitely be using libgccjit.  If Zisp is to be a
+true [full-stack language](260102-full-stack.html) then it must be
+able to produce code rivaling C in efficiency, and that requires
+either GCC or LLVM.
+
+Some of the other considerations in the above linked note, like the
+"ZispScript" idea, are obsolete.  Unless I've totally goofed up and
+planned some illogical nonsense above, I'll be going with what I've
+written here, not in the previous note.
diff --git a/notes/index.md b/notes/index.md
index 197261f..afc627d 100644
--- a/notes/index.md
+++ b/notes/index.md
@@ -27,3 +27,5 @@
 * [Simplifying S-Expression Grammar](260106-simpler-grammar.html)
 * [Decoder](260107-decoder.html)
 * [What not to intern?](260109-uninterned.html)
+* [JOKE RANT](260516-joke-rant.html)
+* [Interpreter](260522-interpreter.html)
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
index aeca539..8d16b93 100644
--- a/src/zisp/io/Parser.zig
+++ b/src/zisp/io/Parser.zig
@@ -438,6 +438,7 @@ fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void {
     return switch (c) {
         '|' => p.jump(next, try p.getString('|')),
         '"' => p.jump(next, try p.getString('"')),
+        '~' => p.jump(next, try p.getTildeString()),
         '#' => p.parseHashExpr(next),
         '(', '[', '{' => p.parseList(c, next),
         '\'', '`', ',' => p.parseQuoteExpr(c, next),
@@ -472,6 +473,18 @@ fn getString(p: *Parser, comptime close: u8) !Value {
     return p.err(.UnclosedString, .{close} ++ " string");
 }
 
+fn getTildeString(p: *Parser) !Value {
+    const sentinel = try p.readNoEof("tilde");
+    while (try p.read()) |c| {
+        if (c == sentinel) {
+            const s = p.getCharsAsString();
+            return p.cons(TILDE, s);
+        }
+        try p.addChar(c);
+    }
+    return p.err(.UnclosedString, "tilde string");
+}
+
 fn skipStringLfEscape(p: *Parser) !u8 {
     const msg = "string linefeed escape";
     while (try p.read()) |c| switch (c) {
@@ -735,8 +748,8 @@ fn checkBlank(p: *Parser, c: u8) !enum { yes, skip_unit, no } {
 fn isBareChar(c: u8) bool {
     return switch (c) {
         // zig fmt: off
-        'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '*' , '+' ,
-        '-' , '/' , '<' , '=' , '>' , '?' , '@' , '^' , '_' , '~' , => true,
+        'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '*' ,
+        '+' , '-' , '/' , '<' , '=' , '>' , '?' , '@' , '^' , '_' , => true,
         // zig fmt: on
         else => false,
     };
author	Taylan Kammer <taylan.kammer@gmail.com>	2026-05-23 22:22:57 +0200
committer	Taylan Kammer <taylan.kammer@gmail.com>	2026-05-23 22:22:57 +0200
commit	378f8598a5a57b731948241e41f584f5172dc2a2 (patch)
tree	e9352110efe5b204a5abe7e00693be2004aab4e5
parent	f1f134d072e375335be5c1203095115fef1db253 (diff)