summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/0/0-value.md199
-rw-r--r--doc/0/1-parse.md (renamed from doc/c1/1-parse.md)219
-rw-r--r--doc/0/2-decode.md (renamed from doc/c1/2-decode.md)2
-rw-r--r--doc/0/grammar/abnf.txt (renamed from doc/c1/grammar/abnf.txt)6
-rw-r--r--doc/0/grammar/index.md (renamed from doc/c1/grammar/index.md)0
-rw-r--r--doc/0/grammar/peg.txt (renamed from doc/c1/grammar/peg.txt)10
-rw-r--r--doc/0/grammar/zbnf.txt (renamed from doc/c1/grammar/zbnf.txt)10
-rw-r--r--doc/0/index.md (renamed from doc/c1/index.md)24
-rw-r--r--doc/index.md42
-rw-r--r--src/zisp/gc.zig14
-rw-r--r--src/zisp/gc/IstrSet.zig8
-rw-r--r--src/zisp/gc/ListPool.zig69
-rw-r--r--src/zisp/gc/PairPool.zig34
-rw-r--r--src/zisp/io/Decoder.zig1
-rw-r--r--src/zisp/io/Encoder.zig1
-rw-r--r--src/zisp/io/Parser.zig195
-rw-r--r--src/zisp/io/Printer.zig255
-rw-r--r--src/zisp/value.zig521
-rw-r--r--src/zisp/value/array.zig8
-rw-r--r--src/zisp/value/istr.zig34
-rw-r--r--src/zisp/value/list.zig64
-rw-r--r--src/zisp/value/ptr.zig68
-rw-r--r--src/zisp/value/srat.zig1
-rw-r--r--src/zisp/value/string.zig13
-rwxr-xr-xupdate-html.sh10
25 files changed, 978 insertions, 830 deletions
diff --git a/doc/0/0-value.md b/doc/0/0-value.md
new file mode 100644
index 0000000..4bb7e0c
--- /dev/null
+++ b/doc/0/0-value.md
@@ -0,0 +1,199 @@
+# NaN-packed Value representation
+
+The format of a binary64 floating-point number, in big-endian notation:
+
+ { sign: 1 bit, exponent: 11 bits, fraction: 52 bits }
+
+When the 11 exponent bits are all set, it's either a NaN or an Infinity.
+
+For value packing, the remaining 53 bits are available, giving us `2^53` values
+minus the following four bit patterns:
+
+ *** FORBIDDEN BIT-PATTERNS ***
+
+ 1. Negative cqNaN :: { sign = 1, exponent = MAX, fraction = 10000... }
+
+ 2. Negative Infinity :: { sign = 1, exponent = MAX, fraction = 00000... }
+
+ 3. Positive cqNaN :: { sign = 0, exponent = MAX, fraction = 10000... }
+
+ 4. Positive Infinity :: { sign = 0, exponent = MAX, fraction = 00000... }
+
+The abbreviation "cqNaN" stands for canonical quiet NaN.
+
+The MSb of the fraction is also called the `is_quiet` flag, because it marks a
+NaN as being "quiet" rather than signaling. The rest of the fraction being all
+zero makes it the *canonical* quiet NaN for the given sign value.
+
+The positive and negative cqNaN are the *only* NaN values that can actually be
+returned by FP operations. This is convenient, because it means we can simply
+use them to represent themselves in Zisp.
+
+Infinity values may also be returned by FP operations, and we want them to also
+exit in Zisp, so they also represent themselves.
+
+Beyond those four bit patterns, all values with a maximum exponent (all bits
+set) are fair game for representing other values, so `2^53 - 4` possibilities.
+
+We split those `2^53 - 4` available values into four groups, each allowing for
+`2^51 - 1` different values to be encoded. (51-bit values excluding zero.)
+
+ sign = 1, quiet = 1 :: Negative Fixnum from -1 to -2^51+1
+
+ sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2
+
+ sign = 0, quiet = 1 :: Pointers and various immediates
+
+ sign = 0, quiet = 0 :: Internal use by interpreter
+
+
+## Fixnums
+
+Negative fixnums actually represent themselves, without needing to go through
+any transformation. Only the smallest 52-bit signed negative, `-2^51`, cannot
+be represented, as it would step on Forbidden Value #1, Negative cqNaN.
+
+Positive fixnums go through a bitsiwe NOT (which can be implemented as an XOR
+mask combining it with removal of NaN-related high bits) to avoid the all-zero
+payload value, which would step on Forbidden Value #2, Negative Infinity.
+
+
+## Pointers and immediates
+
+This region of 51-bit values is divided as follows, based on the three highest
+bits, providing a payload value of 48 bits for each.
+
+ 000 :: Pointer to heap object (type-tagged)
+
+ 001 :: Pointer to list values (length-tagged)
+
+ 010 :: Pointer to istr object
+
+ 011 :: Immediate short string
+
+ 100 :: Immediate small rational (sign bit 0)
+
+ 101 :: Immediate small rational (sign bit 1)
+
+ 110 :: Undefined
+
+ 111 :: Immediate types further subdivided as follows:
+
+ 0....... 0....... 0....... (etc.) :: Rune
+
+ 1....... :: 128 40-bit types
+
+ 0....... 1....... :: 16384 32-bit types
+
+ 0....... 0....... 1....... :: 2097152 24-bit types
+
+ (etc.)
+
+Forbidden Value #3, Positive cqNaN, is avoided by not using 0 as a valid type
+tag value for heap pointers.
+
+### Type-tagged heap pointers
+
+Regular heap objects are allocated with 16-byte alignment, meaning the lowest
+four bits are naturally zero. We exploit this by shifting down the address by
+four bits, making room for more tag bits immediately following the 16 high bits
+that mark the value as a heap pointer.
+
+Thus, a value can be checked against a specific heap object type by comparing
+the 20 high bits to a combined constant value: the 16 highest bits indicating
+that it's a heap pointer, and the 4 bits after that denoting the heap type.
+
+### Length-tagged list pointers
+
+Lists are arrays of Value objects, allocated without padding (8-byte alignment)
+for efficient source code representation and traversal. Since the lowest three
+bits are naturally zero, we use them as a 3-bit length information tag.
+
+A length tag value of 1 to 7 means there are exactly that many Value objects
+starting at the address, while 0 means there is an array of at least eight,
+terminated with a special 64-bit sentinel bit-pattern that is not otherwise
+valid as a Zisp value.
+
+### Interned string pointers
+
+Interned string (istr) objects may be unaligned, so the low bits of the pointer
+are not used for any special purpose. Having a separate category for this type
+of pointer also streamlines the interpreter implementation
+
+### Short strings
+
+This 48-bit range is used for strings of zero to six bytes. These are NUL
+terminated unless exactly six bytes, meaning that a literal NUL byte cannot
+appear in them, but otherwise they allow arbitrary byte values.
+
+### Small rationals
+
+We use a 49-bit space for small rational numbers, with a signed 25-bit two's
+complement integer numerator, and 24-bit unsigned integer denominator.
+
+### Runes and other small values
+
+Runes are symbols of up to 6 ASCII characters in length, used to implement
+extensible reader syntax. (See Zisp decoder.) They cannot contain the NUL
+byte, as they are NUL-terminated unless exactly six ASCII bytes in length.
+
+NOTE: The order in which the characters of the rune are encoded depends on
+endianness. On little-endian systems (i.e. most modern architectures) the
+characters will be in "reverse" order, with the first character in lowest
+position, so the terminating NUL has to be searched from low to high.
+
+The fact that runes are limited to ASCII bytes, whose MSb is unset, opens up
+some space for other small values to co-inhabit the same 48-bit value range.
+
+We divide this space into increasingly many potential types, with smaller and
+smaller payloads, where the highest byte with a non-zero MSb determines which
+size category we're in: If the highest byte has its MSb set, then the other
+seven bits are a type tag, and each type has a 40-bit payload; if the second
+highest byte has its MSb set, then the 14 non-MSb bits of the two high bytes
+define the type, and each has a 32-bit payload; and so on.
+
+Unicode code points need 21 bits, so we use a 24-bit type for the Character
+type. Miscellaneous values like True, False, EOF, etc. are placed in an 8-bit
+type, since there will never be that many of them.
+
+A virtually unlimited number of user-defined enum types can fit into the types
+with small payload values here: There is room for over 268 Million 16-bit types
+(28-bit type tag) and over 34 Billion 8-bit types (35-bit type tag).
+
+
+## Internal use values
+
+The final 51-bit range is used for various internal purposes by the Zisp
+interpreter, mostly related to transparent code optimization.
+
+ 000 :: Pointer to heap object as constant
+
+ 001 :: Pointer to list values as constant
+
+ 010 :: Pointer to istr object as constant
+
+ 011 :: Immediate short string as constant
+
+ 100 :: Local variable reference by index
+
+ 101 :: Pointer to constant function-call expression
+
+ 110 :: Pointer to variable function-call expression
+
+ 111 :: Pointer to special-form or macro-call expression
+
+Forbidden Value #4, Positive Infinity, is avoided thanks to the fact that heap
+pointers always have a non-zero heap-type tag. (See further above.)
+
+The first four categories simply mirror those of the previous 51-bit range, but
+mark the values as being constants rather than code to evaluate.
+
+The remaining four categories are somewhat similar to VM instructions.
+
+
+
+<!--
+;; Local Variables:
+;; fill-column: 80
+;; End:
+-->
diff --git a/doc/c1/1-parse.md b/doc/0/1-parse.md
index d4c4c2e..101a3b6 100644
--- a/doc/c1/1-parse.md
+++ b/doc/0/1-parse.md
@@ -5,31 +5,16 @@
Zisp s-expressions represent an extremely minimal set of data types; only that
which is necessary to strategically construct more complex values:
- +-------+---------+--------+----------+------+
- | TYPE | String | Rune | Pair | Nil |
- +-------+---------+--------+----------+------+
- | E.G. | foobar | #name | (X & Y) | () |
- +-------+---------+--------+----------+------+
+ +---------+--------+----------+------+
+ | String | Rune | List | Nil |
+ +---------+--------+----------+------+
+ | foobar | #name | (X ...) | () |
+ +---------+--------+----------+------+
The parser recognizes various *syntax sugar* which abbreviates verbose syntax,
-and may result in special data structures (typically, a pair with a rune in its
-first, and payload in its second position) which another Zisp component called
-the *decoder* can transform into a rich set of value types.
-
-The most ubiquitous syntax sugar is the list, which abbreviates a sequence of
-tail-linked pairs, terminated with a special nil value represented as `()`:
-
- (x) -> (x & ())
-
- (x y) -> (x & (y & ()))
-
- (x y z) -> (x & (y & (z & ())))
-
-The following are so-called *improper lists*, ending in a non-nil value:
-
- (x y & z) -> (x & (y & z))
-
- (x y z & t) -> (x & (y & (z & t)))
+and may result in special data structures (typically, a list with a rune in its
+first position) which another Zisp component called the *decoder* can transform
+into a rich set of value types.
More details about syntax sugar, and the decoder, are explained later.
@@ -121,8 +106,8 @@ to have an external representation. After parsing, it needs to be *decoded* to
actually become the expected value.
One may speak of an *external representation of a value* where the value is not
-itself a datum, but can be encoded as one. The more strictly correct term for
-this is: "The external representation of a datum that encodes the value."
+itself a datum, but has an encoding as one. The more strictly correct term for
+this is: "The external representation of the datum encoding the value."
### Syntax sugar
@@ -137,7 +122,7 @@ their memory address.
An example of syntax sugar that is not a mere abbreviation is a quoted string
which contains bytes that could not appear in a *bare* string:
- "foo bar" -> (#DQUOTE & <STRING>)
+ "foo bar" -> (#DQUOTE <STRING>)
In this example, the visual token `<STRING>` represents the actual string value
in program memory, which has no direct external representation in bytes because
@@ -175,23 +160,23 @@ is only a *datum* if it adheres to additional constraints as explained below.
### String
Strings can appear *bare* or be quoted in various ways. A quoted string is in
-fact parsed into a pair value with a rune in the first position to identify the
+fact parsed into a list value with a rune in the first position to identify the
quotation variant that was parsed, and the string value in the second position;
or, in case of at-quoted strings, a special construct we will look at later.
- +-----------+-----------------------------+
- | Syntax | Parse output |
- +-----------+-----------------------------+
- | |bytes| | (#PQSTR & <STRING>) |
- +-----------+-----------------------------+
- | "bytes" | (#DQSTR & <STRING>) |
- +-----------+-----------------------------+
- | @_bytes_ | (#ATSTR <BYTE> & <STRING>) |
- +-----------+-----------------------------+
+ +-----------+-------------------------------+
+ | Syntax | Parse output |
+ +-----------+-------------------------------+
+ | |bytes| | (#PQSTR <STRING>) |
+ +-----------+-------------------------------+
+ | "bytes" | (#DQSTR <STRING>) |
+ +-----------+-------------------------------+
+ | @_bytes_ | (#ATSTR <SENTINEL> <STRING>) |
+ +-----------+-------------------------------+
The visual token `<STRING>` denotes the actual string, as a Zisp value, in the
-second position of the pair. The visual token `<BYTE>` stands for an integer
-Zisp value between 0 and 255.
+second position of the list. The visual token `<SENTINEL>` stands for a Zisp
+integer value between 0 and 254.
These external representations of strings will be explained in more detail
further below, including backslash escape sequences allowed within, and how
@@ -206,8 +191,10 @@ occurrence of the same string -- equal in length and containing the same byte
values -- ends up being represented by the same bit-pattern; either a memory
address, or an immediate representation within a CPU word for short strings.
The quotation method is inconsequential to this process; for example, while
-`|foobar|` and `"foobar"` will parse into different pair values, the actual
-string they hold will be the same one in program memory.
+`|foo bar|` and `"foo bar"` will parse into different list values, the actual
+string they hold a reference to will be the same one in program memory. This
+behavior is however configurable and can be disabled entirely for cases where
+large numbers of arbitrary binary strings are being parsed.
Strings of length greater than 255 bytes are stored separately in memory, even
if they are equal in length and content.
@@ -232,30 +219,32 @@ default decoder settings and documented explicitly as such.
Runes are always stored directly in a CPU word; never by memory address.
-### Pair
+### List
-A pair is a tuple of two values: the first value and the second value. In Lisp
-tradition, these are also called the `car` and `cdr` of the pair, respectively.
+A list is a contiguous array of one or more values in memory, whose length may
+be encoded directly within the pointer to the head of the array, or else the
+array is terminated with a special sentinel bit-pattern that is not otherwise
+valid as a Zisp value.
-The parser allocates a unique two-word cell in program memory for every pair,
-and represents that pair through the memory address of the cell.
+The parser allocates a unique array in program memory for every list, and the
+list as a value is then represented by the memory address of that array, with
+either an exact length tag or a tag indicating that it's sentinel-terminated.
-Pairs are valid data if one of the following holds true:
+Lists are valid data if one of the following holds true:
-* The pair encodes a quoted string, datum label, or shebang line.
+* The list encodes a quoted string, datum label, or shebang line.
-* Both the first and second value in the pair is a valid datum.
+* All values in the list are a valid datum.
-Further, a structure of nested pair values may not contain cyclic references
+Further, a structure of nested list values may not contain cyclic references
back up in the structure (which would make the above definition diverge into
-infinity). Such cycles must be broken up with datum labels, or else the pair
+infinity). Such cycles must be broken up with datum labels, or else the list
cannot be considered a datum, since it cannot be printed or parsed.
### Nil
-The Zisp nil value is a singleton and a datum. There is exactly one nil value
-and it is used to terminate a chain of pairs representing a list of values; it
-has the external representation `()`.
+The Zisp nil value is a singleton and a datum. There is exactly one nil value,
+used in lieu of a list of zero length; it has the external representation `()`.
## Quoted strings
@@ -266,26 +255,25 @@ This section goes into the details of each variant.
### Pipe-quoted
Strings can be quoted with pipes, like symbols in R7RS Scheme, which triggers
-the parser to generate a pair with the structure:
+the parser to generate a list with the structure:
- (#PQSTR & <STRING>) ;; <STRING> is visual aid, not syntax
+ (#PQSTR <STRING>) ;; <STRING> is visual aid, not syntax
The decoder, using default settings, would emit this string verbatim as a value.
Then, during code evaluation, this would be seen as an identifier. In this way,
pipe-quoted strings are equivalent to bare strings in functionality.
It is important to understand that the decoder sits between the parser and the
-[evaluator](3-execute.html), and in opposition to Lisp and Scheme tradition, it
-is common for the evaluator to receive values that are not valid as a datum; in
-this case, a string unto itself that may not be a valid datum, due to not being
-possible to be represented as a bare string. Yet, it is valid as an identifier
-for the purposes of the evaluator, since it is a string *value* like any other.
+[evaluator](3-eval.html), and in opposition to Lisp and Scheme tradition, it is
+common for the evaluator to receive values that are not valid as a datum; here,
+a string unto itself that may not be a valid datum. Yet, it is valid as an
+identifier for the purposes of the evaluator.
### Double-quoted
Strings wrapped in the double-quote symbol parse into:
- (#DQSTR & <STRING>) ;; <STRING> is visual aid, not syntax
+ (#DQSTR <STRING>) ;; <STRING> is visual aid, not syntax
Under default settings, the decoder would transform this into a value which,
when evaluated as code, simply yields the contained string as a value.
@@ -297,14 +285,15 @@ escapes nor any other kind of escape sequence are recognized within them.
The syntax begins with an at sign, followed by any byte. That byte becomes a
termination marker, and the string cannot contain an occurrence of it, since
-there are no escape sequences.
+there are no escape sequences. The byte value 255 has a special meaning; see
+further below.
- @"foo \ bar" -> (#ATSTR <BYTE> & <STRING>)
+ @"foo \ bar" -> (#ATSTR <SENTINEL> <STRING>)
-In the above, the visual tokens `<BYTE>` and `<STRING>` represent an integer
-value and a string value, respectively. In this example, the integer value
-would be 34; the ASCII value for the double-quote sign. The string value
-contains a literal backslash, since there is no backslash escape parsing.
+The visual tokens `<SENTINEL>` and `<STRING>` represent an integer and string
+value, respectively. Here, the integer would be 34, which is the ASCII value
+for a double-quote sign. The string contains a literal backslash, since there
+is no backslash escape parsing.
This style of quoting can be useful, for instance, when representing regular
expressions as strings in code:
@@ -325,6 +314,19 @@ valid ASCII byte; it can be absolutely any byte value, even NUL. This can be
useful to easily encode binary data which is known to not contain a specific
byte; an example would be C strings which cannot contain NUL.
+If however the byte value is 255, then it does not stand for a sentinel, but
+rather indicates that 6 more bytes follow, interpreted as a big-endian 48-bit
+integer, which is the count of bytes making up the contents of the string.
+
+Example sequence of bytes, represented as a mixture of ASCII and raw integers:
+
+ '@' 255 0 0 0 0 2 100 <612 bytes> -> (#ATSTR <STRING>)
+
+One may ask why the length is not included in the list. This is unnecessary,
+since strings in Zisp already carry length information in their own metadata
+structure.
+
+
### Backslash escapes
In pipe-quoted and double-quoted strings, the following ASCII characters may
@@ -360,7 +362,7 @@ follow a backslash to insert a certain character.
In words:
-* A backslash followed by a backslash, pipe, or double-quote character is
+* A backslash, followed by a backslash, pipe, or double-quote character, is
substituted with a literal occurrence of that character.
* The characters 0, a, b, t, n, v, f, r, and e have the same meanings as in the
@@ -481,15 +483,15 @@ need to use it.
The following table summarizes commonly useful syntax abbreviations:
- [...] -> (#SQUARE ...) #datum -> (#HASH & datum)
+ [...] -> (#SQUARE ...) #datum -> (#HASH datum)
- {...} -> (#BRACE ...) #rune(...) -> (#rune ...)
+ {...} -> (#BRACE ...) #rune(...) -> (#rune ...)
- 'datum -> (#QUOTE & datum) dat1dat2 -> (#JOIN dat1 & dat2)
+ 'datum -> (#QUOTE datum) dat1dat2 -> (#JOIN dat1 dat2)
- `datum -> (#GRAVE & datum) dat1.dat2 -> (#DOT dat1 & dat2)
+ `datum -> (#GRAVE datum) dat1.dat2 -> (#DOT dat1 dat2)
- ,datum -> (#COMMA & datum) dat1:dat2 -> (#COLON dat1 & dat2)
+ ,datum -> (#COMMA datum) dat1:dat2 -> (#COLON dat1 dat2)
Notes:
@@ -501,53 +503,38 @@ Notes:
with a rune literal. A bare string can nevertheless follow the hash sign by
separating the two with a backslash:
- #\string -> (#HASH & string)
+ #\string -> (#HASH string)
* Though not represented in the table due to notational difficulty, the form
`#rune(...)` doesn't require a list in the second position; any datum that
works with the `#datum` syntax also works with `#rune<DATUM>`.
- #rune1#rune2 -> (#rune1 & #rune2)
+ #rune1#rune2 -> (#rune1 #rune2)
- #rune\string -> (rune & string)
+ #rune\string -> (#rune string)
- #rune'string -> (#rune #QUOTE & string)
+ #rune'string -> (#rune (#QUOTE string))
- #rune"string" -> (#rune #DQSTR & |string|)
+ #rune"string" -> (#rune (#DQSTR |string|))
As a counter-example, following a rune immediately with a bare string isn't
possible without the delimiting backslash, since that would be ambiguous:
- #abcdefgh ;Could be (#abcdef & gh) or (#abcde & fgh) or ...
+ #abcdefgh ;Could be (#abcdef gh) or (#abcde fgh) or ...
* Syntax sugar can combine arbitrarily. Some examples follow. Any of these may
- or may not actually have a meaning in code; many could simply end up producing
+ or may not actually have a meaning in code; some might simply end up producing
an error during decoding, or later evaluation of code.
- #{...} -> (#HASH #BRACE ...)
-
- #'foo -> (#HASH #QUOTE & foo)
-
- ##'[...] -> (#HASH #HASH #QUOTE #SQUARE ...)
-
- {x y}[i j] -> (#JOIN (#BRACE x y) #SQUARE i j)
-
- foo.bar.baz{x y} -> (#JOIN (#DOT (#DOT foo & bar) & baz) #BRACE x y)
-
-* While in Lisp and Scheme `'foo` parses as `(quote foo)`, in Zisp it parses as
- `(#QUOTE & foo)`; a single pair with the quoted datum in the second position.
-
- The same principle is used when parsing other sugar; some examples follow:
-
- Incorrect Correct
+ #{...} -> (#HASH (#BRACE ...))
- #(x y z) -> (#HASH (x y z)) #(x y z) -> (#HASH x y z)
+ #'foo -> (#HASH (#QUOTE foo))
- [x y z] -> (#SQUARE (x y z)) [x y z] -> (#SQUARE x y z)
+ ##'[...] -> (#HASH (#HASH (#QUOTE (#SQUARE ...))))
- #{x} -> (#HASH (#BRACE (x))) #{x} -> (#HASH #BRACE x)
+ {x y}[i j] -> (#JOIN (#BRACE x y) (#SQUARE i j))
- foo(x y) -> (#JOIN foo (x y)) foo(x y) -> (#JOIN foo x y)
+ foo.bar.baz{x y} -> (#JOIN (#DOT (#DOT foo bar) baz) (#BRACE x y))
* Those used to thinking in Lisp and Scheme may think that `(#QUOTE ...)` halts
further decoding of enclosed data. This is not so, since quoting is related
@@ -562,13 +549,13 @@ labels in the data encoding of the value.
A datum label either wraps another datum to assign a number to it, or contains
just a reference to a previous assignment.
- +------------------+------------------------------+
- | Syntax | Internal datum structure |
- +------------------+------------------------------+
- | #%<HEX>=<DATUM> | (#LABEL <NUMBER> & <DATUM>) |
- +------------------+------------------------------+
- | #%<HEX>% | (#LABEL & <NUMBER>) |
- +------------------+------------------------------+
+ +------------------+----------------------------+
+ | Syntax | Internal datum structure |
+ +------------------+----------------------------+
+ | #%<HEX>=<DATUM> | (#LABEL <NUMBER> <DATUM>) |
+ +------------------+----------------------------+
+ | #%<HEX>% | (#LABEL <NUMBER>) |
+ +------------------+----------------------------+
In this visual, the token `<HEX>` stands for a hexadecimal digit sequence, the
token `<DATUM>` stands for any other datum, and `<NUMBER>` is a stand-in for a
@@ -576,13 +563,13 @@ number value; that which is represented by `<HEX>`.
For clarity, concrete examples follow:
- +-------------------+-------------------------------+
- | Byte sequence | Parse result |
- +-------------------+-------------------------------+
- | #%1234abcd=(foo) | (#LABEL <0x1234abcd> & (foo)) |
- +-------------------+-------------------------------+
- | #%1234abcd% | (#LABEL & <0x1234abcd>) |
- +-------------------+-------------------------------+
+ +-------------------+------------------------------+
+ | Byte sequence | Parse result |
+ +-------------------+------------------------------+
+ | #%1234abcd=(foo) | (#LABEL <0x1234abcd> (foo)) |
+ +-------------------+------------------------------+
+ | #%1234abcd% | (#LABEL <0x1234abcd>) |
+ +-------------------+------------------------------+
Here, the visual token `<0x1234abcd>` stands for a Zisp value of a numeric type
with an integer value. Note that the decoder may not accept a bare string here,
@@ -593,9 +580,9 @@ meaning this syntax sugar is not merely an abbreviation.
Finally, the parser recognizes the Unix *shebang* syntax and outputs a datum to
hold the string values found within:
- #!interpreter -> (#SHBANG & interpreter)
+ #!interpreter -> (#SHBANG interpreter)
- #!interpreter argline -> (#SHBANG interpreter & argline)
+ #!interpreter argline -> (#SHBANG interpreter argline)
When executing a script file, Zisp simply stores this into a global value that
may be inspected if desired.
diff --git a/doc/c1/2-decode.md b/doc/0/2-decode.md
index 379c74b..1a45824 100644
--- a/doc/c1/2-decode.md
+++ b/doc/0/2-decode.md
@@ -39,6 +39,6 @@ complex data records with non-standard data types, and so on.
<!--
;; Local Variables:
-;; fill-column: 77
+;; fill-column: 80
;; End:
-->
diff --git a/doc/c1/grammar/abnf.txt b/doc/0/grammar/abnf.txt
index aa67646..5ab3c89 100644
--- a/doc/c1/grammar/abnf.txt
+++ b/doc/0/grammar/abnf.txt
@@ -90,7 +90,7 @@ JoinExpr = Datum RJoinDatum
/ NoEndDot "." Datum
-BareChar = "!" / "$" / "%" / "*" / "/" / "<" / "=" / ">"
+BareChar = "!" / "$" / "%" / "&" / "*" / "/" / "<" / "=" / ">"
/ "?" / "^" / "_" / "~" / ALPHA
Numeric = "+" / "-" / DIGIT
@@ -108,9 +108,7 @@ StringEsc = "\" / "|" / DQUOTE / *( HTAB / SP ) LF *( HTAB / SP )
/ %s"u" ["0"] 1*5HEXDIG ";"
/ %s"u" "1" "0" 4HEXDIG ";"
-List = [ Unit *( Blank Unit ) ] *Blank [Tail] [SkipUnit]
-
-Tail = "&" Unit *Blank
+List = [ Unit *( Blank Unit ) ] *Blank [SkipUnit]
RuneName = ALPHA *5( ALPHA / DIGIT )
diff --git a/doc/c1/grammar/index.md b/doc/0/grammar/index.md
index e3716ea..e3716ea 100644
--- a/doc/c1/grammar/index.md
+++ b/doc/0/grammar/index.md
diff --git a/doc/c1/grammar/peg.txt b/doc/0/grammar/peg.txt
index 7b28a99..1541da6 100644
--- a/doc/c1/grammar/peg.txt
+++ b/doc/0/grammar/peg.txt
@@ -29,7 +29,7 @@ BareString <- SpecBareChar ( BareChar / JoinChar )*
SpecBareChar <- '+' / '-' / JoinChar / DIGIT
BareChar <- ALPHA / DIGIT
- / '!' / '$' / '%' / '*' / '+' / '-' / '/'
+ / '!' / '$' / '%' / '&' / '*' / '+' / '-' / '/'
/ '<' / '=' / '>' / '?' / '^' / '_' / '~'
@@ -65,11 +65,9 @@ Label <- HEXDIG+
Rune <- ALPHA ( ALPHA / DIGIT )*
-ParenList <- '(' ListBody ')'
-SquareList <- '[' ListBody ']'
-BraceList <- '{' ListBody '}'
-
-ListBody <- Unit* ( Blank* '&' Unit )? Blank*
+ParenList <- '(' Unit* ')'
+SquareList <- '[' Unit* ']'
+BraceList <- '{' Unit* '}'
DIGIT <- [0-9]
diff --git a/doc/c1/grammar/zbnf.txt b/doc/0/grammar/zbnf.txt
index 923ac83..c04b813 100644
--- a/doc/c1/grammar/zbnf.txt
+++ b/doc/0/grammar/zbnf.txt
@@ -29,7 +29,7 @@ BareString : SpecBareChar ( BareChar | JoinChar )*
SpecBareChar : '+' | '-' | JoinChar | DIGIT
BareChar : ALPHA | DIGIT
- | '!' | '$' | '%' | '*' | '+' | '-' | '/'
+ | '!' | '$' | '%' | '&' | '*' | '+' | '-' | '/'
| '<' | '=' | '>' | '?' | '^' | '_' | '~'
@@ -65,11 +65,9 @@ Label : HEXDIG+
Rune : ALPHA ( ALPHA | DIGIT )*
-ParenList : '(' ListBody ')'
-SquareList : '[' ListBody ']'
-BraceList : '{' ListBody '}'
-
-ListBody : Unit* [ Blank* '&' Unit ] Blank*
+ParenList : '(' Unit* ')'
+SquareList : '[' Unit* ']'
+BraceList : '{' Unit* '}'
;; Local Variables:
diff --git a/doc/c1/index.md b/doc/0/index.md
index 6cec369..f0da216 100644
--- a/doc/c1/index.md
+++ b/doc/0/index.md
@@ -1,7 +1,13 @@
-# Chapter 1: Genesis
+# Chapter 0: Genesis
-This chapter goes through the processes involved in reading source
-code, running it, and optionally compiling it.
+This chapter explains the core value representation of Zisp, and goes
+through the processes of parsing, decoding, running, and optionally
+compiling code.
+
+0. [Value](0-value.html)
+
+ Zisp uses a uniform 64-bit representation for all values, densely
+ packed into signaling or non-canonical NaN values.
1. [Parse](1-parse.html) (see also [grammar](grammar/))
@@ -12,15 +18,13 @@ code, running it, and optionally compiling it.
The decoder runs configurable and extensible pre-processing steps
over data received from the parser, enriching it with more complex
- data types, and handling primitive source code transforms. It's
- comparable to the C pre-processor or Lisp's `DEFMACRO` mechanism,
- with a few more responsibilities, such as number literal parsing.
+ value types, and handling primitive source code transforms.
-3. [Execute](3-execute.html)
+3. [Eval](3-eval.html)
- Code is executed (or interpreted, or evaluated) in an environment,
- also called a module, which may be mutated, and linked with other
- modules. Execution is immediate, without any pre-compilation.
+ Code is evaluated within a mutable module context, on which it can
+ have side effects such as creating new definitions or establishing
+ links to other modules.
4. [Compile](4-compile.html)
diff --git a/doc/index.md b/doc/index.md
index beaa78c..51b92fa 100644
--- a/doc/index.md
+++ b/doc/index.md
@@ -2,31 +2,35 @@
This document explains the Zisp language and its implementation.
-Zisp intentionally blurs the line between developers and users of the
-language. After all, Zisp is software, and its users are software
-developers; the easiest way to explain *why* Zisp does certain things
-is often to explain *how* it does them.
+Zisp intentionally blurs the line between developers and users of the language.
+After all, Zisp is software, and its users are software developers; the easiest
+way to explain *why* Zisp does certain things is often to explain *how* it does
+them.
-That doesn't mean this manual will walk you through the source code
-line by line. Instead, consider it a documentation of the code base
-at large, doubling as a reference to the language implemented by the
-code base.
+That doesn't mean this manual will walk you through the source code line by
+line. Instead, consider this a documentation of the implementation at large,
+doubling as a language reference.
## Table of Contents
-1. [Chapter 1: Genesis](c1/)
+0. [Chapter 0: Genesis](./0/)
- 1. [Parse](c1/1-parse.html)
- 2. [Decode](c1/2-decode.html)
- 3. [Execute](c1/3-execute.html)
- 4. [Compile](c1/4-compile.html)
+ 0. [Value](./0/0-value.html)
+ 1. [Parse](./0/1-parse.html)
+ 2. [Decode](./0/2-decode.html)
+ 3. [Execute](./0/3-execute.html)
+ 4. [Compile](./0/4-compile.html)
-2. [Chapter 2: Types](c2/)
-
- This chapter deals with the standard data types, and the methods
- Zisp offers for defining new types.
+1. [Chapter 1: Taxonomy](./1/)
+ 0. ...
1. ...
- 2. ...
-3. [Chapter 3: ...](c3/)
+2. [Chapter 2: ...](./2/)
+
+
+<!--
+;; Local Variables:
+;; fill-column: 80
+;; End:
+-->
diff --git a/src/zisp/gc.zig b/src/zisp/gc.zig
index 9267087..b2fcc46 100644
--- a/src/zisp/gc.zig
+++ b/src/zisp/gc.zig
@@ -5,11 +5,11 @@ const Alloc = std.mem.Allocator;
const value = @import("value.zig");
-pub const PairPool = @import("gc/PairPool.zig");
+pub const ListPool = @import("gc/ListPool.zig");
pub const IstrSet = @import("gc/IstrSet.zig");
var main_alloc: Alloc = undefined;
-var main_pair_pool: PairPool = undefined;
+var main_list_pool: ListPool = undefined;
var main_istr_set: IstrSet = undefined;
var init_done = false;
@@ -20,24 +20,24 @@ pub fn init() !void {
const alloc = std.heap.smp_allocator;
initCustom(
alloc,
- try PairPool.init(alloc),
+ try ListPool.init(alloc),
try IstrSet.init(alloc),
);
}
pub fn initCustom(
alloc: Alloc,
- pair_pool: PairPool,
+ list_pool: ListPool,
istr_set: IstrSet,
) void {
main_alloc = alloc;
- main_pair_pool = pair_pool;
+ main_list_pool = list_pool;
main_istr_set = istr_set;
}
-pub fn mainPairPool() *PairPool {
+pub fn mainListPool() *ListPool {
init() catch @panic("OOM"); // TODO this is only here for the test suite
- return &main_pair_pool;
+ return &main_list_pool;
}
pub fn mainIstrSet() *IstrSet {
diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig
index bc345f2..bf0db47 100644
--- a/src/zisp/gc/IstrSet.zig
+++ b/src/zisp/gc/IstrSet.zig
@@ -74,7 +74,7 @@ pub fn getOrNew(self: *Set, s: []const u8) !IstrPtr {
/// Get the canonical istr with the same contents, or store this one as it.
pub fn getOrPut(self: *Set, istr: IstrPtr) !IstrPtr {
- return self.getOrPutOrNew(istr.str(), istr);
+ return self.getOrPutOrNew(istr.bytes(), istr);
}
fn getOrPutOrNew(self: *Set, s: []const u8, ptr: ?IstrPtr) !IstrPtr {
@@ -100,7 +100,7 @@ fn getOrPutOrNew(self: *Set, s: []const u8, ptr: ?IstrPtr) !IstrPtr {
}
if (bucket.fp == fp) {
const istr = bucket.istrPtr();
- if (strEq(s, istr.str())) return istr;
+ if (strEq(s, istr.bytes())) return istr;
}
}
}
@@ -114,7 +114,7 @@ fn resize(self: *Set) !void {
const idx_mask = new_buckets.len - 1;
for (old_buckets) |old| {
if (old.empty()) continue;
- const str = old.istrPtr().str();
+ const str = old.istrPtr().bytes();
var idx = strHash(str) & idx_mask;
while (!new_buckets[idx].empty()) idx = (idx + 1) & idx_mask;
new_buckets[idx] = old;
@@ -147,7 +147,7 @@ pub fn addStdlib(self: *Set, s: []const u8) !IstrPtr {
}
const istr = try self.newIstr(s);
- gop.key_ptr.* = istr.str();
+ gop.key_ptr.* = istr.bytes();
gop.value_ptr.* = istr;
return istr;
}
diff --git a/src/zisp/gc/ListPool.zig b/src/zisp/gc/ListPool.zig
new file mode 100644
index 0000000..6c34074
--- /dev/null
+++ b/src/zisp/gc/ListPool.zig
@@ -0,0 +1,69 @@
+//! List array pool
+//!
+//! Strategy: To ensure list arrays are allocated sequentially without any
+//! padding, we allocate memory in large blocks, and whenever a list array with
+//! a certain length is to be allocated, we check if the current block still has
+//! enough room; otherwise we allocate the next block.
+//!
+//! This is only used for lists up to 7 elements. Starting at 8, they can't
+//! share a cache line with other list elements anyway.
+
+const std = @import("std");
+
+const Alloc = std.mem.Allocator;
+const ArrayList = std.ArrayListUnmanaged;
+
+const value = @import("../value.zig");
+
+const Value = value.Value;
+
+const Self = @This();
+
+// How many Value elements fit in a block. A good value should be 512, since
+// each Value is 8 bytes: 512 * 8 = 4 KiB
+const block_value_cap = 512;
+
+// Initial capacity of ArrayList of pointers to filled blocks. Also 512, since
+// pointers are 8 bytes as well.
+const def_full_blocks_cap = 512;
+
+const Block = [block_value_cap]Value;
+
+alloc: Alloc,
+
+current_block: *Block,
+current_index: usize,
+
+full_blocks: ArrayList(*Block),
+
+pub fn init(alloc: Alloc) !Self {
+ return .{
+ .alloc = alloc,
+ .current_block = try alloc.create(Block),
+ .current_index = 0,
+ .full_blocks = try .initCapacity(alloc, def_full_blocks_cap),
+ };
+}
+
+pub fn deinit(self: *Self) void {
+ self.alloc.destroy(self.current_block);
+ for (self.full_blocks) |block| self.alloc.destroy(block);
+ self.full_blocks.deinit(self.alloc);
+}
+
+fn newBlock(self: *Self) !void {
+ try self.full_blocks.append(self.alloc, self.current_block);
+ self.current_block = try self.alloc.create(Block);
+ self.current_index = 0;
+}
+
+pub fn create(self: *Self, len: u3) ![*]Value {
+ std.debug.assert(len != 0);
+
+ if (len > block_value_cap - self.current_index) {
+ try self.newBlock();
+ }
+
+ defer self.current_index += len;
+ return @ptrCast(&self.current_block[self.current_index]);
+}
diff --git a/src/zisp/gc/PairPool.zig b/src/zisp/gc/PairPool.zig
deleted file mode 100644
index 4a77acc..0000000
--- a/src/zisp/gc/PairPool.zig
+++ /dev/null
@@ -1,34 +0,0 @@
-const std = @import("std");
-
-const Alloc = std.mem.Allocator;
-const AlignedPool = std.heap.memory_pool.Aligned;
-
-const value = @import("../value.zig");
-
-const Value = value.Value;
-const PairPtr = value.pair.PairPtr;
-
-const PairPool = @This();
-
-alloc: Alloc,
-pool: AlignedPool(value.pair.Pair, @enumFromInt(@alignOf(value.Zptr))),
-
-const default_init_cap = 1024;
-
-pub fn init(alloc: Alloc) !PairPool {
- return initCustom(alloc, default_init_cap);
-}
-
-pub fn initCustom(alloc: Alloc, init_cap: usize) !PairPool {
- return .{
- .alloc = alloc,
- .pool = try .initCapacity(alloc, init_cap),
- };
-}
-
-pub fn cons(self: *PairPool, car: Value, cdr: Value) !PairPtr {
- var p = try self.pool.create(self.alloc);
- p.car = car;
- p.cdr = cdr;
- return p;
-}
diff --git a/src/zisp/io/Decoder.zig b/src/zisp/io/Decoder.zig
new file mode 100644
index 0000000..95a0b68
--- /dev/null
+++ b/src/zisp/io/Decoder.zig
@@ -0,0 +1 @@
+const std = @import("std");
diff --git a/src/zisp/io/Encoder.zig b/src/zisp/io/Encoder.zig
new file mode 100644
index 0000000..eb27e20
--- /dev/null
+++ b/src/zisp/io/Encoder.zig
@@ -0,0 +1 @@
+// wip
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
index d4a0a68..f768468 100644
--- a/src/zisp/io/Parser.zig
+++ b/src/zisp/io/Parser.zig
@@ -1,7 +1,7 @@
//!
//! === Syntax ===
//!
-//! See doc/c1/1-parse.md to understand the implemented syntax.
+//! See /doc/0/1-parse.md to understand the implemented syntax.
//!
//!
//! === Trampolining strategy ===
@@ -43,8 +43,8 @@ const gc = @import("../gc.zig");
const lib = @import("../lib.zig");
const value = @import("../value.zig");
+const ListPool = gc.ListPool;
const IstrSet = gc.IstrSet;
-const PairPool = gc.PairPool;
const Value = value.Value;
const Parser = @This();
@@ -81,21 +81,22 @@ pub const Error = enum {
};
pub const Context = struct {
- // What to do next.
+ /// What to do next.
next: ?Fn = undefined,
- // For storing a context value, like datum to join in join syntax.
+ /// For storing a context value, like datum to join in join syntax.
val: Value = undefined,
- // For storing a context char, like list opening bracket.
+ /// For storing a context char, like list opening bracket.
char: u8 = undefined,
- // Count of list elements on current parse level.
- list_len: usize = undefined,
+ /// Start index of list elements on current parse level, within the global
+ /// list element accumulation array.
+ list_start: usize = undefined,
};
-alloc: Alloc,
+list_pool: ?*ListPool,
istr_set: ?*IstrSet,
-pair_pool: *PairPool,
+alloc: Alloc,
-input: *Reader = undefined,
+reader: *Reader = undefined,
context: Context = .{},
ctx_stack: List(Context) = undefined,
@@ -107,23 +108,23 @@ unread_char: ?u8 = null,
err_msg: []const u8 = undefined,
pub fn init(alloc: Alloc) !Parser {
+ const list_pool = gc.mainListPool();
const istr_set = gc.mainIstrSet();
- const pair_pool = gc.mainPairPool();
- return initCustom(alloc, 32, 2048, 32, istr_set, pair_pool);
+ return initCustom(list_pool, istr_set, alloc, 32, 2048, 32);
}
pub fn initCustom(
+ list_pool: ?*ListPool,
+ istr_set: ?*IstrSet,
alloc: Alloc,
init_ctx_stack_cap: usize,
init_str_chars_cap: usize,
init_list_elts_cap: usize,
- istr_set: ?*IstrSet,
- pair_pool: *PairPool,
) !Parser {
var p: Parser = .{
- .alloc = alloc,
+ .list_pool = list_pool,
.istr_set = istr_set,
- .pair_pool = pair_pool,
+ .alloc = alloc,
};
p.ctx_stack = try .initCapacity(alloc, init_ctx_stack_cap);
p.str_chars = try .initCapacity(alloc, init_str_chars_cap);
@@ -148,7 +149,7 @@ fn read(p: *Parser) !?u8 {
.{p.unread_char.?},
);
}
- const c = p.input.takeByte() catch |e| switch (e) {
+ const c = p.reader.takeByte() catch |e| switch (e) {
error.EndOfStream => return null,
else => return p.err(.ReadError, "???"),
};
@@ -158,6 +159,13 @@ fn read(p: *Parser) !?u8 {
return c;
}
+fn readIntoSlice(p: *Parser, slice: []u8) !void {
+ p.reader.readSliceAll(slice) catch |e| return switch (e) {
+ error.EndOfStream => p.err(.UnexpectedEof, "reading into slice"),
+ else => p.err(.ReadError, "???"),
+ };
+}
+
fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 {
return try p.read() orelse p.err(.UnexpectedEof, emsg);
}
@@ -222,25 +230,22 @@ fn getCharsAsRune(p: *Parser) Value {
}
//
-// Pair consing & list creation
+// List creation
//
-fn cons(p: *Parser, car: Value, cdr: Value) !Value {
- return value.pair.consInPool(p.pair_pool, car, cdr);
-}
-
fn addListElt(p: *Parser, elt: Value) !void {
try p.list_elts.append(p.alloc, elt);
- p.context.list_len += 1;
}
-fn getList(p: *Parser, tail: Value) !Value {
- var list = tail;
- for (0..p.context.list_len) |_| {
- const elt = p.list_elts.pop() orelse unreachable;
- list = try p.cons(elt, list);
- }
- return list;
+fn getList(p: *Parser) !Value {
+ if (p.list_elts.items.len == p.context.list_start) return value.nil;
+ defer p.list_elts.items.len = p.context.list_start;
+ const vals = p.list_elts.items[p.context.list_start..];
+ return value.list.new(p.alloc, p.list_pool, vals);
+}
+
+fn makeList(p: *Parser, vals: []const Value) !Value {
+ return value.list.new(p.alloc, p.list_pool, vals);
}
//
@@ -259,8 +264,6 @@ const Fn = enum {
endRuneDatum,
endLabelDatum,
continueList,
- endImproperList,
- closeImproperList,
endQuoteExpr,
};
@@ -277,14 +280,12 @@ inline fn call(p: *Parser, f: Fn) !void {
.endRuneDatum => p.endRuneDatum(),
.endLabelDatum => p.endLabelDatum(),
.continueList => p.continueList(),
- .endImproperList => p.endImproperList(),
- .closeImproperList => p.closeImproperList(),
.endQuoteExpr => p.endQuoteExpr(),
};
}
-pub fn run(p: *Parser, input: *Reader) !Value {
- p.input = input;
+pub fn run(p: *Parser, reader: *Reader) !Value {
+ p.reader = reader;
p.context.next = .parseUnit;
while (p.context.next) |next| {
if (detailed_debug) p.printStack();
@@ -336,7 +337,7 @@ fn pushContext(p: *Parser, next: Fn) !void {
.next = next,
.val = p.context.val,
.char = p.context.char,
- .list_len = p.context.list_len,
+ .list_start = p.context.list_start,
});
}
@@ -455,7 +456,7 @@ fn endJoinDatum(p: *Parser) !void {
':' => COLON,
else => unreachable,
};
- const joined = try p.cons(rune, try p.cons(prev, p.result));
+ const joined = try p.makeList(&.{ rune, prev, p.result });
return p.jump(.parseJoin, joined);
}
@@ -511,21 +512,44 @@ fn getString(p: *Parser, comptime close: u8) !Value {
};
const s = try p.getCharsAsString();
return switch (close) {
- '|' => try p.cons(PQSTR, s),
- '"' => try p.cons(DQSTR, s),
+ '|' => try p.makeList(&.{ PQSTR, s }),
+ '"' => try p.makeList(&.{ DQSTR, s }),
else => unreachable,
};
}
fn getAtString(p: *Parser) !Value {
const stop = try p.readNoEof("at-string");
+ return if (stop == 255) p.getAtLenStr() else p.getAtSentinelStr(stop);
+}
+
+fn getAtLenStr(p: *Parser) !Value {
+ var len: u48 = 0;
+ inline for (0..6) |_| {
+ len <<= 8;
+ len += try p.readNoEof("at-length-string");
+ }
+ const AH = value.array.ArrayHeader;
+ const aln: std.mem.Alignment = @enumFromInt(@alignOf(AH));
+ const mem = try p.alloc.alignedAlloc(u8, aln, @sizeOf(AH) + len);
+ const arr: value.array.ArrayPtr = @ptrCast(mem);
+ arr.* = .{
+ .len_or_ptr = len,
+ .type = .str,
+ .info = .{ .str = .{} },
+ };
+ try p.readIntoSlice(arr.bytes());
+ return p.makeList(&.{ ATSTR, value.ptr.pack(.array, arr) });
+}
+
+fn getAtSentinelStr(p: *Parser, stop: u8) !Value {
while (try p.readNoEofOpt("at-string")) |c| {
if (c == stop) break;
try p.addChar(c);
}
const str = try p.getCharsAsString();
const byte = value.fixnum.pack(stop);
- return try p.cons(ATSTR, try p.cons(byte, str));
+ return p.makeList(&.{ ATSTR, byte, str });
}
fn skipStringLfEscape(p: *Parser) !u8 {
@@ -591,8 +615,9 @@ fn parseHashExpr(p: *Parser, next: Fn) !void {
},
'\\' => {
const c1 = try p.readNoEof("hash-backslash");
- const bs = try p.getBareString(c1);
- return p.jump(next, try p.cons(HASH, bs));
+ const str = try p.getBareString(c1);
+ const val = try p.makeList(&.{ HASH, str });
+ return p.jump(next, val);
},
'!' => return p.parseHashBang(next),
'%' => return p.parseLabel(next),
@@ -611,7 +636,7 @@ fn endHashDatum(p: *Parser) !void {
if (p.result.eq(value.none)) {
return p.err(.InvalidCharacter, "hash datum");
}
- return p.retval(try p.cons(HASH, p.result));
+ return p.retval(try p.makeList(&.{ HASH, p.result }));
}
fn getRune(p: *Parser, c1: u8) !Value {
@@ -635,11 +660,25 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
switch (c) {
'\\' => {
const c1 = try p.readNoEof("rune-backslash");
- return p.jump(next, try p.cons(r, try p.getBareString(c1)));
+ const str = try p.getBareString(c1);
+ const val = try p.makeList(&.{ r, str });
+ return p.jump(next, val);
+ },
+ '"' => {
+ const str = try p.getString('"');
+ const val = try p.makeList(&.{ r, str });
+ return p.jump(next, val);
+ },
+ '|' => {
+ const str = try p.getString('|');
+ const val = try p.makeList(&.{ r, str });
+ return p.jump(next, val);
+ },
+ '@' => {
+ const str = try p.getAtString();
+ const val = try p.makeList(&.{ r, str });
+ return p.jump(next, val);
},
- '"' => return p.jump(next, try p.cons(r, try p.getString('"'))),
- '|' => return p.jump(next, try p.cons(r, try p.getString('|'))),
- '@' => return p.jump(next, try p.cons(r, try p.getAtString())),
'#', '(', '[', '{', '\'', '`', ',' => {
p.unread(c);
try p.push(next);
@@ -654,31 +693,31 @@ fn parseRuneEnd(p: *Parser, r: Value, next: Fn) !void {
}
fn endRuneDatum(p: *Parser) !void {
- return p.retval(try p.cons(p.context.val, p.result));
+ return p.retval(try p.makeList(&.{ p.context.val, p.result }));
}
fn parseHashBang(p: *Parser, next: Fn) !void {
- const val = try p.getHashBangValue();
- return p.jump(next, try p.cons(SHBANG, val));
+ const interp, const arg_line = try p.getHashBangValue();
+ if (arg_line) |args| {
+ return p.jump(next, try p.makeList(&.{ SHBANG, interp, args }));
+ } else {
+ return p.jump(next, try p.makeList(&.{ SHBANG, interp }));
+ }
}
-fn getHashBangValue(p: *Parser) !Value {
+fn getHashBangValue(p: *Parser) !struct { Value, ?Value } {
while (try p.readNoEofOpt("hash-bang")) |c| switch (c) {
' ', '\t' => continue,
'\n' => return p.err(.InvalidCharacter, "hash-bang"),
else => {
try p.addChar(c);
while (try p.read()) |c2| switch (c2) {
- '\n' => return p.getCharsAsString(),
+ '\n' => return .{ try p.getCharsAsString(), null },
' ', '\t' => break,
else => try p.addChar(c2),
};
const interp = try p.getCharsAsString();
- if (try p.getHashBangArgLine()) |arg_line| {
- return try p.cons(interp, arg_line);
- } else {
- return interp;
- }
+ return .{ interp, try p.getHashBangArgLine() };
},
};
unreachable;
@@ -704,7 +743,7 @@ fn parseLabel(p: *Parser, next: Fn) !void {
const n = try p.parseHex(u48, "datum label");
const l = value.fixnum.pack(n);
switch (p.getUnread() orelse try p.readNoEof("datum label")) {
- '%' => return p.jump(next, try p.cons(LABEL, l)),
+ '%' => return p.jump(next, try p.makeList(&.{ LABEL, l })),
'=' => {
try p.push(next);
p.context.val = l;
@@ -718,7 +757,7 @@ fn endLabelDatum(p: *Parser) !void {
if (p.result.eq(value.none)) {
return p.err(.InvalidCharacter, "label datum");
}
- return p.retval(try p.cons(LABEL, try p.cons(p.context.val, p.result)));
+ return p.retval(try p.makeList(&.{ LABEL, p.context.val, p.result }));
}
fn parseList(p: *Parser, open: u8, next: Fn) !void {
@@ -729,7 +768,7 @@ fn parseList(p: *Parser, open: u8, next: Fn) !void {
'{' => '}',
else => unreachable,
};
- p.context.list_len = 0;
+ p.context.list_start = p.list_elts.items.len;
switch (open) {
'(' => {},
'[' => try p.addListElt(SQUARE),
@@ -750,9 +789,6 @@ fn continueList(p: *Parser) !void {
if (c == close) {
return p.endList();
}
- if (c == '&') {
- return p.subr(.parseUnit, .endImproperList);
- }
return p.err(.InvalidCharacter, "list");
}
@@ -762,32 +798,7 @@ fn continueList(p: *Parser) !void {
}
fn endList(p: *Parser) !void {
- return p.retval(try p.getList(value.nil));
-}
-
-fn endImproperList(p: *Parser) !void {
- if (p.result.eq(value.none)) {
- return p.err(.InvalidCharacter, "list tail");
- }
- p.context.val = try p.getList(p.result);
- return p.closeImproperList();
-}
-
-fn closeImproperList(p: *Parser) !void {
- const result = p.context.val;
- const close = p.context.char;
- var c1 = p.getUnread() orelse try p.read();
- while (c1) |c| : (c1 = try p.read()) {
- if (c == close) {
- return p.retval(result);
- }
- switch (try p.checkBlank(c)) {
- .yes => {},
- .skip_unit => return p.subr(.parseUnit, .closeImproperList),
- .no => return p.err(.InvalidCharacter, "after list tail"),
- }
- }
- return p.err(.UnexpectedEof, "after list tail");
+ return p.retval(try p.getList());
}
fn parseQuoteExpr(p: *Parser, c1: u8, next: Fn) !void {
@@ -808,7 +819,7 @@ fn endQuoteExpr(p: *Parser) !void {
if (p.result.eq(value.none)) {
return p.err(.InvalidCharacter, "quote expression datum");
}
- return p.retval(try p.cons(p.context.val, p.result));
+ return p.retval(try p.makeList(&.{ p.context.val, p.result }));
}
// Helpers
@@ -836,7 +847,7 @@ pub fn isSpecialBareChar(c: u8) bool {
pub fn isBareChar(c: u8) bool {
return switch (c) {
// zig fmt: off
- 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '*' ,
+ 'a'...'z' , 'A'...'Z' , '0'...'9' , '!' , '$' , '%' , '&' , '*' ,
'+' , '-' , '/' , '<' , '=' , '>' , '?' , '^' , '_' , '~' , => true,
// zig fmt: on
else => false,
diff --git a/src/zisp/io/Printer.zig b/src/zisp/io/Printer.zig
index e6b3d5b..4b06005 100644
--- a/src/zisp/io/Printer.zig
+++ b/src/zisp/io/Printer.zig
@@ -7,7 +7,6 @@ const value = @import("../value.zig");
const Parser = io.Parser;
const Value = value.Value;
-const PairPtr = value.pair.PairPtr;
const IstrPtr = value.istr.IstrPtr;
const ArrayPtr = value.array.ArrayPtr;
@@ -19,17 +18,21 @@ pub fn init(writer: *Writer) Printer {
return .{ .writer = writer };
}
-fn write(p: *Printer, str: []const u8) !void {
- _ = try p.writer.write(str);
+fn write(p: *Printer, bytes: []const u8) !void {
+ _ = try p.writer.write(bytes);
+}
+
+fn writeByte(p: *Printer, c: u8) !void {
+ _ = try p.writer.writeByte(c);
}
pub fn print(p: *Printer, v: Value) anyerror!void {
if (v.isSstr()) return p.printBareStr(value.sstr.unpack(&v));
if (v.isRune()) return p.printRune(v);
if (v.isMisc()) return p.printMisc(v);
- if (value.istr.check(v)) |str| return p.printBareStr(str.str());
- if (value.array.check(.str, v)) |str| return p.printBareStr(str.str());
- if (value.pair.check(v)) |pair| return p.printPair(pair);
+ if (value.list.check(v)) return p.printList(v);
+ if (value.istr.check(v)) return p.printIstr(v);
+ if (value.array.check(.str, v)) |str| return p.printBareStr(str.bytes());
@panic("Unsupported type to print.");
}
@@ -38,15 +41,20 @@ pub fn printBareStr(p: *Printer, s: []const u8) !void {
const no_joins = Parser.isSpecialBareChar(s[0]);
for (s) |c| {
if (Parser.isBareChar(c)) {
- try p.writer.writeByte(c);
+ try p.writeByte(c);
} else if (no_joins and (c == '.' or c == ':')) {
- try p.writer.writeByte(c);
+ try p.writeByte(c);
} else {
@panic("String needs quoting.");
}
}
}
+pub fn printIstr(p: *Printer, v: Value) !void {
+ const istr = value.istr.unpack(v);
+ try p.printBareStr(istr.bytes());
+}
+
pub fn printRune(p: *Printer, v: Value) !void {
try p.write("#");
try p.write(value.rune.unpack(&v));
@@ -58,90 +66,201 @@ pub fn printMisc(p: *Printer, v: Value) !void {
value.t.bits => p.write("#t"),
value.nil.bits => p.write("()"),
value.eof.bits => p.write("#EOF"),
- value.none.bits => p.write("#NONE"),
- value.undef.bits => p.write("#UNDEF"),
else => @panic("Unsupported misc value to print."),
};
}
-pub fn printPair(p: *Printer, pair: PairPtr) !void {
- try switch (pair.car.bits) {
- Parser.PQSTR.bits => p.printQuotString(pair.cdr, '|'),
- Parser.DQSTR.bits => p.printQuotString(pair.cdr, '"'),
- Parser.ATSTR.bits => p.printAtString(pair.cdr),
- Parser.LABEL.bits => p.printLabel(pair.cdr),
- else => p.printList(pair),
+pub fn printList(p: *Printer, list: Value) !void {
+ const len = value.list.getLenTag(list);
+ const ptr = value.list.getValPtr(list);
+ try switch (len) {
+ 2 => switch (ptr[0].bits) {
+ Parser.SHBANG.bits => p.printShebang(ptr[1]),
+ Parser.LABEL.bits => p.printLabel(ptr[1]),
+ Parser.HASH.bits => p.printHashDatum(ptr[1]),
+ Parser.PQSTR.bits => p.printQuotStr(ptr[1], '|'),
+ Parser.DQSTR.bits => p.printQuotStr(ptr[1], '"'),
+ Parser.ATSTR.bits => p.printAtLenStr(ptr[1]),
+ Parser.QUOTE.bits => p.printQuoteWithChar('\'', ptr[1]),
+ Parser.GRAVE.bits => p.printQuoteWithChar('`', ptr[1]),
+ Parser.COMMA.bits => p.printQuoteWithChar(',', ptr[1]),
+ Parser.SQUARE.bits => p.printListDirect(len, ptr),
+ Parser.BRACE.bits => p.printListDirect(len, ptr),
+ else => if (ptr[0].isRune()) {
+ return p.printRuneDatum(ptr[0], ptr[1]);
+ } else {
+ return p.printListDirect(len, ptr);
+ },
+ },
+ 3 => switch (ptr[0].bits) {
+ Parser.DOT.bits => p.printJoinWithChar('.', ptr[1], ptr[2]),
+ Parser.COLON.bits => p.printJoinWithChar(':', ptr[1], ptr[2]),
+ Parser.JOIN.bits => p.printJoin(ptr[1], ptr[2]),
+ Parser.SHBANG.bits => p.printShebangWithArgs(ptr[1], ptr[2]),
+ Parser.LABEL.bits => p.printLabelDatum(ptr[1], ptr[2]),
+ Parser.ATSTR.bits => p.printAtStr(ptr[1], ptr[2]),
+ else => p.printListDirect(len, ptr),
+ },
+ else => p.printListDirect(len, ptr),
};
}
-fn printQuotString(p: *Printer, s: Value, comptime qchar: u8) !void {
- try p.writer.writeByte(qchar);
- const str = try value.string.getString(&s);
- for (str) |c| switch (c) {
+fn printJoinWithChar(p: *Printer, c: u8, d1: Value, d2: Value) !void {
+ try p.print(d1);
+ try p.writeByte(c);
+ try p.print(d2);
+}
+
+fn printJoin(p: *Printer, d1: Value, d2: Value) !void {
+ try p.print(d1);
+ try p.print(d2);
+}
+
+fn printShebang(p: *Printer, str: Value) !void {
+ try p.write("#!");
+ try p.write(value.string.getBytes(&str));
+ try p.write("\n");
+}
+
+fn printShebangWithArgs(p: *Printer, str: Value, str2: Value) !void {
+ try p.write("#!");
+ try p.write(value.string.getBytes(&str));
+ try p.write(" ");
+ try p.write(value.string.getBytes(&str2));
+ try p.write("\n");
+}
+
+fn printLabel(p: *Printer, num: Value) !void {
+ try p.printLabelNum(num);
+ try p.write("%");
+}
+
+fn printLabelDatum(p: *Printer, num: Value, dat: Value) !void {
+ try p.printLabelNum(num);
+ try p.write("=");
+ try p.print(dat);
+}
+
+fn printLabelNum(p: *Printer, num: Value) !void {
+ const x = value.fixnum.unpack(num);
+ std.debug.assert(x >= 0);
+ std.debug.assert(x <= std.math.maxInt(u48));
+
+ var buf: [12]u8 = undefined;
+ const end = std.fmt.printInt(&buf, x, 16, .lower, .{});
+
+ try p.write("#%");
+ try p.write(buf[0..end]);
+}
+
+fn printHashDatum(p: *Printer, d: Value) !void {
+ try p.write("#");
+ if (value.string.isString(d)) try p.write("\\");
+ try p.print(d);
+}
+
+fn printRuneDatum(p: *Printer, r: Value, d: Value) !void {
+ try p.printRune(r);
+ if (value.string.isString(d)) try p.write("\\");
+ try p.print(d);
+}
+
+fn printQuotStr(p: *Printer, str: Value, comptime qchar: u8) !void {
+ try p.writeByte(qchar);
+ for (value.string.getBytes(&str)) |c| switch (c) {
qchar => {
- try p.writer.writeByte('\\');
- try p.writer.writeByte(qchar);
+ try p.writeByte('\\');
+ try p.writeByte(qchar);
},
'\\' => {
- try p.writer.writeByte('\\');
- try p.writer.writeByte('\\');
+ try p.writeByte('\\');
+ try p.writeByte('\\');
},
else => {
- try p.writer.writeByte(c);
+ try p.writeByte(c);
},
};
- try p.writer.writeByte(qchar);
+ try p.writeByte(qchar);
}
-pub fn printAtString(p: *Printer, at_str_pair: Value) !void {
- const pair = value.pair.assert(at_str_pair);
- const byte = value.fixnum.unpack(pair.car);
- std.debug.assert(byte <= 255);
- const str = try value.string.getString(&pair.cdr);
+fn printAtLenStr(p: *Printer, str: Value) !void {
+ const bytes = value.string.getBytes(&str);
+
try p.write("@");
- try p.writer.writeByte(@intCast(byte));
- try p.write(str);
- try p.writer.writeByte(@intCast(byte));
-}
+ try p.writeByte(255);
-pub fn printLabel(p: *Printer, v: Value) !void {
- var num: Value = undefined;
- var dat: ?Value = undefined;
- if (value.pair.check(v)) |pair| {
- num = pair.car;
- dat = pair.cdr;
- } else {
- num = v;
- dat = null;
+ var buf: [6]u8 = undefined;
+ var len = bytes.len;
+ inline for (0..6) |i| {
+ buf[5 - i] = @truncate(len);
+ len >>= 8;
}
- const x = value.fixnum.unpack(num);
- std.debug.assert(x >= 0);
- std.debug.assert(x <= std.math.maxInt(u48));
- var buf: [12]u8 = undefined;
- const end = std.fmt.printInt(&buf, x, 16, .lower, .{});
+ try p.write(buf[0..6]);
+ try p.write(bytes);
+}
- try p.write("#%");
- try p.write(buf[0..end]);
- if (dat) |d| {
- try p.write("=");
- try p.print(d);
- } else {
- try p.write("%");
+fn printAtStr(p: *Printer, sentinel: Value, str: Value) !void {
+ const sentinel_num = value.fixnum.unpack(sentinel);
+ if (sentinel_num > 254) {
+ @panic("At-string sentinel must be <= 254.");
}
+
+ const byte: u8 = @intCast(sentinel_num);
+ const str_bytes = value.string.getBytes(&str);
+ for (str_bytes) |c| {
+ if (c == byte) @panic("String contains sentinel byte.");
+ }
+ try p.write("@");
+ try p.writeByte(byte);
+ try p.write(str_bytes);
+ try p.writeByte(byte);
+}
+
+fn printQuoteWithChar(p: *Printer, c: u8, d: Value) !void {
+ try p.writeByte(c);
+ try p.print(d);
}
-pub fn printList(p: *Printer, pair: PairPtr) !void {
- try p.write("(");
- try p.print(pair.car);
- var cdr = pair.cdr;
- while (value.pair.check(cdr)) |next| : (cdr = next.cdr) {
- try p.write(" ");
- try p.print(next.car);
+fn printListDirect(p: *Printer, len: u3, vals: [*]Value) !void {
+ var close = ")";
+ var i: usize = 0;
+
+ switch (vals[0].bits) {
+ Parser.SQUARE.bits => {
+ try p.write("[");
+ close = "]";
+ i = 1;
+ },
+ Parser.BRACE.bits => {
+ try p.write("{");
+ close = "}";
+ i = 1;
+ },
+ else => {
+ if (vals[0].isRune()) {
+ try p.printRune(vals[0]);
+ i = 1;
+ }
+ try p.write("(");
+ },
}
- if (!value.nil.eq(cdr)) {
- try p.write(" & ");
- try p.print(cdr);
+
+ if (len != 0) {
+ var first = true;
+ for (i..len) |j| {
+ if (!first) try p.write(" ");
+ first = false;
+ try p.print(vals[j]);
+ }
+ } else {
+ try p.print(vals[i]);
+ i += 1;
+ while (vals[i].bits != value.none.bits) : (i += 1) {
+ try p.write(" ");
+ try p.print(vals[i]);
+ }
}
- try p.write(")");
+
+ try p.write(close);
}
diff --git a/src/zisp/value.zig b/src/zisp/value.zig
index 9fd9384..4ff683a 100644
--- a/src/zisp/value.zig
+++ b/src/zisp/value.zig
@@ -1,170 +1,4 @@
-//!
-//! == NaN Packing Strategy ==
-//!
-//! Format of a double, in most to least significant field order:
-//!
-//! { sign: u1, exponent: u11, fraction: u52 }
-//!
-//! When the exponent bits are all set, it's either a NaN or an Infinity.
-//!
-//! For value packing, almost all remaining 53 bits are available, giving us
-//! about 2^53 values, except for the four following bit patterns:
-//!
-//! *** FORBIDDEN VALUES ***
-//!
-//! 1. Negative cqNaN = { sign = 1, exponent = max, fraction = 2^51 }
-//!
-//! 2. Negative Infinity = { sign = 1, exponent = max, fraction = 0 }
-//!
-//! 3. Positive cqNaN = { sign = 0, exponent = max, fraction = 2^51 }
-//!
-//! 4. Positive Infinity = { sign = 0, exponent = max, fraction = 0 }
-//!
-//! The abbreviation "cqNaN" stands for canonical quiet NaN.
-//!
-//! Note that 2^51 means the MSb of the 52 fraction bits being set, and the rest
-//! being zero. The fraction MSb is also called the is_quiet flag, because it
-//! demarcates quiet NaNs. The rest being zero makes it the canonical qNaN.
-//!
-//! The positive and negative cqNaN are the *only* NaN values that can actually
-//! be returned by FP operations, which is convenient because it means we can
-//! simply use them to represent themselves in Zisp.
-//!
-//! Infinity values may also be returned by FP operations, and we want them to
-//! exist in Zisp as doubles as well, so they also represent themselves.
-//!
-//! Beyond those four bit patterns, all values with a maximum exponent (all bits
-//! set) are fair game for representing other values, so 2^53 - 4 possibilities.
-//!
-//! We split those 2^53 - 4 available values into four groups, each allowing for
-//! 2^51 - 1 different values to be encoded. (51-bit values excluding zero.)
-//!
-//! sign = 1, quiet = 1 :: Negative Fixnum from -1 to -2^51+1
-//!
-//! sign = 1, quiet = 0 :: Positive Fixnum from 0 to 2^51-2
-//!
-//! sign = 0, quiet = 1 :: Pointers and various immediates
-//!
-//! sign = 0, quiet = 0 :: Internal use by interpreter
-//!
-//!
-//! === Fixnums ===
-//!
-//! Negative fixnums actually represent themselves without needing to go through
-//! any transformation. Only the smallest 52-bit signed negative, -2^51, cannot
-//! be represented, as it would step on Forbidden Value #1, Negative cqNaN.
-//!
-//! Positive fixnums go through a bitsiwe NOT (implemented as an XOR mask to
-//! combine it with removal of NaN-related high bits) to avoid the all-zero
-//! payload value, which would step on Forbidden Value #2, Negative Infinity.
-//!
-//!
-//! === Pointers and immediates ===
-//!
-//! This region of 51-bit values is divided as follows based on the three high
-//! bits, providing a payload value of 48 bits for each.
-//!
-//! 000 :: Regular pointer to Zisp heap (type tagged)
-//!
-//! 001 :: Weak pointer to Zisp heap (type tagged)
-//!
-//! 010 :: List segment array pointer (length tagged)
-//!
-//! 011 :: Interned string pointer (untagged)
-//!
-//! 100 :: Subdivided as follows:
-//!
-//! 0....... 0....... 0....... (etc.) :: Rune
-//!
-//! 1....... :: 128 40-bit types
-//!
-//! 0....... 1....... :: 16384 32-bit types
-//!
-//! 0....... 0....... 1....... :: 2097152 24-bit types
-//!
-//! (etc.)
-//!
-//! 101 :: Short immediate string
-//!
-//! 11s :: Small rational (signed)
-//!
-//! ==== Type-tagged pointers ====
-//!
-//! Zisp heap pointers use 48-bit addresses. This is sufficient, since the
-//! address space of user-land applications is effectively 48 bits on 64-bit
-//! systems. Further, Zisp heap objects are allocated at 16-byte boundaries,
-//! meaning the lowest 4 bits are always zero; this is used for type tagging,
-//! providing immediate information about the type of object pointed to.
-//!
-//! Forbidden Value #3, Positive cqNaN, is avoided thanks to the fact that a
-//! regular Zisp heap pointer can never be null.
-//!
-//! ==== Segmented lists ====
-//!
-//! List segment arrays are also allocated at 16-byte boundaries, but use their
-//! low 4 bits for segment length; see documentation of this API for details.
-//!
-//! ==== Interned strings ====
-//!
-//! Interned string pointers don't use any low tag bits. Unlike other heap
-//! objects, they can be allocated at any address.
-//!
-//! ==== Runes and misc. small values ====
-//!
-//! Runes are symbols up to 6 ASCII characters used to implement reader syntax.
-//! They are NUL-terminated if shorter than six characters, meaning they cannot
-//! contain the NUL byte in their value.
-//!
-//! NOTE: The order in which the characters of the rune are encoded depends on
-//! endianness. On little-endian systems (i.e. most modern architectures) the
-//! characters will be in "reverse" order, with the first character in lowest
-//! position, so the terminating NUL has to be searched from low to high.
-//!
-//! The fact that runes are limited to ASCII opens up a lot of space for other
-//! small values to co-inhabit the same 48-bit range. We subdivide this space
-//! into increasingly many potential types, with smaller and smaller payloads,
-//! where the highest byte with a non-zero MSb determines which size category
-//! we're in: If the highest byte has its MSb set, then its seven non-MSb bits
-//! define a type and each type has a 40-bit value range; if the second highest
-//! byte has its MSb set, then the 14 non-MSb bits of the two high bytes define
-//! the type and each has a 32-bit value range; and so on.
-//!
-//! Unicode code points need 21 bits, so we use a 24-bit type for Characters.
-//! Miscellaneous values like true, false, nil, eof, etc. are placed into an
-//! 8-bit type, since there will never be that many of them. A virtually
-//! unlimited number of user-defined enum types can fit here.
-//!
-//! ==== Short strings ====
-//!
-//! Another 48-bit space is used for strings of zero to six bytes. Like runes,
-//! these are NUL-terminated if shorter than six bytes, meaning that NUL cannot
-//! appear in them, but otherwise they allow arbitrary bytes.
-//!
-//! ==== Small rationals ====
-//!
-//! We use a 49-bit space for small exact rational numbers, with the numerator
-//! being a two's complement 25-bit signed integer, and denominator a 24-bit
-//! unsigned integer.
-//!
-//!
-//! === Internal use values ===
-//!
-//! The final 51-bit range is used for various internal purposes by the Zisp
-//! interpreter, mostly related to transparent code optimization:
-//!
-//! 000 :: Direct function pointer aka CALL instruction
-//!
-//! 001 :: Lexically scoped variable reference by index
-//!
-//! 010 :: Pointer to heap object as constant data
-//!
-//! 011 :: Short immediate string as constant data
-//!
-//! 1.. :: TBD
-//!
-//! Forbidden Value #4, Positive Infinity, is avoided thanks to the fact that
-//! direct function pointers cannot be null.
-//!
+//! See /doc/0/0-value.md to understand the NaN-packing strategy.
const builtin = @import("builtin");
const std = @import("std");
@@ -174,17 +8,18 @@ const gc = @import("gc.zig");
pub const double = @import("value/double.zig");
pub const fixnum = @import("value/fixnum.zig");
+pub const ptr = @import("value/ptr.zig");
+pub const list = @import("value/list.zig");
+pub const istr = @import("value/istr.zig");
+pub const sstr = @import("value/sstr.zig");
+pub const srat = @import("value/srat.zig");
pub const rune = @import("value/rune.zig");
pub const char = @import("value/char.zig");
pub const misc = @import("value/misc.zig");
-pub const sstr = @import("value/sstr.zig");
-pub const boole = @import("value/boole.zig");
-pub const ptr = @import("value/ptr.zig");
-pub const pair = @import("value/pair.zig");
-pub const istr = @import("value/istr.zig");
pub const array = @import("value/array.zig");
+pub const boole = @import("value/boole.zig");
pub const string = @import("value/string.zig");
const endian = builtin.target.cpu.arch.endian();
@@ -209,11 +44,11 @@ pub fn sstrLen(x: u64) u8 {
/// Transform back and forth between rune and sstr
pub fn runeXsstr(v: Value) Value {
- return @bitCast(v.bits ^ (1 << 48));
+ return @bitCast(v.bits ^ (1 << 50));
}
// Make sure false/true only differ in LSb.
-pub const MiscValue = enum(u8) { f, t, nil, eof, none, undef };
+pub const MiscValue = enum(u8) { f, t, nil, eof, none };
// zig fmt: off
pub const f = Value{ .misc = .{ .value = .f } };
@@ -221,56 +56,56 @@ pub const t = Value{ .misc = .{ .value = .t } };
pub const nil = Value{ .misc = .{ .value = .nil } };
pub const eof = Value{ .misc = .{ .value = .eof } };
pub const none = Value{ .misc = .{ .value = .none } };
-pub const undef = Value{ .misc = .{ .value = .undef } };
// zig fmt: on
-/// A plain (unpacked, untagged) pointer into the Zisp heap. May point to any
-/// kind of object.
+/// A plain (unpacked, untagged) pointer into the Zisp heap.
pub const Zptr = *align(16) anyopaque;
/// Values for the lowest 4 bits of a heap pointer, indicating the heap type.
-pub const PtrTag = enum(u4) {
- /// Pair aka cons cell aka *[2]Value
- pair,
- /// Interned string buffer (8-bit length, then contents)
- istr,
+pub const HeapType = enum(u4) {
+ /// Unused so the 48-bit payload of a NaN-packed pointer is never zero and
+ /// cannot be confused for an actual NaN even in case of a null-index.
+ _unused = 0,
+
/// Array of various types (see `Array` type for details)
array,
- /// Procedure
- proc,
- pub fn ptype(self: PtrTag) type {
+ pub fn PtrType(self: HeapType) type {
return switch (self) {
- .pair => pair.PairPtr,
- .istr => istr.IstrPtr,
.array => array.ArrayPtr,
else => @panic("not implemented"),
};
}
};
-pub const PtrTagUIntType = @typeInfo(PtrTag).@"enum".tag_type;
+/// Make a "pointer etc." high 16-bit pattern with a 3-bit sub-range tag.
+fn ptrEtc(comptime tag: u3) u16 {
+ return @as(u16, 0x7ff8) + tag;
+}
-// "Pointer etc." high bits (sign=0,exp=MAX,quiet=1) as a u13 field.
-const ptr_etc: u13 = max(u13) - 1;
+const hi16_tag_ptr = ptrEtc(0b000);
+const hi16_tag_list = ptrEtc(0b001);
+const hi16_tag_istr = ptrEtc(0b010);
+const hi16_tag_sstr = ptrEtc(0b011);
+const hi15_tag_srat = @as(u15, @intCast(ptrEtc(0b100) >> 1));
+const hi16_tag_rune = ptrEtc(0b111);
-/// Represents a Zisp value/object.
+/// Represents a Zisp value.
pub const Value = packed union {
- /// To get the value as a regular double.
- double: f64,
-
/// To get an agnostic value for direct comparison with == i.e. eq? as well
/// as manual bit-fiddling to test for and extract packed values.
bits: u64,
+ /// Regular double, including +nan, -nan, +inf, -inf.
+ double: f64,
+
// Some of the structs below are just for inspection, whereas others are to
- // initialize a new value of that category as well as read it that way.
+ // initialize a new value of that category, as well as to read it as such.
+ // Some of these are not used at all and are merely demonstrative.
- // Note: Zig packed struct fields are ordered from LSb to MSb, contrary to
- // most diagrams used to represent IEEE 754, including our own at the top.
+ // Note: Zig packed struct fields are ordered from LSb to MSb.
/// Inspection through the lens of the general IEEE 754 double layout.
- /// (Unused because we do manual bit-fiddling for optimal results.)
ieee: packed struct {
rest: u51,
quiet: bool,
@@ -278,110 +113,84 @@ pub const Value = packed union {
sign: bool,
},
- /// For initializing and reading fixnums.
+ /// 52-bit signed fixnum
fixnum: packed struct {
code: u51,
negative: bool,
- _: u11 = max(u11),
+ _exp: u11 = max(u11),
_is_fixnum: bool = true,
},
- /// For initializing and reading pointers.
+ /// Ordinary heap pointer
ptr: packed struct {
- tagged_value: u48,
- is_weak: bool = false,
- _1: bool = false,
- _2: bool = false,
- _: u13 = ptr_etc,
- },
-
- /// List segment array pointers
- lsa: packed struct {
- tagged_value: u48,
- _id: u3 = 0b010,
- _: u13 = ptr_etc,
+ index: u44,
+ htype: HeapType,
+ _hi16_tag: u16 = hi16_tag_ptr,
},
- /// Interned string pointer
- isp: packed struct {
- pointer: u48,
- _id: u3 = 0b011,
- _: u13 = ptr_etc,
+ /// List pointer
+ list: packed struct {
+ len_tagged_ptr: u48,
+ _hi16_tag: u16 = hi16_tag_list,
},
- /// For initializing and reading runes.
- rune: packed struct {
- // actually [6]u8 but packed struct cannot contain arrays
- name: u48,
- _id: u3 = 0b100,
- _: u13 = ptr_etc,
+ /// Istr pointer
+ istr: packed struct {
+ ptr: u48,
+ _hi16_tag: u16 = hi16_tag_istr,
},
- /// For initializing and reading short strings.
+ /// Short string
sstr: packed struct {
// actually [6]u8 but packed struct cannot contain arrays
bytes: u48,
- _id: u3 = 0b101,
- _: u13 = ptr_etc,
+ _hi16_tag: u16 = hi16_tag_sstr,
},
- /// For initializing and reading small rats (rational numbers).
+ /// Small rat (rational number)
srat: packed struct {
q: u24,
p: i25,
- _tag: u2 = 0b11,
- _: u13 = ptr_etc,
+ _hi16_tag: u15 = hi15_tag_srat,
+ },
+
+ /// Rune (6-byte ASCII string)
+ rune: packed struct {
+ // actually [6]u8 but packed struct cannot contain arrays
+ name: u48,
+ _hi16_tag: u16 = hi16_tag_rune,
},
// TODO: Use a general Small Value type registration mechanism.
- /// For initializing and reading characters.
char: packed struct {
value: u24,
_sv_tag: u24 = 0x000080,
- _id: u3 = 0b100,
- _: u13 = ptr_etc,
+ _hi16_tag: u16 = hi16_tag_rune,
},
// TODO: Use a general Small Value type registration mechanism.
- /// For initializing and reading miscellaneous values.
misc: packed struct {
value: MiscValue,
_sv_tag: u40 = 0x0000000080,
- _id: u3 = 0b100,
- _: u13 = ptr_etc,
+ _hi16_tag: u16 = hi16_tag_rune,
},
- // Disjoint masks where a specific bit or bit-group are set.
- // zig fmt: off
- const mask_sign: u64 = 1 << 63 ; // 1 sign bit
- const mask_exp: u64 = max(u11) << 52 ; // 11 exponent bits
- const mask_quiet: u64 = 1 << 51 ; // 1 quiet bit of fraction
- const mask_rest: u64 = max(u51) ; // 51 rest bits of fraction
- // zig fmt: on
-
- // Mask for pointer type tag bits
- const mask_ptr_tag: u48 = max(PtrTagUIntType);
-
/// Dumps the value for inspection.
pub fn dump(v: Value) void {
- const sign: u1 = @intCast((v.bits & mask_sign) >> 63);
- const exp: u11 = @intCast((v.bits & mask_exp) >> 52);
- const quiet: u1 = @intCast((v.bits & mask_quiet) >> 51);
- const rest: u51 = @intCast(v.bits & mask_rest);
std.debug.print(
\\value: 0x{x}
\\sign: {}, exp: {b}, quiet: {}
\\rest: 0x{x}
\\
- , .{ v.bits, sign, exp, quiet, rest });
+ , .{ v.bits, v.ieee.sign, v.ieee.exp, v.ieee.quiet, v.ieee.rest });
}
- /// To treat the 64-bit value as an 8-byte array.
+ /// To treat the 64-bit value as an 8-byte const array.
pub fn bufRO(v: *const Value) *const [8]u8 {
return @ptrCast(v);
}
- /// To treat the 64-bit value as an 8-byte array.
+ /// To treat the 64-bit value as an 8-byte mutable array.
pub fn bufRW(v: *Value) *[8]u8 {
return @ptrCast(v);
}
@@ -391,19 +200,11 @@ pub const Value = packed union {
return v1.bits == v2.bits;
}
- // It would be great if we could just write the most readable code here,
- // using the packed struct definitions above, but the optimizer isn't
- // sufficiently intelligent to turn that into optimal instructions, so
- // manual bit fiddling it is.
+ // It would be great if we could just write the most readable code in the
+ // following functions, using the packed struct definitions above, but the
+ // optimizer isn't smart enough, so manual bit fiddling it is.
- // The isDouble and isFixnum checks are not as efficient as we would like
- // them to be, each requiring two 10-byte MOV instructions to load 64-bit
- // values for masking and comparison. This seems difficult to improve on
- // without sacrificing large value ranges in our NaN-packing scheme; it's
- // probably best to leave it like this and hope that type checks will be
- // hoisted outside hot-spots by a smart compiler.
-
- /// Checks for a Zisp double, including: +nan.0, -nan.0, +inf.0, -inf.0
+ /// Checks for a double, including: +nan, -nan, +inf, -inf.
pub fn isDouble(v: Value) bool {
// Readable version:
//
@@ -411,18 +212,22 @@ pub const Value = packed union {
//
// Optimized:
//
- // 1. AND away the sign and quiet bits, since they may vary.
+ // 1. Shift the 12 highest bits all the way to the right, then mask out
+ // the sign bit. Allows efficiently checking if the exponent is all
+ // set by comparing to 0x7ff.
//
- // 2. If exp is all 1, and rest non-zero, it's a packed value, so check
- // if the value is less than that.
+ // 2. Shift out 13 high bits (sign, expt, quiet) so we can easily check
+ // if the rest bits are zero.
//
- return v.bits & ~(mask_sign | mask_quiet) <= mask_exp;
+ const expt = v.bits >> 52 & 0x7ff;
+ const rest = v.bits << 13;
+ return expt != 0x7ff or rest == 0;
}
// Imagine there's an isPacked() implemented as !isDouble() here, used to
// make the following functions more readable.
- /// Checks for a fixnum.
+ /// Checks for a fixnum integer.
pub fn isFixnum(v: Value) bool {
// Readable version:
//
@@ -430,137 +235,93 @@ pub const Value = packed union {
//
// Optimized:
//
- // 1. AND away the quiet bit, since it may vary.
+ // 1. Shift the 12 highest bits all the way to the right; these need to
+ // be all set, since the sign bit (and all exponent bits) being set
+ // marks a fixnum.
//
- // 2. Check if highest 12 all 1, rest not all 0, with a greater-than.
+ // 2. But also verify that the lowest 51 bits aren't zero, since that
+ // would be a cqNaN or Infinity.
//
- return v.bits & ~mask_quiet > mask_sign | mask_exp;
+ const expt = v.bits >> 52;
+ const rest = v.bits << 13;
+ return expt == 0xfff and rest != 0;
+ }
+
+ /// Checks if the value is any type-tagged pointer. You probably want to
+ /// use getPtr() or getPtrAny() instead, so you can combine the check with
+ /// the extraction of the pointer and type tag.
+ pub fn isPtrAny(v: Value) bool {
+ const hi: u16 = @intCast(v.bits >> 48);
+ const lo: u48 = @truncate(v.bits);
+ return hi == hi16_tag_ptr and lo != 0;
}
/// Checks for a pointer with a given type tag, returning null on failure
- /// and the pointer value otherwise. This function is for when a value is
- /// assumed to be of a heap type, such as when `car` is called on it, and
- /// the "high" pointer tags (49-51) that encode properties like weakness
- /// don't matter; we only care if it's some kind of valid heap pointer of
- /// the given type, so we only check the type tag in the low bits (0-2).
- pub fn getPtr(v: Value, comptime tag: PtrTag) ?tag.ptype() {
- // Readable version:
- //
- // if (v.isPacked() and !v.ieee.sign and v.ieee.quiet ...)
- //
- // Optimized:
- //
- // I'm not explaining that shit. Figure it out yourself.
- //
- // Jokes aside, the code should be self-explanatory, except for:
- //
- // Intermediate values should be given explicit types here, for optimal
- // code-gen. And it should be noted that the null pointer, with no
- // other high bits set (49-51), which would represent +cqNaN, also
- // results in a null return, so it can't be mistaken for a pointer.
- //
- // This function currently assumes that bits 50 and 51 being set, which
- // has no defined semantics right now, doesn't change the fact that the
- // pointer is a Zisp heap pointer. If we decide to use those bits for
- // something else, simply change the hi_bits check to include them, so
- // that for example bits 50 and 51 must be zero, or 51 must be zero
- // while 50 is still ignored, and so on, as appropriate.
- //
- const hi_bits: u14 = @intCast(v.bits >> 50);
- const ptr_val: u48 = @intCast(v.bits & ~mask_ptr_tag);
- const tag_bits: PtrTagUIntType = @intCast(v.bits & mask_ptr_tag);
- const is_ptr = hi_bits == 0b01111111111110;
- const is_tag = tag_bits == @intFromEnum(tag);
- return if (is_ptr and is_tag) @ptrFromInt(ptr_val) else null;
+ /// and the pointer value otherwise.
+ pub fn getPtr(v: Value, comptime ht: HeapType) ?ht.PtrType() {
+ // Check if the 20 highest bits equal the combination of the 16-bit
+ // pattern marking a pointer, followed by the 4-bit heap type tag.
+ const hi20_bits: u20 = @intCast(v.bits >> 44);
+ const ptr_bits: u20 = hi16_tag_ptr;
+ const tag_bits: u20 = @intFromEnum(ht);
+ if (hi20_bits != ptr_bits << 4 | tag_bits) return null;
+
+ // A zero payload value here would actually indicate that the value is
+ // not a pointer at all, but the Positive cqNaN value; this is totally
+ // fine, because @ptrFromInt() turns zero into null anyway.
+ return @ptrFromInt(v.bits << 20 >> 16);
}
/// Checks for a pointer and returns the value and tag separately, or null
- /// if this isn't a pointer at all. This could be useful for a dispatch
- /// table based on type.
- pub fn getPtrAny(v: Value) ?struct { Zptr, PtrTag } {
- // See last function, which is almost identical.
- const hi_bits: u14 = @intCast(v.bits >> 50);
- const ptr_val: u48 = @intCast(v.bits & ~mask_ptr_tag);
- const tag_bits: PtrTagUIntType = @intCast(v.bits & mask_ptr_tag);
- const is_ptr = hi_bits == 0b01111111111110;
- const zptr: Zptr = @ptrFromInt(ptr_val);
- const tag: PtrTag = @enumFromInt(tag_bits);
- return if (is_ptr) .{ zptr, tag } else null;
+ /// if this isn't a pointer at all. Could be useful for a dispatch table.
+ pub fn getPtrAny(v: Value) ?struct { Zptr, HeapType } {
+ const hi16_bits: u16 = @intCast(v.bits >> 48);
+ if (hi16_bits != hi16_tag_ptr) return null;
+
+ const ptr_val: u48 = @intCast(v.bits << 20 >> 16);
+ if (ptr_val == 0) return null;
+
+ const ht_val: u4 = @intCast(v.bits << 16 >> 60);
+ return .{ @ptrFromInt(ptr_val), @enumFromInt(ht_val) };
}
- /// Checks whether the value is a pointer with certain property bits, not
- /// caring about the type tag bits. NOTE: This function doesn't check for
- /// +cqNaN, which will be mis-identified as a pointer with zero props and
- /// address. Therefore, don't expose this function to Zisp! Parameter
- /// `props_mask` specifies props bits to mask away with a NAND, so it's
- /// possible to only check for bits you care for. For example, to check
- /// weakness, pass `0b110` for `props_mask` and `0b001` for `props`.
- pub fn isPtrProps(v: Value, props_mask: u3, props: u3) bool {
- const hi_bits: u16 = @intCast(v.bits >> 48 & ~@as(u16, props_mask));
- return hi_bits == 0b0111111111111000 | @as(u16, props);
+ /// Checks if the value is a list pointer.
+ pub fn isList(v: Value) bool {
+ return v.bits >> 48 == hi16_tag_list;
}
- /// Checks whether the value is any kind of pointer, making sure not to
- /// mis-identify +cqNaN as a null pointer with zero property bits, which
- /// makes this a bit slower than `isPtrProps()`.
- pub fn isPtrAny(v: Value) bool {
- const hi: u14 = @intCast(v.bits >> 50);
- const lo: u50 = @truncate(v.bits);
- return hi == 0b01111111111110 and lo != 0;
+ /// Checks if the value is an istr pointer.
+ pub fn isIstr(v: Value) bool {
+ return v.bits >> 48 == hi16_tag_istr;
+ }
+
+ /// Checks if the value is a short string.
+ pub fn isSstr(v: Value) bool {
+ return v.bits >> 48 == hi16_tag_sstr;
}
- // TODO: Needs update from here on, and test the above
+ /// Checks if the value is a small rat (rational number).
+ pub fn isSrat(v: Value) bool {
+ return v.bits >> 49 == hi15_tag_srat;
+ }
/// Checks for a rune.
pub fn isRune(v: Value) bool {
- //
- // 1. AND away non-MSb bits of the low 6 bytes, since they may vary.
- //
- // 2. Check if the rest is the exact pattern we expect, with sign = 0,
- // exp all 1, quiet = 0, 3-bit tag = 0, and MSb of remaining bytes 0,
- // i.e., only the exp bits are set.
- //
- // 3. Ensnure that the "first" (based on endianness) of the 6 lowest
- // bytes is not NUL, which would mean it's the illegal "empty string"
- // rune corresponding to +Infinity.
- //
- // Note that, while this is a rather complicated check for a type that
- // appears ubiquitously in parser output, we rarely use this type check,
- // instead using direct comparison to specific rune values, so it's not
- // important if this is a little inefficient.
- //
- const msb_mask = 0xffff808080808080;
- const nul_mask = switch (endian) {
- .big => 127 << 40,
- .little => 127,
- };
- return v.bits & msb_mask == mask_exp and v.bits & nul_mask != 0;
+ // This check isn't used much, since one typically checks for specific
+ // runes via direct equality, so the efficiency here is unimportant.
+ const hi = v.bits >> 48;
+ const lo = v.bits << 16;
+ const msb_mask = 0x7f7f7f7f7f7f0000;
+ return hi == 0x7fff and lo != 0 and lo & msb_mask == lo;
}
- // The rest are straightforward compared to the above, since the cqNaN and
- // Infinity special-cases are out of the way and we just need to compare N
- // high bits to a specific pattern.
-
- /// Checks for a 24-bit character value. This makes no guarantees related
- /// to Unicode, such as being a valid Code Point or Unicode Scalar Value.
+ // TODO: Use a general Small Value type registration mechanism.
pub fn isChar(v: Value) bool {
- // TODO: Use a general Small Value type registration mechanism.
- return v.bits >> 24 == 0x7ff0000080;
+ return v.bits >> 24 == 0x7fff000080;
}
- /// Checks for an 8-bit miscellaneous value.
+ // TODO: Use a general Small Value type registration mechanism.
pub fn isMisc(v: Value) bool {
- // TODO: Use a general Small Value type registration mechanism.
- return v.bits >> 8 == 0x7ff00000000080;
- }
-
- /// Checks if the value is a short string of either kind.
- pub fn isSstr(v: Value) bool {
- return v.bits >> 48 == 0x7ff1;
- }
-
- /// Checks if the value is a small rat (rational number).
- pub fn isSrat(v: Value) bool {
- return v.bits >> 49 == 0x7ff2 >> 1;
+ return v.bits >> 8 == 0x7fff0000000080;
}
};
diff --git a/src/zisp/value/array.zig b/src/zisp/value/array.zig
index ad04ce2..5d326a1 100644
--- a/src/zisp/value/array.zig
+++ b/src/zisp/value/array.zig
@@ -140,7 +140,7 @@ pub const ArrayHeader = packed struct(u64) {
}
}
- pub fn str(self: *Self) []const u8 {
+ pub fn bytes(self: *Self) []u8 {
if (self.is_slice) {
const buf = self.bufU8();
const start, const end = self.sliceInfo();
@@ -149,11 +149,15 @@ pub const ArrayHeader = packed struct(u64) {
var arr = self;
if (self.is_ptr) {
arr = self.arrPointer() orelse {
- @panic("Called str() on array with direct buffer pointer.");
+ @panic("Called bytes() on array with direct buffer pointer.");
};
}
return arr.bufContent()[0..arr.size()];
}
+
+ pub fn bytesRO(self: *Self) []const u8 {
+ return self.bytes();
+ }
};
const ArrayType = enum(u2) { int, flt, val, str };
diff --git a/src/zisp/value/istr.zig b/src/zisp/value/istr.zig
index 35093ed..dfe4614 100644
--- a/src/zisp/value/istr.zig
+++ b/src/zisp/value/istr.zig
@@ -15,7 +15,7 @@ const Value = value.Value;
// Zig API
/// Pointer to an interned string. First byte is length.
-pub const IstrPtr = *align(@alignOf(value.Zptr)) IstrHead;
+pub const IstrPtr = *IstrHead;
pub const IstrHead = packed struct(u8) {
len: u8,
@@ -24,7 +24,7 @@ pub const IstrHead = packed struct(u8) {
return @ptrCast(self);
}
- pub fn str(self: *@This()) []const u8 {
+ pub fn bytes(self: *@This()) []const u8 {
const start = @sizeOf(IstrHead);
const len: usize = self.len;
return self.bufU8()[start .. start + len];
@@ -37,15 +37,15 @@ pub const IstrHead = packed struct(u8) {
}
};
-pub fn check(v: Value) ?IstrPtr {
- return v.getPtr(.istr) orelse null;
+pub fn check(v: Value) bool {
+ return v.isIstr();
}
-pub fn assert(v: Value) IstrPtr {
- return check(v) orelse {
+pub fn assert(v: Value) void {
+ if (!check(v)) {
v.dump();
@panic("not istr");
- };
+ }
}
pub fn isValidIstr(s: []const u8) bool {
@@ -68,7 +68,12 @@ pub fn new(alloc: Alloc, s: []const u8) !IstrPtr {
}
pub fn pack(istr: IstrPtr) Value {
- return value.ptr.pack(.istr, istr);
+ const ptr = @intFromPtr(istr);
+ return .{ .istr = .{ .ptr = @intCast(ptr) } };
+}
+
+pub fn unpack(v: Value) IstrPtr {
+ return @ptrFromInt(v.istr.ptr);
}
pub fn getOrNew(s: []const u8) !Value {
@@ -80,19 +85,22 @@ pub fn getOrNewInSet(set: *IstrSet, s: []const u8) !Value {
return pack(try set.getOrNew(s));
}
-pub fn getStr(v: Value) []const u8 {
- return assert(v).str();
+pub fn getBytes(v: Value) []const u8 {
+ assert(v);
+ const istr: IstrPtr = @ptrFromInt(v.istr.ptr);
+ return istr.bytes();
}
// Zisp API
pub fn pred(v: Value) Value {
- return value.boole.pack(check(v) != null);
+ return value.boole.pack(check(v));
}
pub fn getLen(v: Value) Value {
- const istr = assert(v);
- return value.fixnum.pack(@intCast(istr.len));
+ assert(v);
+ const istr: IstrPtr = @ptrFromInt(v.istr.ptr);
+ return value.fixnum.pack(istr.len);
}
// TODO: Zisp representation of IstrSet & ability to intern in given IstrSet
diff --git a/src/zisp/value/list.zig b/src/zisp/value/list.zig
new file mode 100644
index 0000000..d040a34
--- /dev/null
+++ b/src/zisp/value/list.zig
@@ -0,0 +1,64 @@
+//! List array
+
+const std = @import("std");
+
+const Alloc = std.mem.Allocator;
+
+const gc = @import("../gc.zig");
+const value = @import("../value.zig");
+
+const ListPool = gc.ListPool;
+const Value = value.Value;
+
+// Zig API
+
+pub fn check(v: Value) bool {
+ return v.isList();
+}
+
+pub fn assert(v: Value) void {
+ if (!check(v)) {
+ v.dump();
+ @panic("not istr");
+ }
+}
+
+pub fn new(alloc: Alloc, pool: ?*ListPool, vals: []const Value) !Value {
+ var len_tagged_ptr: u48 = undefined;
+ if (vals.len < 8 and pool != null) {
+ const ptr = try pool.?.create(@intCast(vals.len));
+ for (vals, 0..) |v, i| ptr[i] = v;
+ len_tagged_ptr = @intCast(@intFromPtr(ptr) | vals.len);
+ } else {
+ const ptr = try alloc.alloc(Value, vals.len + 1);
+ for (vals, 0..) |v, i| ptr[i] = v;
+ ptr[vals.len] = value.none;
+ len_tagged_ptr = @intCast(@intFromPtr(ptr.ptr));
+ }
+ return .{ .list = .{ .len_tagged_ptr = len_tagged_ptr } };
+}
+
+pub fn getLenTag(v: Value) u3 {
+ return @truncate(v.bits);
+}
+
+pub fn getValPtr(v: Value) [*]Value {
+ return @ptrFromInt(v.bits & 0x0000fffffffffff8);
+}
+
+// Zisp API
+
+pub fn pred(v: Value) Value {
+ return value.boole.pack(check(v));
+}
+
+pub fn getLen(v: Value) Value {
+ assert(v);
+ var len = getLenTag(v);
+ if (len == 0) {
+ const ptr = getValPtr(v);
+ len = 8;
+ while (ptr[len].bits != value.none.bits) len += 1;
+ }
+ return value.fixnum.pack(@intCast(len));
+}
diff --git a/src/zisp/value/ptr.zig b/src/zisp/value/ptr.zig
index ef12f79..3a6995b 100644
--- a/src/zisp/value/ptr.zig
+++ b/src/zisp/value/ptr.zig
@@ -3,7 +3,7 @@ const std = @import("std");
const gc = @import("../gc.zig");
const value = @import("../value.zig");
-const PtrTag = value.PtrTag;
+const HeapType = value.HeapType;
const Value = value.Value;
const Zptr = value.Zptr;
@@ -22,70 +22,12 @@ pub fn assert(v: Value) void {
}
}
-pub fn checkWeak(v: Value) bool {
- return v.isPtrProps(0b110, 0b001);
+pub fn pack(comptime ht: HeapType, ptr: ht.PtrType()) Value {
+ const index: u44 = @intCast(@intFromPtr(ptr) >> 4);
+ return .{ .ptr = .{ .index = index, .htype = ht } };
}
-pub fn assertWeak(v: Value) void {
- if (!checkWeak(v)) {
- v.dump();
- @panic("not weak pointer");
- }
-}
-
-pub fn _pack(comptime tag: PtrTag, ptr: tag.ptype(), is_weak: bool) Value {
- const ptr_val: usize = @intFromPtr(ptr);
- std.debug.assert(ptr_val < std.math.maxInt(u48));
- const tagged: u48 = @intCast(ptr_val | @intFromEnum(tag));
- return .{ .ptr = .{ .tagged_value = tagged, .is_weak = is_weak } };
-}
-
-pub fn pack(comptime tag: PtrTag, ptr: tag.ptype()) Value {
- return _pack(tag, ptr, false);
-}
-
-pub fn packWeak(comptime tag: PtrTag, ptr: tag.ptype()) Value {
- return _pack(tag, ptr, true);
-}
-
-pub fn setWeakNull(v: *Value) void {
- assertWeak(v.*);
- v.ptr.tagged_value = 0;
-}
-
-pub fn isWeakNull(v: Value) bool {
- assertWeak(v);
- return v.ptr.tagged_value == 0;
-}
-
-pub fn unpack(v: Value) struct { Zptr, PtrTag } {
+pub fn unpack(v: Value) struct { Zptr, HeapType } {
assert(v);
return v.getPtrAny() orelse unreachable;
}
-
-// Zisp API
-
-pub fn makeWeak(v: Value) Value {
- assert(v);
- var copy = v;
- copy.ptr.is_weak = true;
- return copy;
-}
-
-pub fn predWeak(v: Value) Value {
- return value.boole.pack(checkWeak(v));
-}
-
-pub fn predWeakNull(v: Value) Value {
- return value.boole.pack(isWeakNull(v));
-}
-
-pub fn getWeak(v: Value) Value {
- if (isWeakNull(v)) {
- return value.f;
- } else {
- var copy = v;
- copy.ptr.is_weak = false;
- return copy;
- }
-}
diff --git a/src/zisp/value/srat.zig b/src/zisp/value/srat.zig
new file mode 100644
index 0000000..65b3dba
--- /dev/null
+++ b/src/zisp/value/srat.zig
@@ -0,0 +1 @@
+// todo
diff --git a/src/zisp/value/string.zig b/src/zisp/value/string.zig
index 129e35b..1fe5e5e 100644
--- a/src/zisp/value/string.zig
+++ b/src/zisp/value/string.zig
@@ -4,10 +4,17 @@ const value = @import("../value.zig");
const Value = value.Value;
-pub fn getString(s: *const Value) ![]const u8 {
+pub fn isString(v: Value) bool {
+ if (value.sstr.check(v)) return true;
+ if (value.istr.check(v)) return true;
+ if (value.array.check(.str, v)) |_| return true;
+ return false;
+}
+
+pub fn getBytes(s: *const Value) []const u8 {
if (value.sstr.check(s.*)) return value.sstr.unpack(s);
- if (value.istr.check(s.*)) |istr_ptr| return istr_ptr.str();
- if (value.array.check(.str, s.*)) |arr_ptr| return arr_ptr.str();
+ if (value.istr.check(s.*)) return value.istr.unpack(s.*).bytes();
+ if (value.array.check(.str, s.*)) |arr_ptr| return arr_ptr.bytesRO();
s.dump();
@panic("This is not a string.");
}
diff --git a/update-html.sh b/update-html.sh
index 0c37ffd..e611006 100755
--- a/update-html.sh
+++ b/update-html.sh
@@ -20,7 +20,10 @@ md2ht() {
fi
echo "$src -> $dst"
title=$(sed 's/# //; s/&/\\&/; s_/_\\/_; q' "$src")
- if [ "${src##*/}" = index.md ]
+ if [ "$src" = html/index.md ] # It's the root
+ then
+ nav=''
+ elif [ "${src##*/}" = index.md ]
then
nav='<a href="..">^ Up</a>'
else
@@ -34,7 +37,10 @@ md2ht() {
{
sed "s/__TITLE__/$title/" html/prelude.html
echo "<body>"
- sed "1 a $nav" < "$src" | markdown2 -x "$ext"
+ if [ "$nav" ]
+ then
+ sed "1 a $nav" < "$src" | markdown2 -x "$ext"
+ fi
echo "</body>"
echo "</html>"
} > "$dst"