summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylan Kammer <taylan.kammer@gmail.com>2025-03-18 21:39:51 +0100
committerTaylan Kammer <taylan.kammer@gmail.com>2025-03-18 21:39:51 +0100
commitf1c256884b0d59683e8bd43160b048561191a809 (patch)
tree804f356fccb0e1a2b77f61e25bc81cbfc2452b03
parentc43c3c22e5d0f872168c5b687141c7b08a188c5d (diff)
Implement istr.
-rw-r--r--src/libzisp.zig78
-rw-r--r--src/libzisp/gc.zig30
-rw-r--r--src/libzisp/io/unparser.zig3
-rw-r--r--src/libzisp/value.zig3
-rw-r--r--src/libzisp/value/fixnum.zig12
-rw-r--r--src/libzisp/value/istr.zig55
-rw-r--r--src/libzisp/value/pair.zig1
-rw-r--r--src/libzisp/value/ptr.zig6
-rw-r--r--src/libzisp/value/seq.zig56
-rw-r--r--test-data/parser-test-1.scm197
-rw-r--r--test-data/parser-test-2.scm19
-rw-r--r--test-data/string.txt1
12 files changed, 428 insertions, 33 deletions
diff --git a/src/libzisp.zig b/src/libzisp.zig
index f67f568..3a217fd 100644
--- a/src/libzisp.zig
+++ b/src/libzisp.zig
@@ -47,7 +47,7 @@ test "ptr" {
const ptr = value.ptr;
const val: [*]Hval = @ptrFromInt(256);
- const tag = ptr.Tag.istr;
+ const tag = ptr.Tag.pair;
const p = ptr.pack(val, tag);
try std.testing.expect(ptr.check(p));
@@ -246,6 +246,31 @@ test "pair" {
try std.testing.expectEqual(4, value.fixnum.unpack(cdr2));
}
+test "istr" {
+ const istr = value.istr;
+ const fx = value.fixnum;
+
+ const s1 = "foo bar baz";
+ const v1 = istr.intern(s1, false);
+ const v1_len: usize = @intCast(fx.unpack(istr.len(v1)));
+ try std.testing.expectEqualStrings(s1, istr.getHeader(v1).bytes());
+ try std.testing.expectEqual(s1.len, v1_len);
+
+ const file = try std.fs.cwd().openFile("test-data/string.txt", .{});
+ defer file.close();
+ var s2_buf: [4096]u8 = undefined;
+ const s2_len = try file.readAll(&s2_buf);
+ var s2: []u8 = s2_buf[0..s2_len];
+ const v2 = istr.intern(s2, false);
+ const v2_len: usize = @intCast(fx.unpack(istr.len(v2)));
+ var s2_orig_buf: [4096]u8 = undefined;
+ @memcpy(&s2_orig_buf, &s2_buf);
+ const s2_orig = s2_orig_buf[0..s2_len];
+ s2[0] = s2[0] +% 1;
+ try std.testing.expectEqualStrings(s2_orig, istr.getHeader(v2).bytes());
+ try std.testing.expectEqual(s2_len, v2_len);
+}
+
fn parseString(str: []const u8) Value {
var fbs = std.io.fixedBufferStream(str);
return io.parser.parse(fbs.reader().any());
@@ -303,42 +328,65 @@ test "parse4" {
try std.testing.expectEqualStrings("bar", f.slice());
}
-test "parse bench" {
+fn parseBench(path: []const u8) !void {
const iters = switch (@import("builtin").mode) {
.Debug, .ReleaseSmall => 1000,
.ReleaseSafe => 10_000,
.ReleaseFast => 100_000,
};
+
+ var buf: [8196]u8 = undefined;
+ const file = try std.fs.cwd().openFile(path, .{});
+ defer file.close();
+ const count = try file.readAll(&buf);
+
+ var fbs = std.io.fixedBufferStream(buf[0..count]);
+ const reader = fbs.reader().any();
+
var timer = try std.time.Timer.start();
- std.mem.doNotOptimizeAway(timer.lap());
for (0..iters) |i| {
_ = i;
- std.mem.doNotOptimizeAway(parseString(
- \\(a b c (x y z (a b c (x y z (a b c (x y z (a b c (x y z (a b c
- \\(x y z (a b c (x y z (a b c (x y z) d e f) d e f) d e f) d e f)
- \\d e f) d e f) d e f) d e f) d e f) d e f) d e f) 1 2 3))
- ));
+ var v: Value = undefined;
+ while (true) {
+ v = io.parser.parse(reader);
+ if (value.eof.check(v)) {
+ break;
+ }
+ }
+ try fbs.seekTo(0);
}
const ns: f64 = @floatFromInt(timer.lap());
const secs = ns / 1_000_000_000;
- std.debug.print("parse {} times: {d:.3}s\n", .{ iters, secs });
+ std.debug.print(
+ "parse {s} x {}: {d:.3}s\n",
+ .{ path, iters, secs },
+ );
}
-test "unparse" {
- const unparse = io.unparser.unparse;
+test "parse bench" {
+ try parseBench("test-data/parser-test-1.scm");
+ try parseBench("test-data/parser-test-2.scm");
+}
+test "unparse" {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
var out: std.ArrayList(u8) = .init(gpa.allocator());
const w = out.writer();
const v = parseString("#foo");
- try unparse(w, v);
+ try io.unparser.unparse(w, v);
try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice());
}
test "unparse2" {
- try io.unparser.unparse(
- std.io.getStdErr().writer(),
- parseString("#{foo bar['x]}"),
+ var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
+ var out: std.ArrayList(u8) = .init(gpa.allocator());
+
+ const w = out.writer();
+ const v = parseString("#{foo bar['x]}");
+ try io.unparser.unparse(w, v);
+ try std.testing.expectEqualStrings(
+ "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))",
+ try out.toOwnedSlice(),
);
}
diff --git a/src/libzisp/gc.zig b/src/libzisp/gc.zig
index 92b4387..46ac091 100644
--- a/src/libzisp/gc.zig
+++ b/src/libzisp/gc.zig
@@ -5,18 +5,38 @@ const value = @import("value.zig");
const Value = value.Value;
const Hval = value.Hval;
-var _gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
+var _gpa = std.heap.GeneralPurposeAllocator(.{}).init;
const gpa = _gpa.allocator();
-var cpool = std.heap.MemoryPool([2]Value).init(gpa);
+// Cons cells
+
+var cons_pool = std.heap.MemoryPool([2]Value).init(gpa);
pub fn cons(v1: Value, v2: Value) *[2]Value {
- const mem = cpool.create() catch @panic("OOM");
+ const mem = cons_pool.create() catch @panic("OOM");
mem[0] = v1;
mem[1] = v2;
return mem;
}
-pub fn alloc(count: usize) []Hval {
- return gpa.alloc(Hval, count) catch @panic("OOM");
+// Interned strings
+
+var istr_pool = std.hash_map.StringHashMap(void).init(gpa);
+
+pub fn intern(header: value.seq.Header, str: []const u8) [*]Hval {
+ comptime {
+ std.debug.assert(@sizeOf(value.seq.Header) == 8);
+ }
+ const size = str.len + 8;
+ const copy = gpa.alloc(u8, size) catch @panic("OOM");
+ const header_bytes: [8]u8 = @bitCast(header);
+ @memcpy(copy[0..8], &header_bytes);
+ @memcpy(copy[8..size], str);
+ const entry = istr_pool.getOrPutValue(copy, {}) catch @panic("OOM");
+ return @ptrCast(entry.key_ptr);
+}
+
+pub fn istrHeader(ptr: [*]Hval) *value.seq.Header {
+ const entry_key: *[]u8 = @ptrCast(ptr);
+ return @alignCast(@ptrCast(entry_key.ptr));
}
diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig
index c25e918..dd48364 100644
--- a/src/libzisp/io/unparser.zig
+++ b/src/libzisp/io/unparser.zig
@@ -34,8 +34,7 @@ fn unparseHeap(w: anytype, v: Value) !void {
const p, const t = value.ptr.unpack(v);
try switch (t) {
.pair => unparsePair(w, p),
- .istr => @panic("not implemented"),
- .proc => @panic("not implemented"),
+ else => @panic("not implemented"),
};
}
diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig
index 6c0c2e9..aefca14 100644
--- a/src/libzisp/value.zig
+++ b/src/libzisp/value.zig
@@ -149,6 +149,7 @@ pub const double = @import("value/double.zig");
pub const fixnum = @import("value/fixnum.zig");
pub const ptr = @import("value/ptr.zig");
+pub const seq = @import("value/seq.zig");
pub const rune = @import("value/rune.zig");
pub const sstr = @import("value/sstr.zig");
@@ -158,6 +159,7 @@ pub const nil = @import("value/nil.zig");
pub const eof = @import("value/eof.zig");
pub const pair = @import("value/pair.zig");
+pub const istr = @import("value/istr.zig");
// To fill up the u11 exponent part of a NaN.
const FILL = 0x7ff;
@@ -321,4 +323,5 @@ pub const Value = packed union {
/// A "heap value" that could be a Value or object header.
pub const Hval = packed union {
value: Value,
+ seq_header: seq.Header,
};
diff --git a/src/libzisp/value/fixnum.zig b/src/libzisp/value/fixnum.zig
index c705880..80fb4ae 100644
--- a/src/libzisp/value/fixnum.zig
+++ b/src/libzisp/value/fixnum.zig
@@ -19,19 +19,15 @@ pub fn assert(v: Value) void {
}
// See detailed NaN packing docs for why the +/- 1.
-const fixnum_min = std.math.minInt(i52) + 1;
-const fixnum_max = std.math.maxInt(i52) - 1;
-
-pub fn isValidRange(int: i64) bool {
- return fixnum_min < int and int < fixnum_max;
-}
+pub const min = std.math.minInt(i52) + 1;
+pub const max = std.math.maxInt(i52) - 1;
fn assertValidRange(int: i64) void {
- if (int < fixnum_min) {
+ if (int < min) {
std.debug.print("int too small for fixnum: {}\n", .{int});
@panic("int too small for fixnum");
}
- if (int > fixnum_max) {
+ if (int > max) {
std.debug.print("int too large for fixnum: {}\n", .{int});
@panic("int too large for fixnum");
}
diff --git a/src/libzisp/value/istr.zig b/src/libzisp/value/istr.zig
index 5937531..8056d98 100644
--- a/src/libzisp/value/istr.zig
+++ b/src/libzisp/value/istr.zig
@@ -1,3 +1,58 @@
const std = @import("std");
const value = @import("../value.zig");
+const gc = @import("../gc.zig");
+
+const ptr = @import("ptr.zig");
+const seq = @import("seq.zig");
+
+const Value = value.Value;
+
+// Zig API
+
+pub fn check(v: Value) bool {
+ return ptr.checkZispTag(v, .seq);
+}
+
+pub fn assert(v: Value) void {
+ if (!check(v)) {
+ v.dump();
+ @panic("not istr");
+ }
+}
+
+pub fn intern(str: []const u8, quoted: bool) Value {
+ if (str.len > value.fixnum.max) {
+ @panic("String length out of fixnum range.");
+ }
+ const header: seq.Header = .{
+ .type = .string,
+ .info = .{ .string = .{
+ .enc = .utf8,
+ .quoted = quoted,
+ .interned = true,
+ } },
+ .size = @intCast(str.len),
+ };
+ const bytes_ptr = gc.intern(header, str);
+ return ptr.pack(bytes_ptr, .seq);
+}
+
+pub fn getHeader(v: Value) *seq.Header {
+ assert(v);
+ return gc.istrHeader(ptr.unpack(v).@"0");
+}
+
+// Zisp API
+
+pub fn pred(v: Value) Value {
+ return value.boole.pack(check(v));
+}
+
+pub fn len(v: Value) Value {
+ const l = getHeader(v).size;
+ if (l > value.fixnum.max) {
+ @panic("string length out of range");
+ }
+ return value.fixnum.pack(@intCast(l));
+}
diff --git a/src/libzisp/value/pair.zig b/src/libzisp/value/pair.zig
index 87e18e7..6ea1edf 100644
--- a/src/libzisp/value/pair.zig
+++ b/src/libzisp/value/pair.zig
@@ -1,4 +1,5 @@
const std = @import("std");
+
const value = @import("../value.zig");
const gc = @import("../gc.zig");
diff --git a/src/libzisp/value/ptr.zig b/src/libzisp/value/ptr.zig
index 115cc2d..b07acc4 100644
--- a/src/libzisp/value/ptr.zig
+++ b/src/libzisp/value/ptr.zig
@@ -132,10 +132,10 @@ fn untagPtr(tagged: u48) struct { [*]Hval, Tag } {
}
pub const Tag = enum(u3) {
- /// *[2]Value
+ /// Pair aka cons cell aka *[2]Value
pair,
- /// Interned string (symbol)
- istr,
+ /// Sequence of various kinds (16-bit meta, 48-bit length, then data)
+ seq,
/// Procedure
proc,
};
diff --git a/src/libzisp/value/seq.zig b/src/libzisp/value/seq.zig
new file mode 100644
index 0000000..5382a7e
--- /dev/null
+++ b/src/libzisp/value/seq.zig
@@ -0,0 +1,56 @@
+const builtin = @import("builtin");
+const std = @import("std");
+
+const value = @import("../value.zig");
+const gc = @import("../gc.zig");
+
+const Value = value.Value;
+
+const Endian = enum(u1) {
+ little,
+ big,
+
+ const native: Endian = switch (builtin.target.cpu.arch.endian()) {
+ .little => .little,
+ .big => .big,
+ };
+};
+
+pub const Header = packed struct(u64) {
+ type: enum(u2) {
+ values,
+ string,
+ ints,
+ floats,
+ },
+ info: packed union {
+ values: packed struct(u14) {
+ weak: bool = false,
+ _: u13 = 0,
+ },
+ string: packed struct(u14) {
+ enc: enum(u4) { utf8, utf16, utf24, utf32 },
+ endian: Endian = .native,
+ quoted: bool,
+ interned: bool,
+ _: u7 = 0,
+ },
+ ints: packed struct(u14) {
+ signed: bool,
+ endian: Endian = .native,
+ size: u12,
+ },
+ floats: packed struct(u14) {
+ double: bool,
+ endian: Endian = .native,
+ _: u12 = 0,
+ },
+ },
+ size: u48,
+
+ pub fn bytes(self: *Header) []u8 {
+ const ptr: [*]u8 = @ptrCast(self);
+ const end = 8 + self.size;
+ return ptr[8..end];
+ }
+};
diff --git a/test-data/parser-test-1.scm b/test-data/parser-test-1.scm
new file mode 100644
index 0000000..87c41b5
--- /dev/null
+++ b/test-data/parser-test-1.scm
@@ -0,0 +1,197 @@
+;;; bytestructures --- Structured access to bytevector contents.
+
+;; Copyright © 2015, 2016 Taylan Kammer <taylan.kammer@gmail.com>
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This is the base of the module, defining the data types and procedures that
+;; make up the bytestructures framework.
+
+
+;;; Code:
+
+;;; Descriptors
+
+(drt <bsd>
+ (%mkbsd size align unwrap getter setter meta)
+ bsd?
+ (size bsize)
+ (align balign)
+ (unwrap bunwrp)
+ (getter bgettr)
+ (setter bsettr)
+ (meta bmeta))
+
+(define mkbsd
+ (clmbda
+ ((size align unwrap getter setter)
+ (%mkbsd
+ size align unwrap getter setter #f))
+ ((size align unwrap getter setter meta)
+ (%mkbsd
+ size align unwrap getter setter meta))))
+
+(define bdsz
+ (clmbda
+ ((dscr) (bdsz dscr #f #f))
+ ((dscr bvec offset)
+ (let ((size (bsize dscr)))
+ (if (proc? size)
+ (size #f bvec offset)
+ size)))))
+
+(define (bdsz/s bvec offset dscr)
+ (let ((size (bsize dscr)))
+ (if (proc? size)
+ (size #t bvec offset)
+ size)))
+
+
+;;; Bstrs
+
+(drt <bstr>
+ (mkbstr bvec offset dscr)
+ bstr?
+ (bvec bsbvec)
+ (offset bsofst)
+ (dscr bsdscr))
+
+(define bstr
+ (clmbda ((dscr) (%bstr dscr #f #f))
+ ((dscr values) (%bstr dscr #t values))))
+
+(define (%bstr dscr init? values)
+ (let ((bvec (mkbvec
+ (bdsz dscr))))
+ (when init?
+ (bspst! bvec 0 dscr values))
+ (mkbstr bvec 0 dscr)))
+
+(define (bssize bstr)
+ (bdsz (bsdscr bstr)
+ (bsbvec bstr)
+ (bsofst bstr)))
+
+(dsr (bsunwp <bstr> <indx> ...)
+ (let ((bstr <bstr>))
+ (let ((bvec (bsbvec bstr))
+ (offset (bsofst bstr))
+ (dscr (bsdscr bstr)))
+ (bsunwp bvec offset dscr <indx> ...))))
+
+(defsyn bsnwp*
+ (synrul ()
+ ((_ <bvec> <ofst> <dscr>)
+ (values <bvec> <ofst> <dscr>))
+ ((_ <bvec> <ofst> <dscr> <indx> <idxs> ...)
+ (let ((bvec <bvec>)
+ (offset <ofst>)
+ (dscr <dscr>))
+ (let ((unwrap (bunwrp dscr)))
+ (when (not unwrap)
+ (error "cannot" dscr))
+ (letvls (((bvec* ofst* dscr*)
+ (unwrap #f bvec offset <indx>)))
+ (bsnwp*
+ bvec* ofst* dscr* <idxs> ...)))))))
+
+(defsyr (bsref <bstr> <indx> ...)
+ (letvls (((bvec offset dscr)
+ (bsunwp <bstr> <indx> ...)))
+ (bspref bvec offset dscr)))
+
+(defsyr (bsref*
+ <bvec> <ofst> <dscr> <indx> ...)
+ (letvls (((bvec offset dscr)
+ (bsnwp*
+ <bvec> <ofst> <dscr> <indx> ...)))
+ (bspref bvec offset dscr)))
+
+(define (bspref bvec offset dscr)
+ (let ((getter (bdgtr dscr)))
+ (if getter
+ (getter #f bvec offset)
+ (mkbstr bvec offset dscr))))
+
+(defsyr (bsst! <bstr> <indx> ... <valu>)
+ (letvls (((bvec offset dscr)
+ (bsunwp <bstr> <indx> ...)))
+ (bsps! bvec offset dscr <valu>)))
+
+(defsyr (bsst!*
+ <bvec> <ofst> <dscr> <indx> ... <valu>)
+ (letvls (((bvec offset dscr)
+ (bsnwp*
+ <bvec> <ofst> <dscr> <indx> ...)))
+ (bspst! bvec offset dscr <valu>)))
+
+(define (bspst! bvec offset dscr value)
+ (let ((setter (bdstr dscr)))
+ (if setter
+ (setter #f bvec offset value)
+ (if (bvec? value)
+ (bvecop bvec offset value 0
+ (bdsz
+ dscr bvec offset))
+ (error "cannot"
+ value dscr)))))
+
+(define (bsrf/d bstr . indxs)
+ (letvls (((bvec offset dscr)
+ (bsunwp bstr)))
+ (let loop ((bvec bvec)
+ (offset offset)
+ (dscr dscr)
+ (indxs indxs))
+ (if (null? indxs)
+ (bspref bvec offset dscr)
+ (letvls (((bvec* ofst* dscr*)
+ (bsnwp*
+ bvec offset dscr (car indxs))))
+ (loop bvec*
+ ofst*
+ dscr*
+ (cdr indxs)))))))
+
+(define (bst!/d bstr . args)
+ (letvls (((bvec offset dscr)
+ (bsunwp bstr)))
+ (let loop ((bvec bvec)
+ (offset offset)
+ (dscr dscr)
+ (args args))
+ (if (null? (cdr args))
+ (bset! bvec offset dscr (car args))
+ (letvls (((bvec* ofst* dscr*)
+ (bsnwp*
+ bvec offset dscr (car args))))
+ (loop bvec*
+ ofst*
+ dscr*
+ (cdr args)))))))
+
+(defsyn
+ bnwp/s
+ bref/s
+ bset/s
+ dba)
+
+(cexp
+ (guile (incfp "bstrs"))
+ (syncas (incld "base"))
+ (else))
+
+;;; base.scm ends here
diff --git a/test-data/parser-test-2.scm b/test-data/parser-test-2.scm
new file mode 100644
index 0000000..484c61e
--- /dev/null
+++ b/test-data/parser-test-2.scm
@@ -0,0 +1,19 @@
+(a b c
+ (x y z
+ (a b c
+ (x y z
+ (a b c
+ (x y z
+ (a b c
+ (x y z
+ (a b c
+ (x y z
+ (a b c
+ (x y z
+ (a b c
+ (x y z
+ (a b c
+ (x y z
+ (a b c
+ (x y z
+ (a b c)))))))))))))))))))
diff --git a/test-data/string.txt b/test-data/string.txt
new file mode 100644
index 0000000..31382be
--- /dev/null
+++ b/test-data/string.txt
@@ -0,0 +1 @@
+foo bar baz \ No newline at end of file