diff options
| author | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-18 21:39:51 +0100 |
|---|---|---|
| committer | Taylan Kammer <taylan.kammer@gmail.com> | 2025-03-18 21:39:51 +0100 |
| commit | f1c256884b0d59683e8bd43160b048561191a809 (patch) | |
| tree | 804f356fccb0e1a2b77f61e25bc81cbfc2452b03 | |
| parent | c43c3c22e5d0f872168c5b687141c7b08a188c5d (diff) | |
Implement istr.
| -rw-r--r-- | src/libzisp.zig | 78 | ||||
| -rw-r--r-- | src/libzisp/gc.zig | 30 | ||||
| -rw-r--r-- | src/libzisp/io/unparser.zig | 3 | ||||
| -rw-r--r-- | src/libzisp/value.zig | 3 | ||||
| -rw-r--r-- | src/libzisp/value/fixnum.zig | 12 | ||||
| -rw-r--r-- | src/libzisp/value/istr.zig | 55 | ||||
| -rw-r--r-- | src/libzisp/value/pair.zig | 1 | ||||
| -rw-r--r-- | src/libzisp/value/ptr.zig | 6 | ||||
| -rw-r--r-- | src/libzisp/value/seq.zig | 56 | ||||
| -rw-r--r-- | test-data/parser-test-1.scm | 197 | ||||
| -rw-r--r-- | test-data/parser-test-2.scm | 19 | ||||
| -rw-r--r-- | test-data/string.txt | 1 |
12 files changed, 428 insertions, 33 deletions
diff --git a/src/libzisp.zig b/src/libzisp.zig index f67f568..3a217fd 100644 --- a/src/libzisp.zig +++ b/src/libzisp.zig @@ -47,7 +47,7 @@ test "ptr" { const ptr = value.ptr; const val: [*]Hval = @ptrFromInt(256); - const tag = ptr.Tag.istr; + const tag = ptr.Tag.pair; const p = ptr.pack(val, tag); try std.testing.expect(ptr.check(p)); @@ -246,6 +246,31 @@ test "pair" { try std.testing.expectEqual(4, value.fixnum.unpack(cdr2)); } +test "istr" { + const istr = value.istr; + const fx = value.fixnum; + + const s1 = "foo bar baz"; + const v1 = istr.intern(s1, false); + const v1_len: usize = @intCast(fx.unpack(istr.len(v1))); + try std.testing.expectEqualStrings(s1, istr.getHeader(v1).bytes()); + try std.testing.expectEqual(s1.len, v1_len); + + const file = try std.fs.cwd().openFile("test-data/string.txt", .{}); + defer file.close(); + var s2_buf: [4096]u8 = undefined; + const s2_len = try file.readAll(&s2_buf); + var s2: []u8 = s2_buf[0..s2_len]; + const v2 = istr.intern(s2, false); + const v2_len: usize = @intCast(fx.unpack(istr.len(v2))); + var s2_orig_buf: [4096]u8 = undefined; + @memcpy(&s2_orig_buf, &s2_buf); + const s2_orig = s2_orig_buf[0..s2_len]; + s2[0] = s2[0] +% 1; + try std.testing.expectEqualStrings(s2_orig, istr.getHeader(v2).bytes()); + try std.testing.expectEqual(s2_len, v2_len); +} + fn parseString(str: []const u8) Value { var fbs = std.io.fixedBufferStream(str); return io.parser.parse(fbs.reader().any()); @@ -303,42 +328,65 @@ test "parse4" { try std.testing.expectEqualStrings("bar", f.slice()); } -test "parse bench" { +fn parseBench(path: []const u8) !void { const iters = switch (@import("builtin").mode) { .Debug, .ReleaseSmall => 1000, .ReleaseSafe => 10_000, .ReleaseFast => 100_000, }; + + var buf: [8196]u8 = undefined; + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + const count = try file.readAll(&buf); + + var fbs = std.io.fixedBufferStream(buf[0..count]); + const reader = fbs.reader().any(); + var timer = try std.time.Timer.start(); - std.mem.doNotOptimizeAway(timer.lap()); for (0..iters) |i| { _ = i; - std.mem.doNotOptimizeAway(parseString( - \\(a b c (x y z (a b c (x y z (a b c (x y z (a b c (x y z (a b c - \\(x y z (a b c (x y z (a b c (x y z) d e f) d e f) d e f) d e f) - \\d e f) d e f) d e f) d e f) d e f) d e f) d e f) 1 2 3)) - )); + var v: Value = undefined; + while (true) { + v = io.parser.parse(reader); + if (value.eof.check(v)) { + break; + } + } + try fbs.seekTo(0); } const ns: f64 = @floatFromInt(timer.lap()); const secs = ns / 1_000_000_000; - std.debug.print("parse {} times: {d:.3}s\n", .{ iters, secs }); + std.debug.print( + "parse {s} x {}: {d:.3}s\n", + .{ path, iters, secs }, + ); } -test "unparse" { - const unparse = io.unparser.unparse; +test "parse bench" { + try parseBench("test-data/parser-test-1.scm"); + try parseBench("test-data/parser-test-2.scm"); +} +test "unparse" { var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; var out: std.ArrayList(u8) = .init(gpa.allocator()); const w = out.writer(); const v = parseString("#foo"); - try unparse(w, v); + try io.unparser.unparse(w, v); try std.testing.expectEqualStrings("#foo", try out.toOwnedSlice()); } test "unparse2" { - try io.unparser.unparse( - std.io.getStdErr().writer(), - parseString("#{foo bar['x]}"), + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + var out: std.ArrayList(u8) = .init(gpa.allocator()); + + const w = out.writer(); + const v = parseString("#{foo bar['x]}"); + try io.unparser.unparse(w, v); + try std.testing.expectEqualStrings( + "(#HASH #BRACE foo (#JOIN bar #SQUARE (#QUOTE . x)))", + try out.toOwnedSlice(), ); } diff --git a/src/libzisp/gc.zig b/src/libzisp/gc.zig index 92b4387..46ac091 100644 --- a/src/libzisp/gc.zig +++ b/src/libzisp/gc.zig @@ -5,18 +5,38 @@ const value = @import("value.zig"); const Value = value.Value; const Hval = value.Hval; -var _gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; +var _gpa = std.heap.GeneralPurposeAllocator(.{}).init; const gpa = _gpa.allocator(); -var cpool = std.heap.MemoryPool([2]Value).init(gpa); +// Cons cells + +var cons_pool = std.heap.MemoryPool([2]Value).init(gpa); pub fn cons(v1: Value, v2: Value) *[2]Value { - const mem = cpool.create() catch @panic("OOM"); + const mem = cons_pool.create() catch @panic("OOM"); mem[0] = v1; mem[1] = v2; return mem; } -pub fn alloc(count: usize) []Hval { - return gpa.alloc(Hval, count) catch @panic("OOM"); +// Interned strings + +var istr_pool = std.hash_map.StringHashMap(void).init(gpa); + +pub fn intern(header: value.seq.Header, str: []const u8) [*]Hval { + comptime { + std.debug.assert(@sizeOf(value.seq.Header) == 8); + } + const size = str.len + 8; + const copy = gpa.alloc(u8, size) catch @panic("OOM"); + const header_bytes: [8]u8 = @bitCast(header); + @memcpy(copy[0..8], &header_bytes); + @memcpy(copy[8..size], str); + const entry = istr_pool.getOrPutValue(copy, {}) catch @panic("OOM"); + return @ptrCast(entry.key_ptr); +} + +pub fn istrHeader(ptr: [*]Hval) *value.seq.Header { + const entry_key: *[]u8 = @ptrCast(ptr); + return @alignCast(@ptrCast(entry_key.ptr)); } diff --git a/src/libzisp/io/unparser.zig b/src/libzisp/io/unparser.zig index c25e918..dd48364 100644 --- a/src/libzisp/io/unparser.zig +++ b/src/libzisp/io/unparser.zig @@ -34,8 +34,7 @@ fn unparseHeap(w: anytype, v: Value) !void { const p, const t = value.ptr.unpack(v); try switch (t) { .pair => unparsePair(w, p), - .istr => @panic("not implemented"), - .proc => @panic("not implemented"), + else => @panic("not implemented"), }; } diff --git a/src/libzisp/value.zig b/src/libzisp/value.zig index 6c0c2e9..aefca14 100644 --- a/src/libzisp/value.zig +++ b/src/libzisp/value.zig @@ -149,6 +149,7 @@ pub const double = @import("value/double.zig"); pub const fixnum = @import("value/fixnum.zig"); pub const ptr = @import("value/ptr.zig"); +pub const seq = @import("value/seq.zig"); pub const rune = @import("value/rune.zig"); pub const sstr = @import("value/sstr.zig"); @@ -158,6 +159,7 @@ pub const nil = @import("value/nil.zig"); pub const eof = @import("value/eof.zig"); pub const pair = @import("value/pair.zig"); +pub const istr = @import("value/istr.zig"); // To fill up the u11 exponent part of a NaN. const FILL = 0x7ff; @@ -321,4 +323,5 @@ pub const Value = packed union { /// A "heap value" that could be a Value or object header. pub const Hval = packed union { value: Value, + seq_header: seq.Header, }; diff --git a/src/libzisp/value/fixnum.zig b/src/libzisp/value/fixnum.zig index c705880..80fb4ae 100644 --- a/src/libzisp/value/fixnum.zig +++ b/src/libzisp/value/fixnum.zig @@ -19,19 +19,15 @@ pub fn assert(v: Value) void { } // See detailed NaN packing docs for why the +/- 1. -const fixnum_min = std.math.minInt(i52) + 1; -const fixnum_max = std.math.maxInt(i52) - 1; - -pub fn isValidRange(int: i64) bool { - return fixnum_min < int and int < fixnum_max; -} +pub const min = std.math.minInt(i52) + 1; +pub const max = std.math.maxInt(i52) - 1; fn assertValidRange(int: i64) void { - if (int < fixnum_min) { + if (int < min) { std.debug.print("int too small for fixnum: {}\n", .{int}); @panic("int too small for fixnum"); } - if (int > fixnum_max) { + if (int > max) { std.debug.print("int too large for fixnum: {}\n", .{int}); @panic("int too large for fixnum"); } diff --git a/src/libzisp/value/istr.zig b/src/libzisp/value/istr.zig index 5937531..8056d98 100644 --- a/src/libzisp/value/istr.zig +++ b/src/libzisp/value/istr.zig @@ -1,3 +1,58 @@ const std = @import("std"); const value = @import("../value.zig"); +const gc = @import("../gc.zig"); + +const ptr = @import("ptr.zig"); +const seq = @import("seq.zig"); + +const Value = value.Value; + +// Zig API + +pub fn check(v: Value) bool { + return ptr.checkZispTag(v, .seq); +} + +pub fn assert(v: Value) void { + if (!check(v)) { + v.dump(); + @panic("not istr"); + } +} + +pub fn intern(str: []const u8, quoted: bool) Value { + if (str.len > value.fixnum.max) { + @panic("String length out of fixnum range."); + } + const header: seq.Header = .{ + .type = .string, + .info = .{ .string = .{ + .enc = .utf8, + .quoted = quoted, + .interned = true, + } }, + .size = @intCast(str.len), + }; + const bytes_ptr = gc.intern(header, str); + return ptr.pack(bytes_ptr, .seq); +} + +pub fn getHeader(v: Value) *seq.Header { + assert(v); + return gc.istrHeader(ptr.unpack(v).@"0"); +} + +// Zisp API + +pub fn pred(v: Value) Value { + return value.boole.pack(check(v)); +} + +pub fn len(v: Value) Value { + const l = getHeader(v).size; + if (l > value.fixnum.max) { + @panic("string length out of range"); + } + return value.fixnum.pack(@intCast(l)); +} diff --git a/src/libzisp/value/pair.zig b/src/libzisp/value/pair.zig index 87e18e7..6ea1edf 100644 --- a/src/libzisp/value/pair.zig +++ b/src/libzisp/value/pair.zig @@ -1,4 +1,5 @@ const std = @import("std"); + const value = @import("../value.zig"); const gc = @import("../gc.zig"); diff --git a/src/libzisp/value/ptr.zig b/src/libzisp/value/ptr.zig index 115cc2d..b07acc4 100644 --- a/src/libzisp/value/ptr.zig +++ b/src/libzisp/value/ptr.zig @@ -132,10 +132,10 @@ fn untagPtr(tagged: u48) struct { [*]Hval, Tag } { } pub const Tag = enum(u3) { - /// *[2]Value + /// Pair aka cons cell aka *[2]Value pair, - /// Interned string (symbol) - istr, + /// Sequence of various kinds (16-bit meta, 48-bit length, then data) + seq, /// Procedure proc, }; diff --git a/src/libzisp/value/seq.zig b/src/libzisp/value/seq.zig new file mode 100644 index 0000000..5382a7e --- /dev/null +++ b/src/libzisp/value/seq.zig @@ -0,0 +1,56 @@ +const builtin = @import("builtin"); +const std = @import("std"); + +const value = @import("../value.zig"); +const gc = @import("../gc.zig"); + +const Value = value.Value; + +const Endian = enum(u1) { + little, + big, + + const native: Endian = switch (builtin.target.cpu.arch.endian()) { + .little => .little, + .big => .big, + }; +}; + +pub const Header = packed struct(u64) { + type: enum(u2) { + values, + string, + ints, + floats, + }, + info: packed union { + values: packed struct(u14) { + weak: bool = false, + _: u13 = 0, + }, + string: packed struct(u14) { + enc: enum(u4) { utf8, utf16, utf24, utf32 }, + endian: Endian = .native, + quoted: bool, + interned: bool, + _: u7 = 0, + }, + ints: packed struct(u14) { + signed: bool, + endian: Endian = .native, + size: u12, + }, + floats: packed struct(u14) { + double: bool, + endian: Endian = .native, + _: u12 = 0, + }, + }, + size: u48, + + pub fn bytes(self: *Header) []u8 { + const ptr: [*]u8 = @ptrCast(self); + const end = 8 + self.size; + return ptr[8..end]; + } +}; diff --git a/test-data/parser-test-1.scm b/test-data/parser-test-1.scm new file mode 100644 index 0000000..87c41b5 --- /dev/null +++ b/test-data/parser-test-1.scm @@ -0,0 +1,197 @@ +;;; bytestructures --- Structured access to bytevector contents. + +;; Copyright © 2015, 2016 Taylan Kammer <taylan.kammer@gmail.com> + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; This is the base of the module, defining the data types and procedures that +;; make up the bytestructures framework. + + +;;; Code: + +;;; Descriptors + +(drt <bsd> + (%mkbsd size align unwrap getter setter meta) + bsd? + (size bsize) + (align balign) + (unwrap bunwrp) + (getter bgettr) + (setter bsettr) + (meta bmeta)) + +(define mkbsd + (clmbda + ((size align unwrap getter setter) + (%mkbsd + size align unwrap getter setter #f)) + ((size align unwrap getter setter meta) + (%mkbsd + size align unwrap getter setter meta)))) + +(define bdsz + (clmbda + ((dscr) (bdsz dscr #f #f)) + ((dscr bvec offset) + (let ((size (bsize dscr))) + (if (proc? size) + (size #f bvec offset) + size))))) + +(define (bdsz/s bvec offset dscr) + (let ((size (bsize dscr))) + (if (proc? size) + (size #t bvec offset) + size))) + + +;;; Bstrs + +(drt <bstr> + (mkbstr bvec offset dscr) + bstr? + (bvec bsbvec) + (offset bsofst) + (dscr bsdscr)) + +(define bstr + (clmbda ((dscr) (%bstr dscr #f #f)) + ((dscr values) (%bstr dscr #t values)))) + +(define (%bstr dscr init? values) + (let ((bvec (mkbvec + (bdsz dscr)))) + (when init? + (bspst! bvec 0 dscr values)) + (mkbstr bvec 0 dscr))) + +(define (bssize bstr) + (bdsz (bsdscr bstr) + (bsbvec bstr) + (bsofst bstr))) + +(dsr (bsunwp <bstr> <indx> ...) + (let ((bstr <bstr>)) + (let ((bvec (bsbvec bstr)) + (offset (bsofst bstr)) + (dscr (bsdscr bstr))) + (bsunwp bvec offset dscr <indx> ...)))) + +(defsyn bsnwp* + (synrul () + ((_ <bvec> <ofst> <dscr>) + (values <bvec> <ofst> <dscr>)) + ((_ <bvec> <ofst> <dscr> <indx> <idxs> ...) + (let ((bvec <bvec>) + (offset <ofst>) + (dscr <dscr>)) + (let ((unwrap (bunwrp dscr))) + (when (not unwrap) + (error "cannot" dscr)) + (letvls (((bvec* ofst* dscr*) + (unwrap #f bvec offset <indx>))) + (bsnwp* + bvec* ofst* dscr* <idxs> ...))))))) + +(defsyr (bsref <bstr> <indx> ...) + (letvls (((bvec offset dscr) + (bsunwp <bstr> <indx> ...))) + (bspref bvec offset dscr))) + +(defsyr (bsref* + <bvec> <ofst> <dscr> <indx> ...) + (letvls (((bvec offset dscr) + (bsnwp* + <bvec> <ofst> <dscr> <indx> ...))) + (bspref bvec offset dscr))) + +(define (bspref bvec offset dscr) + (let ((getter (bdgtr dscr))) + (if getter + (getter #f bvec offset) + (mkbstr bvec offset dscr)))) + +(defsyr (bsst! <bstr> <indx> ... <valu>) + (letvls (((bvec offset dscr) + (bsunwp <bstr> <indx> ...))) + (bsps! bvec offset dscr <valu>))) + +(defsyr (bsst!* + <bvec> <ofst> <dscr> <indx> ... <valu>) + (letvls (((bvec offset dscr) + (bsnwp* + <bvec> <ofst> <dscr> <indx> ...))) + (bspst! bvec offset dscr <valu>))) + +(define (bspst! bvec offset dscr value) + (let ((setter (bdstr dscr))) + (if setter + (setter #f bvec offset value) + (if (bvec? value) + (bvecop bvec offset value 0 + (bdsz + dscr bvec offset)) + (error "cannot" + value dscr))))) + +(define (bsrf/d bstr . indxs) + (letvls (((bvec offset dscr) + (bsunwp bstr))) + (let loop ((bvec bvec) + (offset offset) + (dscr dscr) + (indxs indxs)) + (if (null? indxs) + (bspref bvec offset dscr) + (letvls (((bvec* ofst* dscr*) + (bsnwp* + bvec offset dscr (car indxs)))) + (loop bvec* + ofst* + dscr* + (cdr indxs))))))) + +(define (bst!/d bstr . args) + (letvls (((bvec offset dscr) + (bsunwp bstr))) + (let loop ((bvec bvec) + (offset offset) + (dscr dscr) + (args args)) + (if (null? (cdr args)) + (bset! bvec offset dscr (car args)) + (letvls (((bvec* ofst* dscr*) + (bsnwp* + bvec offset dscr (car args)))) + (loop bvec* + ofst* + dscr* + (cdr args))))))) + +(defsyn + bnwp/s + bref/s + bset/s + dba) + +(cexp + (guile (incfp "bstrs")) + (syncas (incld "base")) + (else)) + +;;; base.scm ends here diff --git a/test-data/parser-test-2.scm b/test-data/parser-test-2.scm new file mode 100644 index 0000000..484c61e --- /dev/null +++ b/test-data/parser-test-2.scm @@ -0,0 +1,19 @@ +(a b c + (x y z + (a b c + (x y z + (a b c + (x y z + (a b c + (x y z + (a b c + (x y z + (a b c + (x y z + (a b c + (x y z + (a b c + (x y z + (a b c + (x y z + (a b c))))))))))))))))))) diff --git a/test-data/string.txt b/test-data/string.txt new file mode 100644 index 0000000..31382be --- /dev/null +++ b/test-data/string.txt @@ -0,0 +1 @@ +foo bar baz
\ No newline at end of file |
