summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/main.zig2
-rw-r--r--src/zisp/gc.zig55
-rw-r--r--src/zisp/gc/IstrSet.zig109
-rw-r--r--src/zisp/io/Parser.zig35
-rw-r--r--src/zisp/io/print.zig30
-rw-r--r--src/zisp/value/array.zig14
-rw-r--r--src/zisp/value/istr.zig23
7 files changed, 143 insertions, 125 deletions
diff --git a/src/main.zig b/src/main.zig
index f7e752e..dc1187d 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -14,6 +14,8 @@ pub fn main() !u8 {
var stdout_writer = std.Io.File.stdout().writer(io, &stdout_buffer);
const writer = &stdout_writer.interface;
+ zisp.gc.init();
+
var p = try zisp.io.Parser.init(alloc, io);
defer p.deinit();
while (true) {
diff --git a/src/zisp/gc.zig b/src/zisp/gc.zig
index 4fc2c90..1be4304 100644
--- a/src/zisp/gc.zig
+++ b/src/zisp/gc.zig
@@ -3,25 +3,24 @@ const std = @import("std");
const value = @import("value.zig");
+const IstrSet = @import("gc/IstrSet.zig");
+
+const istr = value.istr;
const ptr = value.ptr;
-const seq = value.seq;
const Value = value.Value;
const Zptr = value.Zptr;
+const Istr = istr.Istr;
+
pub const alloc = std.heap.smp_allocator;
// Cons cells
const ConsPool = std.heap.MemoryPool([2]Value);
var cons_pool: ConsPool = undefined;
-var need_init = true;
pub fn cons(v1: Value, v2: Value) *[2]Value {
- if (need_init) {
- cons_pool = ConsPool.initCapacity(alloc, 64) catch @panic("OOM");
- need_init = false;
- }
const mem = cons_pool.create(alloc) catch @panic("OOM");
mem[0] = v1;
mem[1] = v2;
@@ -30,42 +29,16 @@ pub fn cons(v1: Value, v2: Value) *[2]Value {
// Interned strings
-const istr_ctx = std.hash_map.StringContext{};
-var istr_pool = std.hash_map.StringHashMap(*seq.Header).init(alloc);
-
-pub fn internString(str: []const u8) Value {
- const gop = istr_pool.getOrPutAdapted(
- str,
- istr_ctx,
- ) catch @panic("OOM");
- if (gop.found_existing) {
- const p: *seq.Header = gop.value_ptr.*;
- return ptr.pack(p, .seq);
- }
-
- std.debug.assert(str.len <= std.math.maxInt(u48));
+var istr_set: IstrSet = undefined;
- const header: seq.Header = .{
- .type = .string,
- .info = .{ .string = .{
- .enc = .utf8,
- .interned = true,
- } },
- .size = @intCast(str.len),
- };
-
- const h_align: std.mem.Alignment = @enumFromInt(@alignOf(seq.Header));
- const h_size = @sizeOf(seq.Header);
- const size = str.len + h_size;
-
- const mem = alloc.alignedAlloc(u8, h_align, size) catch @panic("OOM");
-
- const h_bytes: [h_size]u8 = @bitCast(header);
- @memcpy(mem[0..h_size], &h_bytes);
- @memcpy(mem[h_size..size], str);
+pub fn internString(s: []const u8) !Value {
+ const p = istr_set.add(alloc, s) catch @panic("OOM");
+ return ptr.pack(p, .istr);
+}
- gop.key_ptr.* = alloc.dupe(u8, str) catch @panic("OOM");
- gop.value_ptr.* = @ptrCast(mem.ptr);
+// Init
- return ptr.pack(mem.ptr, .seq);
+pub fn init() void {
+ cons_pool = ConsPool.initCapacity(alloc, 64) catch @panic("OOM");
+ istr_set = IstrSet.init(alloc) catch @panic("OOM");
}
diff --git a/src/zisp/gc/IstrSet.zig b/src/zisp/gc/IstrSet.zig
index 31bc850..465b0e3 100644
--- a/src/zisp/gc/IstrSet.zig
+++ b/src/zisp/gc/IstrSet.zig
@@ -1,32 +1,39 @@
-///
-/// Efficient hash set for short(ish) strings.
-///
-/// Layout: []Bucket
-///
-/// * Bucket: [2]Sector
-///
-/// * Sector: [32]u8 or [4]u64
-///
-/// The parser interns strings up to 255 bytes in length, using the `Istr` heap
-/// representation, which has an 8-byte metadata prefix. Therefore, strings of
-/// up to 24 bytes can fit into a 32-byte sector as an immediate Istr, so the
-/// pointer to the sector becomes the Istr pointer itself.
-///
-/// The 8-byte header of an Istr is a u64 hash, but its lowest 8 bits are also
-/// the length of the string: u64hash = (real_u64hash << 1) | u8length
-///
-/// Note that the empty string is never interned since strings up to 6 bytes in
-/// length are packed directly into a NaN value. Thus, checking if a sector is
-/// empty is a matter of checking its first 8 bytes as a u64 value, which would
-/// hold the Istr header, which cannot be zero, because the lowest 8 bits are
-/// the length of the string.
-///
-/// If the length value is greater than 24, meaning the Istr can't actually fit
-/// here, that means the next 8 bytes of the sector are actually a pointer to
-/// the Istr elsewhere on the heap, and another 16 bytes are unused.
-///
+//!
+//! Efficient hash set for short(ish) strings.
+//!
+//! Layout: []Bucket
+//!
+//! * Bucket: [2]Sector
+//!
+//! * Sector: [32]u8 or [4]u64
+//!
+//! The parser interns strings up to 255 bytes in length, using the `Istr` heap
+//! representation, which has an 8-byte metadata prefix. Therefore, strings of
+//! up to 24 bytes can fit into a 32-byte sector as an immediate Istr, so the
+//! pointer to the sector becomes the Istr pointer itself.
+//!
+//! The 8-byte header of an Istr is a u64 hash, but its lowest 8 bits are also
+//! the length of the string: u64hash = (real_u64hash << 1) | u8length
+//!
+//! Note that the empty string is never interned since strings up to 6 bytes in
+//! length are packed directly into a NaN value. Thus, checking if a sector is
+//! empty is a matter of checking its first 8 bytes as a u64 value, which would
+//! hold the Istr header, which cannot be zero, because the lowest 8 bits are
+//! the length of the string.
+//!
+//! If the length value is greater than 24, meaning the Istr can't actually fit
+//! here, that means the next 8 bytes of the sector are actually a pointer to
+//! the Istr elsewhere on the heap, and another 16 bytes are unused.
+//!
+
const std = @import("std");
+const value = @import("../value.zig");
+
+const Alloc = std.mem.Allocator;
+
+const Istr = value.istr.Istr;
+
const Set = @This();
const Bucket = [2][4]u64;
@@ -48,13 +55,13 @@ const Sector = *align(8) packed union {
if (self.meta.len <= 24) {
return @ptrCast(self);
} else {
- return @bitCast(self.bufU64()[1]);
+ return @ptrFromInt(self.bufU64()[1]);
}
}
pub fn match(self: *@This(), hash: u64, s: []const u8) ?Istr {
if (self.hash != hash) {
- return false;
+ return null;
}
const istr = self.getIstr();
if (std.hash_map.eqlString(s, istr.str())) {
@@ -66,18 +73,18 @@ const Sector = *align(8) packed union {
pub fn insert(
self: *@This(),
- alloc: *Allocator,
+ alloc: Alloc,
hash: u64,
s: []const u8,
- ) Istr {
+ ) !Istr {
self.hash = hash;
if (s.len <= 24) {
- const istr = @ptrCast(self);
+ const istr: Istr = @ptrCast(self);
istr.putStr(s);
return istr;
} else {
- const istr = Istr.new(alloc, hash, s);
- self.bufU64()[1] = @bitCast(istr);
+ const istr = try value.istr.new(alloc, hash, s);
+ self.bufU64()[1] = @intFromPtr(istr);
return istr;
}
}
@@ -87,53 +94,55 @@ buckets: []Bucket,
const default_bcount = 512;
-pub fn init(alloc: *Allocator) !Set {
+pub fn init(alloc: Alloc) !Set {
return initCustom(alloc, default_bcount);
}
-pub fn initCustom(alloc: *Allocator, bcount: usize) !Set {
+pub fn initCustom(alloc: Alloc, bcount: usize) !Set {
return Set{
.buckets = try alloc.alloc(Bucket, bcount),
};
}
-pub fn deinit(self: *Set, alloc: *Allocator) void {
+pub fn deinit(self: *Set, alloc: Alloc) void {
alloc.free(self.buckets);
}
-pub fn add(self: *Set, s: []const u8) Istr {
+pub fn sector(set: *Set, b_idx: usize, s_idx: usize) Sector {
+ return @ptrCast(&set.buckets[b_idx][s_idx]);
+}
+
+pub fn add(self: *Set, alloc: Alloc, s: []const u8) !Istr {
std.debug.assert(s.len < 256);
const idx_mask = self.buckets.len - 1;
- const len = s.len;
-
const h0 = std.hash_map.hashString(s);
- const hash = (h0 << 1) | len;
+ const hash = (h0 << 1) | s.len;
// We resize if we had to walk through 50% of the buckets.
- const probe_limit = self.buckets.len / 2;
+ const probe_limit = self.buckets.len >> 1;
var probes: usize = 0;
var idx = @as(usize, @truncate(hash & idx_mask));
- var s_idx: usize = undefined;
- while (true) : (idx = (idx + 1) & idx_mask) {
+ const s_idx = b: while (true) : (idx = (idx + 1) & idx_mask) {
inline for (0..1) |i| {
- const sec: Sector = @ptrCast(&self.buckets[idx][i]);
- if (sec.hash == 0) { s_idx = i; break; }
- if (sec.match(s)) |istr| return istr;
+ const sec = sector(self, idx, i);
+ if (sec.hash == 0) break :b i;
+ if (sec.match(hash, s)) |istr| return istr;
}
probes += 1;
if (probes > probe_limit) {
self.resize();
- return self.add(s);
+ return self.add(alloc, s);
}
};
- const sec = &self.buckets[idx][s_idx];
+ const sec = sector(self, idx, s_idx);
return sec.insert(alloc, hash, s);
}
-fn resize() void {
+fn resize(self: *Set) void {
+ _ = self;
@panic("IstrSet resize not implemented.");
}
diff --git a/src/zisp/io/Parser.zig b/src/zisp/io/Parser.zig
index ce61817..f92a308 100644
--- a/src/zisp/io/Parser.zig
+++ b/src/zisp/io/Parser.zig
@@ -152,7 +152,12 @@ fn read(p: *Parser) !?u8 {
}
fn readNoEof(p: *Parser, comptime emsg: []const u8) !u8 {
- return if (try p.read()) |c| c else p.err(.UnexpectedEof, emsg);
+ return try p.read() orelse p.err(.UnexpectedEof, emsg);
+}
+
+// Fake optional, for use in: while (readToEof()) |c| { }
+fn readNoEof2(p: *Parser, comptime emsg: []const u8) !?u8 {
+ return try p.read() orelse p.err(.UnexpectedEof, emsg);
}
fn unread(p: *Parser, c: u8) void {
@@ -187,12 +192,13 @@ fn addUnicode(p: *Parser, uc: u21) !void {
fn getCharsAsString(p: *Parser) !Value {
defer p.chars.clearRetainingCapacity();
const s = p.chars.items;
- if (value.sstr.isValidSstr(s))
+ if (value.sstr.isValidSstr(s)) {
return value.sstr.pack(s);
- else if (value.istr.isValidIstr(s))
+ } else if (value.istr.isValidIstr(s)) {
return value.istr.intern(s);
- else
- return @panic("not implemented"); // TODO
+ } else {
+ @panic("not implemented"); // TODO
+ }
}
fn getCharsAsRune(p: *Parser) Value {
@@ -445,7 +451,7 @@ fn parseCladDatum(p: *Parser, c: u8, next: Fn) !void {
fn getString(p: *Parser, comptime close: u8) !Value {
const msg = "string(" ++ .{close} ++ ")";
- while (try p.readNoEof(msg)) |c| sw: switch (c) {
+ while (try p.readNoEof2(msg)) |c| sw: switch (c) {
close => break,
'\\' => switch (try p.readNoEof("string backslash escape")) {
'\\', '|', '"' => |c2| try p.addChar(c2),
@@ -475,33 +481,36 @@ fn getString(p: *Parser, comptime close: u8) !Value {
fn getAtString(p: *Parser) !Value {
const sentinel = try p.readNoEof("at-string");
- while (try p.readNoEof("at-string")) |c| {
+ while (try p.readNoEof2("at-string")) |c| {
if (c == sentinel) break;
try p.addChar(c);
}
- const s = p.getCharsAsString();
+ const s = try p.getCharsAsString();
return p.cons(ATSTR, s);
}
fn skipStringLfEscape(p: *Parser) !u8 {
const msg = "string linefeed escape";
- while (try p.readNoEof(msg)) |c| switch (c) {
- '\t', ' ' => {},
+ while (try p.readNoEof2(msg)) |c| switch (c) {
+ '\t', ' ' => continue,
'\n' => return p.skipStringIndent(),
else => return p.err(.InvalidCharacter, msg),
};
+ unreachable;
}
fn skipStringIndent(p: *Parser) !u8 {
- while (try p.readNoEof("string newline escape")) |c| switch (c) {
- '\t', ' ' => {},
+ const msg = "string newline escape";
+ while (try p.readNoEof2(msg)) |c| switch (c) {
+ '\t', ' ' => continue,
else => return c,
};
+ unreachable;
}
fn parseStringRawHexEsc(p: *Parser) !void {
const msg = "string raw hex escape";
- while (try p.readNoEof(msg)) |c1| {
+ while (try p.readNoEof2(msg)) |c1| {
if (c1 == ';') break;
const c2 = try p.readNoEof(msg);
const hi = try p.parseHexDigit(c1, msg);
diff --git a/src/zisp/io/print.zig b/src/zisp/io/print.zig
index 65082c4..5e3bd15 100644
--- a/src/zisp/io/print.zig
+++ b/src/zisp/io/print.zig
@@ -3,14 +3,16 @@ const std = @import("std");
const gc = @import("../gc.zig");
const value = @import("../value.zig");
+const Parser = @import("Parser.zig");
const Value = value.Value;
+const Array = value.array.Array;
pub fn toWriter(w: *std.Io.Writer, v: Value) anyerror!void {
// zig fmt: off
try if (v.isDouble()) double(w, v)
else if (v.isFixnum()) fixnum(w, v)
else if (v.getPtr(.pair)) |p| pair(w, @ptrCast(p))
- else if (v.getPtr(.seq)) |p| seq(w, @ptrCast(p))
+ else if (v.getPtr(.array)) |p| array(w, @ptrCast(p))
else if (v.isRune()) rune(w, v)
else if (v.isChar()) char(w, v)
else if (v.isMisc()) misc(w, v)
@@ -68,6 +70,26 @@ pub fn srat(w: *std.Io.Writer, v: Value) !void {
}
pub fn pair(w: *std.Io.Writer, p: *[2]Value) !void {
+ const car = p[0];
+ //const cdr = p[1];
+ if (car.eq(Parser.PQSTR)) {
+ //try pipeString(w, cdr);
+ @panic("");
+ } else if (car.eq(Parser.DQSTR)) {
+ //try quotString(w, cdr);
+ @panic("");
+ } else if (car.eq(Parser.ATSTR)) {
+ //try atString(w, cdr);
+ @panic("");
+ } else if (car.eq(Parser.LABEL)) {
+ //try label(w, cdr);
+ @panic("");
+ } else {
+ try list(w, p);
+ }
+}
+
+pub fn list(w: *std.Io.Writer, p: *[2]Value) !void {
try w.writeByte('(');
try toWriter(w, p[0]);
var cdr = p[1];
@@ -84,16 +106,16 @@ pub fn pair(w: *std.Io.Writer, p: *[2]Value) !void {
try w.writeByte(')');
}
-pub fn seq(w: *std.Io.Writer, s: *value.seq.Header) !void {
+pub fn array(w: *std.Io.Writer, s: Array) !void {
switch (s.type) {
.string => try string(w, s),
else => @panic("not implemented"),
}
}
-pub fn string(w: *std.Io.Writer, s: *value.seq.Header) !void {
+pub fn string(w: *std.Io.Writer, s: Array) !void {
// TODO: Check if pipes/escapes necessary.
try w.writeByte('|');
- try w.writeAll(s.bytes());
+ try w.writeAll(s.str());
try w.writeByte('|');
}
diff --git a/src/zisp/value/array.zig b/src/zisp/value/array.zig
index b2faf94..6e37013 100644
--- a/src/zisp/value/array.zig
+++ b/src/zisp/value/array.zig
@@ -94,13 +94,13 @@ pub const Array = *align(8) packed struct(u64) {
fn bufContent(self: *@This()) [*]u8 {
std.debug.assert(!self.is_ptr);
- return @ptrCast(self.bufU64 + 1);
+ return @ptrCast(self.bufU64() + 1);
}
fn bufPointer(self: *@This()) [*]u8 {
std.debug.assert(self.is_ptr);
std.debug.assert(self.len_or_ptr == 0);
- return @ptrCast(self.bufU64[1]);
+ return @ptrFromInt(self.bufU64()[1]);
}
fn eltSize(self: *@This()) u16 {
@@ -116,7 +116,7 @@ pub const Array = *align(8) packed struct(u64) {
fn arrPointer(self: *@This()) ?Array {
std.debug.assert(self.is_ptr);
const p = self.len_or_ptr;
- return if (p != 0) @ptrCast(p) else null;
+ return if (p != 0) @ptrFromInt(p) else null;
}
fn sliceInfo(self: *@This()) [2]u64 {
@@ -124,13 +124,13 @@ pub const Array = *align(8) packed struct(u64) {
const ptr = self.len_or_ptr;
const buf = self.bufU64();
if (ptr != 0) {
- return buf[1..2];
+ return .{ buf[1], buf[2] };
} else {
- return buf[2..3];
+ return .{ buf[2], buf[3] };
}
}
- pub fn buf(self: *@This()) [*]u8 {
+ pub fn bufU8(self: *@This()) [*]u8 {
if (self.is_ptr) {
if (self.arrPointer()) |a| {
return a.bufContent();
@@ -144,7 +144,7 @@ pub const Array = *align(8) packed struct(u64) {
pub fn str(self: *@This()) []u8 {
if (self.is_slice) {
- const buf = self.buf();
+ const buf = self.bufU8();
const start, const end = self.sliceInfo();
return buf[start..end];
}
diff --git a/src/zisp/value/istr.zig b/src/zisp/value/istr.zig
index dec3b09..8f70725 100644
--- a/src/zisp/value/istr.zig
+++ b/src/zisp/value/istr.zig
@@ -3,6 +3,8 @@ const std = @import("std");
const gc = @import("../gc.zig");
const value = @import("../value.zig");
+const Alloc = std.mem.Allocator;
+
const Value = value.Value;
// Zig API
@@ -22,25 +24,26 @@ pub const Istr = *align(8) packed union {
return @ptrCast(self);
}
- pub fn new(alloc: *Allocator, hash: u64, s: []const u8) Istr {
- const istr: Istr = @ptrCast(alloc.alloc(u8, 8 + s.len));
- istr.hash = hash;
- istr.putStr(s);
- return istr;
- }
-
pub fn putStr(self: *@This(), s: []const u8) void {
std.debug.assert(s.len <= 255);
- @memcpy(istr.bufU8()[8 .. 8 + s.len], s);
+ @memcpy(self.bufU8()[8 .. 8 + s.len], s);
}
pub fn str(self: *@This()) []const u8 {
const buf = self.bufU8();
- const len = self.meta.len;
- return buf[8 .. 8 + len];
+ return buf[8 .. 8 + self.meta.len];
}
};
+pub fn new(alloc: Alloc, hash: u64, s: []const u8) !Istr {
+ const aln = std.mem.Alignment.of(Istr);
+ const ptr = try alloc.alignedAlloc(u8, aln, 8 + s.len);
+ const istr: Istr = @ptrCast(ptr);
+ istr.hash = hash;
+ istr.putStr(s);
+ return istr;
+}
+
pub fn check(v: Value) ?Istr {
if (v.getPtr(.istr)) |p| {
return @ptrCast(p);