Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a U64 backed BitPacker #1

Merged
merged 6 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions src/BitPacker.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ const std = @import("std");
// initial_bit_size: Determine the initial expected max values.
// reserved_bits: Skip this number of bits in each array item. Reduces the packer efficiency to produce valid values for your target encoding.
pub fn BitPacker(comptime _UnderlyingType: type, comptime _ValueType: type, comptime initial_bit_size: u8, comptime reserved_bits: u8) type {
std.debug.assert(@bitSizeOf(_UnderlyingType) <= @bitSizeOf(_ValueType)); // We can probably support it, we just don't right now.

return struct {
arr: std.ArrayList(UnderlyingType),

Expand Down Expand Up @@ -96,10 +94,16 @@ pub fn BitPacker(comptime _UnderlyingType: type, comptime _ValueType: type, comp

const to_write = @min(remaining_bits, @bitSizeOf(UnderlyingType) - self.bit);

var shifted: ValueType = value << @intCast(@bitSizeOf(ValueType) - remaining_bits); // "Mask" high bits
shifted >>= @intCast(self.bit + (@bitSizeOf(ValueType) - @bitSizeOf(UnderlyingType)));
self.arr.items[self.arr.items.len - 1] |= @intCast(shifted);

// FIXME: This can probably be simplified
if (comptime (@bitSizeOf(ValueType) < @bitSizeOf(UnderlyingType))) {
var shifted: UnderlyingType = @as(UnderlyingType, @intCast(value)) << @intCast(@bitSizeOf(UnderlyingType) - remaining_bits); // "Mask" high bits
shifted >>= @intCast(self.bit);
self.arr.items[self.arr.items.len - 1] |= shifted;
} else {
var shifted: ValueType = value << @intCast(@bitSizeOf(ValueType) - remaining_bits); // "Mask" high bits
shifted >>= @intCast(self.bit + (@bitSizeOf(ValueType) - @bitSizeOf(UnderlyingType)));
self.arr.items[self.arr.items.len - 1] |= @intCast(shifted);
}
remaining_bits -= to_write;

self.bit += to_write;
Expand Down
2 changes: 1 addition & 1 deletion src/lzwPacked.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const bp = @import("./BitPacker.zig");

const impl = @import("lzw.zig");

pub const BitPacker = bp.BitPacker(u16, u20, 9, 0);
pub const BitPacker = bp.BitPacker(u64, u20, 9, 0);
pub const sentinel_token = std.math.maxInt(BitPacker.ValueType);

pub fn compressPacked(data: []const u8, allocator: std.mem.Allocator) !BitPacker {
Expand Down
19 changes: 10 additions & 9 deletions src/wasmPacked.zig
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,26 @@ comptime {
@export(wasmAllocator.free, .{ .name = "free", .linkage = .Strong });
}

comptime {
std.debug.assert(@bitSizeOf(usize) <= @bitSizeOf(u32));
}

export fn compressPacked(ptr: [*]u8, length: usize) i32 {
const allocator = std.heap.page_allocator;
const data: []const u8 = ptr[0..length];
var output = packed_impl.compressPacked(data, allocator) catch {
return 0;
};

const content_length = output.arr.items.len + 2;
output.arr.ensureTotalCapacity(output.arr.items.len + 6) catch {
const content_length = output.arr.items.len * 4 + 2; // In u16
output.arr.ensureTotalCapacity(output.arr.items.len + 2) catch {
return 0;
};
output.arr.appendAssumeCapacity(@intCast(output.size >> 16));
output.arr.appendAssumeCapacity(@intCast(output.size & 0xFFFF));
output.arr.appendAssumeCapacity(@intCast(content_length >> 16));
output.arr.appendAssumeCapacity(@intCast(content_length & 0xFFFF));
output.arr.appendAssumeCapacity(@intCast(output.arr.capacity >> 16));
output.arr.appendAssumeCapacity(@intCast(output.arr.capacity & 0xFFFF));
// Token Count followed by the usual footer
output.arr.appendAssumeCapacity((@as(u64, @intCast(content_length)) << 32) | output.size);
output.arr.appendAssumeCapacity((@as(u64, @intCast(output.arr.capacity * 2))));

return @intCast(@intFromPtr(output.arr.items.ptr + content_length));
return @intCast(@intFromPtr(output.arr.items.ptr + output.arr.items.len - 2) + 4);
}

export fn decompressPacked(ptr: [*]packed_impl.BitPacker.UnderlyingType, length: usize, token_count: usize) i32 {
Expand Down
26 changes: 23 additions & 3 deletions ts-lib/dist/smol-string-packed.js

Large diffs are not rendered by default.

14 changes: 13 additions & 1 deletion ts-lib/dist/smol-string-worker-packed.js

Large diffs are not rendered by default.

Binary file modified ts-lib/src/module-packed.wasm
Binary file not shown.
9 changes: 5 additions & 4 deletions ts-lib/src/smol-string-packed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ export function compressPacked(str: string) {
const footer = new Uint16Array(
exports.memory.buffer.slice(ptrToFooter, ptrToFooter + 8)
);
const streamLength = (footer.at(0)! << 16) + footer.at(1)!;
const capacity = (footer.at(2)! << 16) + footer.at(3)!;
// FIXME: Little endian while all the others are big endian...
const streamLength = (footer.at(1)! << 16) + footer.at(0)!;
const capacity = (footer.at(3)! << 16) + footer.at(2)!;
const start = ptrToFooter - 2 * streamLength;

// Includes the tokenCount at the end of the stream (2 * u16).
Expand All @@ -45,8 +46,8 @@ export function compressPacked(str: string) {

export function decompressPacked(compressedStr: string) {
const tokenCount =
(compressedStr.charCodeAt(compressedStr.length - 2)! << 16) +
compressedStr.charCodeAt(compressedStr.length - 1);
(compressedStr.charCodeAt(compressedStr.length - 1)! << 16) +
compressedStr.charCodeAt(compressedStr.length - 2);

let ptrToCompressed = exports.allocUint16(compressedStr.length - 2);
let compressed_buffer = new Uint16Array(
Expand Down
Loading