Skip to content

Commit

Permalink
Store metadata at the end of the stream in the interface between WASM…
Browse files Browse the repository at this point in the history
… and JS.

This avoids an allocation, a copy and searching for a null terminator.
  • Loading branch information
Senryoku committed Nov 1, 2023
1 parent b736d43 commit 2947349
Show file tree
Hide file tree
Showing 11 changed files with 203 additions and 169 deletions.
25 changes: 21 additions & 4 deletions src/wasm.zig
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,16 @@ export fn compress(ptr: [*]const u8, length: usize) i32 {
var output = impl.compress(u16, 0, 0xFFFE, data, allocator) catch {
return 0;
};
const r = output.toOwnedSliceSentinel(0) catch {
const content_length = output.items.len;
output.ensureTotalCapacity(output.items.len + 4) catch {
return 0;
};
return @intCast(@intFromPtr(r.ptr));
output.appendAssumeCapacity(@intCast(content_length >> 16));
output.appendAssumeCapacity(@intCast(content_length & 0xFFFF));
output.appendAssumeCapacity(@intCast(output.capacity >> 16));
output.appendAssumeCapacity(@intCast(output.capacity & 0xFFFF));

return @intCast(@intFromPtr(output.items.ptr + content_length));
}

export fn decompress(ptr: [*]const u16, length: usize) i32 {
Expand All @@ -27,8 +33,19 @@ export fn decompress(ptr: [*]const u16, length: usize) i32 {
var output = impl.decompress(u16, 0, 0xFFFE, data, allocator) catch {
return 0;
};
const r = output.toOwnedSliceSentinel(0) catch {

const content_length = output.items.len;
output.ensureTotalCapacity(output.items.len + 8) catch {
return 0;
};
return @intCast(@intFromPtr(r.ptr));
output.appendAssumeCapacity(@intCast((content_length >> 24) & 0xFF));
output.appendAssumeCapacity(@intCast((content_length >> 16) & 0xFF));
output.appendAssumeCapacity(@intCast((content_length >> 8) & 0xFF));
output.appendAssumeCapacity(@intCast((content_length >> 0) & 0xFF));
output.appendAssumeCapacity(@intCast((output.capacity >> 24) & 0xFF));
output.appendAssumeCapacity(@intCast((output.capacity >> 16) & 0xFF));
output.appendAssumeCapacity(@intCast((output.capacity >> 8) & 0xFF));
output.appendAssumeCapacity(@intCast((output.capacity >> 0) & 0xFF));

return @intCast(@intFromPtr(output.items.ptr + content_length));
}
35 changes: 22 additions & 13 deletions src/wasmPacked.zig
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,19 @@ export fn compressPacked(ptr: [*]u8, length: usize) i32 {
var output = packed_impl.compressPacked(data, allocator) catch {
return 0;
};
defer output.deinit();

// We could preallocate the array and leave space for our 'header' to avoid this copy.
// However this should only be the responsibility of this WASM interface, not compressPacked.
// We could also ask for an additional buffer to output these information...
var r = allocator.alloc(u16, output.arr.items.len + 4) catch {
const content_length = output.arr.items.len + 2;
output.arr.ensureTotalCapacity(output.arr.items.len + 6) catch {
return 0;
};
r[0] = @intCast(r.len >> 16);
r[1] = @intCast(r.len & 0xFFFF);
r[2] = @intCast(output.size >> 16);
r[3] = @intCast(output.size & 0xFFFF);
std.mem.copy(u16, r[4..], output.arr.items);
output.arr.appendAssumeCapacity(@intCast(output.size >> 16));
output.arr.appendAssumeCapacity(@intCast(output.size & 0xFFFF));
output.arr.appendAssumeCapacity(@intCast(content_length >> 16));
output.arr.appendAssumeCapacity(@intCast(content_length & 0xFFFF));
output.arr.appendAssumeCapacity(@intCast(output.arr.capacity >> 16));
output.arr.appendAssumeCapacity(@intCast(output.arr.capacity & 0xFFFF));

return @intCast(@intFromPtr(r.ptr));
return @intCast(@intFromPtr(output.arr.items.ptr + content_length));
}

export fn decompressPacked(ptr: [*]packed_impl.BitPacker.UnderlyingType, length: usize, token_count: usize) i32 {
Expand All @@ -52,8 +50,19 @@ export fn decompressPacked(ptr: [*]packed_impl.BitPacker.UnderlyingType, length:
var output = impl.decompress(packed_impl.BitPacker.ValueType, 0, packed_impl.sentinel_token, unpackedData, allocator) catch {
return 0;
};
const r = output.toOwnedSliceSentinel(0) catch {

const content_length = output.items.len;
output.ensureTotalCapacity(output.items.len + 8) catch {
return 0;
};
return @intCast(@intFromPtr(r.ptr));
output.appendAssumeCapacity(@intCast((content_length >> 24) & 0xFF));
output.appendAssumeCapacity(@intCast((content_length >> 16) & 0xFF));
output.appendAssumeCapacity(@intCast((content_length >> 8) & 0xFF));
output.appendAssumeCapacity(@intCast((content_length >> 0) & 0xFF));
output.appendAssumeCapacity(@intCast((output.capacity >> 24) & 0xFF));
output.appendAssumeCapacity(@intCast((output.capacity >> 16) & 0xFF));
output.appendAssumeCapacity(@intCast((output.capacity >> 8) & 0xFF));
output.appendAssumeCapacity(@intCast((output.capacity >> 0) & 0xFF));

return @intCast(@intFromPtr(output.items.ptr + content_length));
}
64 changes: 30 additions & 34 deletions ts-lib/dist/smol-string-packed.js

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions ts-lib/dist/smol-string-worker-packed.js

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions ts-lib/dist/smol-string-worker.js

Large diffs are not rendered by default.

66 changes: 30 additions & 36 deletions ts-lib/dist/smol-string.js

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
function s(e, n) {
const t = new TextEncoder().encode(e), a = n.allocUint8(t.length + 1), r = new Uint8Array(
const t = new TextEncoder().encode(e), a = n.allocUint8(t.length);
return new Uint8Array(
n.memory.buffer,
a,
t.length + 1
);
return r.set(t), r[t.length] = 0, { ptr: a, length: t.length + 1 };
t.length
).set(t), { ptr: a, length: t.length };
}
function f(e) {
const n = new Array(e.length);
Expand Down
Binary file modified ts-lib/src/module-packed.wasm
Binary file not shown.
Binary file modified ts-lib/src/module.wasm
Binary file not shown.
70 changes: 40 additions & 30 deletions ts-lib/src/smol-string-packed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,33 @@ const exports = instance.exports as exportsType;
export function compressPacked(str: string) {
const { ptr, length } = copyToWasmBuffer(str, exports);

const ptrToCompressed = exports.compressPacked(ptr, length);
const ptrToFooter = exports.compressPacked(ptr, length);

exports.free(ptr, length);

const buffer = new Uint16Array(
exports.memory.buffer.slice(
ptrToCompressed,
ptrToCompressed +
(exports.memory.buffer.byteLength - ptrToCompressed)
)
const footer = new Uint16Array(
exports.memory.buffer.slice(ptrToFooter, ptrToFooter + 8)
);
const streamLength = (buffer.at(0)! << 16) + buffer.at(1)!;
// Includes the tokenCount at the start of the stream (2 * u16).
const compressedBuffer = buffer.slice(2, streamLength);
const streamLength = (footer.at(0)! << 16) + footer.at(1)!;
const capacity = (footer.at(2)! << 16) + footer.at(3)!;
const start = ptrToFooter - 2 * streamLength;

const compressed = Uint16ArraytoString(compressedBuffer);
// Includes the tokenCount at the end of the stream (2 * u16).
const compressed = new Uint16Array(
exports.memory.buffer.slice(start, ptrToFooter)
);

const r = Uint16ArraytoString(compressed);

exports.free(ptrToCompressed, streamLength);
exports.free(start, capacity);

return compressed;
return r;
}

export function decompressPacked(compressedStr: string) {
const tokenCount =
(compressedStr.charCodeAt(0)! << 16) + compressedStr.charCodeAt(1);
(compressedStr.charCodeAt(compressedStr.length - 2)! << 16) +
compressedStr.charCodeAt(compressedStr.length - 1);

let ptrToCompressed = exports.allocUint16(compressedStr.length - 2);
let compressed_buffer = new Uint16Array(
Expand All @@ -53,31 +55,39 @@ export function decompressPacked(compressedStr: string) {
compressedStr.length - 2
);

for (let i = 2; i < compressedStr.length; i++)
compressed_buffer[i - 2] = compressedStr.charCodeAt(i);
for (let i = 0; i < compressedStr.length - 2; i++)
compressed_buffer[i] = compressedStr.charCodeAt(i);

const ptrToDecompressedNullTerminated = exports.decompressPacked(
const ptrToFooter = exports.decompressPacked(
ptrToCompressed,
compressedStr.length,
compressedStr.length - 2,
tokenCount
);

exports.free(ptrToCompressed, compressedStr.length);
exports.free(ptrToCompressed, compressedStr.length - 2);

const decompressed_buffer = new Uint8Array(
exports.memory.buffer.slice(
ptrToDecompressedNullTerminated,
ptrToDecompressedNullTerminated +
(exports.memory.buffer.byteLength -
ptrToDecompressedNullTerminated)
)
const footer = new Uint8Array(
exports.memory.buffer.slice(ptrToFooter, ptrToFooter + 8)
);
const decompressed_end = decompressed_buffer.indexOf(0);
const r = new TextDecoder().decode(
decompressed_buffer.slice(0, decompressed_end)
const streamLength =
(footer.at(0)! << 24) +
(footer.at(1)! << 16) +
(footer.at(2)! << 8) +
footer.at(3)!;
const capacity =
(footer.at(4)! << 24) +
(footer.at(5)! << 16) +
(footer.at(6)! << 8) +
footer.at(7)!;
const start = ptrToFooter - streamLength;

const decompressed = new Uint8Array(
exports.memory.buffer.slice(start, ptrToFooter)
);

exports.free(ptrToDecompressedNullTerminated, decompressed_end + 1);
const r = new TextDecoder().decode(decompressed);

exports.free(start, capacity);

return r;
}
54 changes: 31 additions & 23 deletions ts-lib/src/smol-string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,24 @@ const exports = instance.exports as exportsType;
export function compress(str: string) {
const { ptr, length } = copyToWasmBuffer(str, exports);

const ptrToCompressed = exports.compress(ptr, length);
const ptrToFooter = exports.compress(ptr, length);

exports.free(ptr, length);

const buffer = new Uint16Array(
exports.memory.buffer.slice(
ptrToCompressed,
ptrToCompressed +
(exports.memory.buffer.byteLength - ptrToCompressed)
)
const footer = new Uint16Array(
exports.memory.buffer.slice(ptrToFooter, ptrToFooter + 8)
);
const streamLength = (footer.at(0)! << 16) + footer.at(1)!;
const capacity = (footer.at(2)! << 16) + footer.at(3)!;
const start = ptrToFooter - 2 * streamLength;

const compressed = new Uint16Array(
exports.memory.buffer.slice(start, ptrToFooter)
);
const end = buffer.indexOf(0);
const compressed = buffer.slice(0, end);

const r = Uint16ArraytoString(compressed);

exports.free(ptrToCompressed, end + 1);
exports.free(start, capacity);

return r;
}
Expand All @@ -47,28 +48,35 @@ export function decompress(compressedStr: string) {
for (let i = 0; i < compressedStr.length; i++)
compressed_buffer[i] = compressedStr.charCodeAt(i);

const ptrToDecompressedNullTerminated = exports.decompress(
const ptrToFooter = exports.decompress(
ptrToCompressed,
compressedStr.length
);

exports.free(ptrToCompressed, compressedStr.length);

const decompressed_buffer = new Uint8Array(
exports.memory.buffer.slice(
ptrToDecompressedNullTerminated,
ptrToDecompressedNullTerminated +
(exports.memory.buffer.byteLength -
ptrToDecompressedNullTerminated)
)
const footer = new Uint8Array(
exports.memory.buffer.slice(ptrToFooter, ptrToFooter + 8)
);
const streamLength =
(footer.at(0)! << 24) +
(footer.at(1)! << 16) +
(footer.at(2)! << 8) +
footer.at(3)!;
const capacity =
(footer.at(4)! << 24) +
(footer.at(5)! << 16) +
(footer.at(6)! << 8) +
footer.at(7)!;
const start = ptrToFooter - streamLength;

const decompressed = new Uint8Array(
exports.memory.buffer.slice(start, ptrToFooter)
);
const decompressed_end = decompressed_buffer.indexOf(0);

exports.free(ptrToDecompressedNullTerminated, decompressed_end + 1);
const r = new TextDecoder().decode(decompressed);

const r = new TextDecoder().decode(
decompressed_buffer.slice(0, decompressed_end)
);
exports.free(start, capacity);

return r;
}

0 comments on commit 2947349

Please sign in to comment.