Skip to content

Commit

Permalink
More tests on the zig side. Rudimentary fuzzing. Handle empty strings.
Browse files Browse the repository at this point in the history
  • Loading branch information
Senryoku committed Nov 1, 2023
1 parent 2300c55 commit 7700f83
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 46 deletions.
71 changes: 47 additions & 24 deletions src/lzw.zig
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
const std = @import("std");

pub fn compress(comptime TokenType: type, comptime reserved_codepoints: TokenType, comptime sentinel_token: TokenType, data: []const u8, allocator: std.mem.Allocator) !std.ArrayList(TokenType) {
if (data.len == 0) return std.ArrayList(TokenType).init(allocator);

const first_allocated_token: TokenType = comptime std.math.maxInt(u8) + 1 + reserved_codepoints;
var next_value: TokenType = first_allocated_token;
var context = std.StringHashMap(TokenType).init(allocator);
Expand Down Expand Up @@ -49,6 +51,8 @@ pub fn compress(comptime TokenType: type, comptime reserved_codepoints: TokenTyp
}

pub fn decompress(comptime TokenType: type, comptime reserved_codepoints: TokenType, comptime sentinel_token: TokenType, data: []const TokenType, allocator: std.mem.Allocator) !std.ArrayList(u8) {
if (data.len == 0) return std.ArrayList(u8).init(allocator);

const first_allocated_token: TokenType = comptime std.math.maxInt(u8) + 1 + reserved_codepoints;
var next_value: TokenType = first_allocated_token;
var context = std.ArrayList(?[]u8).init(allocator);
Expand All @@ -57,7 +61,7 @@ pub fn decompress(comptime TokenType: type, comptime reserved_codepoints: TokenT
context.appendNTimesAssumeCapacity(null, std.math.maxInt(TokenType));

// FIXME: We need to make sure pointers to that buffer will be stable for the slices in context to stay valid.
var output = try std.ArrayList(u8).initCapacity(allocator, 20 * data.len);
var output = try std.ArrayList(u8).initCapacity(allocator, 24 * data.len);
output.appendAssumeCapacity(@intCast(data[0] - reserved_codepoints));

context.items[data[0]] = output.items[0..1];
Expand Down Expand Up @@ -119,6 +123,8 @@ fn testRound(str: []const u8) !void {
}

test "basic" {
try testRound("");
try testRound("a");
try testRound("aa");
try testRound("aaaa");
try testRound("aaaaaa");
Expand All @@ -132,40 +138,57 @@ test "basic" {
try testRound("33337373737");
try testRound("3333737373700000000000000000000");
try testRound("3333773737373777777373773737373");
}

test "fuzzing" {
// Doesn't ensure that the string is valid UTF-8, but it should not matter.
var rng = std.rand.DefaultPrng.init(42);

{
var str: [100]u8 = undefined;
for (0..100) |i| {
str[i] = rng.random().int(u8);
}
try testRound(&str);
}
{
var str: [1000]u8 = undefined;
for (0..1000) |i| {
str[i] = rng.random().int(u8);
}
try testRound(&str);
// Note: In the future, use std.testing.random_seed. See https://github.com/ziglang/zig/issues/17609.
const seed = std.crypto.random.int(u64);
errdefer std.debug.print("\nFuzzing Test FAILED\n\tSeed: {d}\n", .{seed});
var rng = std.rand.DefaultPrng.init(seed);
for (0..10) |_| {
const length = rng.random().intRangeAtMost(usize, 0, 10_000_000); // Up to ~10MB
var str = try std.testing.allocator.alloc(u8, length);
defer std.testing.allocator.free(str);
rng.fill(str);
try testRound(str);
}
}

test "json small" {
const str = try std.fs.cwd().readFileAlloc(std.testing.allocator, "test/data/json_small.json", 1e8);
fn testFile(path: []const u8) !void {
const str = try std.fs.cwd().readFileAlloc(std.testing.allocator, path, 1e8);
defer std.testing.allocator.free(str);
try testRound(str);
}

test "json 64KB" {
try testFile("test/data/64KB.json");
}

test "json 128KB" {
try testFile("test/data/128KB.json");
}

test "json 256KB" {
try testFile("test/data/256KB.json");
}

test "json 512KB" {
try testFile("test/data/512KB.json");
}

test "json 1MB" {
try testFile("test/data/1MB.json");
}

test "json 5MB" {
try testFile("test/data/5MB.json");
}

test "real world medium" {
const str = try std.fs.cwd().readFileAlloc(std.testing.allocator, "test/data/rw_medium.json", 1e8);
defer std.testing.allocator.free(str);
try testRound(str);
try testFile("test/data/rw_medium.json");
}

test "real world large" {
const str = try std.fs.cwd().readFileAlloc(std.testing.allocator, "test/data/rw_large.json", 1e8);
defer std.testing.allocator.free(str);
try testRound(str);
try testFile("test/data/rw_large.json");
}
80 changes: 58 additions & 22 deletions src/lzwPacked.zig
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ pub const BitPacker = bp.BitPacker(u16, u20, 9, 0);
pub const sentinel_token = std.math.maxInt(BitPacker.ValueType);

pub fn compressPacked(data: []const u8, allocator: std.mem.Allocator) !BitPacker {
if (data.len == 0) return BitPacker.init(allocator);

const first_allocated_token: BitPacker.ValueType = comptime std.math.maxInt(u8) + 1;
var next_value: BitPacker.ValueType = first_allocated_token;
var context = std.StringHashMap(BitPacker.ValueType).init(allocator);
Expand Down Expand Up @@ -56,9 +58,7 @@ pub fn compressPacked(data: []const u8, allocator: std.mem.Allocator) !BitPacker
return output;
}

test "json small, packed" {
const str = try std.fs.cwd().readFileAlloc(std.testing.allocator, "test/data/json_small.json", 1e8);
defer std.testing.allocator.free(str);
fn testRound(str: []const u8) !void {
var compressed = try compressPacked(str, std.testing.allocator);
defer compressed.deinit();
const unpacked = try compressed.unpackWithReset(std.testing.allocator, sentinel_token);
Expand All @@ -68,26 +68,62 @@ test "json small, packed" {
try std.testing.expectEqualSlices(u8, str, decompressed.items);
}

test "real world medium, packed" {
const str = try std.fs.cwd().readFileAlloc(std.testing.allocator, "test/data/rw_medium.json", 1e8);
fn testFile(path: []const u8) !void {
const str = try std.fs.cwd().readFileAlloc(std.testing.allocator, path, 1e8);
defer std.testing.allocator.free(str);
var compressed = try compressPacked(str, std.testing.allocator);
defer compressed.deinit();
const unpacked = try compressed.unpackWithReset(std.testing.allocator, sentinel_token);
defer std.testing.allocator.free(unpacked);
const decompressed = try impl.decompress(BitPacker.ValueType, 0, sentinel_token, unpacked, std.testing.allocator);
defer decompressed.deinit();
try std.testing.expectEqualSlices(u8, str, decompressed.items);
try testRound(str);
}

test "real world large, packed" {
const str = try std.fs.cwd().readFileAlloc(std.testing.allocator, "test/data/rw_large.json", 1e8);
defer std.testing.allocator.free(str);
var compressed = try compressPacked(str, std.testing.allocator);
defer compressed.deinit();
const unpacked = try compressed.unpackWithReset(std.testing.allocator, sentinel_token);
defer std.testing.allocator.free(unpacked);
const decompressed = try impl.decompress(BitPacker.ValueType, 0, sentinel_token, unpacked, std.testing.allocator);
defer decompressed.deinit();
try std.testing.expectEqualSlices(u8, str, decompressed.items);
test "basic" {
try testRound("");
try testRound("a");
try testRound("aa");
try testRound("aaa");
}

test "fuzzing" {
// Doesn't ensure that the string is valid UTF-8, but it should not matter.
// Note: In the future, use std.testing.random_seed. See https://github.com/ziglang/zig/issues/17609.
const seed = std.crypto.random.int(u64);
errdefer std.debug.print("\nFuzzing Test FAILED\n\tSeed: {d}\n", .{seed});
var rng = std.rand.DefaultPrng.init(seed);
for (0..10) |_| {
const length = rng.random().intRangeAtMost(usize, 0, 10_000_000); // Up to ~10MB
var str = try std.testing.allocator.alloc(u8, length);
defer std.testing.allocator.free(str);
rng.fill(str);
try testRound(str);
}
}

test "json 64KB" {
try testFile("test/data/64KB.json");
}

test "json 128KB" {
try testFile("test/data/128KB.json");
}

test "json 256KB" {
try testFile("test/data/256KB.json");
}

test "json 512KB" {
try testFile("test/data/512KB.json");
}

test "json 1MB" {
try testFile("test/data/1MB.json");
}

test "json 5MB" {
try testFile("test/data/5MB.json");
}

test "real world medium" {
try testFile("test/data/rw_medium.json");
}

test "real world large" {
try testFile("test/data/rw_large.json");
}

0 comments on commit 7700f83

Please sign in to comment.