Skip to content

Commit

Permalink
Merge pull request #105 from shepmaster/xxh3-128
Browse files Browse the repository at this point in the history
Add XXH3 128-bit implementation
  • Loading branch information
shepmaster authored Dec 9, 2024
2 parents 725da3f + f382d8d commit e7c956f
Show file tree
Hide file tree
Showing 33 changed files with 4,154 additions and 1,938 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
runs-on: ubuntu-latest

env:
IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64
IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64 xxhash3_128
FEATURE_SET: random serialize std alloc

steps:
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ members = [
#END-[workspace]

[features]
default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"]
default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "xxhash3_128", "std"]

random = ["dep:rand"]

Expand All @@ -34,6 +34,7 @@ serialize = ["dep:serde"]
xxhash32 = []
xxhash64 = []
xxhash3_64 = []
xxhash3_128 = []

std = ["alloc"]
alloc = []
Expand Down
2 changes: 1 addition & 1 deletion comparison/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ harness = false
criterion = { version = "0.5.1", features = [] }
proptest = "1.5.0"
rand = "0.8.5"
twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] }
twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "xxhash3_128", "std"] }
xx_hash-sys = { path = "../xx_hash-sys" }
96 changes: 84 additions & 12 deletions comparison/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ graphs are boring flat lines, so a table is used instead.

| Implementation | Throughput (GiB/s) |
|----------------|--------------------|
| Rust | 13.5 |
| C | 13.5 |
| Rust | 13.4 |
| C | 13.4 |

## x86_64

| Implementation | Throughput (GiB/s) |
|----------------|--------------------|
| Rust | 16.5 |
| C | 16.5 |
| Rust | 16.7 |
| C | 16.6 |


## Streaming data
Expand Down Expand Up @@ -75,7 +75,6 @@ Compares the **time taken** to hash 0 to 32 bytes of data.
/>
</a>


# xxHash3 (64-bit)

## Oneshot hashing
Expand All @@ -88,20 +87,21 @@ graphs are boring flat lines, so a table is used instead.

| Implementation | Throughput (GiB/s) |
|----------------|--------------------|
| Rust | 35.2 |
| Rust | 35.0 |
| C | 35.0 |
| C (scalar) | 21.2 |
| C (NEON) | 35.1 |
| C (NEON) | 35.0 |

### x86_64

| Implementation | Throughput (GiB/s) |
|----------------|--------------------|
| Rust | 58.6 |
| C | 25.0 |
| C (scalar) | 7.5 |
| Rust | 58.9 |
| C | 25.1 |
| C (scalar) | 7.6 |
| C (SSE2) | 25.1 |
| C (AVX2) | 57.8 |
| C (AVX2) | 58.4 |


## Streaming data

Expand Down Expand Up @@ -150,6 +150,78 @@ cluttering the graph and wasting benchmarking time.
/>
</a>

# xxHash3 (128-bit)

## Oneshot hashing

Compares the **speed** of hashing an entire buffer of data in one
function call. Data sizes from 256 KiB to 4 MiB are tested. These
graphs are boring flat lines, so a table is used instead.

| Implementation | Throughput (GiB/s) |
|----------------|--------------------|
| Rust | 34.4 |
| C | 34.8 |
| C (scalar) | 21.3 |
| C (NEON) | 34.6 |

### x86_64

| Implementation | Throughput (GiB/s) |
|----------------|--------------------|
| Rust | 58.3 |
| C | 25.6 |
| C (scalar) | 7.6 |
| C (SSE2) | 25.5 |
| C (AVX2) | 57.4 |

## Streaming data

Compares the **speed** of hashing a 1 MiB buffer of data split into
various chunk sizes.

### aarch64

<a href="./results/xxhash3_128-streaming-aarch64.svg">
<img
src="./results/xxhash3_128-streaming-aarch64.svg"
alt="xxHash3, 128-bit, streaming data, on an aarch64 processor"
/>
</a>

### x86_64

<a href="./results/xxhash3_128-streaming-x86_64.svg">
<img
src="./results/xxhash3_128-streaming-x86_64.svg"
alt="xxHash3, 128-bit, streaming data, on an x86_64 processor"
/>
</a>

## Small amounts of data

Compares the **time taken** to hash 0 to 230 bytes of
data. Representative samples are taken from similar times to avoid
cluttering the graph and wasting benchmarking time.

### aarch64

<a href="./results/xxhash3_128-tiny_data-aarch64.svg">
<img
src="./results/xxhash3_128-tiny_data-aarch64.svg"
alt="xxHash3, 128-bit, small data, on an aarch64 processor"
/>
</a>

### x86_64

<a href="./results/xxhash3_128-tiny_data-x86_64.svg">
<img
src="./results/xxhash3_128-tiny_data-x86_64.svg"
alt="xxHash3, 128-bit, small data, on an x86_64 processor"
/>
</a>

# Benchmark machines

## Overview
Expand All @@ -159,7 +231,7 @@ cluttering the graph and wasting benchmarking time.
| Apple M1 Max | 64 GiB | clang 16.0.0 |
| AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 |

Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`.
Tests were run with `rustc 1.82.0 (f6e511eec 2024-10-15)`.

## Details

Expand Down
199 changes: 198 additions & 1 deletion comparison/benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,4 +314,201 @@ mod xxhash3_64 {
criterion_group!(benches, tiny_data, oneshot, streaming);
}

criterion_main!(xxhash64::benches, xxhash3_64::benches);
mod xxhash3_128 {
use super::*;

fn tiny_data(c: &mut Criterion) {
let (seed, data) = gen_data(240);
let mut g = c.my_benchmark_group("xxhash3_128", "tiny_data");

// let categories = 0..=data.len();

// Visual inspection of all the data points showed these as
// examples of thier nearby neighbors.
let categories = [
0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230,
];

for size in categories {
let data = &data[..size];
g.throughput(Throughput::Bytes(data.len() as _));

let id = format!("impl-c/size-{size:03}");
g.bench_function(id, |b| {
b.iter(|| c::XxHash3_128::oneshot_with_seed(seed, data))
});

let id = format!("impl-c-scalar/size-{size:03}");
g.bench_function(id, |b| {
b.iter(|| c::scalar::XxHash3_128::oneshot_with_seed(seed, data))
});

#[cfg(target_arch = "aarch64")]
{
let id = format!("impl-c-neon/size-{size:03}");
g.bench_function(id, |b| {
b.iter(|| c::neon::XxHash3_128::oneshot_with_seed(seed, data))
});
}

#[cfg(target_arch = "x86_64")]
{
let id = format!("impl-c-avx2/size-{size:03}");
g.bench_function(id, |b| {
b.iter(|| c::avx2::XxHash3_128::oneshot_with_seed(seed, data))
});

let id = format!("impl-c-sse2/size-{size:03}");
g.bench_function(id, |b| {
b.iter(|| c::sse2::XxHash3_128::oneshot_with_seed(seed, data))
});
}

let id = format!("impl-rust/size-{size:03}");
g.bench_function(id, |b| {
b.iter(|| rust::XxHash3_128::oneshot_with_seed(seed, data))
});
}

g.finish();
}

fn oneshot(c: &mut Criterion) {
let (seed, data) = gen_data(BIG_DATA_SIZE);
let mut g = c.my_benchmark_group("xxhash3_128", "oneshot");

for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
let data = &data[..size];
g.throughput(Throughput::Bytes(data.len() as _));

let id = format!("impl-c/size-{size:07}");
g.bench_function(id, |b| {
b.iter(|| c::XxHash3_128::oneshot_with_seed(seed, data))
});

let id = format!("impl-c-scalar/size-{size:07}");
g.bench_function(id, |b| {
b.iter(|| c::scalar::XxHash3_128::oneshot_with_seed(seed, data))
});

#[cfg(target_arch = "aarch64")]
{
let id = format!("impl-c-neon/size-{size:07}");
g.bench_function(id, |b| {
b.iter(|| c::neon::XxHash3_128::oneshot_with_seed(seed, data))
});
}

#[cfg(target_arch = "x86_64")]
{
let id = format!("impl-c-avx2/size-{size:07}");
g.bench_function(id, |b| {
b.iter(|| c::avx2::XxHash3_128::oneshot_with_seed(seed, data))
});

let id = format!("impl-c-sse2/size-{size:07}");
g.bench_function(id, |b| {
b.iter(|| c::sse2::XxHash3_128::oneshot_with_seed(seed, data))
});
}

let id = format!("impl-rust/size-{size:07}");
g.bench_function(id, |b| {
b.iter(|| rust::XxHash3_128::oneshot_with_seed(seed, data))
});
}

g.finish();
}

fn streaming(c: &mut Criterion) {
let mut g = c.my_benchmark_group("xxhash3_128", "streaming");

let size = 1024 * 1024;
let (seed, data) = gen_data(size);

for chunk_size in half_sizes(size) {
let chunks = data.chunks(chunk_size).collect::<Vec<_>>();

g.throughput(Throughput::Bytes(size as _));

let id = format!("impl-c/size-{size:07}/chunk_size-{chunk_size:07}");
g.bench_function(id, |b| {
b.iter(|| {
let mut hasher = c::XxHash3_128::with_seed(seed);
for chunk in &chunks {
hasher.write(chunk);
}
hasher.finish()
})
});

let id = format!("impl-c-scalar/size-{size:07}/chunk_size-{chunk_size:07}");
g.bench_function(id, |b| {
b.iter(|| {
let mut hasher = c::scalar::XxHash3_128::with_seed(seed);
for chunk in &chunks {
hasher.write(chunk);
}
hasher.finish()
})
});

#[cfg(target_arch = "aarch64")]
{
let id = format!("impl-c-neon/size-{size:07}/chunk_size-{chunk_size:07}");
g.bench_function(id, |b| {
b.iter(|| {
let mut hasher = c::neon::XxHash3_128::with_seed(seed);
for chunk in &chunks {
hasher.write(chunk);
}
hasher.finish()
})
});
}

#[cfg(target_arch = "x86_64")]
{
let id = format!("impl-c-avx2/size-{size:07}/chunk_size-{chunk_size:07}");
g.bench_function(id, |b| {
b.iter(|| {
let mut hasher = c::avx2::XxHash3_128::with_seed(seed);
for chunk in &chunks {
hasher.write(chunk);
}
hasher.finish()
})
});

let id = format!("impl-c-sse2/size-{size:07}/chunk_size-{chunk_size:07}");
g.bench_function(id, |b| {
b.iter(|| {
let mut hasher = c::sse2::XxHash3_128::with_seed(seed);
for chunk in &chunks {
hasher.write(chunk);
}
hasher.finish()
})
});
}

let id = format!("impl-rust/size-{size:07}/chunk_size-{chunk_size:07}");
g.bench_function(id, |b| {
b.iter(|| {
let mut hasher = rust::XxHash3_128::with_seed(seed);
for chunk in &chunks {
hasher.write(chunk);
}
hasher.finish_128()
})
});
}

g.finish();
}

criterion_group!(benches, tiny_data, oneshot, streaming);
}

criterion_main!(xxhash64::benches, xxhash3_64::benches, xxhash3_128::benches);
2 changes: 1 addition & 1 deletion comparison/generate-graph.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ cpus = c(aarch64 = "Apple M1 Max", x86_64 = "AMD Ryzen 9 3950X")

common_theme = theme(legend.position = "inside", legend.position.inside = c(0.8, 0.2), plot.margin = unit(c(0.1, 1, 0.1, 0.1), 'cm'))

for (algo in c("xxhash64", "xxhash3_64")) {
for (algo in c("xxhash64", "xxhash3_64", "xxhash3_128")) {
message("# ", algo)

algo_data = data[data$algo == algo,]
Expand Down
Loading

0 comments on commit e7c956f

Please sign in to comment.