diff --git a/benches/throughput.rs b/benches/throughput.rs index fbe9baf..be5147a 100644 --- a/benches/throughput.rs +++ b/benches/throughput.rs @@ -5,20 +5,20 @@ use std::alloc::{alloc, dealloc, Layout}; use std::slice; use criterion::measurement::{WallTime}; -use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput, PlotConfiguration, AxisScale, BenchmarkGroup}; +use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput, PlotConfiguration, AxisScale, BenchmarkGroup, BenchmarkId}; use gxhash::*; use rand::Rng; fn benchmark(c: &mut BenchmarkGroup, data: &[u8], name: &str, delegate: F) where F: Fn(&[u8], i32) -> u64 { - for i in 1..8 { - let len = usize::pow(4, i); + for i in 1..16 { + let len = usize::pow(2, i); c.throughput(Throughput::Bytes(len as u64)); let aligned_slice = &data[0..len]; - c.bench_with_input(format!("{} bytes", len), aligned_slice, |bencher, input| { + c.bench_with_input(BenchmarkId::new(name, len), aligned_slice, |bencher, input| { bencher.iter(|| black_box(delegate(input, 0))) }); @@ -40,12 +40,19 @@ fn benchmark_all(c: &mut Criterion) { // Fill with random bytes rng.fill(slice); - let mut group = c.benchmark_group("hash algos"); + let mut group = c.benchmark_group("all"); let plot_config = PlotConfiguration::default().summary_scale(AxisScale::Logarithmic); group.plot_config(plot_config); - // GxHash - benchmark(&mut group, slice, "gxhash", gxhash64); + // GxHash0 + benchmark(&mut group, slice, "gxhash0", |data: &[u8], _: i32| -> u64 { + gxhash0_64(data, 0) + }); + + // GxHash1 + benchmark(&mut group, slice, "gxhash1", |data: &[u8], _: i32| -> u64 { + gxhash1_64(data, 0) + }); // AHash let build_hasher = ahash::RandomState::with_seeds(0, 0, 0, 0); @@ -59,7 +66,7 @@ fn benchmark_all(c: &mut Criterion) { }); // XxHash (twox-hash) - benchmark(&mut group, slice, "xxhash (twox-hash)", |data: &[u8], seed: i32| -> u64 { + benchmark(&mut group, slice, "xxhash", |data: &[u8], seed: i32| -> u64 { twox_hash::xxh3::hash64_with_seed(data, seed as u64) }); diff --git a/src/gxhash/mod.rs b/src/gxhash/mod.rs index 6ec6d9d..c0ac0c8 100644 --- a/src/gxhash/mod.rs +++ b/src/gxhash/mod.rs @@ -5,23 +5,39 @@ mod platform; pub use platform::*; #[inline] // To be disabled when profiling -pub fn gxhash32(input: &[u8], seed: i32) -> u32 { +pub fn gxhash0_32(input: &[u8], seed: i32) -> u32 { unsafe { - let p = &gxhash(input, seed) as *const state as *const u32; + let p = &gxhash::<0>(input, seed) as *const state as *const u32; *p } } #[inline] // To be disabled when profiling -pub fn gxhash64(input: &[u8], seed: i32) -> u64 { +pub fn gxhash0_64(input: &[u8], seed: i32) -> u64 { unsafe { - let p = &gxhash(input, seed) as *const state as *const u64; + let p = &gxhash::<0>(input, seed) as *const state as *const u64; + *p + } +} + +#[inline] // To be disabled when profiling +pub fn gxhash1_32(input: &[u8], seed: i32) -> u32 { + unsafe { + let p = &gxhash::<1>(input, seed) as *const state as *const u32; + *p + } +} + +#[inline] // To be disabled when profiling +pub fn gxhash1_64(input: &[u8], seed: i32) -> u64 { + unsafe { + let p = &gxhash::<1>(input, seed) as *const state as *const u64; *p } } #[inline] -fn gxhash(input: &[u8], seed: i32) -> state { +fn gxhash(input: &[u8], seed: i32) -> state { unsafe { const VECTOR_SIZE: isize = std::mem::size_of::() as isize; @@ -40,6 +56,14 @@ fn gxhash(input: &[u8], seed: i32) -> state { let mut remaining_blocks_count: isize = len / VECTOR_SIZE; let mut hash_vector: state = create_empty(); + // Choose compression function depending on version. + // Lower is faster, higher is more collision resistant. + let c = match N { + 0 => compress_0, + 1 => compress_1, + _ => compress_1 + }; + macro_rules! load_unaligned { ($($var:ident),+) => { $( @@ -64,19 +88,19 @@ fn gxhash(input: &[u8], seed: i32) -> state { prefetch(v); - s0 = compress(s0, v0); - s1 = compress(s1, v1); - s2 = compress(s2, v2); - s3 = compress(s3, v3); - s4 = compress(s4, v4); - s5 = compress(s5, v5); - s6 = compress(s6, v6); - s7 = compress(s7, v7); + s0 = c(s0, v0); + s1 = c(s1, v1); + s2 = c(s2, v2); + s3 = c(s3, v3); + s4 = c(s4, v4); + s5 = c(s5, v5); + s6 = c(s6, v6); + s7 = c(s7, v7); } - let a = compress(compress(s0, s1), compress(s2, s3)); - let b = compress(compress(s4, s5), compress(s6, s7)); - hash_vector = compress(a, b); + let a = c(c(s0, s1), c(s2, s3)); + let b = c(c(s4, s5), c(s6, s7)); + hash_vector = c(a, b); remaining_blocks_count -= unrollable_blocks_count; } @@ -85,13 +109,13 @@ fn gxhash(input: &[u8], seed: i32) -> state { while likely((v as usize) < end_address) { load_unaligned!(v0); - hash_vector = compress(hash_vector, v0); + hash_vector = c(hash_vector, v0); } let remaining_bytes = len & (VECTOR_SIZE - 1); if likely(remaining_bytes > 0) { let partial_vector = get_partial(v, remaining_bytes); - hash_vector = compress(hash_vector, partial_vector); + hash_vector = c(hash_vector, partial_vector); } finalize(hash_vector, seed) @@ -108,12 +132,12 @@ mod tests { fn all_blocks_are_consumed() { let mut bytes = [42u8; 1200]; - let ref_hash = gxhash32(&bytes, 0); + let ref_hash = gxhash0_32(&bytes, 0); for i in 0..bytes.len() { let swap = bytes[i]; bytes[i] = 82; - let new_hash = gxhash32(&bytes, 0); + let new_hash = gxhash0_32(&bytes, 0); bytes[i] = swap; assert_ne!(ref_hash, new_hash, "byte {i} not processed"); @@ -130,13 +154,14 @@ mod tests { let mut ref_hash = 0; for i in 32..100 { - let new_hash = gxhash32(&mut bytes[..i], 0); + let new_hash = gxhash0_32(&mut bytes[..i], 0); assert_ne!(ref_hash, new_hash, "Same hash at size {i} ({new_hash})"); ref_hash = new_hash; } } #[test] + // Test collisions for all possible inputs of size n bits with m bits set fn test_collisions_bits() { let mut bytes = [0u8; 120]; let bits_to_set = 2; @@ -160,7 +185,7 @@ mod tests { } i += 1; - set.insert(gxhash64(&bytes, 0)); + set.insert(gxhash0_64(&bytes, 0)); // for &byte in bytes.iter() { // print!("{:08b}", byte); // } @@ -198,8 +223,8 @@ mod tests { #[test] fn hash_of_zero_is_not_zero() { - assert_ne!(0, gxhash32(&[0u8; 0], 0)); - assert_ne!(0, gxhash32(&[0u8; 1], 0)); - assert_ne!(0, gxhash32(&[0u8; 1200], 0)); + assert_ne!(0, gxhash0_32(&[0u8; 0], 0)); + assert_ne!(0, gxhash0_32(&[0u8; 1], 0)); + assert_ne!(0, gxhash0_32(&[0u8; 1200], 0)); } } \ No newline at end of file diff --git a/src/gxhash/platform/arm_128.rs b/src/gxhash/platform/arm_128.rs index d63f499..e888226 100644 --- a/src/gxhash/platform/arm_128.rs +++ b/src/gxhash/platform/arm_128.rs @@ -63,7 +63,7 @@ unsafe fn get_partial_safe(data: *const i8, len: usize) -> state { } #[inline(always)] -pub unsafe fn compress(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub unsafe fn compress_1(a: int8x16_t, b: int8x16_t) -> int8x16_t { // 37 GiB/s let keys_1 = vld1q_u32([0xFC3BC28E, 0x89C222E5, 0xB09D3E21, 0xF2784542].as_ptr()); let keys_2 = vld1q_u32([0x03FCE279, 0xCB6B2E9B, 0xB361DC58, 0x39136BD9].as_ptr()); @@ -128,6 +128,11 @@ pub unsafe fn compress(a: int8x16_t, b: int8x16_t) -> int8x16_t { //ve } +#[inline(always)] +pub unsafe fn compress_0(a: int8x16_t, b: int8x16_t) -> int8x16_t { + vreinterpretq_s8_u8(aes_encrypt(vreinterpretq_u8_s8(a), vreinterpretq_u8_s8(b))) +} + #[inline(always)] // See https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a unsafe fn aes_encrypt(data: uint8x16_t, keys: uint8x16_t) -> uint8x16_t { diff --git a/src/main.rs b/src/main.rs index 80347e5..26961b9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,7 @@ fn main() { let mut sum: u32 = 0; for _ in 0..100_000_000 { - sum = sum.wrapping_add(gxhash32(&random_bytes, 0)); + sum = sum.wrapping_add(gxhash0_32(&random_bytes, 0)); } println!("{}", sum);