From 510a31086706a442568b670b06a50a203feefede Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 23 Oct 2024 16:51:52 -0400 Subject: [PATCH 01/20] Move common XXH3 code to a shared location --- src/lib.rs | 4 + src/xxhash3.rs | 347 +++++++++++ src/xxhash3/large.rs | 313 ++++++++++ src/{xxhash3_64 => xxhash3/large}/avx2.rs | 0 src/{xxhash3_64 => xxhash3/large}/neon.rs | 3 +- src/{xxhash3_64 => xxhash3/large}/scalar.rs | 3 +- src/{xxhash3_64 => xxhash3/large}/sse2.rs | 0 src/{xxhash3_64 => xxhash3}/secret.rs | 0 src/xxhash3_64.rs | 650 +------------------- 9 files changed, 679 insertions(+), 641 deletions(-) create mode 100644 src/xxhash3.rs create mode 100644 src/xxhash3/large.rs rename src/{xxhash3_64 => xxhash3/large}/avx2.rs (100%) rename src/{xxhash3_64 => xxhash3/large}/neon.rs (98%) rename src/{xxhash3_64 => xxhash3/large}/scalar.rs (96%) rename src/{xxhash3_64 => xxhash3/large}/sse2.rs (100%) rename src/{xxhash3_64 => xxhash3}/secret.rs (100%) diff --git a/src/lib.rs b/src/lib.rs index ca6003851..09976b8b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #![doc = include_str!("../README.md")] #![deny(rust_2018_idioms)] #![deny(missing_docs)] +#![deny(unnameable_types)] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] @@ -26,6 +27,9 @@ pub mod xxhash64; #[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] pub use xxhash64::Hasher as XxHash64; +#[cfg(feature = "xxhash3_64")] +mod xxhash3; + #[cfg(feature = "xxhash3_64")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] pub mod xxhash3_64; diff --git a/src/xxhash3.rs b/src/xxhash3.rs new file mode 100644 index 000000000..b3aeb0749 --- /dev/null +++ b/src/xxhash3.rs @@ -0,0 +1,347 @@ +use core::slice; + +use crate::IntoU128 as _; + +pub mod large; + +pub(crate) use large::dispatch; +pub use large::{Algorithm, StripeAccumulator, Vector}; + +pub mod secret; + +pub use secret::Secret; + +pub mod primes { + pub const PRIME32_1: u64 = 0x9E3779B1; + pub const PRIME32_2: u64 = 0x85EBCA77; + pub const PRIME32_3: u64 = 0xC2B2AE3D; + pub const PRIME64_1: u64 = 0x9E3779B185EBCA87; + pub const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; + pub const PRIME64_3: u64 = 0x165667B19E3779F9; + pub const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; + pub const PRIME64_5: u64 = 0x27D4EB2F165667C5; + pub const PRIME_MX1: u64 = 0x165667919E3779F9; + pub const PRIME_MX2: u64 = 0x9FB21C651E98DF25; +} + +pub const CUTOFF: usize = 240; + +pub const DEFAULT_SEED: u64 = 0; + +/// The length of the default secret. +pub const DEFAULT_SECRET_LENGTH: usize = 192; + +pub type DefaultSecret = [u8; DEFAULT_SECRET_LENGTH]; + +pub const DEFAULT_SECRET_RAW: DefaultSecret = [ + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +]; + +// Safety: The default secret is long enough +pub const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) }; + +/// # Correctness +/// +/// This function assumes that the incoming buffer has been populated +/// with the default secret. +#[inline] +pub fn derive_secret(seed: u64, secret: &mut DefaultSecret) { + if seed == DEFAULT_SEED { + return; + } + + let (words, _) = secret.bp_as_chunks_mut(); + let (pairs, _) = words.bp_as_chunks_mut(); + + for [a_p, b_p] in pairs { + let a = u64::from_le_bytes(*a_p); + let b = u64::from_le_bytes(*b_p); + + let a = a.wrapping_add(seed); + let b = b.wrapping_sub(seed); + + *a_p = a.to_le_bytes(); + *b_p = b.to_le_bytes(); + } +} + +/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] +/// bytes. +#[derive(Debug)] +pub struct OneshotWithSecretError(pub(crate) secret::Error); + +impl core::error::Error for OneshotWithSecretError {} + +impl core::fmt::Display for OneshotWithSecretError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.0.fmt(f) + } +} + +macro_rules! assert_input_range { + ($min:literal.., $len:expr) => { + assert!($min <= $len); + }; + ($min:literal..=$max:literal, $len:expr) => { + assert!($min <= $len); + assert!($len <= $max); + }; +} +pub(crate) use assert_input_range; + +#[inline] +pub fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { + let data_words = to_u64s(data); + let secret_words = to_u64s(secret); + + let mul_result = { + let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); + let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128(); + + a.wrapping_mul(b) + }; + + mul_result.lower_half() ^ mul_result.upper_half() +} + +#[inline] +pub fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] { + let (pair, _) = bytes.bp_as_chunks::<8>(); + [pair[0], pair[1]].map(u64::from_le_bytes) +} + +#[inline] +pub fn avalanche(mut x: u64) -> u64 { + x ^= x >> 37; + x = x.wrapping_mul(primes::PRIME_MX1); + x ^= x >> 32; + x +} + +#[inline] +pub fn avalanche_xxh64(mut x: u64) -> u64 { + x ^= x >> 33; + x = x.wrapping_mul(primes::PRIME64_2); + x ^= x >> 29; + x = x.wrapping_mul(primes::PRIME64_3); + x ^= x >> 32; + x +} + +#[inline] +pub fn stripes_with_tail(block: &[u8]) -> (&[[u8; 64]], &[u8]) { + match block.bp_as_chunks() { + ([stripes @ .., last], []) => (stripes, last), + (stripes, last) => (stripes, last), + } +} + +pub trait Halves { + type Output; + + fn upper_half(self) -> Self::Output; + fn lower_half(self) -> Self::Output; +} + +impl Halves for u64 { + type Output = u32; + + #[inline] + fn upper_half(self) -> Self::Output { + (self >> 32) as _ + } + + #[inline] + fn lower_half(self) -> Self::Output { + self as _ + } +} + +impl Halves for u128 { + type Output = u64; + + #[inline] + fn upper_half(self) -> Self::Output { + (self >> 64) as _ + } + + #[inline] + fn lower_half(self) -> Self::Output { + self as _ + } +} + +pub trait U8SliceExt { + fn first_u32(&self) -> Option; + + fn last_u32(&self) -> Option; + + fn first_u64(&self) -> Option; + + fn last_u64(&self) -> Option; +} + +impl U8SliceExt for [u8] { + #[inline] + fn first_u32(&self) -> Option { + self.first_chunk().copied().map(u32::from_le_bytes) + } + + #[inline] + fn last_u32(&self) -> Option { + self.last_chunk().copied().map(u32::from_le_bytes) + } + + #[inline] + fn first_u64(&self) -> Option { + self.first_chunk().copied().map(u64::from_le_bytes) + } + + #[inline] + fn last_u64(&self) -> Option { + self.last_chunk().copied().map(u64::from_le_bytes) + } +} + +pub trait SliceBackport { + fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); + + fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]); + + fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); +} + +impl SliceBackport for [T] { + fn bp_as_chunks(&self) -> (&[[T; N]], &[T]) { + assert_ne!(N, 0); + let len = self.len() / N; + // Safety: `(len / N) * N` has to be less-than-or-equal to `len` + let (head, tail) = unsafe { self.split_at_unchecked(len * N) }; + // Safety: (1) `head` points to valid data, (2) the alignment + // of an array and the individual type are the same, (3) the + // valid elements are less-than-or-equal to the original + // slice. + let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) }; + (head, tail) + } + + fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]) { + assert_ne!(N, 0); + let len = self.len() / N; + // Safety: `(len / N) * N` has to be less than or equal to `len` + let (head, tail) = unsafe { self.split_at_mut_unchecked(len * N) }; + // Safety: (1) `head` points to valid data, (2) the alignment + // of an array and the individual type are the same, (3) the + // valid elements are less-than-or-equal to the original + // slice. + let head = unsafe { slice::from_raw_parts_mut(head.as_mut_ptr().cast(), len) }; + (head, tail) + } + + fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { + assert_ne!(N, 0); + let len = self.len() / N; + // Safety: `(len / N) * N` has to be less than or equal to `len` + let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; + // Safety: (1) `tail` points to valid data, (2) the alignment + // of an array and the individual type are the same, (3) the + // valid elements are less-than-or-equal to the original + // slice. + let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; + (head, tail) + } +} + +#[cfg(test)] +pub mod test { + use std::array; + + use super::*; + + macro_rules! bytes { + ($($n: literal),* $(,)?) => { + &[$(&crate::xxhash3::test::gen_bytes::<$n>() as &[u8],)*] as &[&[u8]] + }; + } + pub(crate) use bytes; + + pub fn gen_bytes() -> [u8; N] { + // Picking 251 as it's a prime number, which will hopefully + // help avoid incidental power-of-two alignment. + array::from_fn(|i| (i % 251) as u8) + } + + #[test] + fn default_secret_is_valid() { + assert!(DEFAULT_SECRET.is_valid()) + } + + #[test] + fn backported_as_chunks() { + let x = [1, 2, 3, 4, 5]; + + let (a, b) = x.bp_as_chunks::<1>(); + assert_eq!(a, &[[1], [2], [3], [4], [5]]); + assert_eq!(b, &[] as &[i32]); + + let (a, b) = x.bp_as_chunks::<2>(); + assert_eq!(a, &[[1, 2], [3, 4]]); + assert_eq!(b, &[5]); + + let (a, b) = x.bp_as_chunks::<3>(); + assert_eq!(a, &[[1, 2, 3]]); + assert_eq!(b, &[4, 5]); + + let (a, b) = x.bp_as_chunks::<4>(); + assert_eq!(a, &[[1, 2, 3, 4]]); + assert_eq!(b, &[5]); + + let (a, b) = x.bp_as_chunks::<5>(); + assert_eq!(a, &[[1, 2, 3, 4, 5]]); + assert_eq!(b, &[] as &[i32]); + + let (a, b) = x.bp_as_chunks::<6>(); + assert_eq!(a, &[] as &[[i32; 6]]); + assert_eq!(b, &[1, 2, 3, 4, 5]); + } + + #[test] + fn backported_as_rchunks() { + let x = [1, 2, 3, 4, 5]; + + let (a, b) = x.bp_as_rchunks::<1>(); + assert_eq!(a, &[] as &[i32]); + assert_eq!(b, &[[1], [2], [3], [4], [5]]); + + let (a, b) = x.bp_as_rchunks::<2>(); + assert_eq!(a, &[1]); + assert_eq!(b, &[[2, 3], [4, 5]]); + + let (a, b) = x.bp_as_rchunks::<3>(); + assert_eq!(a, &[1, 2]); + assert_eq!(b, &[[3, 4, 5]]); + + let (a, b) = x.bp_as_rchunks::<4>(); + assert_eq!(a, &[1]); + assert_eq!(b, &[[2, 3, 4, 5]]); + + let (a, b) = x.bp_as_rchunks::<5>(); + assert_eq!(a, &[] as &[i32]); + assert_eq!(b, &[[1, 2, 3, 4, 5]]); + + let (a, b) = x.bp_as_rchunks::<6>(); + assert_eq!(a, &[1, 2, 3, 4, 5]); + assert_eq!(b, &[] as &[[i32; 6]]); + } +} diff --git a/src/xxhash3/large.rs b/src/xxhash3/large.rs new file mode 100644 index 000000000..13b1392f4 --- /dev/null +++ b/src/xxhash3/large.rs @@ -0,0 +1,313 @@ +use super::{ + assert_input_range, avalanche, primes::*, stripes_with_tail, Halves, Secret, SliceBackport as _, +}; + +use crate::{IntoU128, IntoU64}; + +// This module is not `cfg`-gated because it is used by some of the +// SIMD implementations. +pub mod scalar; + +#[cfg(target_arch = "aarch64")] +pub mod neon; + +#[cfg(target_arch = "x86_64")] +pub mod avx2; + +#[cfg(target_arch = "x86_64")] +pub mod sse2; + +macro_rules! dispatch { + ( + fn $fn_name:ident<$($gen:ident),*>($($arg_name:ident : $arg_ty:ty),*) $(-> $ret_ty:ty)? + [$($wheres:tt)*] + ) => { + #[inline] + fn do_scalar<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? + where + $($wheres)* + { + $fn_name($crate::xxhash3::large::scalar::Impl, $($arg_name),*) + } + + /// # Safety + /// + /// You must ensure that the CPU has the NEON feature + #[inline] + #[target_feature(enable = "neon")] + #[cfg(all(target_arch = "aarch64", feature = "std"))] + unsafe fn do_neon<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? + where + $($wheres)* + { + // Safety: The caller has ensured we have the NEON feature + unsafe { + $fn_name($crate::xxhash3::large::neon::Impl::new_unchecked(), $($arg_name),*) + } + } + + /// # Safety + /// + /// You must ensure that the CPU has the AVX2 feature + #[inline] + #[target_feature(enable = "avx2")] + #[cfg(all(target_arch = "x86_64", feature = "std"))] + unsafe fn do_avx2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? + where + $($wheres)* + { + // Safety: The caller has ensured we have the AVX2 feature + unsafe { + $fn_name($crate::xxhash3::large::avx2::Impl::new_unchecked(), $($arg_name),*) + } + } + + /// # Safety + /// + /// You must ensure that the CPU has the SSE2 feature + #[inline] + #[target_feature(enable = "sse2")] + #[cfg(all(target_arch = "x86_64", feature = "std"))] + unsafe fn do_sse2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? + where + $($wheres)* + { + // Safety: The caller has ensured we have the SSE2 feature + unsafe { + $fn_name($crate::xxhash3::large::sse2::Impl::new_unchecked(), $($arg_name),*) + } + } + + // Now we invoke the right function + + #[cfg(_internal_xxhash3_force_neon)] + return unsafe { do_neon($($arg_name),*) }; + + #[cfg(_internal_xxhash3_force_avx2)] + return unsafe { do_avx2($($arg_name),*) }; + + #[cfg(_internal_xxhash3_force_sse2)] + return unsafe { do_sse2($($arg_name),*) }; + + #[cfg(_internal_xxhash3_force_scalar)] + return do_scalar($($arg_name),*); + + // This code can be unreachable if one of the `*_force_*` cfgs + // are set above, but that's the point. + #[allow(unreachable_code)] + { + #[cfg(all(target_arch = "aarch64", feature = "std"))] + { + if std::arch::is_aarch64_feature_detected!("neon") { + // Safety: We just ensured we have the NEON feature + return unsafe { do_neon($($arg_name),*) }; + } + } + + #[cfg(all(target_arch = "x86_64", feature = "std"))] + { + if is_x86_feature_detected!("avx2") { + // Safety: We just ensured we have the AVX2 feature + return unsafe { do_avx2($($arg_name),*) }; + } else if is_x86_feature_detected!("sse2") { + // Safety: We just ensured we have the SSE2 feature + return unsafe { do_sse2($($arg_name),*) }; + } + } + do_scalar($($arg_name),*) + } + }; +} +pub(crate) use dispatch; + +pub trait Vector: Copy { + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]); + + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]); +} + +#[rustfmt::skip] +const INITIAL_ACCUMULATORS: [u64; 8] = [ + PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, + PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, +]; + +pub struct Algorithm(pub V); + +impl Algorithm +where + V: Vector, +{ + #[inline] + pub fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 { + assert_input_range!(241.., input.len()); + let mut acc = INITIAL_ACCUMULATORS; + + let stripes_per_block = (secret.len() - 64) / 8; + let block_size = 64 * stripes_per_block; + + let mut blocks = input.chunks_exact(block_size); + + let last_block = if blocks.remainder().is_empty() { + // Safety: We know that `input` is non-empty, which means + // that either there will be a remainder or one or more + // full blocks. That info isn't flowing to the optimizer, + // so we use `unwrap_unchecked`. + unsafe { blocks.next_back().unwrap_unchecked() } + } else { + blocks.remainder() + }; + + self.rounds(&mut acc, blocks, secret); + + let len = input.len(); + + let last_stripe = input.last_chunk().unwrap(); + self.finalize(acc, last_block, last_stripe, secret, len) + } + + #[inline] + fn rounds<'a>( + &self, + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &Secret, + ) { + for block in blocks { + let (stripes, _) = block.bp_as_chunks(); + + self.round(acc, stripes, secret); + } + } + + #[inline] + fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { + let secret_end = secret.last_stripe(); + + self.round_accumulate(acc, stripes, secret); + self.0.round_scramble(acc, secret_end); + } + + #[inline] + fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { + let secrets = (0..stripes.len()).map(|i| { + // Safety: The number of stripes is determined by the + // block size, which is determined by the secret size. + unsafe { secret.stripe(i) } + }); + + for (stripe, secret) in stripes.iter().zip(secrets) { + self.0.accumulate(acc, stripe, secret); + } + } + + #[inline] + pub fn finalize( + &self, + mut acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &Secret, + len: usize, + ) -> u64 { + debug_assert!(!last_block.is_empty()); + self.last_round(&mut acc, last_block, last_stripe, secret); + + self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret) + } + + #[inline] + fn last_round( + &self, + acc: &mut [u64; 8], + block: &[u8], + last_stripe: &[u8; 64], + secret: &Secret, + ) { + // Accumulation steps are run for the stripes in the last block, + // except for the last stripe (whether it is full or not) + let (stripes, _) = stripes_with_tail(block); + + let secrets = (0..stripes.len()).map(|i| { + // Safety: The number of stripes is determined by the + // block size, which is determined by the secret size. + unsafe { secret.stripe(i) } + }); + + for (stripe, secret) in stripes.iter().zip(secrets) { + self.0.accumulate(acc, stripe, secret); + } + + let last_stripe_secret = secret.last_stripe_secret_better_name(); + self.0.accumulate(acc, last_stripe, last_stripe_secret); + } + + #[inline] + fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &Secret) -> u64 { + let secret = secret.final_secret(); + let (secrets, _) = secret.bp_as_chunks(); + let mut result = init_value; + for i in 0..4 { + // 64-bit by 64-bit multiplication to 128-bit full result + let mul_result = { + let sa = u64::from_le_bytes(secrets[i * 2]); + let sb = u64::from_le_bytes(secrets[i * 2 + 1]); + + let a = (acc[i * 2] ^ sa).into_u128(); + let b = (acc[i * 2 + 1] ^ sb).into_u128(); + a.wrapping_mul(b) + }; + result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); + } + avalanche(result) + } +} + +/// Tracks which stripe we are currently on to know which part of the +/// secret we should be using. +#[derive(Copy, Clone)] +pub struct StripeAccumulator { + pub accumulator: [u64; 8], + current_stripe: usize, +} + +impl StripeAccumulator { + pub fn new() -> Self { + Self { + accumulator: INITIAL_ACCUMULATORS, + current_stripe: 0, + } + } + + #[inline] + pub fn process_stripe( + &mut self, + vector: impl Vector, + stripe: &[u8; 64], + n_stripes: usize, + secret: &Secret, + ) { + let Self { + accumulator, + current_stripe, + .. + } = self; + + // For each stripe + + // Safety: The number of stripes is determined by the + // block size, which is determined by the secret size. + let secret_stripe = unsafe { secret.stripe(*current_stripe) }; + vector.accumulate(accumulator, stripe, secret_stripe); + + *current_stripe += 1; + + // After a full block's worth + if *current_stripe == n_stripes { + let secret_end = secret.last_stripe(); + vector.round_scramble(accumulator, secret_end); + + *current_stripe = 0; + } + } +} diff --git a/src/xxhash3_64/avx2.rs b/src/xxhash3/large/avx2.rs similarity index 100% rename from src/xxhash3_64/avx2.rs rename to src/xxhash3/large/avx2.rs diff --git a/src/xxhash3_64/neon.rs b/src/xxhash3/large/neon.rs similarity index 98% rename from src/xxhash3_64/neon.rs rename to src/xxhash3/large/neon.rs index f86da1522..fc49726ab 100644 --- a/src/xxhash3_64/neon.rs +++ b/src/xxhash3/large/neon.rs @@ -1,6 +1,7 @@ use core::arch::aarch64::*; -use super::{SliceBackport as _, Vector, PRIME32_1}; +use super::Vector; +use crate::xxhash3::{primes::PRIME32_1, SliceBackport as _}; #[derive(Copy, Clone)] pub struct Impl(()); diff --git a/src/xxhash3_64/scalar.rs b/src/xxhash3/large/scalar.rs similarity index 96% rename from src/xxhash3_64/scalar.rs rename to src/xxhash3/large/scalar.rs index 64f6f9fa4..77da53a51 100644 --- a/src/xxhash3_64/scalar.rs +++ b/src/xxhash3/large/scalar.rs @@ -1,4 +1,5 @@ -use super::{SliceBackport as _, Vector, PRIME32_1}; +use super::Vector; +use crate::xxhash3::{primes::PRIME32_1, SliceBackport as _}; #[derive(Copy, Clone)] pub struct Impl; diff --git a/src/xxhash3_64/sse2.rs b/src/xxhash3/large/sse2.rs similarity index 100% rename from src/xxhash3_64/sse2.rs rename to src/xxhash3/large/sse2.rs diff --git a/src/xxhash3_64/secret.rs b/src/xxhash3/secret.rs similarity index 100% rename from src/xxhash3_64/secret.rs rename to src/xxhash3/secret.rs diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 34327143d..c8d059f95 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -6,66 +6,16 @@ unsafe_op_in_unsafe_fn )] -use core::{hash, hint::assert_unchecked, slice}; +use core::{hash, hint::assert_unchecked}; -use crate::{IntoU128, IntoU32, IntoU64}; +use crate::{ + xxhash3::{primes::*, *}, + IntoU128 as _, IntoU32 as _, IntoU64 as _, +}; -mod secret; - -use secret::Secret; - -pub use secret::SECRET_MINIMUM_LENGTH; - -// This module is not `cfg`-gated because it is used by some of the -// SIMD implementations. -mod scalar; - -#[cfg(target_arch = "aarch64")] -mod neon; - -#[cfg(target_arch = "x86_64")] -mod avx2; - -#[cfg(target_arch = "x86_64")] -mod sse2; - -const PRIME32_1: u64 = 0x9E3779B1; -const PRIME32_2: u64 = 0x85EBCA77; -const PRIME32_3: u64 = 0xC2B2AE3D; -const PRIME64_1: u64 = 0x9E3779B185EBCA87; -const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; -const PRIME64_3: u64 = 0x165667B19E3779F9; -const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; -const PRIME64_5: u64 = 0x27D4EB2F165667C5; -const PRIME_MX1: u64 = 0x165667919E3779F9; -const PRIME_MX2: u64 = 0x9FB21C651E98DF25; - -const CUTOFF: usize = 240; - -const DEFAULT_SEED: u64 = 0; - -/// The length of the default secret. -pub const DEFAULT_SECRET_LENGTH: usize = 192; - -type DefaultSecret = [u8; DEFAULT_SECRET_LENGTH]; - -const DEFAULT_SECRET_RAW: DefaultSecret = [ - 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, - 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, - 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, - 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, - 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, - 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, - 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, - 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, - 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, - 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, - 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, - 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, -]; - -// Safety: The default secret is long enough -const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) }; +pub use crate::xxhash3::{ + secret::SECRET_MINIMUM_LENGTH, OneshotWithSecretError, DEFAULT_SECRET_LENGTH, +}; /// Calculates the 64-bit hash. #[derive(Clone)] @@ -131,19 +81,6 @@ impl Hasher { } } -/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] -/// bytes. -#[derive(Debug)] -pub struct OneshotWithSecretError(secret::Error); - -impl core::error::Error for OneshotWithSecretError {} - -impl core::fmt::Display for OneshotWithSecretError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - self.0.fmt(f) - } -} - const STRIPE_BYTES: usize = 64; const BUFFERED_STRIPES: usize = 4; const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES; @@ -459,55 +396,6 @@ impl core::fmt::Display for SecretWithSeedError { } } -/// Tracks which stripe we are currently on to know which part of the -/// secret we should be using. -#[derive(Copy, Clone)] -struct StripeAccumulator { - accumulator: [u64; 8], - current_stripe: usize, -} - -impl StripeAccumulator { - fn new() -> Self { - Self { - accumulator: INITIAL_ACCUMULATORS, - current_stripe: 0, - } - } - - #[inline] - fn process_stripe( - &mut self, - vector: impl Vector, - stripe: &[u8; 64], - n_stripes: usize, - secret: &Secret, - ) { - let Self { - accumulator, - current_stripe, - .. - } = self; - - // For each stripe - - // Safety: The number of stripes is determined by the - // block size, which is determined by the secret size. - let secret_stripe = unsafe { secret.stripe(*current_stripe) }; - vector.accumulate(accumulator, stripe, secret_stripe); - - *current_stripe += 1; - - // After a full block's worth - if *current_stripe == n_stripes { - let secret_end = secret.last_stripe(); - vector.round_scramble(accumulator, secret_end); - - *current_stripe = 0; - } - } -} - /// A lower-level interface for computing a hash from streaming data. /// /// The algorithm requires a secret which can be a reasonably large @@ -542,108 +430,6 @@ impl RawHasher { } } -macro_rules! dispatch { - ( - fn $fn_name:ident<$($gen:ident),*>($($arg_name:ident : $arg_ty:ty),*) $(-> $ret_ty:ty)? - [$($wheres:tt)*] - ) => { - #[inline] - fn do_scalar<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? - where - $($wheres)* - { - $fn_name(scalar::Impl, $($arg_name),*) - } - - /// # Safety - /// - /// You must ensure that the CPU has the NEON feature - #[inline] - #[target_feature(enable = "neon")] - #[cfg(all(target_arch = "aarch64", feature = "std"))] - unsafe fn do_neon<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? - where - $($wheres)* - { - // Safety: The caller has ensured we have the NEON feature - unsafe { - $fn_name(neon::Impl::new_unchecked(), $($arg_name),*) - } - } - - /// # Safety - /// - /// You must ensure that the CPU has the AVX2 feature - #[inline] - #[target_feature(enable = "avx2")] - #[cfg(all(target_arch = "x86_64", feature = "std"))] - unsafe fn do_avx2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? - where - $($wheres)* - { - // Safety: The caller has ensured we have the AVX2 feature - unsafe { - $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*) - } - } - - /// # Safety - /// - /// You must ensure that the CPU has the SSE2 feature - #[inline] - #[target_feature(enable = "sse2")] - #[cfg(all(target_arch = "x86_64", feature = "std"))] - unsafe fn do_sse2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? - where - $($wheres)* - { - // Safety: The caller has ensured we have the SSE2 feature - unsafe { - $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*) - } - } - - // Now we invoke the right function - - #[cfg(_internal_xxhash3_force_neon)] - return unsafe { do_neon($($arg_name),*) }; - - #[cfg(_internal_xxhash3_force_avx2)] - return unsafe { do_avx2($($arg_name),*) }; - - #[cfg(_internal_xxhash3_force_sse2)] - return unsafe { do_sse2($($arg_name),*) }; - - #[cfg(_internal_xxhash3_force_scalar)] - return do_scalar($($arg_name),*); - - // This code can be unreachable if one of the `*_force_*` cfgs - // are set above, but that's the point. - #[allow(unreachable_code)] - { - #[cfg(all(target_arch = "aarch64", feature = "std"))] - { - if std::arch::is_aarch64_feature_detected!("neon") { - // Safety: We just ensured we have the NEON feature - return unsafe { do_neon($($arg_name),*) }; - } - } - - #[cfg(all(target_arch = "x86_64", feature = "std"))] - { - if is_x86_feature_detected!("avx2") { - // Safety: We just ensured we have the AVX2 feature - return unsafe { do_avx2($($arg_name),*) }; - } else if is_x86_feature_detected!("sse2") { - // Safety: We just ensured we have the SSE2 feature - return unsafe { do_sse2($($arg_name),*) }; - } - } - do_scalar($($arg_name),*) - } - }; -} - impl hash::Hasher for RawHasher where S: FixedBuffer, @@ -824,31 +610,6 @@ where } } -/// # Correctness -/// -/// This function assumes that the incoming buffer has been populated -/// with the default secret. -#[inline] -fn derive_secret(seed: u64, secret: &mut DefaultSecret) { - if seed == DEFAULT_SEED { - return; - } - - let (words, _) = secret.bp_as_chunks_mut(); - let (pairs, _) = words.bp_as_chunks_mut(); - - for [a_p, b_p] in pairs { - let a = u64::from_le_bytes(*a_p); - let b = u64::from_le_bytes(*b_p); - - let a = a.wrapping_add(seed); - let b = b.wrapping_sub(seed); - - *a_p = a.to_le_bytes(); - *b_p = b.to_le_bytes(); - } -} - #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { match input.len() { @@ -868,16 +629,6 @@ fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { } } -macro_rules! assert_input_range { - ($min:literal.., $len:expr) => { - assert!($min <= $len); - }; - ($min:literal..=$max:literal, $len:expr) => { - assert!($min <= $len); - assert!($len <= $max); - }; -} - #[inline(always)] fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 { let secret_words = secret.words_for_0(); @@ -1009,33 +760,6 @@ fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { avalanche(acc) } -#[inline] -fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { - #[inline] - fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] { - let (pair, _) = bytes.bp_as_chunks::<8>(); - [pair[0], pair[1]].map(u64::from_le_bytes) - } - - let data_words = to_u64s(data); - let secret_words = to_u64s(secret); - - let mul_result = { - let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); - let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128(); - - a.wrapping_mul(b) - }; - - mul_result.lower_half() ^ mul_result.upper_half() -} - -#[rustfmt::skip] -const INITIAL_ACCUMULATORS: [u64; 8] = [ - PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, - PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, -]; - #[inline] fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 { assert_input_range!(241.., input.len()); @@ -1050,288 +774,11 @@ fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 { Algorithm(vector).oneshot(secret, input) } -struct Algorithm(V); - -impl Algorithm -where - V: Vector, -{ - #[inline] - fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 { - assert_input_range!(241.., input.len()); - let mut acc = INITIAL_ACCUMULATORS; - - let stripes_per_block = (secret.len() - 64) / 8; - let block_size = 64 * stripes_per_block; - - let mut blocks = input.chunks_exact(block_size); - - let last_block = if blocks.remainder().is_empty() { - // Safety: We know that `input` is non-empty, which means - // that either there will be a remainder or one or more - // full blocks. That info isn't flowing to the optimizer, - // so we use `unwrap_unchecked`. - unsafe { blocks.next_back().unwrap_unchecked() } - } else { - blocks.remainder() - }; - - self.rounds(&mut acc, blocks, secret); - - let len = input.len(); - - let last_stripe = input.last_chunk().unwrap(); - self.finalize(acc, last_block, last_stripe, secret, len) - } - - #[inline] - fn rounds<'a>( - &self, - acc: &mut [u64; 8], - blocks: impl IntoIterator, - secret: &Secret, - ) { - for block in blocks { - let (stripes, _) = block.bp_as_chunks(); - - self.round(acc, stripes, secret); - } - } - - #[inline] - fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { - let secret_end = secret.last_stripe(); - - self.round_accumulate(acc, stripes, secret); - self.0.round_scramble(acc, secret_end); - } - - #[inline] - fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { - let secrets = (0..stripes.len()).map(|i| { - // Safety: The number of stripes is determined by the - // block size, which is determined by the secret size. - unsafe { secret.stripe(i) } - }); - - for (stripe, secret) in stripes.iter().zip(secrets) { - self.0.accumulate(acc, stripe, secret); - } - } - - #[inline] - fn finalize( - &self, - mut acc: [u64; 8], - last_block: &[u8], - last_stripe: &[u8; 64], - secret: &Secret, - len: usize, - ) -> u64 { - debug_assert!(!last_block.is_empty()); - self.last_round(&mut acc, last_block, last_stripe, secret); - - self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret) - } - - #[inline] - fn last_round( - &self, - acc: &mut [u64; 8], - block: &[u8], - last_stripe: &[u8; 64], - secret: &Secret, - ) { - // Accumulation steps are run for the stripes in the last block, - // except for the last stripe (whether it is full or not) - let (stripes, _) = stripes_with_tail(block); - - let secrets = (0..stripes.len()).map(|i| { - // Safety: The number of stripes is determined by the - // block size, which is determined by the secret size. - unsafe { secret.stripe(i) } - }); - - for (stripe, secret) in stripes.iter().zip(secrets) { - self.0.accumulate(acc, stripe, secret); - } - - let last_stripe_secret = secret.last_stripe_secret_better_name(); - self.0.accumulate(acc, last_stripe, last_stripe_secret); - } - - #[inline] - fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &Secret) -> u64 { - let secret = secret.final_secret(); - let (secrets, _) = secret.bp_as_chunks(); - let mut result = init_value; - for i in 0..4 { - // 64-bit by 64-bit multiplication to 128-bit full result - let mul_result = { - let sa = u64::from_le_bytes(secrets[i * 2]); - let sb = u64::from_le_bytes(secrets[i * 2 + 1]); - - let a = (acc[i * 2] ^ sa).into_u128(); - let b = (acc[i * 2 + 1] ^ sb).into_u128(); - a.wrapping_mul(b) - }; - result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); - } - avalanche(result) - } -} - -#[inline] -fn stripes_with_tail(block: &[u8]) -> (&[[u8; 64]], &[u8]) { - match block.bp_as_chunks() { - ([stripes @ .., last], []) => (stripes, last), - (stripes, last) => (stripes, last), - } -} - -trait Vector: Copy { - fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]); - - fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]); -} - -#[inline] -fn avalanche(mut x: u64) -> u64 { - x ^= x >> 37; - x = x.wrapping_mul(PRIME_MX1); - x ^= x >> 32; - x -} - -#[inline] -fn avalanche_xxh64(mut x: u64) -> u64 { - x ^= x >> 33; - x = x.wrapping_mul(PRIME64_2); - x ^= x >> 29; - x = x.wrapping_mul(PRIME64_3); - x ^= x >> 32; - x -} - -trait Halves { - type Output; - - fn upper_half(self) -> Self::Output; - fn lower_half(self) -> Self::Output; -} - -impl Halves for u64 { - type Output = u32; - - #[inline] - fn upper_half(self) -> Self::Output { - (self >> 32) as _ - } - - #[inline] - fn lower_half(self) -> Self::Output { - self as _ - } -} - -impl Halves for u128 { - type Output = u64; - - #[inline] - fn upper_half(self) -> Self::Output { - (self >> 64) as _ - } - - #[inline] - fn lower_half(self) -> Self::Output { - self as _ - } -} - -trait U8SliceExt { - fn first_u32(&self) -> Option; - - fn last_u32(&self) -> Option; - - fn first_u64(&self) -> Option; - - fn last_u64(&self) -> Option; -} - -impl U8SliceExt for [u8] { - #[inline] - fn first_u32(&self) -> Option { - self.first_chunk().copied().map(u32::from_le_bytes) - } - - #[inline] - fn last_u32(&self) -> Option { - self.last_chunk().copied().map(u32::from_le_bytes) - } - - #[inline] - fn first_u64(&self) -> Option { - self.first_chunk().copied().map(u64::from_le_bytes) - } - - #[inline] - fn last_u64(&self) -> Option { - self.last_chunk().copied().map(u64::from_le_bytes) - } -} - -trait SliceBackport { - fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); - - fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]); - - fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); -} - -impl SliceBackport for [T] { - fn bp_as_chunks(&self) -> (&[[T; N]], &[T]) { - assert_ne!(N, 0); - let len = self.len() / N; - // Safety: `(len / N) * N` has to be less-than-or-equal to `len` - let (head, tail) = unsafe { self.split_at_unchecked(len * N) }; - // Safety: (1) `head` points to valid data, (2) the alignment - // of an array and the individual type are the same, (3) the - // valid elements are less-than-or-equal to the original - // slice. - let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) }; - (head, tail) - } - - fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]) { - assert_ne!(N, 0); - let len = self.len() / N; - // Safety: `(len / N) * N` has to be less than or equal to `len` - let (head, tail) = unsafe { self.split_at_mut_unchecked(len * N) }; - // Safety: (1) `head` points to valid data, (2) the alignment - // of an array and the individual type are the same, (3) the - // valid elements are less-than-or-equal to the original - // slice. - let head = unsafe { slice::from_raw_parts_mut(head.as_mut_ptr().cast(), len) }; - (head, tail) - } - - fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { - assert_ne!(N, 0); - let len = self.len() / N; - // Safety: `(len / N) * N` has to be less than or equal to `len` - let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; - // Safety: (1) `tail` points to valid data, (2) the alignment - // of an array and the individual type are the same, (3) the - // valid elements are less-than-or-equal to the original - // slice. - let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; - (head, tail) - } -} - #[cfg(test)] mod test { - use std::{array, hash::Hasher as _}; + use std::hash::Hasher as _; + + use crate::xxhash3::test::bytes; use super::*; @@ -1342,11 +789,6 @@ mod test { const EMPTY_BYTES: [u8; 0] = []; - #[test] - fn default_secret_is_valid() { - assert!(DEFAULT_SECRET.is_valid()) - } - #[test] fn secret_buffer_default_is_valid() { assert!(SecretBuffer::default().is_valid()); @@ -1362,18 +804,6 @@ mod test { assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid()) } - macro_rules! bytes { - ($($n: literal),* $(,)?) => { - &[$(&gen_bytes::<$n>() as &[u8],)*] as &[&[u8]] - }; - } - - fn gen_bytes() -> [u8; N] { - // Picking 251 as it's a prime number, which will hopefully - // help avoid incidental power-of-two alignment. - array::from_fn(|i| (i % 251) as u8) - } - fn hash_byte_by_byte(input: &[u8]) -> u64 { let mut hasher = Hasher::new(); for byte in input.chunks(1) { @@ -1622,62 +1052,4 @@ mod test { assert_eq!(hash, expected, "input was {} bytes", input.len()); } } - - #[test] - fn backported_as_chunks() { - let x = [1, 2, 3, 4, 5]; - - let (a, b) = x.bp_as_chunks::<1>(); - assert_eq!(a, &[[1], [2], [3], [4], [5]]); - assert_eq!(b, &[] as &[i32]); - - let (a, b) = x.bp_as_chunks::<2>(); - assert_eq!(a, &[[1, 2], [3, 4]]); - assert_eq!(b, &[5]); - - let (a, b) = x.bp_as_chunks::<3>(); - assert_eq!(a, &[[1, 2, 3]]); - assert_eq!(b, &[4, 5]); - - let (a, b) = x.bp_as_chunks::<4>(); - assert_eq!(a, &[[1, 2, 3, 4]]); - assert_eq!(b, &[5]); - - let (a, b) = x.bp_as_chunks::<5>(); - assert_eq!(a, &[[1, 2, 3, 4, 5]]); - assert_eq!(b, &[] as &[i32]); - - let (a, b) = x.bp_as_chunks::<6>(); - assert_eq!(a, &[] as &[[i32; 6]]); - assert_eq!(b, &[1, 2, 3, 4, 5]); - } - - #[test] - fn backported_as_rchunks() { - let x = [1, 2, 3, 4, 5]; - - let (a, b) = x.bp_as_rchunks::<1>(); - assert_eq!(a, &[] as &[i32]); - assert_eq!(b, &[[1], [2], [3], [4], [5]]); - - let (a, b) = x.bp_as_rchunks::<2>(); - assert_eq!(a, &[1]); - assert_eq!(b, &[[2, 3], [4, 5]]); - - let (a, b) = x.bp_as_rchunks::<3>(); - assert_eq!(a, &[1, 2]); - assert_eq!(b, &[[3, 4, 5]]); - - let (a, b) = x.bp_as_rchunks::<4>(); - assert_eq!(a, &[1]); - assert_eq!(b, &[[2, 3, 4, 5]]); - - let (a, b) = x.bp_as_rchunks::<5>(); - assert_eq!(a, &[] as &[i32]); - assert_eq!(b, &[[1, 2, 3, 4, 5]]); - - let (a, b) = x.bp_as_rchunks::<6>(); - assert_eq!(a, &[1, 2, 3, 4, 5]); - assert_eq!(b, &[] as &[[i32; 6]]); - } } From 1a08cec2ee822ee54bf8c620c048cb0a803292bb Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 28 Oct 2024 14:18:19 -0400 Subject: [PATCH 02/20] Move streaming code to a shared location Beyond moving code, this also includes 1. Small renaming steps to clarify operations that are 64-bit specific. 1. Allow choosing the finalization function as that differs between 64-bit and 128-bit. --- src/xxhash3.rs | 14 +- src/xxhash3/large.rs | 63 +---- src/xxhash3/streaming.rs | 561 +++++++++++++++++++++++++++++++++++++++ src/xxhash3_64.rs | 483 +++------------------------------ 4 files changed, 614 insertions(+), 507 deletions(-) create mode 100644 src/xxhash3/streaming.rs diff --git a/src/xxhash3.rs b/src/xxhash3.rs index b3aeb0749..aa57f73e7 100644 --- a/src/xxhash3.rs +++ b/src/xxhash3.rs @@ -5,11 +5,21 @@ use crate::IntoU128 as _; pub mod large; pub(crate) use large::dispatch; -pub use large::{Algorithm, StripeAccumulator, Vector}; +pub use large::{Algorithm, Vector}; pub mod secret; -pub use secret::Secret; +pub use secret::{Secret, SECRET_MINIMUM_LENGTH}; + +mod streaming; + +pub use streaming::{ + Finalize, FixedBuffer, FixedMutBuffer, RawHasherCore, SecretBuffer, SecretTooShortError, + SecretWithSeedError, +}; + +#[cfg(feature = "alloc")] +pub use streaming::AllocRawHasher; pub mod primes { pub const PRIME32_1: u64 = 0x9E3779B1; diff --git a/src/xxhash3/large.rs b/src/xxhash3/large.rs index 13b1392f4..26a99891d 100644 --- a/src/xxhash3/large.rs +++ b/src/xxhash3/large.rs @@ -127,7 +127,7 @@ pub trait Vector: Copy { } #[rustfmt::skip] -const INITIAL_ACCUMULATORS: [u64; 8] = [ +pub const INITIAL_ACCUMULATORS: [u64; 8] = [ PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, ]; @@ -139,7 +139,10 @@ where V: Vector, { #[inline] - pub fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 { + pub fn oneshot(&self, secret: &Secret, input: &[u8], finalize: F) -> F::Output + where + F: super::Finalize, + { assert_input_range!(241.., input.len()); let mut acc = INITIAL_ACCUMULATORS; @@ -163,7 +166,7 @@ where let len = input.len(); let last_stripe = input.last_chunk().unwrap(); - self.finalize(acc, last_block, last_stripe, secret, len) + finalize.large(self.0, acc, last_block, last_stripe, secret, len) } #[inline] @@ -201,8 +204,9 @@ where } } - #[inline] - pub fn finalize( + #[inline(always)] + #[cfg(feature = "xxhash3_64")] + pub fn finalize_64( &self, mut acc: [u64; 8], last_block: &[u8], @@ -262,52 +266,3 @@ where avalanche(result) } } - -/// Tracks which stripe we are currently on to know which part of the -/// secret we should be using. -#[derive(Copy, Clone)] -pub struct StripeAccumulator { - pub accumulator: [u64; 8], - current_stripe: usize, -} - -impl StripeAccumulator { - pub fn new() -> Self { - Self { - accumulator: INITIAL_ACCUMULATORS, - current_stripe: 0, - } - } - - #[inline] - pub fn process_stripe( - &mut self, - vector: impl Vector, - stripe: &[u8; 64], - n_stripes: usize, - secret: &Secret, - ) { - let Self { - accumulator, - current_stripe, - .. - } = self; - - // For each stripe - - // Safety: The number of stripes is determined by the - // block size, which is determined by the secret size. - let secret_stripe = unsafe { secret.stripe(*current_stripe) }; - vector.accumulate(accumulator, stripe, secret_stripe); - - *current_stripe += 1; - - // After a full block's worth - if *current_stripe == n_stripes { - let secret_end = secret.last_stripe(); - vector.round_scramble(accumulator, secret_end); - - *current_stripe = 0; - } - } -} diff --git a/src/xxhash3/streaming.rs b/src/xxhash3/streaming.rs new file mode 100644 index 000000000..f8e484bee --- /dev/null +++ b/src/xxhash3/streaming.rs @@ -0,0 +1,561 @@ +use core::hint::assert_unchecked; + +use super::{large::INITIAL_ACCUMULATORS, *}; + +/// A buffer containing the secret bytes. +/// +/// # Safety +/// +/// Must always return a slice with the same number of elements. +pub unsafe trait FixedBuffer: AsRef<[u8]> {} + +/// A mutable buffer to contain the secret bytes. +/// +/// # Safety +/// +/// Must always return a slice with the same number of elements. The +/// slice must always be the same as that returned from +/// [`AsRef::as_ref`][]. +pub unsafe trait FixedMutBuffer: FixedBuffer + AsMut<[u8]> {} + +// Safety: An array will never change size. +unsafe impl FixedBuffer for [u8; N] {} + +// Safety: An array will never change size. +unsafe impl FixedMutBuffer for [u8; N] {} + +// Safety: An array will never change size. +unsafe impl FixedBuffer for &[u8; N] {} + +// Safety: An array will never change size. +unsafe impl FixedBuffer for &mut [u8; N] {} + +// Safety: An array will never change size. +unsafe impl FixedMutBuffer for &mut [u8; N] {} + +const STRIPE_BYTES: usize = 64; +const BUFFERED_STRIPES: usize = 4; +const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES; +type Buffer = [u8; BUFFERED_BYTES]; + +// Ensure that a full buffer always implies we are in the 241+ byte case. +const _: () = assert!(BUFFERED_BYTES > CUTOFF); + +/// Holds secret and temporary buffers that are ensured to be +/// appropriately sized. +#[derive(Clone)] +pub struct SecretBuffer { + seed: u64, + secret: S, + buffer: Buffer, +} + +impl SecretBuffer { + /// Returns the secret. + pub fn into_secret(self) -> S { + self.secret + } +} + +impl SecretBuffer +where + S: FixedBuffer, +{ + /// Takes the seed, secret, and buffer and performs no + /// modifications to them, only validating that the sizes are + /// appropriate. + pub fn new(seed: u64, secret: S) -> Result> { + match Secret::new(secret.as_ref()) { + Ok(_) => Ok(Self { + seed, + secret, + buffer: [0; BUFFERED_BYTES], + }), + Err(e) => Err(SecretTooShortError(e, secret)), + } + } + + #[inline(always)] + #[cfg(test)] + fn is_valid(&self) -> bool { + let secret = self.secret.as_ref(); + + secret.len() >= SECRET_MINIMUM_LENGTH + } + + #[inline] + fn n_stripes(&self) -> usize { + Self::secret(&self.secret).n_stripes() + } + + #[inline] + fn parts(&self) -> (u64, &Secret, &Buffer) { + (self.seed, Self::secret(&self.secret), &self.buffer) + } + + #[inline] + fn parts_mut(&mut self) -> (u64, &Secret, &mut Buffer) { + (self.seed, Self::secret(&self.secret), &mut self.buffer) + } + + fn secret(secret: &S) -> &Secret { + let secret = secret.as_ref(); + // Safety: We established the length at construction and the + // length is not allowed to change. + unsafe { Secret::new_unchecked(secret) } + } +} + +impl SecretBuffer +where + S: FixedMutBuffer, +{ + /// Fills the secret buffer with a secret derived from the seed + /// and the default secret. The secret must be exactly + /// [`DEFAULT_SECRET_LENGTH`][] bytes long. + pub fn with_seed(seed: u64, mut secret: S) -> Result> { + match <&mut DefaultSecret>::try_from(secret.as_mut()) { + Ok(secret_slice) => { + *secret_slice = DEFAULT_SECRET_RAW; + derive_secret(seed, secret_slice); + + Ok(Self { + seed, + secret, + buffer: [0; BUFFERED_BYTES], + }) + } + Err(_) => Err(SecretWithSeedError(secret)), + } + } +} + +impl SecretBuffer<&'static [u8; DEFAULT_SECRET_LENGTH]> { + /// Use the default seed and secret values while allocating nothing. + #[inline] + pub const fn default() -> Self { + SecretBuffer { + seed: DEFAULT_SEED, + secret: &DEFAULT_SECRET_RAW, + buffer: [0; BUFFERED_BYTES], + } + } +} + +#[derive(Clone)] +pub struct RawHasherCore { + secret_buffer: SecretBuffer, + buffer_usage: usize, + stripe_accumulator: StripeAccumulator, + total_bytes: usize, +} + +impl RawHasherCore { + pub fn new(secret_buffer: SecretBuffer) -> Self { + Self { + secret_buffer, + buffer_usage: 0, + stripe_accumulator: StripeAccumulator::new(), + total_bytes: 0, + } + } + + pub fn into_secret(self) -> S { + self.secret_buffer.into_secret() + } +} + +impl RawHasherCore +where + S: FixedBuffer, +{ + #[inline] + pub fn write(&mut self, input: &[u8]) { + let this = self; + dispatch! { + fn write_impl(this: &mut RawHasherCore, input: &[u8]) + [S: FixedBuffer] + } + } + + #[inline] + pub fn finish(&self, finalize: F) -> F::Output + where + F: Finalize, + { + let this = self; + dispatch! { + fn finish_impl(this: &RawHasherCore, finalize: F) -> F::Output + [S: FixedBuffer, F: Finalize] + } + } +} + +#[inline(always)] +fn write_impl(vector: impl Vector, this: &mut RawHasherCore, mut input: &[u8]) +where + S: FixedBuffer, +{ + if input.is_empty() { + return; + } + + let RawHasherCore { + secret_buffer, + buffer_usage, + stripe_accumulator, + total_bytes, + .. + } = this; + + let n_stripes = secret_buffer.n_stripes(); + let (_, secret, buffer) = secret_buffer.parts_mut(); + + *total_bytes += input.len(); + + // Safety: This is an invariant of the buffer. + unsafe { + debug_assert!(*buffer_usage <= buffer.len()); + assert_unchecked(*buffer_usage <= buffer.len()) + }; + + // We have some previous data saved; try to fill it up and process it first + if !buffer.is_empty() { + let remaining = &mut buffer[*buffer_usage..]; + let n_to_copy = usize::min(remaining.len(), input.len()); + + let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy); + let (input_head, input_tail) = input.split_at(n_to_copy); + + remaining_head.copy_from_slice(input_head); + *buffer_usage += n_to_copy; + + input = input_tail; + + // We did not fill up the buffer + if !remaining_tail.is_empty() { + return; + } + + // We don't know this isn't the last of the data + if input.is_empty() { + return; + } + + let (stripes, _) = buffer.bp_as_chunks(); + for stripe in stripes { + stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); + } + *buffer_usage = 0; + } + + debug_assert!(*buffer_usage == 0); + + // Process as much of the input data in-place as possible, + // while leaving at least one full stripe for the + // finalization. + if let Some(len) = input.len().checked_sub(STRIPE_BYTES) { + let full_block_point = (len / STRIPE_BYTES) * STRIPE_BYTES; + // Safety: We know that `full_block_point` must be less than + // `input.len()` as we subtracted and then integer-divided + // (which rounds down) and then multiplied back. That's not + // evident to the compiler and `split_at` results in a + // potential panic. + // + // https://github.com/llvm/llvm-project/issues/104827 + let (stripes, remainder) = unsafe { input.split_at_unchecked(full_block_point) }; + let (stripes, _) = stripes.bp_as_chunks(); + + for stripe in stripes { + stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret) + } + input = remainder; + } + + // Any remaining data has to be less than the buffer, and the + // buffer is empty so just fill up the buffer. + debug_assert!(*buffer_usage == 0); + debug_assert!(!input.is_empty()); + + // Safety: We have parsed all the full blocks of input except one + // and potentially a full block minus one byte. That amount of + // data must be less than the buffer. + let buffer_head = unsafe { + debug_assert!(input.len() < 2 * STRIPE_BYTES); + debug_assert!(2 * STRIPE_BYTES < buffer.len()); + buffer.get_unchecked_mut(..input.len()) + }; + + buffer_head.copy_from_slice(input); + *buffer_usage = input.len(); +} + +#[inline(always)] +fn finish_impl(vector: impl Vector, this: &RawHasherCore, finalize: F) -> F::Output +where + S: FixedBuffer, + F: Finalize, +{ + let RawHasherCore { + ref secret_buffer, + buffer_usage, + mut stripe_accumulator, + total_bytes, + } = *this; + + let n_stripes = secret_buffer.n_stripes(); + let (seed, secret, buffer) = secret_buffer.parts(); + + // Safety: This is an invariant of the buffer. + unsafe { + debug_assert!(buffer_usage <= buffer.len()); + assert_unchecked(buffer_usage <= buffer.len()) + }; + + if total_bytes > CUTOFF { + let input = &buffer[..buffer_usage]; + + // Ingest final stripes + let (stripes, remainder) = stripes_with_tail(input); + for stripe in stripes { + stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); + } + + let mut temp = [0; 64]; + + let last_stripe = match input.last_chunk() { + Some(chunk) => chunk, + None => { + let n_to_reuse = 64 - input.len(); + let to_reuse = buffer.len() - n_to_reuse; + + let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse); + temp_head.copy_from_slice(&buffer[to_reuse..]); + temp_tail.copy_from_slice(input); + + &temp + } + }; + + finalize.large( + vector, + stripe_accumulator.accumulator, + remainder, + last_stripe, + secret, + total_bytes, + ) + } else { + finalize.small(DEFAULT_SECRET, seed, &buffer[..total_bytes]) + } +} + +pub trait Finalize { + type Output; + + fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output; + + fn large( + &self, + vector: impl Vector, + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &Secret, + len: usize, + ) -> Self::Output; +} + +#[cfg(feature = "alloc")] +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +pub mod with_alloc { + use ::alloc::boxed::Box; + + use super::*; + + // Safety: A plain slice will never change size. + unsafe impl FixedBuffer for Box<[u8]> {} + + // Safety: A plain slice will never change size. + unsafe impl FixedMutBuffer for Box<[u8]> {} + + type AllocSecretBuffer = SecretBuffer>; + + impl AllocSecretBuffer { + /// Allocates the secret and temporary buffers and fills them + /// with the default seed and secret values. + pub fn allocate_default() -> Self { + Self { + seed: DEFAULT_SEED, + secret: DEFAULT_SECRET_RAW.to_vec().into(), + buffer: [0; BUFFERED_BYTES], + } + } + + /// Allocates the secret and temporary buffers and uses the + /// provided seed to construct the secret value. + pub fn allocate_with_seed(seed: u64) -> Self { + let mut secret = DEFAULT_SECRET_RAW; + derive_secret(seed, &mut secret); + + Self { + seed, + secret: secret.to_vec().into(), + buffer: [0; BUFFERED_BYTES], + } + } + + /// Allocates the temporary buffer and uses the provided seed + /// and secret buffer. + pub fn allocate_with_seed_and_secret( + seed: u64, + secret: impl Into>, + ) -> Result>> { + Self::new(seed, secret.into()) + } + } + + pub type AllocRawHasher = RawHasherCore>; + + impl AllocRawHasher { + pub fn allocate_default() -> Self { + Self::new(SecretBuffer::allocate_default()) + } + + pub fn allocate_with_seed(seed: u64) -> Self { + Self::new(SecretBuffer::allocate_with_seed(seed)) + } + + pub fn allocate_with_seed_and_secret( + seed: u64, + secret: impl Into>, + ) -> Result>> { + SecretBuffer::allocate_with_seed_and_secret(seed, secret).map(Self::new) + } + } +} + +#[cfg(feature = "alloc")] +pub use with_alloc::AllocRawHasher; + +/// Tracks which stripe we are currently on to know which part of the +/// secret we should be using. +#[derive(Copy, Clone)] +pub struct StripeAccumulator { + pub accumulator: [u64; 8], + current_stripe: usize, +} + +impl StripeAccumulator { + pub fn new() -> Self { + Self { + accumulator: INITIAL_ACCUMULATORS, + current_stripe: 0, + } + } + + #[inline] + pub fn process_stripe( + &mut self, + vector: impl Vector, + stripe: &[u8; 64], + n_stripes: usize, + secret: &Secret, + ) { + let Self { + accumulator, + current_stripe, + .. + } = self; + + // For each stripe + + // Safety: The number of stripes is determined by the + // block size, which is determined by the secret size. + let secret_stripe = unsafe { secret.stripe(*current_stripe) }; + vector.accumulate(accumulator, stripe, secret_stripe); + + *current_stripe += 1; + + // After a full block's worth + if *current_stripe == n_stripes { + let secret_end = secret.last_stripe(); + vector.round_scramble(accumulator, secret_end); + + *current_stripe = 0; + } + } +} + +/// The provided secret was not exactly [`DEFAULT_SECRET_LENGTH`][] +/// bytes. +pub struct SecretWithSeedError(S); + +impl SecretWithSeedError { + /// Returns the secret. + pub fn into_secret(self) -> S { + self.0 + } +} + +impl core::error::Error for SecretWithSeedError {} + +impl core::fmt::Debug for SecretWithSeedError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("SecretWithSeedError").finish() + } +} + +impl core::fmt::Display for SecretWithSeedError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "The secret must be exactly {DEFAULT_SECRET_LENGTH} bytes" + ) + } +} + +/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] +/// bytes. +pub struct SecretTooShortError(secret::Error, S); + +impl SecretTooShortError { + /// Returns the secret. + pub fn into_secret(self) -> S { + self.1 + } +} + +impl core::error::Error for SecretTooShortError {} + +impl core::fmt::Debug for SecretTooShortError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("SecretTooShortError").finish() + } +} + +impl core::fmt::Display for SecretTooShortError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.0.fmt(f) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn secret_buffer_default_is_valid() { + assert!(SecretBuffer::default().is_valid()); + } + + #[test] + fn secret_buffer_allocate_default_is_valid() { + assert!(SecretBuffer::allocate_default().is_valid()) + } + + #[test] + fn secret_buffer_allocate_with_seed_is_valid() { + assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid()) + } +} diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index c8d059f95..cba9c2382 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -6,7 +6,7 @@ unsafe_op_in_unsafe_fn )] -use core::{hash, hint::assert_unchecked}; +use core::hash; use crate::{ xxhash3::{primes::*, *}, @@ -14,14 +14,15 @@ use crate::{ }; pub use crate::xxhash3::{ - secret::SECRET_MINIMUM_LENGTH, OneshotWithSecretError, DEFAULT_SECRET_LENGTH, + FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError, + SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH, }; /// Calculates the 64-bit hash. #[derive(Clone)] pub struct Hasher { #[cfg(feature = "alloc")] - inner: with_alloc::AllocRawHasher, + inner: AllocRawHasher, _private: (), } @@ -81,122 +82,6 @@ impl Hasher { } } -const STRIPE_BYTES: usize = 64; -const BUFFERED_STRIPES: usize = 4; -const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES; -type Buffer = [u8; BUFFERED_BYTES]; - -// Ensure that a full buffer always implies we are in the 241+ byte case. -const _: () = assert!(BUFFERED_BYTES > CUTOFF); - -/// A buffer containing the secret bytes. -/// -/// # Safety -/// -/// Must always return a slice with the same number of elements. -pub unsafe trait FixedBuffer: AsRef<[u8]> {} - -/// A mutable buffer to contain the secret bytes. -/// -/// # Safety -/// -/// Must always return a slice with the same number of elements. The -/// slice must always be the same as that returned from -/// [`AsRef::as_ref`][]. -pub unsafe trait FixedMutBuffer: FixedBuffer + AsMut<[u8]> {} - -// Safety: An array will never change size. -unsafe impl FixedBuffer for [u8; N] {} - -// Safety: An array will never change size. -unsafe impl FixedMutBuffer for [u8; N] {} - -// Safety: An array will never change size. -unsafe impl FixedBuffer for &[u8; N] {} - -// Safety: An array will never change size. -unsafe impl FixedBuffer for &mut [u8; N] {} - -// Safety: An array will never change size. -unsafe impl FixedMutBuffer for &mut [u8; N] {} - -/// Holds secret and temporary buffers that are ensured to be -/// appropriately sized. -#[derive(Clone)] -pub struct SecretBuffer { - seed: u64, - secret: S, - buffer: Buffer, -} - -impl SecretBuffer -where - S: FixedBuffer, -{ - /// Takes the seed, secret, and buffer and performs no - /// modifications to them, only validating that the sizes are - /// appropriate. - pub fn new(seed: u64, secret: S) -> Result> { - match Secret::new(secret.as_ref()) { - Ok(_) => Ok(Self { - seed, - secret, - buffer: [0; BUFFERED_BYTES], - }), - Err(e) => Err(SecretTooShortError(e, secret)), - } - } - - #[inline(always)] - #[cfg(test)] - fn is_valid(&self) -> bool { - let secret = self.secret.as_ref(); - - secret.len() >= SECRET_MINIMUM_LENGTH - } - - #[inline] - fn n_stripes(&self) -> usize { - Self::secret(&self.secret).n_stripes() - } - - #[inline] - fn parts(&self) -> (u64, &Secret, &Buffer) { - (self.seed, Self::secret(&self.secret), &self.buffer) - } - - #[inline] - fn parts_mut(&mut self) -> (u64, &Secret, &mut Buffer) { - (self.seed, Self::secret(&self.secret), &mut self.buffer) - } - - fn secret(secret: &S) -> &Secret { - let secret = secret.as_ref(); - // Safety: We established the length at construction and the - // length is not allowed to change. - unsafe { Secret::new_unchecked(secret) } - } -} - -impl SecretBuffer { - /// Returns the secret. - pub fn into_secret(self) -> S { - self.secret - } -} - -impl SecretBuffer<&'static [u8; DEFAULT_SECRET_LENGTH]> { - /// Use the default seed and secret values while allocating nothing. - #[inline] - pub const fn default() -> Self { - SecretBuffer { - seed: DEFAULT_SEED, - secret: &DEFAULT_SECRET_RAW, - buffer: [0; BUFFERED_BYTES], - } - } -} - #[cfg(feature = "alloc")] #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] mod with_alloc { @@ -204,17 +89,11 @@ mod with_alloc { use super::*; - // Safety: A plain slice will never change size. - unsafe impl FixedBuffer for Box<[u8]> {} - - // Safety: A plain slice will never change size. - unsafe impl FixedMutBuffer for Box<[u8]> {} - impl Hasher { /// Constructs the hasher using the default seed and secret values. pub fn new() -> Self { Self { - inner: RawHasher::allocate_default(), + inner: RawHasherCore::allocate_default(), _private: (), } } @@ -223,7 +102,7 @@ mod with_alloc { /// derived from the seed. pub fn with_seed(seed: u64) -> Self { Self { - inner: RawHasher::allocate_with_seed(seed), + inner: RawHasherCore::allocate_with_seed(seed), _private: (), } } @@ -234,7 +113,7 @@ mod with_alloc { secret: impl Into>, ) -> Result>> { Ok(Self { - inner: RawHasher::allocate_with_seed_and_secret(seed, secret)?, + inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?, _private: (), }) } @@ -259,143 +138,12 @@ mod with_alloc { #[inline] fn finish(&self) -> u64 { - self.inner.finish() - } - } - - type AllocSecretBuffer = SecretBuffer>; - - impl AllocSecretBuffer { - /// Allocates the secret and temporary buffers and fills them - /// with the default seed and secret values. - pub fn allocate_default() -> Self { - Self { - seed: DEFAULT_SEED, - secret: DEFAULT_SECRET_RAW.to_vec().into(), - buffer: [0; BUFFERED_BYTES], - } - } - - /// Allocates the secret and temporary buffers and uses the - /// provided seed to construct the secret value. - pub fn allocate_with_seed(seed: u64) -> Self { - let mut secret = DEFAULT_SECRET_RAW; - derive_secret(seed, &mut secret); - - Self { - seed, - secret: secret.to_vec().into(), - buffer: [0; BUFFERED_BYTES], - } - } - - /// Allocates the temporary buffer and uses the provided seed - /// and secret buffer. - pub fn allocate_with_seed_and_secret( - seed: u64, - secret: impl Into>, - ) -> Result>> { - Self::new(seed, secret.into()) - } - } - - pub type AllocRawHasher = RawHasher>; - - impl AllocRawHasher { - fn allocate_default() -> Self { - Self::new(SecretBuffer::allocate_default()) - } - - fn allocate_with_seed(seed: u64) -> Self { - Self::new(SecretBuffer::allocate_with_seed(seed)) - } - - fn allocate_with_seed_and_secret( - seed: u64, - secret: impl Into>, - ) -> Result>> { - SecretBuffer::allocate_with_seed_and_secret(seed, secret).map(Self::new) - } - } -} - -impl SecretBuffer -where - S: FixedMutBuffer, -{ - /// Fills the secret buffer with a secret derived from the seed - /// and the default secret. The secret must be exactly - /// [`DEFAULT_SECRET_LENGTH`][] bytes long. - pub fn with_seed(seed: u64, mut secret: S) -> Result> { - match <&mut DefaultSecret>::try_from(secret.as_mut()) { - Ok(secret_slice) => { - *secret_slice = DEFAULT_SECRET_RAW; - derive_secret(seed, secret_slice); - - Ok(Self { - seed, - secret, - buffer: [0; BUFFERED_BYTES], - }) - } - Err(_) => Err(SecretWithSeedError(secret)), + self.inner.finish(Finalize64) } } } -/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] -/// bytes. -pub struct SecretTooShortError(secret::Error, S); - -impl SecretTooShortError { - /// Returns the secret. - pub fn into_secret(self) -> S { - self.1 - } -} - -impl core::error::Error for SecretTooShortError {} - -impl core::fmt::Debug for SecretTooShortError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.debug_tuple("SecretTooShortError").finish() - } -} - -impl core::fmt::Display for SecretTooShortError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - self.0.fmt(f) - } -} - -/// The provided secret was not exactly [`DEFAULT_SECRET_LENGTH`][] -/// bytes. -pub struct SecretWithSeedError(S); - -impl SecretWithSeedError { - /// Returns the secret. - pub fn into_secret(self) -> S { - self.0 - } -} - -impl core::error::Error for SecretWithSeedError {} - -impl core::fmt::Debug for SecretWithSeedError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.debug_tuple("SecretWithSeedError").finish() - } -} - -impl core::fmt::Display for SecretWithSeedError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!( - f, - "The secret must be exactly {DEFAULT_SECRET_LENGTH} bytes" - ) - } -} - +#[derive(Clone)] /// A lower-level interface for computing a hash from streaming data. /// /// The algorithm requires a secret which can be a reasonably large @@ -404,29 +152,18 @@ impl core::fmt::Display for SecretWithSeedError { /// usages may desire more flexibility. This type, combined with /// [`SecretBuffer`][], offer that flexibility at the cost of a /// generic type. -#[derive(Clone)] -pub struct RawHasher { - secret_buffer: SecretBuffer, - buffer_usage: usize, - stripe_accumulator: StripeAccumulator, - total_bytes: usize, -} +pub struct RawHasher(RawHasherCore); impl RawHasher { /// Construct the hasher with the provided seed, secret, and /// temporary buffer. pub fn new(secret_buffer: SecretBuffer) -> Self { - Self { - secret_buffer, - buffer_usage: 0, - stripe_accumulator: StripeAccumulator::new(), - total_bytes: 0, - } + Self(RawHasherCore::new(secret_buffer)) } /// Returns the secret. pub fn into_secret(self) -> S { - self.secret_buffer.into_secret() + self.0.into_secret() } } @@ -436,177 +173,36 @@ where { #[inline] fn write(&mut self, input: &[u8]) { - let this = self; - dispatch! { - fn write_impl(this: &mut RawHasher, input: &[u8]) - [S: FixedBuffer] - } + self.0.write(input); } #[inline] fn finish(&self) -> u64 { - let this = self; - dispatch! { - fn finish_impl(this: &RawHasher) -> u64 - [S: FixedBuffer] - } + self.0.finish(Finalize64) } } -#[inline(always)] -fn write_impl(vector: impl Vector, this: &mut RawHasher, mut input: &[u8]) -where - S: FixedBuffer, -{ - if input.is_empty() { - return; - } - - let RawHasher { - secret_buffer, - buffer_usage, - stripe_accumulator, - total_bytes, - .. - } = this; - - let n_stripes = secret_buffer.n_stripes(); - let (_, secret, buffer) = secret_buffer.parts_mut(); - - *total_bytes += input.len(); - - // Safety: This is an invariant of the buffer. - unsafe { - debug_assert!(*buffer_usage <= buffer.len()); - assert_unchecked(*buffer_usage <= buffer.len()) - }; - - // We have some previous data saved; try to fill it up and process it first - if !buffer.is_empty() { - let remaining = &mut buffer[*buffer_usage..]; - let n_to_copy = usize::min(remaining.len(), input.len()); - - let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy); - let (input_head, input_tail) = input.split_at(n_to_copy); - - remaining_head.copy_from_slice(input_head); - *buffer_usage += n_to_copy; +struct Finalize64; - input = input_tail; +impl Finalize for Finalize64 { + type Output = u64; - // We did not fill up the buffer - if !remaining_tail.is_empty() { - return; - } - - // We don't know this isn't the last of the data - if input.is_empty() { - return; - } - - let (stripes, _) = buffer.bp_as_chunks(); - for stripe in stripes { - stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); - } - *buffer_usage = 0; - } - - debug_assert!(*buffer_usage == 0); - - // Process as much of the input data in-place as possible, - // while leaving at least one full stripe for the - // finalization. - if let Some(len) = input.len().checked_sub(STRIPE_BYTES) { - let full_block_point = (len / STRIPE_BYTES) * STRIPE_BYTES; - // Safety: We know that `full_block_point` must be less than - // `input.len()` as we subtracted and then integer-divided - // (which rounds down) and then multiplied back. That's not - // evident to the compiler and `split_at` results in a - // potential panic. - // - // https://github.com/llvm/llvm-project/issues/104827 - let (stripes, remainder) = unsafe { input.split_at_unchecked(full_block_point) }; - let (stripes, _) = stripes.bp_as_chunks(); - - for stripe in stripes { - stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret) - } - input = remainder; + #[inline(always)] + fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output { + impl_oneshot(secret, seed, input) } - // Any remaining data has to be less than the buffer, and the - // buffer is empty so just fill up the buffer. - debug_assert!(*buffer_usage == 0); - debug_assert!(!input.is_empty()); - - // Safety: We have parsed all the full blocks of input except one - // and potentially a full block minus one byte. That amount of - // data must be less than the buffer. - let buffer_head = unsafe { - debug_assert!(input.len() < 2 * STRIPE_BYTES); - debug_assert!(2 * STRIPE_BYTES < buffer.len()); - buffer.get_unchecked_mut(..input.len()) - }; - - buffer_head.copy_from_slice(input); - *buffer_usage = input.len(); -} - -#[inline(always)] -fn finish_impl(vector: impl Vector, this: &RawHasher) -> u64 -where - S: FixedBuffer, -{ - let RawHasher { - ref secret_buffer, - buffer_usage, - mut stripe_accumulator, - total_bytes, - } = *this; - - let n_stripes = secret_buffer.n_stripes(); - let (seed, secret, buffer) = secret_buffer.parts(); - - // Safety: This is an invariant of the buffer. - unsafe { - debug_assert!(buffer_usage <= buffer.len()); - assert_unchecked(buffer_usage <= buffer.len()) - }; - - if total_bytes > CUTOFF { - let input = &buffer[..buffer_usage]; - - // Ingest final stripes - let (stripes, remainder) = stripes_with_tail(input); - for stripe in stripes { - stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); - } - - let mut temp = [0; 64]; - - let last_stripe = match input.last_chunk() { - Some(chunk) => chunk, - None => { - let n_to_reuse = 64 - input.len(); - let to_reuse = buffer.len() - n_to_reuse; - - let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse); - temp_head.copy_from_slice(&buffer[to_reuse..]); - temp_tail.copy_from_slice(input); - - &temp - } - }; - - Algorithm(vector).finalize( - stripe_accumulator.accumulator, - remainder, - last_stripe, - secret, - total_bytes, - ) - } else { - impl_oneshot(DEFAULT_SECRET, seed, &buffer[..total_bytes]) + #[inline(always)] + fn large( + &self, + vector: impl Vector, + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &Secret, + len: usize, + ) -> Self::Output { + Algorithm(vector).finalize_64(acc, last_block, last_stripe, secret, len) } } @@ -771,7 +367,7 @@ fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 { #[inline] fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 { - Algorithm(vector).oneshot(secret, input) + Algorithm(vector).oneshot(secret, input, Finalize64) } #[cfg(test)] @@ -789,21 +385,6 @@ mod test { const EMPTY_BYTES: [u8; 0] = []; - #[test] - fn secret_buffer_default_is_valid() { - assert!(SecretBuffer::default().is_valid()); - } - - #[test] - fn secret_buffer_allocate_default_is_valid() { - assert!(SecretBuffer::allocate_default().is_valid()) - } - - #[test] - fn secret_buffer_allocate_with_seed_is_valid() { - assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid()) - } - fn hash_byte_by_byte(input: &[u8]) -> u64 { let mut hasher = Hasher::new(); for byte in input.chunks(1) { From e063393cf966229fd256e419917e1b1cf1c25ea4 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 28 Oct 2024 16:24:13 -0400 Subject: [PATCH 03/20] Extract common code from the 1 to 3 bytes implementation --- src/xxhash3.rs | 13 ++++++++++++- src/xxhash3_64.rs | 9 ++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/xxhash3.rs b/src/xxhash3.rs index aa57f73e7..e23d3f0ec 100644 --- a/src/xxhash3.rs +++ b/src/xxhash3.rs @@ -1,6 +1,6 @@ use core::slice; -use crate::IntoU128 as _; +use crate::{IntoU128 as _, IntoU32 as _}; pub mod large; @@ -110,6 +110,17 @@ macro_rules! assert_input_range { } pub(crate) use assert_input_range; +#[inline(always)] +pub fn impl_1_to_3_bytes_combined(input: &[u8]) -> u32 { + assert_input_range!(1..=3, input.len()); + let input_length = input.len() as u8; // OK as we checked that the length fits + + input[input.len() - 1].into_u32() + | input_length.into_u32() << 8 + | input[0].into_u32() << 16 + | input[input.len() >> 1].into_u32() << 24 +} + #[inline] pub fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { let data_words = to_u64s(data); diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index cba9c2382..37255ffb8 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -10,7 +10,7 @@ use core::hash; use crate::{ xxhash3::{primes::*, *}, - IntoU128 as _, IntoU32 as _, IntoU64 as _, + IntoU128 as _, IntoU64 as _, }; pub use crate::xxhash3::{ @@ -234,12 +234,7 @@ fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 { #[inline(always)] fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(1..=3, input.len()); - let input_length = input.len() as u8; // OK as we checked that the length fits - - let combined = input[input.len() - 1].into_u32() - | input_length.into_u32() << 8 - | input[0].into_u32() << 16 - | input[input.len() >> 1].into_u32() << 24; + let combined = impl_1_to_3_bytes_combined(input); let secret_words = secret.words_for_1_to_3(); From 94d82a53ff6fb26e487c5d39e763d265631f7daa Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 28 Oct 2024 16:24:47 -0400 Subject: [PATCH 04/20] Extract common code from the 17 to 128 bytes implementation --- src/xxhash3.rs | 28 ++++++++++++++++++++++++++++ src/xxhash3_64.rs | 28 ++++------------------------ 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/xxhash3.rs b/src/xxhash3.rs index e23d3f0ec..a472ac6fa 100644 --- a/src/xxhash3.rs +++ b/src/xxhash3.rs @@ -121,6 +121,34 @@ pub fn impl_1_to_3_bytes_combined(input: &[u8]) -> u32 { | input[input.len() >> 1].into_u32() << 24 } +#[inline] +pub fn impl_17_to_128_bytes_iter( + secret: &Secret, + input: &[u8], + mut f: impl FnMut(&[u8; 16], &[u8; 16], &[[u8; 16]; 2]), +) { + let secret = secret.words_for_17_to_128(); + let (secret, _) = secret.bp_as_chunks::<2>(); + let (fwd, _) = input.bp_as_chunks(); + let (_, bwd) = input.bp_as_rchunks(); + + let q = bwd.len(); + + if input.len() > 32 { + if input.len() > 64 { + if input.len() > 96 { + f(&fwd[3], &bwd[q - 4], &secret[3]); + } + + f(&fwd[2], &bwd[q - 3], &secret[2]); + } + + f(&fwd[1], &bwd[q - 2], &secret[1]); + } + + f(&fwd[0], &bwd[q - 1], &secret[0]); +} + #[inline] pub fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { let data_words = to_u64s(data); diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 37255ffb8..24747cbd5 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -296,30 +296,10 @@ fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(17..=128, input.len()); let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); - let secret = secret.words_for_17_to_128(); - let (secret, _) = secret.bp_as_chunks::<2>(); - let (fwd, _) = input.bp_as_chunks(); - let (_, bwd) = input.bp_as_rchunks(); - - let q = bwd.len(); - - if input.len() > 32 { - if input.len() > 64 { - if input.len() > 96 { - acc = acc.wrapping_add(mix_step(&fwd[3], &secret[3][0], seed)); - acc = acc.wrapping_add(mix_step(&bwd[q - 4], &secret[3][1], seed)); - } - - acc = acc.wrapping_add(mix_step(&fwd[2], &secret[2][0], seed)); - acc = acc.wrapping_add(mix_step(&bwd[q - 3], &secret[2][1], seed)); - } - - acc = acc.wrapping_add(mix_step(&fwd[1], &secret[1][0], seed)); - acc = acc.wrapping_add(mix_step(&bwd[q - 2], &secret[1][1], seed)); - } - - acc = acc.wrapping_add(mix_step(&fwd[0], &secret[0][0], seed)); - acc = acc.wrapping_add(mix_step(&bwd[q - 1], &secret[0][1], seed)); + impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| { + acc = acc.wrapping_add(mix_step(fwd, &secret[0], seed)); + acc = acc.wrapping_add(mix_step(bwd, &secret[1], seed)); + }); avalanche(acc) } From c9094ed763f6f3d033b4c633fbefc8b05d1101af Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 28 Oct 2024 13:36:21 -0400 Subject: [PATCH 05/20] Pass secret as a slice to final_merge This will allow us to call it with different arguments for the 128-bit hash. --- src/xxhash3/large.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/xxhash3/large.rs b/src/xxhash3/large.rs index 26a99891d..2845b7931 100644 --- a/src/xxhash3/large.rs +++ b/src/xxhash3/large.rs @@ -217,7 +217,8 @@ where debug_assert!(!last_block.is_empty()); self.last_round(&mut acc, last_block, last_stripe, secret); - self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret) + let low = len.into_u64().wrapping_mul(PRIME64_1); + self.final_merge(&acc, low, secret.final_secret()) } #[inline] @@ -247,8 +248,7 @@ where } #[inline] - fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &Secret) -> u64 { - let secret = secret.final_secret(); + fn final_merge(&self, acc: &[u64; 8], init_value: u64, secret: &[u8; 64]) -> u64 { let (secrets, _) = secret.bp_as_chunks(); let mut result = init_value; for i in 0..4 { From 3e03234239cda59f18fd348971dcdf212e27322f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 4 Nov 2024 08:52:32 -0500 Subject: [PATCH 06/20] Clarify which secret methods are 64-bit specific --- src/xxhash3/secret.rs | 121 ++++++++++++++++++++++++------------------ src/xxhash3_64.rs | 14 ++--- 2 files changed, 76 insertions(+), 59 deletions(-) diff --git a/src/xxhash3/secret.rs b/src/xxhash3/secret.rs index e7210b977..70a971f94 100644 --- a/src/xxhash3/secret.rs +++ b/src/xxhash3/secret.rs @@ -34,35 +34,9 @@ impl Secret { } #[inline] - pub fn words_for_0(&self) -> [u64; 2] { - self.reassert_preconditions(); - - let (q, _) = self.0[56..].bp_as_chunks(); - [q[0], q[1]].map(u64::from_le_bytes) - } - - #[inline] - pub fn words_for_1_to_3(&self) -> [u32; 2] { - self.reassert_preconditions(); - - let (q, _) = self.0.bp_as_chunks(); - [q[0], q[1]].map(u32::from_le_bytes) - } - - #[inline] - pub fn words_for_4_to_8(&self) -> [u64; 2] { - self.reassert_preconditions(); - - let (q, _) = self.0[8..].bp_as_chunks(); - [q[0], q[1]].map(u64::from_le_bytes) - } - - #[inline] - pub fn words_for_9_to_16(&self) -> [u64; 4] { - self.reassert_preconditions(); - - let (q, _) = self.0[24..].bp_as_chunks(); - [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) + #[cfg(feature = "xxhash3_64")] + pub fn for_64(&self) -> Secret64BitView<'_> { + Secret64BitView(self) } #[inline] @@ -73,29 +47,6 @@ impl Secret { words } - #[inline] - pub fn words_for_127_to_240_part1(&self) -> &[[u8; 16]] { - self.reassert_preconditions(); - - let (ss, _) = self.0.bp_as_chunks(); - ss - } - - #[inline] - pub fn words_for_127_to_240_part2(&self) -> &[[u8; 16]] { - self.reassert_preconditions(); - - let (ss, _) = self.0[3..].bp_as_chunks(); - ss - } - - #[inline] - pub fn words_for_127_to_240_part3(&self) -> &[u8; 16] { - self.reassert_preconditions(); - - self.0[119..].first_chunk().unwrap() - } - /// # Safety /// /// `i` must be less than the number of stripes in the secret @@ -160,6 +111,72 @@ impl Secret { } } +#[derive(Copy, Clone)] +#[cfg(feature = "xxhash3_64")] +pub struct Secret64BitView<'a>(&'a Secret); + +#[cfg(feature = "xxhash3_64")] +impl<'a> Secret64BitView<'a> { + #[inline] + pub fn words_for_0(self) -> [u64; 2] { + self.0.reassert_preconditions(); + + let (q, _) = self.b()[56..].bp_as_chunks(); + [q[0], q[1]].map(u64::from_le_bytes) + } + + #[inline] + pub fn words_for_1_to_3(self) -> [u32; 2] { + self.0.reassert_preconditions(); + + let (q, _) = self.b().bp_as_chunks(); + [q[0], q[1]].map(u32::from_le_bytes) + } + + #[inline] + pub fn words_for_4_to_8(self) -> [u64; 2] { + self.0.reassert_preconditions(); + + let (q, _) = self.b()[8..].bp_as_chunks(); + [q[0], q[1]].map(u64::from_le_bytes) + } + + #[inline] + pub fn words_for_9_to_16(self) -> [u64; 4] { + self.0.reassert_preconditions(); + + let (q, _) = self.b()[24..].bp_as_chunks(); + [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) + } + + #[inline] + pub fn words_for_127_to_240_part1(self) -> &'a [[u8; 16]] { + self.0.reassert_preconditions(); + + let (ss, _) = self.b().bp_as_chunks(); + ss + } + + #[inline] + pub fn words_for_127_to_240_part2(self) -> &'a [[u8; 16]] { + self.0.reassert_preconditions(); + + let (ss, _) = self.b()[3..].bp_as_chunks(); + ss + } + + #[inline] + pub fn words_for_127_to_240_part3(self) -> &'a [u8; 16] { + self.0.reassert_preconditions(); + + self.b()[119..].first_chunk().unwrap() + } + + fn b(self) -> &'a [u8] { + &(self.0).0 + } +} + #[derive(Debug)] pub struct Error(()); diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 24747cbd5..2e967e476 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -227,7 +227,7 @@ fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { #[inline(always)] fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 { - let secret_words = secret.words_for_0(); + let secret_words = secret.for_64().words_for_0(); avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]) } @@ -236,7 +236,7 @@ fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(1..=3, input.len()); let combined = impl_1_to_3_bytes_combined(input); - let secret_words = secret.words_for_1_to_3(); + let secret_words = secret.for_64().words_for_1_to_3(); let value = { let secret = (secret_words[0] ^ secret_words[1]).into_u64(); @@ -254,7 +254,7 @@ fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let input_last = input.last_u32().unwrap(); let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); - let secret_words = secret.words_for_4_to_8(); + let secret_words = secret.for_64().words_for_4_to_8(); let combined = input_last.into_u64() | (input_first.into_u64() << 32); @@ -277,7 +277,7 @@ fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let input_first = input.first_u64().unwrap(); let input_last = input.last_u64().unwrap(); - let secret_words = secret.words_for_9_to_16(); + let secret_words = secret.for_64().words_for_9_to_16(); let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last; let mul_result = low.into_u128().wrapping_mul(high.into_u128()); @@ -312,20 +312,20 @@ fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let (head, _) = input.bp_as_chunks(); let mut head = head.iter(); - let ss = secret.words_for_127_to_240_part1(); + let ss = secret.for_64().words_for_127_to_240_part1(); for (chunk, secret) in head.by_ref().zip(ss).take(8) { acc = acc.wrapping_add(mix_step(chunk, secret, seed)); } acc = avalanche(acc); - let ss = secret.words_for_127_to_240_part2(); + let ss = secret.for_64().words_for_127_to_240_part2(); for (chunk, secret) in head.zip(ss) { acc = acc.wrapping_add(mix_step(chunk, secret, seed)); } let last_chunk = input.last_chunk().unwrap(); - let ss = secret.words_for_127_to_240_part3(); + let ss = secret.for_64().words_for_127_to_240_part3(); acc = acc.wrapping_add(mix_step(last_chunk, ss, seed)); avalanche(acc) From c4977291ba9f1fe080d5b5be2580ca2c4778111c Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 1 Nov 2024 12:48:46 -0400 Subject: [PATCH 07/20] Upgrade xxHash to a development version A [bug][] was found in the implementation of `XXH3_128bits_withSecretandSeed`. It has been fixed but not yet released, so we are bumping to the current development version. [bug]: https://github.com/Cyan4973/xxHash/pull/894 --- xx_hash-sys/xxHash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xx_hash-sys/xxHash b/xx_hash-sys/xxHash index bbb27a5ef..7546e25c9 160000 --- a/xx_hash-sys/xxHash +++ b/xx_hash-sys/xxHash @@ -1 +1 @@ -Subproject commit bbb27a5efb85b92a0486cf361a8635715a53f6ba +Subproject commit 7546e25c96c736896f6ff25e30042de523926182 From e4553d545bb0c17a85f3b2ef6617249856a70146 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 23 Oct 2024 17:13:59 -0400 Subject: [PATCH 08/20] Add C bindings for the XXH3_128 family of functions --- xx_hash-sys/src/lib.rs | 187 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 185 insertions(+), 2 deletions(-) diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index cbe655c40..47324b050 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -135,6 +135,36 @@ pub struct XXH3_state_t { _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, } +#[repr(C)] +pub struct XXH128_hash_t { + low64: XXH64_hash_t, + high64: XXH64_hash_t, +} + +impl From for u128 { + fn from(value: XXH128_hash_t) -> Self { + u128::from(value.high64) << 64 | u128::from(value.low64) + } +} + +/// Constructs a wrapper around the XXH3_* familiy of functions as we +/// compile the library in multiple modes to performance test against. +macro_rules! xxh3_template { + () => { crate::xxh3_template!(@ XXH3); }; + + ($prefix: ident) => { ::paste::paste! { crate::xxh3_template!(@ [< $prefix _XXH3 >]); } }; + + (@ $prefix: ident) => { + ::paste::paste! { + extern "C" { + fn [<$prefix _createState>]() -> *mut crate::XXH3_state_t; + fn [<$prefix _freeState>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode; + } + } + }; +} +pub(crate) use xxh3_template; + /// Constructs a wrapper around the XXH3_64bit familiy of functions as /// we compile the library in multiple modes to performance test /// against. @@ -166,7 +196,6 @@ macro_rules! xxh3_64b_template { seed: crate::XXH64_hash_t, ) -> crate::XXH64_hash_t; - fn [<$prefix _createState>]() -> *mut crate::XXH3_state_t; fn [<$prefix _64bits_reset>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode; fn [<$prefix _64bits_reset_withSeed>]( state: *mut crate::XXH3_state_t, @@ -184,7 +213,6 @@ macro_rules! xxh3_64b_template { length: libc::size_t, ) -> crate::XXH_errorcode; fn [<$prefix _64bits_digest>](state: *mut crate::XXH3_state_t) -> crate::XXH64_hash_t; - fn [<$prefix _freeState>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode; } pub struct XxHash3_64(*mut crate::XXH3_state_t); @@ -282,23 +310,178 @@ macro_rules! xxh3_64b_template { } pub(crate) use xxh3_64b_template; +/// Constructs a wrapper around the XXH3_128bit familiy of functions as +/// we compile the library in multiple modes to performance test +/// against. +macro_rules! xxh3_128b_template { + () => { crate::xxh3_128b_template!(@ XXH3); }; + + ($prefix: ident) => { ::paste::paste! { crate::xxh3_128b_template!(@ [< $prefix _XXH3 >]); } }; + + (@ $prefix: ident) => { + ::paste::paste! { + extern "C" { + fn [<$prefix _128bits>](input: *const libc::c_void, length: libc::size_t) -> crate::XXH128_hash_t; + fn [<$prefix _128bits_withSeed>]( + input: *const libc::c_void, + length: libc::size_t, + seed: crate::XXH64_hash_t, + ) -> crate::XXH128_hash_t; + fn [<$prefix _128bits_withSecret>]( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> crate::XXH128_hash_t; + fn [<$prefix _128bits_withSecretandSeed>]( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + seed: crate::XXH64_hash_t, + ) -> crate::XXH128_hash_t; + + fn [<$prefix _128bits_reset>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode; + fn [<$prefix _128bits_reset_withSeed>]( + state: *mut crate::XXH3_state_t, + seed: crate::XXH64_hash_t, + ) -> crate::XXH_errorcode; + fn [<$prefix _128bits_reset_withSecretandSeed>]( + state: *mut crate::XXH3_state_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + seed: crate::XXH64_hash_t, + ) -> crate::XXH_errorcode; + fn [<$prefix _128bits_update>]( + state: *mut crate::XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> crate::XXH_errorcode; + fn [<$prefix _128bits_digest>](state: *mut crate::XXH3_state_t) -> crate::XXH128_hash_t; + } + + pub struct XxHash3_128(*mut crate::XXH3_state_t); + + impl XxHash3_128 { + #[inline] + pub fn oneshot(data: &[u8]) -> u128 { + unsafe { [<$prefix _128bits>](data.as_ptr().cast(), data.len()) }.into() + } + + #[inline] + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u128 { + unsafe { [<$prefix _128bits_withSeed>](data.as_ptr().cast(), data.len(), seed) }.into() + } + + #[inline] + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u128 { + unsafe { + [<$prefix _128bits_withSecret>]( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + }.into() + } + + #[inline] + pub fn oneshot_with_seed_and_secret(seed: u64, secret: &[u8], data: &[u8]) -> u128 { + unsafe { + [<$prefix _128bits_withSecretandSeed>]( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + seed, + ) + }.into() + } + + #[inline] + pub fn new() -> Self { + let state = unsafe { + let state = [<$prefix _createState>](); + [<$prefix _128bits_reset>](state); + state + }; + + Self(state) + } + + #[inline] + pub fn with_seed(seed: u64) -> Self { + let state = unsafe { + let state = [<$prefix _createState>](); + [<$prefix _128bits_reset_withSeed>](state, seed); + state + }; + + Self(state) + } + + #[inline] + pub fn with_seed_and_secret(seed: u64, secret: &[u8]) -> Self { + let state = unsafe { + let state = [<$prefix _createState>](); + [<$prefix _128bits_reset_withSecretandSeed>](state, secret.as_ptr().cast(), secret.len(), seed); + state + }; + + Self(state) + } + + #[inline] + pub fn write(&mut self, data: &[u8]) { + let retval = + unsafe { [<$prefix _128bits_update>](self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, crate::XXH_OK); + } + + #[inline] + pub fn finish(&mut self) -> u128 { + unsafe { [<$prefix _128bits_digest>](self.0) }.into() + } + } + + impl Drop for XxHash3_128 { + fn drop(&mut self) { + let retval = unsafe { [<$prefix _freeState>](self.0) }; + assert_eq!(retval, crate::XXH_OK); + } + } + } + }; +} +pub(crate) use xxh3_128b_template; + +xxh3_template!(); xxh3_64b_template!(); +xxh3_128b_template!(); pub mod scalar { + crate::xxh3_template!(scalar); crate::xxh3_64b_template!(scalar); + crate::xxh3_128b_template!(scalar); } #[cfg(target_arch = "aarch64")] pub mod neon { + crate::xxh3_template!(neon); crate::xxh3_64b_template!(neon); + crate::xxh3_128b_template!(neon); } #[cfg(target_arch = "x86_64")] pub mod avx2 { + crate::xxh3_template!(avx2); crate::xxh3_64b_template!(avx2); + crate::xxh3_128b_template!(avx2); } #[cfg(target_arch = "x86_64")] pub mod sse2 { + crate::xxh3_template!(sse2); crate::xxh3_64b_template!(sse2); + crate::xxh3_128b_template!(sse2); } From 3626817b4111f3f6bb4a988f29a8aef5ed845eb3 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 24 Oct 2024 11:59:37 -0400 Subject: [PATCH 09/20] Initial skeleton of XXH3 128-bit Rust implementation --- .github/workflows/ci.yml | 2 +- Cargo.toml | 3 ++- comparison/Cargo.toml | 2 +- src/lib.rs | 15 +++++++++++++-- src/xxhash3_128.rs | 41 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 src/xxhash3_128.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 49331ba65..92f64ccb6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,7 +102,7 @@ jobs: runs-on: ubuntu-latest env: - IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64 + IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64 xxhash3_128 FEATURE_SET: random serialize std alloc steps: diff --git a/Cargo.toml b/Cargo.toml index a9a1608b1..2256572b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ members = [ #END-[workspace] [features] -default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"] +default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "xxhash3_128", "std"] random = ["dep:rand"] @@ -34,6 +34,7 @@ serialize = ["dep:serde"] xxhash32 = [] xxhash64 = [] xxhash3_64 = [] +xxhash3_128 = [] std = ["alloc"] alloc = [] diff --git a/comparison/Cargo.toml b/comparison/Cargo.toml index 80cc76cb6..170f49e60 100644 --- a/comparison/Cargo.toml +++ b/comparison/Cargo.toml @@ -14,5 +14,5 @@ harness = false criterion = { version = "0.5.1", features = [] } proptest = "1.5.0" rand = "0.8.5" -twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] } +twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "xxhash3_128", "std"] } xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/src/lib.rs b/src/lib.rs index 09976b8b5..9d94ce18a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,10 @@ #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] -#[cfg(all(feature = "alloc", feature = "xxhash3_64"))] +#[cfg(all( + feature = "alloc", + any(feature = "xxhash3_64", feature = "xxhash3_128") +))] extern crate alloc; #[cfg(any(feature = "std", doc, test))] @@ -27,7 +30,7 @@ pub mod xxhash64; #[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] pub use xxhash64::Hasher as XxHash64; -#[cfg(feature = "xxhash3_64")] +#[cfg(any(feature = "xxhash3_64", feature = "xxhash3_128"))] mod xxhash3; #[cfg(feature = "xxhash3_64")] @@ -38,6 +41,14 @@ pub mod xxhash3_64; #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] pub use xxhash3_64::Hasher as XxHash3_64; +#[cfg(feature = "xxhash3_128")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_128")))] +pub mod xxhash3_128; + +#[cfg(feature = "xxhash3_128")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_128")))] +pub use xxhash3_128::Hasher as XxHash3_128; + #[allow(dead_code, reason = "Too lazy to cfg-gate these")] trait IntoU32 { fn into_u32(self) -> u32; diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs new file mode 100644 index 000000000..bb4fa7540 --- /dev/null +++ b/src/xxhash3_128.rs @@ -0,0 +1,41 @@ +//! The implementation of XXH3_128. + +#![deny( + clippy::missing_safety_doc, + clippy::undocumented_unsafe_blocks, + unsafe_op_in_unsafe_fn +)] + +pub use crate::xxhash3::{DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH}; + +/// Calculates the 128-bit hash. +#[derive(Clone)] +pub struct Hasher; + +impl Hasher { + /// Hash all data at once. If you can use this function, you may + /// see noticable speed gains for certain types of input. + #[must_use] + #[inline] + pub fn oneshot(_input: &[u8]) -> u128 { + 0x99aa06d3014798d86001c324468d497f + } +} + +#[cfg(test)] +mod test { + use super::*; + + const _: () = { + const fn is_clone() {} + is_clone::(); + }; + + const EMPTY_BYTES: [u8; 0] = []; + + #[test] + fn oneshot_empty() { + let hash = Hasher::oneshot(&EMPTY_BYTES); + assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f); + } +} From 461a65f89acbfd259a3882bbf01a2b98a9862492 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 24 Oct 2024 13:23:04 -0400 Subject: [PATCH 10/20] Implement oneshot of 0 bytes --- src/xxhash3/secret.rs | 25 +++++++++++++++++++++++++ src/xxhash3_128.rs | 25 +++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/xxhash3/secret.rs b/src/xxhash3/secret.rs index 70a971f94..00ca3d95d 100644 --- a/src/xxhash3/secret.rs +++ b/src/xxhash3/secret.rs @@ -39,6 +39,12 @@ impl Secret { Secret64BitView(self) } + #[inline] + #[cfg(feature = "xxhash3_128")] + pub fn for_128(&self) -> Secret128BitView<'_> { + Secret128BitView(self) + } + #[inline] pub fn words_for_17_to_128(&self) -> &[[u8; 16]] { self.reassert_preconditions(); @@ -177,6 +183,25 @@ impl<'a> Secret64BitView<'a> { } } +#[derive(Copy, Clone)] +#[cfg(feature = "xxhash3_128")] +pub struct Secret128BitView<'a>(&'a Secret); + +#[cfg(feature = "xxhash3_128")] +impl<'a> Secret128BitView<'a> { + #[inline] + pub fn words_for_0(self) -> [u64; 4] { + self.0.reassert_preconditions(); + + let (q, _) = self.b()[64..].bp_as_chunks(); + [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) + } + + fn b(self) -> &'a [u8] { + &(self.0).0 + } +} + #[derive(Debug)] pub struct Error(()); diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index bb4fa7540..22d887632 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -6,6 +6,8 @@ unsafe_op_in_unsafe_fn )] +use crate::{xxhash3::*, IntoU128 as _}; + pub use crate::xxhash3::{DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH}; /// Calculates the 128-bit hash. @@ -17,11 +19,30 @@ impl Hasher { /// see noticable speed gains for certain types of input. #[must_use] #[inline] - pub fn oneshot(_input: &[u8]) -> u128 { - 0x99aa06d3014798d86001c324468d497f + pub fn oneshot(input: &[u8]) -> u128 { + impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input) + } +} + +#[inline(always)] +fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { + match input.len() { + 0 => impl_0_bytes(secret, seed), + + _ => unimplemented!(), } } +#[inline(always)] +fn impl_0_bytes(secret: &Secret, seed: u64) -> u128 { + let secret_words = secret.for_128().words_for_0(); + + let low = avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]); + let high = avalanche_xxh64(seed ^ secret_words[2] ^ secret_words[3]); + + high.into_u128() << 64 | low.into_u128() +} + #[cfg(test)] mod test { use super::*; From 914ce5c882fe69b5ef39284e66d3ef0da0b188fb Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 24 Oct 2024 15:34:22 -0400 Subject: [PATCH 11/20] Implement oneshot of 1 to 3 bytes --- src/xxhash3/secret.rs | 8 ++++++ src/xxhash3_128.rs | 63 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/xxhash3/secret.rs b/src/xxhash3/secret.rs index 00ca3d95d..405d70830 100644 --- a/src/xxhash3/secret.rs +++ b/src/xxhash3/secret.rs @@ -197,6 +197,14 @@ impl<'a> Secret128BitView<'a> { [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) } + #[inline] + pub fn words_for_1_to_3(self) -> [u32; 4] { + self.0.reassert_preconditions(); + + let (q, _) = self.b().bp_as_chunks(); + [q[0], q[1], q[2], q[3]].map(u32::from_le_bytes) + } + fn b(self) -> &'a [u8] { &(self.0).0 } diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index 22d887632..ccf0a5106 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -6,7 +6,7 @@ unsafe_op_in_unsafe_fn )] -use crate::{xxhash3::*, IntoU128 as _}; +use crate::{xxhash3::*, IntoU128 as _, IntoU64 as _}; pub use crate::xxhash3::{DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH}; @@ -27,12 +27,26 @@ impl Hasher { #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { match input.len() { + 1..=3 => impl_1_to_3_bytes(secret, seed, input), + 0 => impl_0_bytes(secret, seed), _ => unimplemented!(), } } +#[derive(Copy, Clone)] +struct X128 { + low: u64, + high: u64, +} + +impl From for u128 { + fn from(value: X128) -> Self { + value.high.into_u128() << 64 | value.low.into_u128() + } +} + #[inline(always)] fn impl_0_bytes(secret: &Secret, seed: u64) -> u128 { let secret_words = secret.for_128().words_for_0(); @@ -40,11 +54,35 @@ fn impl_0_bytes(secret: &Secret, seed: u64) -> u128 { let low = avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]); let high = avalanche_xxh64(seed ^ secret_words[2] ^ secret_words[3]); - high.into_u128() << 64 | low.into_u128() + X128 { low, high }.into() +} + +#[inline(always)] +fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { + assert_input_range!(1..=3, input.len()); + + let combined = impl_1_to_3_bytes_combined(input); + let secret_words = secret.for_128().words_for_1_to_3(); + + let low = { + let secret = (secret_words[0] ^ secret_words[1]).into_u64(); + secret.wrapping_add(seed) ^ combined.into_u64() + }; + let high = { + let secret = (secret_words[2] ^ secret_words[3]).into_u64(); + secret.wrapping_sub(seed) ^ combined.swap_bytes().rotate_left(13).into_u64() + }; + + let low = avalanche_xxh64(low); + let high = avalanche_xxh64(high); + + X128 { low, high }.into() } #[cfg(test)] mod test { + use crate::xxhash3::test::bytes; + use super::*; const _: () = { @@ -59,4 +97,25 @@ mod test { let hash = Hasher::oneshot(&EMPTY_BYTES); assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f); } + + #[test] + fn oneshot_1_to_3_bytes() { + test_1_to_3_bytes(Hasher::oneshot) + } + + #[track_caller] + fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u128) { + let inputs = bytes![1, 2, 3]; + + let expected = [ + 0xa6cd_5e93_9200_0f6a_c44b_dff4_074e_ecdb, + 0x6a4a_5274_c1b0_d3ad_d664_5fc3_051a_9457, + 0xe3b5_5f57_945a_17cf_5f42_99fc_161c_9cbb, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = f(input); + assert_eq!(hash, expected, "input was {} bytes", input.len()); + } + } } From c8aa792d702824b2d5598f852e3ea77c04879983 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 24 Oct 2024 16:37:51 -0400 Subject: [PATCH 12/20] Implement oneshot of 4 to 8 bytes --- src/xxhash3/secret.rs | 8 ++++++ src/xxhash3_128.rs | 63 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/xxhash3/secret.rs b/src/xxhash3/secret.rs index 405d70830..d6bc75809 100644 --- a/src/xxhash3/secret.rs +++ b/src/xxhash3/secret.rs @@ -205,6 +205,14 @@ impl<'a> Secret128BitView<'a> { [q[0], q[1], q[2], q[3]].map(u32::from_le_bytes) } + #[inline] + pub fn words_for_4_to_8(self) -> [u64; 2] { + self.0.reassert_preconditions(); + + let (q, _) = self.b()[16..].bp_as_chunks(); + [q[0], q[1]].map(u64::from_le_bytes) + } + fn b(self) -> &'a [u8] { &(self.0).0 } diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index ccf0a5106..e12853520 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -6,7 +6,10 @@ unsafe_op_in_unsafe_fn )] -use crate::{xxhash3::*, IntoU128 as _, IntoU64 as _}; +use crate::{ + xxhash3::{primes::*, *}, + IntoU128 as _, IntoU64 as _, +}; pub use crate::xxhash3::{DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH}; @@ -27,6 +30,8 @@ impl Hasher { #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { match input.len() { + 4..=8 => impl_4_to_8_bytes(secret, seed, input), + 1..=3 => impl_1_to_3_bytes(secret, seed, input), 0 => impl_0_bytes(secret, seed), @@ -79,6 +84,39 @@ fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { X128 { low, high }.into() } +#[inline(always)] +fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { + assert_input_range!(4..=8, input.len()); + let input_first = input.first_u32().unwrap(); + let input_last = input.last_u32().unwrap(); + + let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); + let secret_words = secret.for_128().words_for_4_to_8(); + + let combined = input_first.into_u64() | (input_last.into_u64() << 32); + let lhs = { + let a = secret_words[0] ^ secret_words[1]; + let b = a.wrapping_add(modified_seed); + b ^ combined + }; + let rhs = PRIME64_1.wrapping_add(input.len().into_u64() << 2); + let mul_result = lhs.into_u128().wrapping_mul(rhs.into_u128()); + + let mut high = mul_result.upper_half(); + let mut low = mul_result.lower_half(); + + high = high.wrapping_add(low << 1); + + low ^= high >> 3; + low ^= low >> 35; + low = low.wrapping_mul(PRIME_MX2); + low ^= low >> 28; + + high = avalanche(high); + + X128 { low, high }.into() +} + #[cfg(test)] mod test { use crate::xxhash3::test::bytes; @@ -118,4 +156,27 @@ mod test { assert_eq!(hash, expected, "input was {} bytes", input.len()); } } + + #[test] + fn oneshot_4_to_8_bytes() { + test_4_to_8_bytes(Hasher::oneshot) + } + + #[track_caller] + fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u128) { + let inputs = bytes![4, 5, 6, 7, 8]; + + let expected = [ + 0xeb70_bf5f_c779_e9e6_a611_1d53_e80a_3db5, + 0x9434_5321_06a7_c141_c920_d234_7a85_929b, + 0x545f_093d_32b1_68fe_a6b5_2f4d_ea38_96a3, + 0x61ce_291b_c3a4_357d_dbb2_0782_1e6d_5efe, + 0xe1e4_432a_6221_7fe4_cfd5_0c61_c8bb_98c1, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = f(input); + assert_eq!(hash, expected, "input was {} bytes", input.len()); + } + } } From 098a9c5a2d03a3f06a2a8316f64493f751655596 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 24 Oct 2024 18:05:43 -0400 Subject: [PATCH 13/20] Implement oneshot of 9 to 16 bytes --- src/xxhash3/secret.rs | 8 ++++++ src/xxhash3_128.rs | 67 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/src/xxhash3/secret.rs b/src/xxhash3/secret.rs index d6bc75809..9d41e9cc0 100644 --- a/src/xxhash3/secret.rs +++ b/src/xxhash3/secret.rs @@ -213,6 +213,14 @@ impl<'a> Secret128BitView<'a> { [q[0], q[1]].map(u64::from_le_bytes) } + #[inline] + pub fn words_for_9_to_16(self) -> [u64; 4] { + self.0.reassert_preconditions(); + + let (q, _) = self.b()[32..].bp_as_chunks(); + [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) + } + fn b(self) -> &'a [u8] { &(self.0).0 } diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index e12853520..f9f18aadf 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -30,6 +30,8 @@ impl Hasher { #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { match input.len() { + 9..=16 => impl_9_to_16_bytes(secret, seed, input), + 4..=8 => impl_4_to_8_bytes(secret, seed, input), 1..=3 => impl_1_to_3_bytes(secret, seed, input), @@ -52,6 +54,12 @@ impl From for u128 { } } +impl crate::IntoU128 for X128 { + fn into_u128(self) -> u128 { + self.into() + } +} + #[inline(always)] fn impl_0_bytes(secret: &Secret, seed: u64) -> u128 { let secret_words = secret.for_128().words_for_0(); @@ -117,6 +125,39 @@ fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { X128 { low, high }.into() } +#[inline(always)] +fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { + assert_input_range!(9..=16, input.len()); + let input_first = input.first_u64().unwrap(); + let input_last = input.last_u64().unwrap(); + + let secret_words = secret.for_128().words_for_9_to_16(); + let val1 = ((secret_words[0] ^ secret_words[1]).wrapping_sub(seed)) ^ input_first ^ input_last; + let val2 = ((secret_words[2] ^ secret_words[3]).wrapping_add(seed)) ^ input_last; + let mul_result = val1.into_u128().wrapping_mul(PRIME64_1.into_u128()); + let low = mul_result + .lower_half() + .wrapping_add((input.len() - 1).into_u64() << 54); + + // Algorithm describes this in two ways + let high = mul_result + .upper_half() + .wrapping_add(val2.upper_half().into_u64() << 32) + .wrapping_add(val2.lower_half().into_u64().wrapping_mul(PRIME32_2)); + + let low = low ^ high.swap_bytes(); + + // Algorithm describes this multiplication in two ways. + let q = X128 { low, high } + .into_u128() + .wrapping_mul(PRIME64_2.into_u128()); + + let low = avalanche(q.lower_half()); + let high = avalanche(q.upper_half()); + + X128 { low, high }.into() +} + #[cfg(test)] mod test { use crate::xxhash3::test::bytes; @@ -179,4 +220,30 @@ mod test { assert_eq!(hash, expected, "input was {} bytes", input.len()); } } + + #[test] + fn oneshot_9_to_16_bytes() { + test_9_to_16_bytes(Hasher::oneshot) + } + + #[track_caller] + fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u128) { + let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16]; + + let expected = [ + 0x16c7_69d8_3e4a_ebce_9079_3197_9dca_3746, + 0xbd93_0669_a87b_4b37_e67b_f1ad_8dcf_73a8, + 0xacad_8071_8f47_d494_7d67_cfc1_730f_22a3, + 0x38f9_2247_a7f7_3cc5_7780_eb31_198f_13ca, + 0xae92_e123_e947_2408_bd79_5526_1902_66c0, + 0x5f91_e6bf_7418_cfaa_55d6_5715_e2a5_7c31, + 0x301a_9f75_4e8f_569a_0017_ea4b_e19b_c787, + 0x7295_0631_8276_07e2_8428_12cc_870d_cae2, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = f(input); + assert_eq!(hash, expected, "input was {} bytes", input.len()); + } + } } From 91f2ea49c65ff4dadd2a33e97e9c77372c95fc90 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 25 Oct 2024 18:20:17 -0400 Subject: [PATCH 14/20] Implement oneshot of 17 to 128 bytes --- src/xxhash3_128.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index f9f18aadf..3950f5bf6 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -30,6 +30,8 @@ impl Hasher { #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { match input.len() { + 17..=128 => impl_17_to_128_bytes(secret, seed, input), + 9..=16 => impl_9_to_16_bytes(secret, seed, input), 4..=8 => impl_4_to_8_bytes(secret, seed, input), @@ -158,6 +160,44 @@ fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { X128 { low, high }.into() } +#[inline] +fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { + assert_input_range!(17..=128, input.len()); + let mut acc = [input.len().into_u64().wrapping_mul(PRIME64_1), 0]; + + impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| { + mix_two_chunks(&mut acc, fwd, bwd, secret, seed); + }); + + let low = acc[0].wrapping_add(acc[1]); + let high = acc[0] + .wrapping_mul(PRIME64_1) + .wrapping_add(acc[1].wrapping_mul(PRIME64_4)) + .wrapping_add((input.len().into_u64().wrapping_sub(seed)).wrapping_mul(PRIME64_2)); + + let low = avalanche(low); + let high = avalanche(high).wrapping_neg(); + + X128 { low, high }.into() +} + +#[inline] +fn mix_two_chunks( + acc: &mut [u64; 2], + data1: &[u8; 16], + data2: &[u8; 16], + secret: &[[u8; 16]; 2], + seed: u64, +) { + let data_words1 = to_u64s(data1); + let data_words2 = to_u64s(data2); + + acc[0] = acc[0].wrapping_add(mix_step(data1, &secret[0], seed)); + acc[1] = acc[1].wrapping_add(mix_step(data2, &secret[1], seed)); + acc[0] ^= data_words2[0].wrapping_add(data_words2[1]); + acc[1] ^= data_words1[0].wrapping_add(data_words1[1]); +} + #[cfg(test)] mod test { use crate::xxhash3::test::bytes; @@ -246,4 +286,41 @@ mod test { assert_eq!(hash, expected, "input was {} bytes", input.len()); } } + + #[test] + fn oneshot_17_to_128_bytes() { + test_17_to_128_bytes(Hasher::oneshot) + } + + #[track_caller] + fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u128) { + let lower_boundary = bytes![17, 18, 19]; + let chunk_boundary = bytes![31, 32, 33]; + let upper_boundary = bytes![126, 127, 128]; + + let inputs = lower_boundary + .iter() + .chain(chunk_boundary) + .chain(upper_boundary); + + let expected = [ + // lower_boundary + 0x685b_c458_b37d_057f_c06e_233d_f772_9217, + 0x87ce_996b_b557_6d8d_e3a3_c96b_b0af_2c23, + 0x7619_bcef_2e31_1cd8_c47d_dc58_8737_93df, + // chunk_boundary + 0x4ed3_946d_393b_687b_b54d_e399_3874_ed20, + 0x25e7_c9b3_424c_eed2_457d_9566_b6fc_d697, + 0x0217_5c3a_abb0_0637_e08d_8495_1339_de86, + // upper_boundary + 0x0abc_2062_87ce_2afe_5181_0be2_9323_2106, + 0xd5ad_d870_c9c9_e00f_060c_2e3d_df0f_2fb9, + 0x1479_2fc3_af88_dc6c_0532_1a0b_64d6_7b41, + ]; + + for (input, expected) in inputs.zip(expected) { + let hash = f(input); + assert_eq!(hash, expected, "input was {} bytes", input.len()); + } + } } From 514343e154702fc86490e12ef60666cee5850c2b Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 27 Oct 2024 11:48:17 -0400 Subject: [PATCH 15/20] Implement oneshot of 129 to 240 bytes --- src/xxhash3.rs | 8 ++++ src/xxhash3/secret.rs | 24 ++++++++++++ src/xxhash3_128.rs | 87 ++++++++++++++++++++++++++++++++++++++----- 3 files changed, 110 insertions(+), 9 deletions(-) diff --git a/src/xxhash3.rs b/src/xxhash3.rs index a472ac6fa..14251e089 100644 --- a/src/xxhash3.rs +++ b/src/xxhash3.rs @@ -170,6 +170,14 @@ pub fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] { [pair[0], pair[1]].map(u64::from_le_bytes) } +#[inline] +#[cfg(feature = "xxhash3_128")] +pub fn pairs_of_u64_bytes(bytes: &[u8]) -> &[[[u8; 16]; 2]] { + let (u64_bytes, _) = bytes.bp_as_chunks::<16>(); + let (pairs, _) = u64_bytes.bp_as_chunks::<2>(); + pairs +} + #[inline] pub fn avalanche(mut x: u64) -> u64 { x ^= x >> 37; diff --git a/src/xxhash3/secret.rs b/src/xxhash3/secret.rs index 9d41e9cc0..95d02c640 100644 --- a/src/xxhash3/secret.rs +++ b/src/xxhash3/secret.rs @@ -2,6 +2,9 @@ use core::{hint::assert_unchecked, mem}; use super::SliceBackport as _; +#[cfg(feature = "xxhash3_128")] +use super::pairs_of_u64_bytes; + /// The minimum length of a secret. pub const SECRET_MINIMUM_LENGTH: usize = 136; @@ -221,6 +224,27 @@ impl<'a> Secret128BitView<'a> { [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) } + #[inline] + pub fn words_for_127_to_240_part1(self) -> &'a [[[u8; 16]; 2]] { + self.0.reassert_preconditions(); + + pairs_of_u64_bytes(self.b()) + } + + #[inline] + pub fn words_for_127_to_240_part2(self) -> &'a [[[u8; 16]; 2]] { + self.0.reassert_preconditions(); + + pairs_of_u64_bytes(&self.b()[3..]) + } + + #[inline] + pub fn words_for_127_to_240_part3(self) -> &'a [[u8; 16]; 2] { + self.0.reassert_preconditions(); + + pairs_of_u64_bytes(&self.b()[103..]).first().unwrap() + } + fn b(self) -> &'a [u8] { &(self.0).0 } diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index 3950f5bf6..7b2f7075f 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -30,6 +30,8 @@ impl Hasher { #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { match input.len() { + 129..=240 => impl_129_to_240_bytes(secret, seed, input), + 17..=128 => impl_17_to_128_bytes(secret, seed, input), 9..=16 => impl_9_to_16_bytes(secret, seed, input), @@ -163,22 +165,46 @@ fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { #[inline] fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { assert_input_range!(17..=128, input.len()); - let mut acc = [input.len().into_u64().wrapping_mul(PRIME64_1), 0]; + let input_len = input.len().into_u64(); + let mut acc = [input_len.wrapping_mul(PRIME64_1), 0]; impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| { mix_two_chunks(&mut acc, fwd, bwd, secret, seed); }); - let low = acc[0].wrapping_add(acc[1]); - let high = acc[0] - .wrapping_mul(PRIME64_1) - .wrapping_add(acc[1].wrapping_mul(PRIME64_4)) - .wrapping_add((input.len().into_u64().wrapping_sub(seed)).wrapping_mul(PRIME64_2)); + finalize_medium(acc, input_len, seed) +} - let low = avalanche(low); - let high = avalanche(high).wrapping_neg(); +#[inline] +fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { + assert_input_range!(129..=240, input.len()); + let input_len = input.len().into_u64(); + let mut acc = [input_len.wrapping_mul(PRIME64_1), 0]; - X128 { low, high }.into() + let head = pairs_of_u64_bytes(input); + let mut head = head.iter(); + + let ss = secret.for_128().words_for_127_to_240_part1(); + for (input, secret) in head.by_ref().zip(ss).take(4) { + mix_two_chunks(&mut acc, &input[0], &input[1], secret, seed); + } + + let mut acc = acc.map(avalanche); + + let ss = secret.for_128().words_for_127_to_240_part2(); + for (input, secret) in head.zip(ss) { + mix_two_chunks(&mut acc, &input[0], &input[1], secret, seed); + } + + let (_, tail) = input.bp_as_rchunks::<16>(); + let (_, tail) = tail.bp_as_rchunks::<2>(); + let tail = tail.last().unwrap(); + let ss = secret.for_128().words_for_127_to_240_part3(); + + // note that the half-chunk order and the seed is different here + mix_two_chunks(&mut acc, &tail[1], &tail[0], ss, seed.wrapping_neg()); + + finalize_medium(acc, input_len, seed) } #[inline] @@ -198,6 +224,20 @@ fn mix_two_chunks( acc[1] ^= data_words1[0].wrapping_add(data_words1[1]); } +#[inline] +fn finalize_medium(acc: [u64; 2], input_len: u64, seed: u64) -> u128 { + let low = acc[0].wrapping_add(acc[1]); + let high = acc[0] + .wrapping_mul(PRIME64_1) + .wrapping_add(acc[1].wrapping_mul(PRIME64_4)) + .wrapping_add((input_len.wrapping_sub(seed)).wrapping_mul(PRIME64_2)); + + let low = avalanche(low); + let high = avalanche(high).wrapping_neg(); + + X128 { low, high }.into() +} + #[cfg(test)] mod test { use crate::xxhash3::test::bytes; @@ -323,4 +363,33 @@ mod test { assert_eq!(hash, expected, "input was {} bytes", input.len()); } } + + #[test] + fn oneshot_129_to_240_bytes() { + test_129_to_240_bytes(Hasher::oneshot) + } + + #[track_caller] + fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u128) { + let lower_boundary = bytes![129, 130, 131]; + let upper_boundary = bytes![238, 239, 240]; + + let inputs = lower_boundary.iter().chain(upper_boundary); + + let expected = [ + // lower_boundary + 0xdd5e_74ac_6b45_f54e_bc30_b633_82b0_9a3b, + 0x6cd2_e56a_10f1_e707_3ec5_f135_d0a7_d28f, + 0x6da7_92f1_702d_4494_5609_cfc7_9dba_18fd, + // upper_boundary + 0x73a9_e8f7_bd32_83c8_2a9b_ddd0_e5c4_014c, + 0x9843_ab31_a06b_e0df_fe21_3746_28fc_c539, + 0x65b5_be86_da55_40e7_c92b_68e1_6f83_bbb6, + ]; + + for (input, expected) in inputs.zip(expected) { + let hash = f(input); + assert_eq!(hash, expected, "input was {} bytes", input.len()); + } + } } From e47968f436dd459508b6cf21c69dd0aef735d59f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 28 Oct 2024 13:33:43 -0400 Subject: [PATCH 16/20] Implement oneshot of 241 and up bytes --- src/xxhash3.rs | 21 +++++++++++ src/xxhash3/large.rs | 27 ++++++++++++++ src/xxhash3/secret.rs | 8 ++++ src/xxhash3_128.rs | 85 +++++++++++++++++++++++++++++++++---------- 4 files changed, 121 insertions(+), 20 deletions(-) diff --git a/src/xxhash3.rs b/src/xxhash3.rs index 14251e089..0067abeab 100644 --- a/src/xxhash3.rs +++ b/src/xxhash3.rs @@ -204,6 +204,27 @@ pub fn stripes_with_tail(block: &[u8]) -> (&[[u8; 64]], &[u8]) { } } +/// THis exists just to easily map the XXH3 algorithm to Rust as the +/// algorithm describes 128-bit results as a pair of high and low u64 +/// values. +#[derive(Copy, Clone)] +pub(crate) struct X128 { + pub low: u64, + pub high: u64, +} + +impl From for u128 { + fn from(value: X128) -> Self { + value.high.into_u128() << 64 | value.low.into_u128() + } +} + +impl crate::IntoU128 for X128 { + fn into_u128(self) -> u128 { + self.into() + } +} + pub trait Halves { type Output; diff --git a/src/xxhash3/large.rs b/src/xxhash3/large.rs index 2845b7931..df1a7d0dd 100644 --- a/src/xxhash3/large.rs +++ b/src/xxhash3/large.rs @@ -2,6 +2,9 @@ use super::{ assert_input_range, avalanche, primes::*, stripes_with_tail, Halves, Secret, SliceBackport as _, }; +#[cfg(feature = "xxhash3_128")] +use super::X128; + use crate::{IntoU128, IntoU64}; // This module is not `cfg`-gated because it is used by some of the @@ -221,6 +224,30 @@ where self.final_merge(&acc, low, secret.final_secret()) } + #[inline] + #[cfg(feature = "xxhash3_128")] + pub fn finalize_128( + &self, + mut acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &Secret, + len: usize, + ) -> u128 { + debug_assert!(!last_block.is_empty()); + self.last_round(&mut acc, last_block, last_stripe, secret); + + let len = len.into_u64(); + + let low = len.wrapping_mul(PRIME64_1); + let low = self.final_merge(&acc, low, secret.final_secret()); + + let high = !len.wrapping_mul(PRIME64_2); + let high = self.final_merge(&acc, high, secret.for_128().final_secret()); + + X128 { low, high }.into() + } + #[inline] fn last_round( &self, diff --git a/src/xxhash3/secret.rs b/src/xxhash3/secret.rs index 95d02c640..63e55b2fd 100644 --- a/src/xxhash3/secret.rs +++ b/src/xxhash3/secret.rs @@ -245,6 +245,14 @@ impl<'a> Secret128BitView<'a> { pairs_of_u64_bytes(&self.b()[103..]).first().unwrap() } + #[inline] + pub fn final_secret(self) -> &'a [u8; 64] { + self.0.reassert_preconditions(); + + let b = self.b(); + b[b.len() - 75..].first_chunk().unwrap() + } + fn b(self) -> &'a [u8] { &(self.0).0 } diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index 7b2f7075f..0365c36de 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -27,9 +27,35 @@ impl Hasher { } } +struct Finalize128; + +impl Finalize for Finalize128 { + type Output = u128; + + #[inline] + fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output { + impl_oneshot(secret, seed, input) + } + + #[inline] + fn large( + &self, + vector: impl Vector, + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &Secret, + len: usize, + ) -> Self::Output { + Algorithm(vector).finalize_128(acc, last_block, last_stripe, secret, len) + } +} + #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { match input.len() { + 241.. => impl_241_plus_bytes(secret, input), + 129..=240 => impl_129_to_240_bytes(secret, seed, input), 17..=128 => impl_17_to_128_bytes(secret, seed, input), @@ -41,26 +67,6 @@ fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { 1..=3 => impl_1_to_3_bytes(secret, seed, input), 0 => impl_0_bytes(secret, seed), - - _ => unimplemented!(), - } -} - -#[derive(Copy, Clone)] -struct X128 { - low: u64, - high: u64, -} - -impl From for u128 { - fn from(value: X128) -> Self { - value.high.into_u128() << 64 | value.low.into_u128() - } -} - -impl crate::IntoU128 for X128 { - fn into_u128(self) -> u128 { - self.into() } } @@ -238,6 +244,20 @@ fn finalize_medium(acc: [u64; 2], input_len: u64, seed: u64) -> u128 { X128 { low, high }.into() } +#[inline] +fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u128 { + assert_input_range!(241.., input.len()); + dispatch! { + fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u128 + [] + } +} + +#[inline] +fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u128 { + Algorithm(vector).oneshot(secret, input, Finalize128) +} + #[cfg(test)] mod test { use crate::xxhash3::test::bytes; @@ -392,4 +412,29 @@ mod test { assert_eq!(hash, expected, "input was {} bytes", input.len()); } } + + #[test] + fn oneshot_241_plus_bytes() { + test_241_plus_bytes(Hasher::oneshot) + } + + #[track_caller] + fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u128) { + let inputs = bytes![241, 242, 243, 244, 1024, 10240]; + + let expected = [ + 0x1da1_cb61_bcb8_a2a1_02e8_cd95_421c_6d02, + 0x1623_84cb_44d1_d806_ddcb_33c4_9405_1832, + 0xbd2e_9fcf_378c_35e9_8835_f952_9193_e3dc, + 0x3ff4_93d7_a813_7ab6_bc17_c91e_c3cf_8d7f, + 0xd0ac_1f7b_93bf_57b9_e5d7_8baf_a45b_2aa5, + 0x4f63_75cc_a7ec_e1e1_bcd6_3266_df6e_2244, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = f(input); + eprintln!("{hash:032x}\n{expected:032x}"); + assert_eq!(hash, expected, "input was {} bytes", input.len()); + } + } } From a9536eddd5394494c9d1436cafa0ebae55fa2bda Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 29 Oct 2024 08:49:58 -0400 Subject: [PATCH 17/20] Implement seed and secret entrypoints --- comparison/src/lib.rs | 57 +++++++++++++++++++++++++++++++++++++++++++ src/xxhash3_128.rs | 51 +++++++++++++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/comparison/src/lib.rs b/comparison/src/lib.rs index e37bd4429..65fccd8fe 100644 --- a/comparison/src/lib.rs +++ b/comparison/src/lib.rs @@ -363,6 +363,63 @@ mod xxhash3_64 { } } +mod xxhash3_128 { + use proptest::{prelude::*, test_runner::TestCaseResult}; + use twox_hash::xxhash3_128::SECRET_MINIMUM_LENGTH; + + use super::*; + + proptest! { + #[test] + fn oneshot(seed in seed_64(), data: Vec) { + oneshot_impl(seed, &data)?; + } + + #[test] + fn oneshot_with_an_offset(seed in seed_64(), (data, offset) in vec_and_index()) { + oneshot_impl(seed, &data[offset..])?; + } + + #[test] + fn oneshot_with_a_secret(secret in secret(), data: Vec) { + oneshot_with_secret_impl(&secret, &data)?; + } + + #[test] + fn oneshot_with_a_seed_and_secret(seed in seed_64(), secret in secret(), data: Vec) { + oneshot_with_seed_and_secret_impl(seed, &secret, &data)?; + } + } + + fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_128::oneshot_with_seed(seed, data); + let rust = rust::XxHash3_128::oneshot_with_seed(seed, data); + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_128::oneshot_with_secret(secret, data); + let rust = rust::XxHash3_128::oneshot_with_secret(secret, data).unwrap(); + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn oneshot_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_128::oneshot_with_seed_and_secret(seed, secret, data); + let rust = rust::XxHash3_128::oneshot_with_seed_and_secret(seed, secret, data).unwrap(); + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn secret() -> impl Strategy> { + prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024) + } +} + fn seed_32() -> impl Strategy { prop_oneof![Just(0), Just(u32::MAX), num::u32::ANY] } diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index 0365c36de..8f098abdb 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -11,7 +11,7 @@ use crate::{ IntoU128 as _, IntoU64 as _, }; -pub use crate::xxhash3::{DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH}; +pub use crate::xxhash3::{OneshotWithSecretError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH}; /// Calculates the 128-bit hash. #[derive(Clone)] @@ -25,6 +25,55 @@ impl Hasher { pub fn oneshot(input: &[u8]) -> u128 { impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input) } + + /// Hash all data at once using the provided seed and a secret + /// derived from the seed. If you can use this function, you may + /// see noticable speed gains for certain types of input. + #[must_use] + #[inline] + pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u128 { + let mut secret = DEFAULT_SECRET_RAW; + + // We know that the secret will only be used if we have more + // than 240 bytes, so don't waste time computing it otherwise. + if input.len() > CUTOFF { + derive_secret(seed, &mut secret); + } + + let secret = Secret::new(&secret).expect("The default secret length is invalid"); + + impl_oneshot(secret, seed, input) + } + + /// Hash all data at once using the provided secret and the + /// default seed. If you can use this function, you may see + /// noticable speed gains for certain types of input. + #[inline] + pub fn oneshot_with_secret( + secret: &[u8], + input: &[u8], + ) -> Result { + let secret = Secret::new(secret).map_err(OneshotWithSecretError)?; + Ok(impl_oneshot(secret, DEFAULT_SEED, input)) + } + + /// Hash all data at once using the provided seed and secret. If + /// you can use this function, you may see noticable speed gains + /// for certain types of input. + #[inline] + pub fn oneshot_with_seed_and_secret( + seed: u64, + secret: &[u8], + input: &[u8], + ) -> Result { + let secret = if input.len() > CUTOFF { + Secret::new(secret).map_err(OneshotWithSecretError)? + } else { + DEFAULT_SECRET + }; + + Ok(impl_oneshot(secret, seed, input)) + } } struct Finalize128; From f629e883d74ac1eb2036ee4b87ae7e8c934adbb0 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 30 Oct 2024 11:42:45 -0400 Subject: [PATCH 18/20] Implement streaming entrypoints --- comparison/src/lib.rs | 102 ++++++++++++++++++++++++++ src/xxhash3_128.rs | 163 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 263 insertions(+), 2 deletions(-) diff --git a/comparison/src/lib.rs b/comparison/src/lib.rs index 65fccd8fe..7f6973426 100644 --- a/comparison/src/lib.rs +++ b/comparison/src/lib.rs @@ -370,6 +370,21 @@ mod xxhash3_128 { use super::*; proptest! { + #[test] + fn oneshot_same_as_one_chunk(seed in seed_64(), data: Vec) { + oneshot_same_as_one_chunk_impl(seed, &data)?; + } + + #[test] + fn oneshot_same_as_one_chunk_with_an_offset(seed in seed_64(), (data, offset) in vec_and_index()) { + oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + } + + #[test] + fn oneshot_same_as_many_chunks(seed in seed_64(), (data, chunks) in data_and_chunks()) { + oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + } + #[test] fn oneshot(seed in seed_64(), data: Vec) { oneshot_impl(seed, &data)?; @@ -389,6 +404,51 @@ mod xxhash3_128 { fn oneshot_with_a_seed_and_secret(seed in seed_64(), secret in secret(), data: Vec) { oneshot_with_seed_and_secret_impl(seed, &secret, &data)?; } + + #[test] + fn streaming_one_chunk(seed in seed_64(), data: Vec) { + streaming_one_chunk_impl(seed, &data)?; + } + + #[test] + fn streaming_one_chunk_with_an_offset(seed in seed_64(), (data, offset) in vec_and_index()) { + streaming_one_chunk_impl(seed, &data[offset..])?; + } + + #[test] + fn streaming_with_a_seed_and_secret(seed in seed_64(), secret in secret(), data: Vec) { + streaming_with_seed_and_secret_impl(seed, &secret, &data)?; + } + } + + fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let oneshot = rust::XxHash3_128::oneshot_with_seed(seed, data); + let one_chunk = { + let mut hasher = rust::XxHash3_128::with_seed(seed); + hasher.write(data); + hasher.finish_128() + }; + + prop_assert_eq!(oneshot, one_chunk); + Ok(()) + } + + fn oneshot_same_as_many_chunks_impl( + seed: u64, + data: &[u8], + chunks: &[Vec], + ) -> TestCaseResult { + let oneshot = rust::XxHash3_128::oneshot_with_seed(seed, data); + let many_chunks = { + let mut hasher = rust::XxHash3_128::with_seed(seed); + for chunk in chunks { + hasher.write(chunk); + } + hasher.finish_128() + }; + + prop_assert_eq!(oneshot, many_chunks); + Ok(()) } fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { @@ -415,6 +475,48 @@ mod xxhash3_128 { Ok(()) } + fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash3_128::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash3_128::with_seed(seed); + hasher.write(data); + hasher.finish_128() + }; + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn streaming_with_seed_and_secret_impl( + seed: u64, + secret: &[u8], + data: &[u8], + ) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash3_128::with_seed_and_secret(seed, secret); + for chunk in data.chunks(256) { + hasher.write(chunk); + } + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash3_128::with_seed_and_secret(seed, secret).unwrap(); + for chunk in data.chunks(256) { + hasher.write(chunk); + } + hasher.finish_128() + }; + + prop_assert_eq!(native, rust); + Ok(()) + } + fn secret() -> impl Strategy> { prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024) } diff --git a/src/xxhash3_128.rs b/src/xxhash3_128.rs index 8f098abdb..d7624378c 100644 --- a/src/xxhash3_128.rs +++ b/src/xxhash3_128.rs @@ -11,11 +11,19 @@ use crate::{ IntoU128 as _, IntoU64 as _, }; -pub use crate::xxhash3::{OneshotWithSecretError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH}; +pub use crate::xxhash3::{ + FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError, + SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH, +}; /// Calculates the 128-bit hash. #[derive(Clone)] -pub struct Hasher; +/// TODO: does not implement hash. +pub struct Hasher { + #[cfg(feature = "alloc")] + inner: AllocRawHasher, + _private: (), +} impl Hasher { /// Hash all data at once. If you can use this function, you may @@ -75,6 +83,113 @@ impl Hasher { Ok(impl_oneshot(secret, seed, input)) } } +#[cfg(feature = "alloc")] +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +mod with_alloc { + use ::alloc::boxed::Box; + + use super::*; + + impl Hasher { + /// Constructs the hasher using the default seed and secret values. + pub fn new() -> Self { + Self { + inner: RawHasherCore::allocate_default(), + _private: (), + } + } + + /// Constructs the hasher using the provided seed and a secret + /// derived from the seed. + pub fn with_seed(seed: u64) -> Self { + Self { + inner: RawHasherCore::allocate_with_seed(seed), + _private: (), + } + } + + /// Constructs the hasher using the provided seed and secret. + pub fn with_seed_and_secret( + seed: u64, + secret: impl Into>, + ) -> Result>> { + Ok(Self { + inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?, + _private: (), + }) + } + + /// Returns the secret. + pub fn into_secret(self) -> Box<[u8]> { + self.inner.into_secret() + } + + /// Writes some data into this `Hasher`. + #[inline] + pub fn write(&mut self, input: &[u8]) { + self.inner.write(input); + } + + /// Returns the hash value for the values written so + /// far. Unlike [`std::hash::Hasher::finish`][], this method + /// returns the complete 128-bit value calculated, not a + /// 64-bit value. + #[inline] + pub fn finish_128(&self) -> u128 { + self.inner.finish(Finalize128) + } + } + + impl Default for Hasher { + fn default() -> Self { + Self::new() + } + } +} + +#[derive(Clone)] +/// A lower-level interface for computing a hash from streaming data. +/// +/// The algorithm requires a secret which can be a reasonably large +/// piece of data. [`Hasher`][] makes one concrete implementation +/// decision that uses dynamic memory allocation, but specialized +/// usages may desire more flexibility. This type, combined with +/// [`SecretBuffer`][], offer that flexibility at the cost of a +/// generic type. +pub struct RawHasher(RawHasherCore); + +impl RawHasher { + /// Construct the hasher with the provided seed, secret, and + /// temporary buffer. + pub fn new(secret_buffer: SecretBuffer) -> Self { + Self(RawHasherCore::new(secret_buffer)) + } + + /// Returns the secret. + pub fn into_secret(self) -> S { + self.0.into_secret() + } +} + +impl RawHasher +where + S: FixedBuffer, +{ + /// Writes some data into this `Hasher`. + #[inline] + pub fn write(&mut self, input: &[u8]) { + self.0.write(input); + } + + /// Returns the hash value for the values written so + /// far. Unlike [`std::hash::Hasher::finish`][], this method + /// returns the complete 128-bit value calculated, not a + /// 64-bit value. + #[inline] + pub fn finish_128(&self) -> u128 { + self.0.finish(Finalize128) + } +} struct Finalize128; @@ -320,17 +435,36 @@ mod test { const EMPTY_BYTES: [u8; 0] = []; + fn hash_byte_by_byte(input: &[u8]) -> u128 { + let mut hasher = Hasher::new(); + for byte in input.chunks(1) { + hasher.write(byte) + } + hasher.finish_128() + } + #[test] fn oneshot_empty() { let hash = Hasher::oneshot(&EMPTY_BYTES); assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f); } + #[test] + fn streaming_empty() { + let hash = hash_byte_by_byte(&EMPTY_BYTES); + assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f); + } + #[test] fn oneshot_1_to_3_bytes() { test_1_to_3_bytes(Hasher::oneshot) } + #[test] + fn streaming_1_to_3_bytes() { + test_1_to_3_bytes(hash_byte_by_byte) + } + #[track_caller] fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u128) { let inputs = bytes![1, 2, 3]; @@ -352,6 +486,11 @@ mod test { test_4_to_8_bytes(Hasher::oneshot) } + #[test] + fn streaming_4_to_8_bytes() { + test_4_to_8_bytes(hash_byte_by_byte) + } + #[track_caller] fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u128) { let inputs = bytes![4, 5, 6, 7, 8]; @@ -375,6 +514,11 @@ mod test { test_9_to_16_bytes(Hasher::oneshot) } + #[test] + fn streaming_9_to_16_bytes() { + test_9_to_16_bytes(hash_byte_by_byte) + } + #[track_caller] fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u128) { let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16]; @@ -401,6 +545,11 @@ mod test { test_17_to_128_bytes(Hasher::oneshot) } + #[test] + fn streaming_17_to_128_bytes() { + test_17_to_128_bytes(hash_byte_by_byte) + } + #[track_caller] fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u128) { let lower_boundary = bytes![17, 18, 19]; @@ -438,6 +587,11 @@ mod test { test_129_to_240_bytes(Hasher::oneshot) } + #[test] + fn streaming_129_to_240_bytes() { + test_129_to_240_bytes(hash_byte_by_byte) + } + #[track_caller] fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u128) { let lower_boundary = bytes![129, 130, 131]; @@ -467,6 +621,11 @@ mod test { test_241_plus_bytes(Hasher::oneshot) } + #[test] + fn streaming_241_plus_bytes() { + test_241_plus_bytes(hash_byte_by_byte) + } + #[track_caller] fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u128) { let inputs = bytes![241, 242, 243, 244, 1024, 10240]; From e8e3dc32d2bb7e1681c6539e8a9dccbbdba5037a Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 31 Oct 2024 11:52:30 -0400 Subject: [PATCH 19/20] Add benchmarks --- comparison/benches/benchmark.rs | 199 +++++++++++++++++++++++++++++++- comparison/generate-graph.R | 2 +- 2 files changed, 199 insertions(+), 2 deletions(-) diff --git a/comparison/benches/benchmark.rs b/comparison/benches/benchmark.rs index 961465ee0..45b2194ae 100644 --- a/comparison/benches/benchmark.rs +++ b/comparison/benches/benchmark.rs @@ -314,4 +314,201 @@ mod xxhash3_64 { criterion_group!(benches, tiny_data, oneshot, streaming); } -criterion_main!(xxhash64::benches, xxhash3_64::benches); +mod xxhash3_128 { + use super::*; + + fn tiny_data(c: &mut Criterion) { + let (seed, data) = gen_data(240); + let mut g = c.my_benchmark_group("xxhash3_128", "tiny_data"); + + // let categories = 0..=data.len(); + + // Visual inspection of all the data points showed these as + // examples of thier nearby neighbors. + let categories = [ + 0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230, + ]; + + for size in categories { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); + + let id = format!("impl-c/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::XxHash3_128::oneshot_with_seed(seed, data)) + }); + + let id = format!("impl-c-scalar/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::scalar::XxHash3_128::oneshot_with_seed(seed, data)) + }); + + #[cfg(target_arch = "aarch64")] + { + let id = format!("impl-c-neon/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::neon::XxHash3_128::oneshot_with_seed(seed, data)) + }); + } + + #[cfg(target_arch = "x86_64")] + { + let id = format!("impl-c-avx2/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::avx2::XxHash3_128::oneshot_with_seed(seed, data)) + }); + + let id = format!("impl-c-sse2/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::sse2::XxHash3_128::oneshot_with_seed(seed, data)) + }); + } + + let id = format!("impl-rust/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| rust::XxHash3_128::oneshot_with_seed(seed, data)) + }); + } + + g.finish(); + } + + fn oneshot(c: &mut Criterion) { + let (seed, data) = gen_data(BIG_DATA_SIZE); + let mut g = c.my_benchmark_group("xxhash3_128", "oneshot"); + + for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); + + let id = format!("impl-c/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::XxHash3_128::oneshot_with_seed(seed, data)) + }); + + let id = format!("impl-c-scalar/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::scalar::XxHash3_128::oneshot_with_seed(seed, data)) + }); + + #[cfg(target_arch = "aarch64")] + { + let id = format!("impl-c-neon/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::neon::XxHash3_128::oneshot_with_seed(seed, data)) + }); + } + + #[cfg(target_arch = "x86_64")] + { + let id = format!("impl-c-avx2/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::avx2::XxHash3_128::oneshot_with_seed(seed, data)) + }); + + let id = format!("impl-c-sse2/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::sse2::XxHash3_128::oneshot_with_seed(seed, data)) + }); + } + + let id = format!("impl-rust/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| rust::XxHash3_128::oneshot_with_seed(seed, data)) + }); + } + + g.finish(); + } + + fn streaming(c: &mut Criterion) { + let mut g = c.my_benchmark_group("xxhash3_128", "streaming"); + + let size = 1024 * 1024; + let (seed, data) = gen_data(size); + + for chunk_size in half_sizes(size) { + let chunks = data.chunks(chunk_size).collect::>(); + + g.throughput(Throughput::Bytes(size as _)); + + let id = format!("impl-c/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::XxHash3_128::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish() + }) + }); + + let id = format!("impl-c-scalar/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::scalar::XxHash3_128::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish() + }) + }); + + #[cfg(target_arch = "aarch64")] + { + let id = format!("impl-c-neon/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::neon::XxHash3_128::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish() + }) + }); + } + + #[cfg(target_arch = "x86_64")] + { + let id = format!("impl-c-avx2/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::avx2::XxHash3_128::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish() + }) + }); + + let id = format!("impl-c-sse2/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::sse2::XxHash3_128::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish() + }) + }); + } + + let id = format!("impl-rust/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = rust::XxHash3_128::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish_128() + }) + }); + } + + g.finish(); + } + + criterion_group!(benches, tiny_data, oneshot, streaming); +} + +criterion_main!(xxhash64::benches, xxhash3_64::benches, xxhash3_128::benches); diff --git a/comparison/generate-graph.R b/comparison/generate-graph.R index aacf9b09b..903f44b3d 100755 --- a/comparison/generate-graph.R +++ b/comparison/generate-graph.R @@ -45,7 +45,7 @@ cpus = c(aarch64 = "Apple M1 Max", x86_64 = "AMD Ryzen 9 3950X") common_theme = theme(legend.position = "inside", legend.position.inside = c(0.8, 0.2), plot.margin = unit(c(0.1, 1, 0.1, 0.1), 'cm')) -for (algo in c("xxhash64", "xxhash3_64")) { +for (algo in c("xxhash64", "xxhash3_64", "xxhash3_128")) { message("# ", algo) algo_data = data[data$algo == algo,] From f382d8d1077e7a94ab9c1b349b0e5be1d4d0b240 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 1 Nov 2024 09:57:10 -0400 Subject: [PATCH 20/20] Update benchmark results --- comparison/README.md | 96 +++++++- .../results/xxhash3_128-streaming-aarch64.svg | 174 ++++++++++++++ .../results/xxhash3_128-streaming-x86_64.svg | 200 ++++++++++++++++ .../results/xxhash3_128-tiny_data-aarch64.svg | 122 ++++++++++ .../results/xxhash3_128-tiny_data-x86_64.svg | 142 +++++++++++ .../results/xxhash3_64-streaming-aarch64.svg | 176 +++++++------- .../results/xxhash3_64-streaming-x86_64.svg | 220 +++++++++--------- .../results/xxhash3_64-tiny_data-aarch64.svg | 141 ++++++----- .../results/xxhash3_64-tiny_data-x86_64.svg | 173 +++++++------- .../results/xxhash64-streaming-aarch64.svg | 107 +++++---- .../results/xxhash64-streaming-x86_64.svg | 107 +++++---- .../results/xxhash64-tiny_data-aarch64.svg | 151 ++++++------ .../results/xxhash64-tiny_data-x86_64.svg | 149 ++++++------ 13 files changed, 1331 insertions(+), 627 deletions(-) create mode 100644 comparison/results/xxhash3_128-streaming-aarch64.svg create mode 100644 comparison/results/xxhash3_128-streaming-x86_64.svg create mode 100644 comparison/results/xxhash3_128-tiny_data-aarch64.svg create mode 100644 comparison/results/xxhash3_128-tiny_data-x86_64.svg diff --git a/comparison/README.md b/comparison/README.md index 5bd5f11dc..73b6b73d0 100644 --- a/comparison/README.md +++ b/comparison/README.md @@ -19,15 +19,15 @@ graphs are boring flat lines, so a table is used instead. | Implementation | Throughput (GiB/s) | |----------------|--------------------| -| Rust | 13.5 | -| C | 13.5 | +| Rust | 13.4 | +| C | 13.4 | ## x86_64 | Implementation | Throughput (GiB/s) | |----------------|--------------------| -| Rust | 16.5 | -| C | 16.5 | +| Rust | 16.7 | +| C | 16.6 | ## Streaming data @@ -75,7 +75,6 @@ Compares the **time taken** to hash 0 to 32 bytes of data. /> - # xxHash3 (64-bit) ## Oneshot hashing @@ -88,20 +87,21 @@ graphs are boring flat lines, so a table is used instead. | Implementation | Throughput (GiB/s) | |----------------|--------------------| -| Rust | 35.2 | +| Rust | 35.0 | | C | 35.0 | | C (scalar) | 21.2 | -| C (NEON) | 35.1 | +| C (NEON) | 35.0 | ### x86_64 | Implementation | Throughput (GiB/s) | |----------------|--------------------| -| Rust | 58.6 | -| C | 25.0 | -| C (scalar) | 7.5 | +| Rust | 58.9 | +| C | 25.1 | +| C (scalar) | 7.6 | | C (SSE2) | 25.1 | -| C (AVX2) | 57.8 | +| C (AVX2) | 58.4 | + ## Streaming data @@ -150,6 +150,78 @@ cluttering the graph and wasting benchmarking time. /> +# xxHash3 (128-bit) + +## Oneshot hashing + +Compares the **speed** of hashing an entire buffer of data in one +function call. Data sizes from 256 KiB to 4 MiB are tested. These +graphs are boring flat lines, so a table is used instead. + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 34.4 | +| C | 34.8 | +| C (scalar) | 21.3 | +| C (NEON) | 34.6 | + +### x86_64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 58.3 | +| C | 25.6 | +| C (scalar) | 7.6 | +| C (SSE2) | 25.5 | +| C (AVX2) | 57.4 | + +## Streaming data + +Compares the **speed** of hashing a 1 MiB buffer of data split into +various chunk sizes. + +### aarch64 + + + xxHash3, 128-bit, streaming data, on an aarch64 processor + + +### x86_64 + + + xxHash3, 128-bit, streaming data, on an x86_64 processor + + +## Small amounts of data + +Compares the **time taken** to hash 0 to 230 bytes of +data. Representative samples are taken from similar times to avoid +cluttering the graph and wasting benchmarking time. + +### aarch64 + + + xxHash3, 128-bit, small data, on an aarch64 processor + + +### x86_64 + + + xxHash3, 128-bit, small data, on an x86_64 processor + + # Benchmark machines ## Overview @@ -159,7 +231,7 @@ cluttering the graph and wasting benchmarking time. | Apple M1 Max | 64 GiB | clang 16.0.0 | | AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 | -Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`. +Tests were run with `rustc 1.82.0 (f6e511eec 2024-10-15)`. ## Details diff --git a/comparison/results/xxhash3_128-streaming-aarch64.svg b/comparison/results/xxhash3_128-streaming-aarch64.svg new file mode 100644 index 000000000..15cb7e2ce --- /dev/null +++ b/comparison/results/xxhash3_128-streaming-aarch64.svg @@ -0,0 +1,174 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 8 MiB/sec + 16 MiB/sec + 32 MiB/sec + 64 MiB/sec + 128 MiB/sec + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec + 32 GiB/sec + 64 GiB/sec + + 1 B + 2 B + 4 B + 8 B + 16 B + 32 B + 64 B + 128 B + 256 B + 512 B + 1 KiB + 2 KiB + 4 KiB + 8 KiB + 16 KiB + 32 KiB + 64 KiB + 128 KiB + 256 KiB + 512 KiB + 1 MiB + 2 MiB + Chunk Size + Throughput + + Implementation + + + + + + + + + + + + + Rust + C + C (scalar) + C (NEON) + aarch64 (Apple M1 Max) + [xxhash3_128] Throughput of a 1 MiB buffer by chunk size (higher is better) + + diff --git a/comparison/results/xxhash3_128-streaming-x86_64.svg b/comparison/results/xxhash3_128-streaming-x86_64.svg new file mode 100644 index 000000000..7ec2719a4 --- /dev/null +++ b/comparison/results/xxhash3_128-streaming-x86_64.svg @@ -0,0 +1,200 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 8 MiB/sec + 16 MiB/sec + 32 MiB/sec + 64 MiB/sec + 128 MiB/sec + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec + 32 GiB/sec + 64 GiB/sec + + 1 B + 2 B + 4 B + 8 B + 16 B + 32 B + 64 B + 128 B + 256 B + 512 B + 1 KiB + 2 KiB + 4 KiB + 8 KiB + 16 KiB + 32 KiB + 64 KiB + 128 KiB + 256 KiB + 512 KiB + 1 MiB + 2 MiB + Chunk Size + Throughput + + Implementation + + + + + + + + + + + + + + + + Rust + C + C (scalar) + C (SSE2) + C (AVX2) + x86_64 (AMD Ryzen 9 3950X) + [xxhash3_128] Throughput of a 1 MiB buffer by chunk size (higher is better) + + diff --git a/comparison/results/xxhash3_128-tiny_data-aarch64.svg b/comparison/results/xxhash3_128-tiny_data-aarch64.svg new file mode 100644 index 000000000..8d66f2958 --- /dev/null +++ b/comparison/results/xxhash3_128-tiny_data-aarch64.svg @@ -0,0 +1,122 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10ns + 20ns + + 0 B + 50 B + 100 B + 150 B + 200 B + Size + Time + + Implementation + + + + + + + + + + + + + Rust + C + C (scalar) + C (NEON) + aarch64 (Apple M1 Max) + [xxhash3_128] Hashing small amounts of bytes (lower is better) + + diff --git a/comparison/results/xxhash3_128-tiny_data-x86_64.svg b/comparison/results/xxhash3_128-tiny_data-x86_64.svg new file mode 100644 index 000000000..a16135f96 --- /dev/null +++ b/comparison/results/xxhash3_128-tiny_data-x86_64.svg @@ -0,0 +1,142 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10ns + 20ns + + 0 B + 50 B + 100 B + 150 B + 200 B + Size + Time + + Implementation + + + + + + + + + + + + + + + + Rust + C + C (scalar) + C (SSE2) + C (AVX2) + x86_64 (AMD Ryzen 9 3950X) + [xxhash3_128] Hashing small amounts of bytes (lower is better) + + diff --git a/comparison/results/xxhash3_64-streaming-aarch64.svg b/comparison/results/xxhash3_64-streaming-aarch64.svg index dc46b30db..a8075662e 100644 --- a/comparison/results/xxhash3_64-streaming-aarch64.svg +++ b/comparison/results/xxhash3_64-streaming-aarch64.svg @@ -21,94 +21,94 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 8 MiB/sec diff --git a/comparison/results/xxhash3_64-streaming-x86_64.svg b/comparison/results/xxhash3_64-streaming-x86_64.svg index 6a3997ed9..938d03d94 100644 --- a/comparison/results/xxhash3_64-streaming-x86_64.svg +++ b/comparison/results/xxhash3_64-streaming-x86_64.svg @@ -21,116 +21,116 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 8 MiB/sec diff --git a/comparison/results/xxhash3_64-tiny_data-aarch64.svg b/comparison/results/xxhash3_64-tiny_data-aarch64.svg index 753a36cc5..d4154de35 100644 --- a/comparison/results/xxhash3_64-tiny_data-aarch64.svg +++ b/comparison/results/xxhash3_64-tiny_data-aarch64.svg @@ -20,80 +20,79 @@ - - - - - - - - - - + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 4ns - 8ns - 12ns - 16ns - 20ns - + 4ns + 8ns + 12ns + 16ns + 0 B 50 B 100 B diff --git a/comparison/results/xxhash3_64-tiny_data-x86_64.svg b/comparison/results/xxhash3_64-tiny_data-x86_64.svg index 81b4fc317..a29058139 100644 --- a/comparison/results/xxhash3_64-tiny_data-x86_64.svg +++ b/comparison/results/xxhash3_64-tiny_data-x86_64.svg @@ -20,96 +20,95 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + - 4ns - 8ns - 12ns - 16ns - 20ns - + 4ns + 8ns + 12ns + 16ns + 0 B 50 B 100 B diff --git a/comparison/results/xxhash64-streaming-aarch64.svg b/comparison/results/xxhash64-streaming-aarch64.svg index 1f58161a6..ba0b061e3 100644 --- a/comparison/results/xxhash64-streaming-aarch64.svg +++ b/comparison/results/xxhash64-streaming-aarch64.svg @@ -20,63 +20,62 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 128 MiB/sec - 256 MiB/sec - 512 MiB/sec - 1 GiB/sec - 2 GiB/sec - 4 GiB/sec - 8 GiB/sec - 16 GiB/sec + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec 32 GiB/sec - + 1 B 2 B 4 B diff --git a/comparison/results/xxhash64-streaming-x86_64.svg b/comparison/results/xxhash64-streaming-x86_64.svg index 064188466..a2223dac3 100644 --- a/comparison/results/xxhash64-streaming-x86_64.svg +++ b/comparison/results/xxhash64-streaming-x86_64.svg @@ -20,63 +20,62 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 128 MiB/sec - 256 MiB/sec - 512 MiB/sec - 1 GiB/sec - 2 GiB/sec - 4 GiB/sec - 8 GiB/sec - 16 GiB/sec + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec 32 GiB/sec - + 1 B 2 B 4 B diff --git a/comparison/results/xxhash64-tiny_data-aarch64.svg b/comparison/results/xxhash64-tiny_data-aarch64.svg index df15121a7..34b041ab4 100644 --- a/comparison/results/xxhash64-tiny_data-aarch64.svg +++ b/comparison/results/xxhash64-tiny_data-aarch64.svg @@ -20,84 +20,83 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 0 - 2ns - 4ns - 6ns - 8ns - + 0 + 2ns + 4ns + 6ns + 0 B 10 B 20 B diff --git a/comparison/results/xxhash64-tiny_data-x86_64.svg b/comparison/results/xxhash64-tiny_data-x86_64.svg index ca5185f8c..8ea93460b 100644 --- a/comparison/results/xxhash64-tiny_data-x86_64.svg +++ b/comparison/results/xxhash64-tiny_data-x86_64.svg @@ -20,84 +20,83 @@ - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 0 - 2ns - 4ns - 6ns - 8ns - + 0 + 2ns + 4ns + 6ns + 0 B 10 B 20 B