From db63372d27f832fba30674b0033fea0ba01a2f57 Mon Sep 17 00:00:00 2001 From: Olivier Giniaux Date: Sun, 29 Oct 2023 02:29:28 +0200 Subject: [PATCH] Fix quality for 256-bit state version --- .github/workflows/rust.yml | 4 +++- Cargo.toml | 2 +- src/gxhash/mod.rs | 28 ---------------------------- src/gxhash/platform/x86_256.rs | 3 ++- 4 files changed, 6 insertions(+), 31 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 2bcc45a..88031f8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -22,4 +22,6 @@ jobs: - name: Build run: cargo build --verbose - name: Run tests - run: cargo test --verbose \ No newline at end of file + run: cargo test --verbose + - name: Run tests (AVX2) + run: cargo test --verbose --features avx2 \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index ad59ed9..29d512b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ version = "0.1.0" edition = "2021" [features] -# The 256-bit state GxHash is faster for large inputs than the default 128-bit state implementation. +# The 256-bit state GxHash is faster for large inputs than the default 128-bit state implementation, but faster on smaller hashes. # Please not however that the 256-bit GxHash and the 128-bit GxHash don't generate the same hashes for a same input. # Requires AVX2 and VAES (X86). avx2 = [] diff --git a/src/gxhash/mod.rs b/src/gxhash/mod.rs index 5723c3d..4e79def 100644 --- a/src/gxhash/mod.rs +++ b/src/gxhash/mod.rs @@ -1,7 +1,6 @@ mod platform; use platform::*; -#[cfg(not(feature = "avx2"))] #[inline(always)] pub fn gxhash32(input: &[u8], seed: i32) -> u32 { unsafe { @@ -10,20 +9,6 @@ pub fn gxhash32(input: &[u8], seed: i32) -> u32 { } } -// Since the 256-bit runs AES operations on two 128-bit lanes, we need to extract -// the hash from the center, picking the same entropy amount from the two lanes -#[cfg(feature = "avx2")] -#[inline(always)] -pub fn gxhash32(input: &[u8], seed: i32) -> u32 { - unsafe { - let p = &gxhash(input, seed) as *const state as *const u8; - let offset = std::mem::size_of::() / 2 - std::mem::size_of::() / 2 - 1; - let shifted_ptr = p.offset(offset as isize) as *const u32; - *shifted_ptr - } -} - -#[cfg(not(feature = "avx2"))] #[inline(always)] pub fn gxhash64(input: &[u8], seed: i32) -> u64 { unsafe { @@ -32,19 +17,6 @@ pub fn gxhash64(input: &[u8], seed: i32) -> u64 { } } -// Since the 256-bit runs AES operations on two 128-bit lanes, we need to extract -// the hash from the center, picking the same entropy amount from the two lanes -#[cfg(feature = "avx2")] -#[inline(always)] -pub fn gxhash64(input: &[u8], seed: i32) -> u64 { - unsafe { - let p = &gxhash(input, seed) as *const state as *const u8; - let offset = std::mem::size_of::() / 2 - std::mem::size_of::() / 2 - 1; - let shifted_ptr = p.offset(offset as isize) as *const u64; - *shifted_ptr - } -} - const VECTOR_SIZE: isize = std::mem::size_of::() as isize; #[inline(always)] diff --git a/src/gxhash/platform/x86_256.rs b/src/gxhash/platform/x86_256.rs index b3e855f..c037fe1 100644 --- a/src/gxhash/platform/x86_256.rs +++ b/src/gxhash/platform/x86_256.rs @@ -87,5 +87,6 @@ pub unsafe fn finalize(hash: state, seed: i32) -> state { hash = _mm256_aesenc_epi128(hash, keys_2); hash = _mm256_aesenclast_epi128(hash, keys_3); - hash + let permuted = _mm256_permute2x128_si256(hash, hash, 0x21); + _mm256_xor_si256(hash, permuted) } \ No newline at end of file