Fix quality for 256-bit state version

ogxd · Oct 29, 2023 · db63372 · db63372
1 parent 2a875c5
commit db63372
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 31 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -22,4 +22,6 @@ jobs:
     - name: Build
       run: cargo build --verbose
     - name: Run tests
-      run: cargo test --verbose
+      run: cargo test --verbose
+    - name: Run tests (AVX2)
+      run: cargo test --verbose --features avx2
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,7 +5,7 @@ version = "0.1.0"
 edition = "2021"
 
 [features]
-# The 256-bit state GxHash is faster for large inputs than the default 128-bit state implementation.
+# The 256-bit state GxHash is faster for large inputs than the default 128-bit state implementation, but faster on smaller hashes.
 # Please not however that the 256-bit GxHash and the 128-bit GxHash don't generate the same hashes for a same input.
 # Requires AVX2 and VAES (X86).
 avx2 = []

diff --git a/src/gxhash/mod.rs b/src/gxhash/mod.rs
@@ -1,7 +1,6 @@
 mod platform;
 use platform::*;
 
-#[cfg(not(feature = "avx2"))]
 #[inline(always)]
 pub fn gxhash32(input: &[u8], seed: i32) -> u32 {
     unsafe {
@@ -10,20 +9,6 @@ pub fn gxhash32(input: &[u8], seed: i32) -> u32 {
     }
 }
 
-// Since the 256-bit runs AES operations on two 128-bit lanes, we need to extract
-// the hash from the center, picking the same entropy amount from the two lanes
-#[cfg(feature = "avx2")]
-#[inline(always)]
-pub fn gxhash32(input: &[u8], seed: i32) -> u32 {
-    unsafe {
-        let p = &gxhash(input, seed) as *const state as *const u8;
-        let offset = std::mem::size_of::<state>() / 2 - std::mem::size_of::<u32>() / 2 - 1;
-        let shifted_ptr = p.offset(offset as isize) as *const u32;
-        *shifted_ptr
-    }
-}
-
-#[cfg(not(feature = "avx2"))]
 #[inline(always)]
 pub fn gxhash64(input: &[u8], seed: i32) -> u64 {
     unsafe {
@@ -32,19 +17,6 @@ pub fn gxhash64(input: &[u8], seed: i32) -> u64 {
     }
 }
 
-// Since the 256-bit runs AES operations on two 128-bit lanes, we need to extract
-// the hash from the center, picking the same entropy amount from the two lanes
-#[cfg(feature = "avx2")]
-#[inline(always)]
-pub fn gxhash64(input: &[u8], seed: i32) -> u64 {
-    unsafe {
-        let p = &gxhash(input, seed) as *const state as *const u8;
-        let offset = std::mem::size_of::<state>() / 2 - std::mem::size_of::<u64>() / 2 - 1;
-        let shifted_ptr = p.offset(offset as isize) as *const u64;
-        *shifted_ptr
-    }
-}
-
 const VECTOR_SIZE: isize = std::mem::size_of::<state>() as isize;
 
 #[inline(always)]

diff --git a/src/gxhash/platform/x86_256.rs b/src/gxhash/platform/x86_256.rs
@@ -87,5 +87,6 @@ pub unsafe fn finalize(hash: state, seed: i32) -> state {
     hash = _mm256_aesenc_epi128(hash, keys_2);
     hash = _mm256_aesenclast_epi128(hash, keys_3);
 
-    hash
+    let permuted = _mm256_permute2x128_si256(hash, hash, 0x21);
+    _mm256_xor_si256(hash, permuted)
 }