Skip to content

Commit

Permalink
Return pos
Browse files Browse the repository at this point in the history
  • Loading branch information
ogxd committed Nov 12, 2024
1 parent 2eaeabf commit 517c01e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 14 deletions.
23 changes: 12 additions & 11 deletions src/gxhash/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ macro_rules! load_unaligned {

pub(crate) use load_unaligned;

#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;

#[inline(always)]
pub(crate) unsafe fn gxhash(input: &[u8], seed: State) -> State {
Expand All @@ -90,31 +90,32 @@ pub(crate) unsafe fn gxhash_no_finish(input: &[u8], seed: State) -> State {
'p0: {
'p1: {
'p2: {
// This seems ultra efficient
// C-style fallthrough alternative
if lzcnt == 64 {
break 'p0;
} else if lzcnt >= 60 {
break 'p1;
} else if lzcnt >= 55 {
} else if lzcnt >= 56 {
break 'p2;
}

state = compress_8(ptr, whole_vector_count, state, len);

whole_vector_count %= 8;
// Process vectors by batches of 8
// This method is not inlined because len is large enough to make it not worth it, so we keep the bytecode size small
(state, ptr, whole_vector_count) = compress_8(ptr, whole_vector_count, state, len);
}

// Process remaining vectors
let end_address = ptr.add(whole_vector_count) as usize;

while (ptr as usize) < end_address {
load_unaligned!(ptr, v0);
state = aes_encrypt(state, v0);
}
}

// Process remaining bytes
let len_partial = len % VECTOR_SIZE;
let partial = get_partial(ptr, len_partial);
state = _mm_add_epi8(state, partial);
state = vaddq_s8(state, partial);
}

return state;
Expand Down
9 changes: 6 additions & 3 deletions src/gxhash/platform/arm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ pub unsafe fn ld(array: *const u32) -> State {
vreinterpretq_s8_u32(vld1q_u32(array))
}

#[inline(always)]
pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector: State, len: usize) -> State {
#[inline(never)]
pub unsafe fn compress_8(mut ptr: *const State, whole_vector_count: usize, hash_vector: State, len: usize) -> (State, *const State, usize) {

let end_address = ptr.add((whole_vector_count / 8) * 8) as usize;

// Disambiguation vectors
let mut t1: State = create_empty();
Expand Down Expand Up @@ -113,8 +115,9 @@ pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector:
let len_vec = vreinterpretq_s8_u32(vdupq_n_u32(len as u32));
lane1 = vaddq_s8(lane1, len_vec);
lane2 = vaddq_s8(lane2, len_vec);

// Merge lanes
aes_encrypt(lane1, lane2)
(aes_encrypt(lane1, lane2), ptr, whole_vector_count % 8)
}

#[inline(always)]
Expand Down

0 comments on commit 517c01e

Please sign in to comment.