Skip to content

Commit

Permalink
Fix performance bottleneck on compression order on ARM
Browse files Browse the repository at this point in the history
  • Loading branch information
ogxd committed Oct 31, 2023
1 parent 24922ab commit c1d438b
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 5 deletions.
2 changes: 1 addition & 1 deletion benches/throughput.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fn benchmark_all(c: &mut Criterion) {
let mut rng = rand::thread_rng();

// Allocate 32-bytes-aligned
let layout = Layout::from_size_align(50_000, 32).unwrap();
let layout = Layout::from_size_align(100_000, 32).unwrap();
let ptr = unsafe { alloc(layout) };
let slice: &mut [u8] = unsafe { slice::from_raw_parts_mut(ptr, layout.size()) };

Expand Down
2 changes: 1 addition & 1 deletion src/gxhash/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn gxhash(input: &[u8], seed: i32) -> state {

// Lower sizes first, as comparison/branching overhead will become negligible as input size grows.
let hash_vector = if len <= VECTOR_SIZE {
gxhash_process_last(ptr, create_empty(), len)
get_partial(ptr, len)
} else if len <= VECTOR_SIZE * 2 {
gxhash_process_last(ptr.offset(1), compress(load_unaligned(ptr), create_empty()), len - VECTOR_SIZE)
} else if len < VECTOR_SIZE * 8 {
Expand Down
7 changes: 4 additions & 3 deletions src/gxhash/platform/arm_128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ pub unsafe fn compress(a: int8x16_t, b: int8x16_t) -> int8x16_t {
let keys_1 = vld1q_u32([0xFC3BC28E, 0x89C222E5, 0xB09D3E21, 0xF2784542].as_ptr());
let keys_2 = vld1q_u32([0x03FCE279, 0xCB6B2E9B, 0xB361DC58, 0x39136BD9].as_ptr());

let b = aes_encrypt(vreinterpretq_u8_s8(b), vreinterpretq_u8_u32(keys_1));
let a = aes_encrypt(vreinterpretq_u8_s8(a), vreinterpretq_u8_u32(keys_2));
let mut bs = vreinterpretq_u8_s8(b);
bs = aes_encrypt(bs, vreinterpretq_u8_u32(keys_1));
bs = aes_encrypt(bs, vreinterpretq_u8_u32(keys_2));

vreinterpretq_s8_u8(aes_encrypt_last(a, b))
vreinterpretq_s8_u8(aes_encrypt_last(vreinterpretq_u8_s8(a), bs))
}

#[inline(always)]
Expand Down

0 comments on commit c1d438b

Please sign in to comment.