Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add non-copying version of murmur3_128 that reads directly from a byte buffer #23

Merged
merged 4 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ fn bench_x86_128(b: &mut Bencher) {
});
}

#[bench]
fn bench_x86_128_slice(b: &mut Bencher) {
let string: &[u8] =
test::black_box(b"Lorem ipsum dolor sit amet, consectetur adipisicing elit");
b.bytes = string.len() as u64;
b.iter(|| murmur3_x86_128_of_slice(string, 0));
}

#[bench]
fn bench_c_x86_128(b: &mut Bencher) {
let string: &[u8] =
Expand Down Expand Up @@ -93,6 +101,14 @@ fn bench_x64_128(b: &mut Bencher) {
});
}

#[bench]
fn bench_x64_128_slice(b: &mut Bencher) {
let string: &[u8] =
test::black_box(b"Lorem ipsum dolor sit amet, consectetur adipisicing elit");
b.bytes = string.len() as u64;
b.iter(|| murmur3_x64_128_of_slice(string, 0));
}

#[bench]
fn bench_c_x64_128(b: &mut Bencher) {
let string: &[u8] =
Expand Down
118 changes: 118 additions & 0 deletions src/murmur3_x64_128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

use std::cmp::min;
use std::io::{Read, Result};
use std::ops::Shl;

Expand Down Expand Up @@ -119,6 +120,123 @@ pub fn murmur3_x64_128<T: Read>(source: &mut T, seed: u32) -> Result<u128> {
}
}

/// Use the x64 variant of the 128 bit murmur3 to hash byte slice without copying the buffer.
///
/// # Example
/// ```
/// use murmur3::murmur3_x64_128_of_slice;
/// let hash_result = murmur3_x64_128_of_slice(b"hello world", 0);
/// ```
pub fn murmur3_x64_128_of_slice(source: &[u8], seed: u32) -> u128 {
const C1: u64 = 0x87c3_7b91_1142_53d5;
const C2: u64 = 0x4cf5_ad43_2745_937f;
const C3: u64 = 0x52dc_e729;
const C4: u64 = 0x3849_5ab5;
const R1: u32 = 27;
const R2: u32 = 31;
const R3: u32 = 33;
const M: u64 = 5;
let mut h1: u64 = seed as u64;
let mut h2: u64 = seed as u64;
let mut buf = source;
let mut processed: usize = 0;
loop {
match min(buf.len(), 16) {
16 => {
processed += 16;

let k1 = u64::from_le_bytes(copy_into_array(&buf[0..8]));
let k2 = u64::from_le_bytes(copy_into_array(&buf[8..16]));
h1 ^= k1.wrapping_mul(C1).rotate_left(R2).wrapping_mul(C2);
h1 = h1
.rotate_left(R1)
.wrapping_add(h2)
.wrapping_mul(M)
.wrapping_add(C3);
h2 ^= k2.wrapping_mul(C2).rotate_left(R3).wrapping_mul(C1);
h2 = h2
.rotate_left(R2)
.wrapping_add(h1)
.wrapping_mul(M)
.wrapping_add(C4);

buf = &buf[16..];
}
0 => {
h1 ^= processed as u64;
h2 ^= processed as u64;
h1 = h1.wrapping_add(h2);
h2 = h2.wrapping_add(h1);
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 = h1.wrapping_add(h2);
h2 = h2.wrapping_add(h1);
return ((h2 as u128) << 64) | (h1 as u128);
}
_ => {
let read = buf.len();
processed += read;

let mut k1 = 0;
let mut k2 = 0;
if read >= 15 {
k2 ^= (buf[14] as u64).shl(48);
}
if read >= 14 {
k2 ^= (buf[13] as u64).shl(40);
}
if read >= 13 {
k2 ^= (buf[12] as u64).shl(32);
}
if read >= 12 {
k2 ^= (buf[11] as u64).shl(24);
}
if read >= 11 {
k2 ^= (buf[10] as u64).shl(16);
}
if read >= 10 {
k2 ^= (buf[9] as u64).shl(8);
}
if read >= 9 {
k2 ^= buf[8] as u64;
k2 = k2.wrapping_mul(C2).rotate_left(33).wrapping_mul(C1);
h2 ^= k2;
}
if read >= 8 {
k1 ^= (buf[7] as u64).shl(56);
}
if read >= 7 {
k1 ^= (buf[6] as u64).shl(48);
}
if read >= 6 {
k1 ^= (buf[5] as u64).shl(40);
}
if read >= 5 {
k1 ^= (buf[4] as u64).shl(32);
}
if read >= 4 {
k1 ^= (buf[3] as u64).shl(24);
}
if read >= 3 {
k1 ^= (buf[2] as u64).shl(16);
}
if read >= 2 {
k1 ^= (buf[1] as u64).shl(8);
}
if read >= 1 {
k1 ^= buf[0] as u64;
}
k1 = k1.wrapping_mul(C1);
k1 = k1.rotate_left(31);
k1 = k1.wrapping_mul(C2);
h1 ^= k1;

buf = &buf[read..]
}
}
}
}

fn fmix64(k: u64) -> u64 {
const C1: u64 = 0xff51_afd7_ed55_8ccd;
const C2: u64 = 0xc4ce_b9fe_1a85_ec53;
Expand Down
157 changes: 157 additions & 0 deletions src/murmur3_x86_128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

use std::cmp::min;
use std::io::{Read, Result};
use std::ops::Shl;

Expand Down Expand Up @@ -156,6 +157,162 @@ pub fn murmur3_x86_128<T: Read>(source: &mut T, seed: u32) -> Result<u128> {
}
}

/// Use the x86 variant of the 128 bit murmur3 to hash byte slice without copying buffer.
///
/// # Example
/// ```
/// use murmur3::murmur3_x86_128_of_slice;
/// let hash_result = murmur3_x86_128_of_slice(b"hello world", 0);
/// ```
pub fn murmur3_x86_128_of_slice(source: &[u8], seed: u32) -> u128 {
const C1: u32 = 0x239b_961b;
const C2: u32 = 0xab0e_9789;
const C3: u32 = 0x38b3_4ae5;
const C4: u32 = 0xa1e3_8b93;
const C5: u32 = 0x561c_cd1b;
const C6: u32 = 0x0bca_a747;
const C7: u32 = 0x96cd_1c35;
const C8: u32 = 0x32ac_3b17;
const M: u32 = 5;

let mut h1: u32 = seed;
let mut h2: u32 = seed;
let mut h3: u32 = seed;
let mut h4: u32 = seed;

let mut buf = source;
let mut processed: usize = 0;
loop {
match min(buf.len(), 16) {
16 => {
processed += 16;

let k1 = u32::from_le_bytes(copy_into_array(&buf[0..4]));
let k2 = u32::from_le_bytes(copy_into_array(&buf[4..8]));
let k3 = u32::from_le_bytes(copy_into_array(&buf[8..12]));
let k4 = u32::from_le_bytes(copy_into_array(&buf[12..16]));
h1 ^= k1.wrapping_mul(C1).rotate_left(15).wrapping_mul(C2);
h1 = h1
.rotate_left(19)
.wrapping_add(h2)
.wrapping_mul(M)
.wrapping_add(C5);
h2 ^= k2.wrapping_mul(C2).rotate_left(16).wrapping_mul(C3);
h2 = h2
.rotate_left(17)
.wrapping_add(h3)
.wrapping_mul(M)
.wrapping_add(C6);
h3 ^= k3.wrapping_mul(C3).rotate_left(17).wrapping_mul(C4);
h3 = h3
.rotate_left(15)
.wrapping_add(h4)
.wrapping_mul(M)
.wrapping_add(C7);
h4 ^= k4.wrapping_mul(C4).rotate_left(18).wrapping_mul(C1);
h4 = h4
.rotate_left(13)
.wrapping_add(h1)
.wrapping_mul(M)
.wrapping_add(C8);

buf = &buf[16..];
}
0 => {
h1 ^= processed as u32;
h2 ^= processed as u32;
h3 ^= processed as u32;
h4 ^= processed as u32;
h1 = h1.wrapping_add(h2);
h1 = h1.wrapping_add(h3);
h1 = h1.wrapping_add(h4);
h2 = h2.wrapping_add(h1);
h3 = h3.wrapping_add(h1);
h4 = h4.wrapping_add(h1);
h1 = fmix32(h1);
h2 = fmix32(h2);
h3 = fmix32(h3);
h4 = fmix32(h4);
h1 = h1.wrapping_add(h2);
h1 = h1.wrapping_add(h3);
h1 = h1.wrapping_add(h4);
h2 = h2.wrapping_add(h1);
h3 = h3.wrapping_add(h1);
h4 = h4.wrapping_add(h1);
let x =
((h4 as u128) << 96) | ((h3 as u128) << 64) | ((h2 as u128) << 32) | h1 as u128;
return x;
}
_ => {
let read = buf.len();
processed += read;

let mut k1 = 0;
let mut k2 = 0;
let mut k3 = 0;
let mut k4 = 0;
if read >= 15 {
k4 ^= (buf[14] as u32).shl(16);
}
if read >= 14 {
k4 ^= (buf[13] as u32).shl(8);
}
if read >= 13 {
k4 ^= buf[12] as u32;
k4 = k4.wrapping_mul(C4).rotate_left(18).wrapping_mul(C1);
h4 ^= k4;
}
if read >= 12 {
k3 ^= (buf[11] as u32).shl(24);
}
if read >= 11 {
k3 ^= (buf[10] as u32).shl(16);
}
if read >= 10 {
k3 ^= (buf[9] as u32).shl(8);
}
if read >= 9 {
k3 ^= buf[8] as u32;
k3 = k3.wrapping_mul(C3).rotate_left(17).wrapping_mul(C4);
h3 ^= k3;
}
if read >= 8 {
k2 ^= (buf[7] as u32).shl(24);
}
if read >= 7 {
k2 ^= (buf[6] as u32).shl(16);
}
if read >= 6 {
k2 ^= (buf[5] as u32).shl(8);
}
if read >= 5 {
k2 ^= buf[4] as u32;
k2 = k2.wrapping_mul(C2).rotate_left(16).wrapping_mul(C3);
h2 ^= k2;
}
if read >= 4 {
k1 ^= (buf[3] as u32).shl(24);
}
if read >= 3 {
k1 ^= (buf[2] as u32).shl(16);
}
if read >= 2 {
k1 ^= (buf[1] as u32).shl(8);
}
if read >= 1 {
k1 ^= buf[0] as u32;
}
k1 = k1.wrapping_mul(C1);
k1 = k1.rotate_left(15);
k1 = k1.wrapping_mul(C2);
h1 ^= k1;

buf = &buf[read..]
}
}
}
}

fn fmix32(k: u32) -> u32 {
const C1: u32 = 0x85eb_ca6b;
const C2: u32 = 0xc2b2_ae35;
Expand Down
22 changes: 19 additions & 3 deletions tests/quickcheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@

use std::io::Cursor;

use murmur3::{murmur3_32, murmur3_32_of_slice};
use murmur3::{
murmur3_32, murmur3_32_of_slice, murmur3_x64_128_of_slice, murmur3_x86_128_of_slice,
};
use murmur3_sys::MurmurHash3_x86_32;

use murmur3::murmur3_x86_128;
use murmur3_sys::MurmurHash3_x86_128;

use murmur3::murmur3_x64_128;

Check warning on line 24 in tests/quickcheck.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest)

unused import: `murmur3::murmur3_x64_128`
use murmur3_sys::MurmurHash3_x64_128;

quickcheck! {
Expand Down Expand Up @@ -65,15 +67,29 @@
}

quickcheck! {
fn quickcheck_x64_128(input:(u32, Vec<u8>)) -> bool {
fn quickcheck_x86_128_slice(input:(u32, Vec<u8>)) -> bool {
let seed = input.0;
let xs = input.1;
let output_bytes: [u8; 16] = [0; 16];
unsafe {
MurmurHash3_x86_128(xs.as_ptr() as _, xs.len() as i32,seed,output_bytes.as_ptr() as *mut _)
};
let output = u128::from_le_bytes(output_bytes);
let output2 = murmur3_x86_128_of_slice(&xs, seed);
output == output2
}
}

quickcheck! {
fn quickcheck_x64_128_slice(input:(u32, Vec<u8>)) -> bool {
let seed = input.0;
let xs = input.1;
let output_bytes: [u8; 16] = [0; 16];
unsafe {
MurmurHash3_x64_128(xs.as_ptr() as _, xs.len() as i32,seed, output_bytes.as_ptr() as *mut _)
};
let output = u128::from_le_bytes(output_bytes);
let output2 = murmur3_x64_128(&mut Cursor::new(xs), seed).unwrap();
let output2 = murmur3_x64_128_of_slice(&xs, seed);
output == output2
}
}
Loading
Loading