diff --git a/benches/bench.rs b/benches/bench.rs index 3d7259f..8b6e961 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -63,6 +63,14 @@ fn bench_x86_128(b: &mut Bencher) { }); } +#[bench] +fn bench_x86_128_slice(b: &mut Bencher) { + let string: &[u8] = + test::black_box(b"Lorem ipsum dolor sit amet, consectetur adipisicing elit"); + b.bytes = string.len() as u64; + b.iter(|| murmur3_x86_128_of_slice(string, 0)); +} + #[bench] fn bench_c_x86_128(b: &mut Bencher) { let string: &[u8] = @@ -93,6 +101,14 @@ fn bench_x64_128(b: &mut Bencher) { }); } +#[bench] +fn bench_x64_128_slice(b: &mut Bencher) { + let string: &[u8] = + test::black_box(b"Lorem ipsum dolor sit amet, consectetur adipisicing elit"); + b.bytes = string.len() as u64; + b.iter(|| murmur3_x64_128_of_slice(string, 0)); +} + #[bench] fn bench_c_x64_128(b: &mut Bencher) { let string: &[u8] = diff --git a/src/murmur3_x64_128.rs b/src/murmur3_x64_128.rs index 1cbd777..43eca73 100644 --- a/src/murmur3_x64_128.rs +++ b/src/murmur3_x64_128.rs @@ -6,6 +6,7 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +use std::cmp::min; use std::io::{Read, Result}; use std::ops::Shl; @@ -119,6 +120,123 @@ pub fn murmur3_x64_128(source: &mut T, seed: u32) -> Result { } } +/// Use the x64 variant of the 128 bit murmur3 to hash byte slice without copying the buffer. +/// +/// # Example +/// ``` +/// use murmur3::murmur3_x64_128_of_slice; +/// let hash_result = murmur3_x64_128_of_slice(b"hello world", 0); +/// ``` +pub fn murmur3_x64_128_of_slice(source: &[u8], seed: u32) -> u128 { + const C1: u64 = 0x87c3_7b91_1142_53d5; + const C2: u64 = 0x4cf5_ad43_2745_937f; + const C3: u64 = 0x52dc_e729; + const C4: u64 = 0x3849_5ab5; + const R1: u32 = 27; + const R2: u32 = 31; + const R3: u32 = 33; + const M: u64 = 5; + let mut h1: u64 = seed as u64; + let mut h2: u64 = seed as u64; + let mut buf = source; + let mut processed: usize = 0; + loop { + match min(buf.len(), 16) { + 16 => { + processed += 16; + + let k1 = u64::from_le_bytes(copy_into_array(&buf[0..8])); + let k2 = u64::from_le_bytes(copy_into_array(&buf[8..16])); + h1 ^= k1.wrapping_mul(C1).rotate_left(R2).wrapping_mul(C2); + h1 = h1 + .rotate_left(R1) + .wrapping_add(h2) + .wrapping_mul(M) + .wrapping_add(C3); + h2 ^= k2.wrapping_mul(C2).rotate_left(R3).wrapping_mul(C1); + h2 = h2 + .rotate_left(R2) + .wrapping_add(h1) + .wrapping_mul(M) + .wrapping_add(C4); + + buf = &buf[16..]; + } + 0 => { + h1 ^= processed as u64; + h2 ^= processed as u64; + h1 = h1.wrapping_add(h2); + h2 = h2.wrapping_add(h1); + h1 = fmix64(h1); + h2 = fmix64(h2); + h1 = h1.wrapping_add(h2); + h2 = h2.wrapping_add(h1); + return ((h2 as u128) << 64) | (h1 as u128); + } + _ => { + let read = buf.len(); + processed += read; + + let mut k1 = 0; + let mut k2 = 0; + if read >= 15 { + k2 ^= (buf[14] as u64).shl(48); + } + if read >= 14 { + k2 ^= (buf[13] as u64).shl(40); + } + if read >= 13 { + k2 ^= (buf[12] as u64).shl(32); + } + if read >= 12 { + k2 ^= (buf[11] as u64).shl(24); + } + if read >= 11 { + k2 ^= (buf[10] as u64).shl(16); + } + if read >= 10 { + k2 ^= (buf[9] as u64).shl(8); + } + if read >= 9 { + k2 ^= buf[8] as u64; + k2 = k2.wrapping_mul(C2).rotate_left(33).wrapping_mul(C1); + h2 ^= k2; + } + if read >= 8 { + k1 ^= (buf[7] as u64).shl(56); + } + if read >= 7 { + k1 ^= (buf[6] as u64).shl(48); + } + if read >= 6 { + k1 ^= (buf[5] as u64).shl(40); + } + if read >= 5 { + k1 ^= (buf[4] as u64).shl(32); + } + if read >= 4 { + k1 ^= (buf[3] as u64).shl(24); + } + if read >= 3 { + k1 ^= (buf[2] as u64).shl(16); + } + if read >= 2 { + k1 ^= (buf[1] as u64).shl(8); + } + if read >= 1 { + k1 ^= buf[0] as u64; + } + k1 = k1.wrapping_mul(C1); + k1 = k1.rotate_left(31); + k1 = k1.wrapping_mul(C2); + h1 ^= k1; + + buf = &buf[read..] + } + } + } +} + fn fmix64(k: u64) -> u64 { const C1: u64 = 0xff51_afd7_ed55_8ccd; const C2: u64 = 0xc4ce_b9fe_1a85_ec53; diff --git a/src/murmur3_x86_128.rs b/src/murmur3_x86_128.rs index 9c1184f..c39fbd9 100644 --- a/src/murmur3_x86_128.rs +++ b/src/murmur3_x86_128.rs @@ -6,6 +6,7 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +use std::cmp::min; use std::io::{Read, Result}; use std::ops::Shl; @@ -156,6 +157,162 @@ pub fn murmur3_x86_128(source: &mut T, seed: u32) -> Result { } } +/// Use the x86 variant of the 128 bit murmur3 to hash byte slice without copying buffer. +/// +/// # Example +/// ``` +/// use murmur3::murmur3_x86_128_of_slice; +/// let hash_result = murmur3_x86_128_of_slice(b"hello world", 0); +/// ``` +pub fn murmur3_x86_128_of_slice(source: &[u8], seed: u32) -> u128 { + const C1: u32 = 0x239b_961b; + const C2: u32 = 0xab0e_9789; + const C3: u32 = 0x38b3_4ae5; + const C4: u32 = 0xa1e3_8b93; + const C5: u32 = 0x561c_cd1b; + const C6: u32 = 0x0bca_a747; + const C7: u32 = 0x96cd_1c35; + const C8: u32 = 0x32ac_3b17; + const M: u32 = 5; + + let mut h1: u32 = seed; + let mut h2: u32 = seed; + let mut h3: u32 = seed; + let mut h4: u32 = seed; + + let mut buf = source; + let mut processed: usize = 0; + loop { + match min(buf.len(), 16) { + 16 => { + processed += 16; + + let k1 = u32::from_le_bytes(copy_into_array(&buf[0..4])); + let k2 = u32::from_le_bytes(copy_into_array(&buf[4..8])); + let k3 = u32::from_le_bytes(copy_into_array(&buf[8..12])); + let k4 = u32::from_le_bytes(copy_into_array(&buf[12..16])); + h1 ^= k1.wrapping_mul(C1).rotate_left(15).wrapping_mul(C2); + h1 = h1 + .rotate_left(19) + .wrapping_add(h2) + .wrapping_mul(M) + .wrapping_add(C5); + h2 ^= k2.wrapping_mul(C2).rotate_left(16).wrapping_mul(C3); + h2 = h2 + .rotate_left(17) + .wrapping_add(h3) + .wrapping_mul(M) + .wrapping_add(C6); + h3 ^= k3.wrapping_mul(C3).rotate_left(17).wrapping_mul(C4); + h3 = h3 + .rotate_left(15) + .wrapping_add(h4) + .wrapping_mul(M) + .wrapping_add(C7); + h4 ^= k4.wrapping_mul(C4).rotate_left(18).wrapping_mul(C1); + h4 = h4 + .rotate_left(13) + .wrapping_add(h1) + .wrapping_mul(M) + .wrapping_add(C8); + + buf = &buf[16..]; + } + 0 => { + h1 ^= processed as u32; + h2 ^= processed as u32; + h3 ^= processed as u32; + h4 ^= processed as u32; + h1 = h1.wrapping_add(h2); + h1 = h1.wrapping_add(h3); + h1 = h1.wrapping_add(h4); + h2 = h2.wrapping_add(h1); + h3 = h3.wrapping_add(h1); + h4 = h4.wrapping_add(h1); + h1 = fmix32(h1); + h2 = fmix32(h2); + h3 = fmix32(h3); + h4 = fmix32(h4); + h1 = h1.wrapping_add(h2); + h1 = h1.wrapping_add(h3); + h1 = h1.wrapping_add(h4); + h2 = h2.wrapping_add(h1); + h3 = h3.wrapping_add(h1); + h4 = h4.wrapping_add(h1); + let x = + ((h4 as u128) << 96) | ((h3 as u128) << 64) | ((h2 as u128) << 32) | h1 as u128; + return x; + } + _ => { + let read = buf.len(); + processed += read; + + let mut k1 = 0; + let mut k2 = 0; + let mut k3 = 0; + let mut k4 = 0; + if read >= 15 { + k4 ^= (buf[14] as u32).shl(16); + } + if read >= 14 { + k4 ^= (buf[13] as u32).shl(8); + } + if read >= 13 { + k4 ^= buf[12] as u32; + k4 = k4.wrapping_mul(C4).rotate_left(18).wrapping_mul(C1); + h4 ^= k4; + } + if read >= 12 { + k3 ^= (buf[11] as u32).shl(24); + } + if read >= 11 { + k3 ^= (buf[10] as u32).shl(16); + } + if read >= 10 { + k3 ^= (buf[9] as u32).shl(8); + } + if read >= 9 { + k3 ^= buf[8] as u32; + k3 = k3.wrapping_mul(C3).rotate_left(17).wrapping_mul(C4); + h3 ^= k3; + } + if read >= 8 { + k2 ^= (buf[7] as u32).shl(24); + } + if read >= 7 { + k2 ^= (buf[6] as u32).shl(16); + } + if read >= 6 { + k2 ^= (buf[5] as u32).shl(8); + } + if read >= 5 { + k2 ^= buf[4] as u32; + k2 = k2.wrapping_mul(C2).rotate_left(16).wrapping_mul(C3); + h2 ^= k2; + } + if read >= 4 { + k1 ^= (buf[3] as u32).shl(24); + } + if read >= 3 { + k1 ^= (buf[2] as u32).shl(16); + } + if read >= 2 { + k1 ^= (buf[1] as u32).shl(8); + } + if read >= 1 { + k1 ^= buf[0] as u32; + } + k1 = k1.wrapping_mul(C1); + k1 = k1.rotate_left(15); + k1 = k1.wrapping_mul(C2); + h1 ^= k1; + + buf = &buf[read..] + } + } + } +} + fn fmix32(k: u32) -> u32 { const C1: u32 = 0x85eb_ca6b; const C2: u32 = 0xc2b2_ae35; diff --git a/tests/quickcheck.rs b/tests/quickcheck.rs index c72d897..b3a9383 100644 --- a/tests/quickcheck.rs +++ b/tests/quickcheck.rs @@ -13,7 +13,9 @@ extern crate murmur3_sys; use std::io::Cursor; -use murmur3::{murmur3_32, murmur3_32_of_slice}; +use murmur3::{ + murmur3_32, murmur3_32_of_slice, murmur3_x64_128_of_slice, murmur3_x86_128_of_slice, +}; use murmur3_sys::MurmurHash3_x86_32; use murmur3::murmur3_x86_128; @@ -65,7 +67,21 @@ quickcheck! { } quickcheck! { - fn quickcheck_x64_128(input:(u32, Vec)) -> bool { + fn quickcheck_x86_128_slice(input:(u32, Vec)) -> bool { + let seed = input.0; + let xs = input.1; + let output_bytes: [u8; 16] = [0; 16]; + unsafe { + MurmurHash3_x86_128(xs.as_ptr() as _, xs.len() as i32,seed,output_bytes.as_ptr() as *mut _) + }; + let output = u128::from_le_bytes(output_bytes); + let output2 = murmur3_x86_128_of_slice(&xs, seed); + output == output2 + } +} + +quickcheck! { + fn quickcheck_x64_128_slice(input:(u32, Vec)) -> bool { let seed = input.0; let xs = input.1; let output_bytes: [u8; 16] = [0; 16]; @@ -73,7 +89,7 @@ quickcheck! { MurmurHash3_x64_128(xs.as_ptr() as _, xs.len() as i32,seed, output_bytes.as_ptr() as *mut _) }; let output = u128::from_le_bytes(output_bytes); - let output2 = murmur3_x64_128(&mut Cursor::new(xs), seed).unwrap(); + let output2 = murmur3_x64_128_of_slice(&xs, seed); output == output2 } } diff --git a/tests/test.rs b/tests/test.rs index e2a596c..6596fbf 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -397,6 +397,12 @@ fn test_static_strings() { "Failed x86_128 chained reader on string {}", test.string ); + let chained_hash2 = murmur3::murmur3_x86_128_of_slice(test.string.as_bytes(), 0); + assert_eq!( + chained_hash2, expected, + "Failed x86_128 chained reader on string {}", + test.string + ); let expected = u128::from_le_bytes(test.hash_128_x64); let hash = murmur3::murmur3_x64_128(&mut str_as_cursor(test.string), 0).unwrap(); @@ -408,5 +414,12 @@ fn test_static_strings() { "Failed x64_128 chained reader on string {}", test.string ); + + let chained_hash2 = murmur3::murmur3_x64_128_of_slice(test.string.as_bytes(), 0); + assert_eq!( + chained_hash2, expected, + "Failed x64_128 chained reader on string {}", + test.string + ); } }