From 53b863b93e200398448c26dc5abfb0faac8f0ad9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 13 Dec 2024 10:05:40 -0700 Subject: [PATCH] Upgrade to twoxhash 2.1 and use oneshot API for improved performance --- parquet/Cargo.toml | 2 +- parquet/src/bloom_filter/mod.rs | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 4064baba0947..3056a98a9b97 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -64,7 +64,7 @@ seq-macro = { version = "0.3", default-features = false } futures = { version = "0.3", default-features = false, features = ["std"], optional = true } tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "rt", "io-util"] } hashbrown = { version = "0.15", default-features = false } -twox-hash = { version = "1.6", default-features = false } +twox-hash = { version = "2.1", default-features = false, features = ["xxhash64"] } paste = { version = "1.0" } half = { version = "2.1", default-features = false, features = ["num-traits"] } sysinfo = { version = "0.32.0", optional = true, default-features = false, features = ["system"] } diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs index 7d6dccdd2378..9e7db1296031 100644 --- a/parquet/src/bloom_filter/mod.rs +++ b/parquet/src/bloom_filter/mod.rs @@ -82,7 +82,6 @@ use crate::format::{ }; use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; use bytes::Bytes; -use std::hash::Hasher; use std::io::Write; use std::sync::Arc; use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol}; @@ -397,9 +396,7 @@ const SEED: u64 = 0; #[inline] fn hash_as_bytes(value: &A) -> u64 { - let mut hasher = XxHash64::with_seed(SEED); - hasher.write(value.as_bytes()); - hasher.finish() + XxHash64::oneshot(SEED, value.as_bytes()) } #[cfg(test)]