From 66d99846f5a0786713ebcc78daacd162ec733f47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Thu, 11 Apr 2024 17:30:46 +0200 Subject: [PATCH] checksum: add fxhash This is just to test out the performance compared to the xxhash we've been using until now. Early measurements with 4M blobs have shown that it could be worth experimenting with fxhash which is used in the rust compiler. --- betree/src/checksum.rs | 70 ++++++++++++++++++++++++++++++++++++++ betree/src/database/mod.rs | 10 +++--- 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/betree/src/checksum.rs b/betree/src/checksum.rs index fadc9a4a..ee194c03 100644 --- a/betree/src/checksum.rs +++ b/betree/src/checksum.rs @@ -23,6 +23,9 @@ pub trait Checksum: fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> { self.verify_buffer(once(data)) } + + /// Create a valid empty builder for this checksum type. + fn builder() -> Self::Builder; } /// A checksum builder @@ -94,6 +97,10 @@ impl Checksum for XxHash { Err(ChecksumError) } } + + fn builder() -> Self::Builder { + XxHashBuilder + } } /// The corresponding `Builder` for `XxHash`. @@ -122,3 +129,66 @@ impl State for XxHashState { XxHash(self.0.finish()) } } + +/// The rustc own hash impl originally from Firefox. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct FxHash(u64); + +impl StaticSize for FxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for FxHash { + type Builder = FxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = FxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + FxHashBuilder + } +} + +/// The corresponding `Builder` for `XxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FxHashBuilder; + +impl Builder for FxHashBuilder { + type State = FxHashState; + + fn build(&self) -> Self::State { + FxHashState(FxHasher::default()) + } +} + +use rustc_hash::FxHasher; + +/// The internal state of `XxHash`. +pub struct FxHashState(FxHasher); + +impl State for FxHashState { + type Checksum = FxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + FxHash(self.0.finish()) + } +} diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index 50ae4f0c..02fa4fe5 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -2,7 +2,7 @@ use crate::{ atomic_option::AtomicOption, cache::ClockCache, - checksum::{XxHash, XxHashBuilder}, + checksum::{FxHash, FxHashBuilder, XxHash, XxHashBuilder}, compression::CompressionConfiguration, cow_bytes::SlicedCowBytes, data_management::{ @@ -67,6 +67,8 @@ const ROOT_TREE_STORAGE_PREFERENCE: StoragePreference = StoragePreference::FASTE const DEFAULT_CACHE_SIZE: usize = 256 * 1024 * 1024; const DEFAULT_SYNC_INTERVAL_MS: u64 = 1000; +// This is the hash used overall in the entire database. For reconfiguration +// recompilation is necessary and this type changed. type Checksum = XxHash; type ObjectPointer = data_management::ObjectPointer; @@ -74,7 +76,7 @@ pub(crate) type ObjectRef = data_management::impls::ObjRef; pub(crate) type Object = Node; type DbHandler = Handler; -pub(crate) type RootSpu = StoragePoolUnit; +pub(crate) type RootSpu = StoragePoolUnit; pub(crate) type RootDmu = Dmu< ClockCache< data_management::impls::ObjectKey, @@ -179,7 +181,7 @@ impl DatabaseConfiguration { impl DatabaseConfiguration { pub fn new_spu(&self) -> Result { - Ok(StoragePoolUnit::::new(&self.storage)?) + Ok(StoragePoolUnit::::new(&self.storage)?) } pub fn new_handler(&self, spu: &RootSpu) -> DbHandler { @@ -229,7 +231,7 @@ impl DatabaseConfiguration { Dmu::new( self.compression.to_builder(), - XxHashBuilder, + ::builder(), self.default_storage_class, spu, strategy,