Skip to content

Commit

Permalink
checksum: add fxhash
Browse files Browse the repository at this point in the history
This is just to test out the performance compared to the xxhash we've been
using until now.  Early measurements with 4M blobs have shown that it could be
worth experimenting with fxhash which is used in the rust compiler.
  • Loading branch information
Johannes Wünsche committed Apr 11, 2024
1 parent 0d678fe commit 66d9984
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 4 deletions.
70 changes: 70 additions & 0 deletions betree/src/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ pub trait Checksum:
fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> {
self.verify_buffer(once(data))
}

/// Create a valid empty builder for this checksum type.
fn builder() -> Self::Builder;
}

/// A checksum builder
Expand Down Expand Up @@ -94,6 +97,10 @@ impl Checksum for XxHash {
Err(ChecksumError)
}
}

fn builder() -> Self::Builder {
XxHashBuilder
}
}

/// The corresponding `Builder` for `XxHash`.
Expand Down Expand Up @@ -122,3 +129,66 @@ impl State for XxHashState {
XxHash(self.0.finish())
}
}

/// The rustc own hash impl originally from Firefox.
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
pub struct FxHash(u64);

impl StaticSize for FxHash {
fn static_size() -> usize {
8
}
}

impl Checksum for FxHash {
type Builder = FxHashBuilder;

fn verify_buffer<I: IntoIterator<Item = T>, T: AsRef<[u8]>>(
&self,
data: I,
) -> Result<(), ChecksumError> {
let mut state = FxHashBuilder.build();
for x in data {
state.ingest(x.as_ref());
}
let other = state.finish();
if *self == other {
Ok(())
} else {
Err(ChecksumError)
}
}

fn builder() -> Self::Builder {
FxHashBuilder
}
}

/// The corresponding `Builder` for `XxHash`.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FxHashBuilder;

impl Builder<FxHash> for FxHashBuilder {
type State = FxHashState;

fn build(&self) -> Self::State {
FxHashState(FxHasher::default())
}
}

use rustc_hash::FxHasher;

/// The internal state of `XxHash`.
pub struct FxHashState(FxHasher);

impl State for FxHashState {
type Checksum = FxHash;

fn ingest(&mut self, data: &[u8]) {
self.0.write(data);
}

fn finish(self) -> Self::Checksum {
FxHash(self.0.finish())
}
}
10 changes: 6 additions & 4 deletions betree/src/database/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use crate::{
atomic_option::AtomicOption,
cache::ClockCache,
checksum::{XxHash, XxHashBuilder},
checksum::{FxHash, FxHashBuilder, XxHash, XxHashBuilder},
compression::CompressionConfiguration,
cow_bytes::SlicedCowBytes,
data_management::{
Expand Down Expand Up @@ -67,14 +67,16 @@ const ROOT_TREE_STORAGE_PREFERENCE: StoragePreference = StoragePreference::FASTE
const DEFAULT_CACHE_SIZE: usize = 256 * 1024 * 1024;
const DEFAULT_SYNC_INTERVAL_MS: u64 = 1000;

// This is the hash used overall in the entire database. For reconfiguration
// recompilation is necessary and this type changed.
type Checksum = XxHash;

type ObjectPointer = data_management::ObjectPointer<Checksum>;
pub(crate) type ObjectRef = data_management::impls::ObjRef<ObjectPointer>;
pub(crate) type Object = Node<ObjectRef>;
type DbHandler = Handler<ObjectRef>;

pub(crate) type RootSpu = StoragePoolUnit<XxHash>;
pub(crate) type RootSpu = StoragePoolUnit<Checksum>;
pub(crate) type RootDmu = Dmu<
ClockCache<
data_management::impls::ObjectKey<Generation>,
Expand Down Expand Up @@ -179,7 +181,7 @@ impl DatabaseConfiguration {

impl DatabaseConfiguration {
pub fn new_spu(&self) -> Result<RootSpu> {
Ok(StoragePoolUnit::<XxHash>::new(&self.storage)?)
Ok(StoragePoolUnit::<Checksum>::new(&self.storage)?)
}

pub fn new_handler(&self, spu: &RootSpu) -> DbHandler {
Expand Down Expand Up @@ -229,7 +231,7 @@ impl DatabaseConfiguration {

Dmu::new(
self.compression.to_builder(),
XxHashBuilder,
<Checksum as crate::checksum::Checksum>::builder(),
self.default_storage_class,
spu,
strategy,
Expand Down

0 comments on commit 66d9984

Please sign in to comment.