From 9192b435571e66b652339d8e7f08197fc39ee903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Mon, 15 Apr 2024 11:58:24 +0200 Subject: [PATCH] checksum: add gxhash This commit required modifying the build context to allow for the AES optimizations of GxHash. It should not prove to be an issue on the system we use (x86-64 and maybe ARM64) which I've tested before this commit. --- betree/.cargo/config.toml | 2 + betree/Cargo.toml | 1 + betree/src/checksum/gxhash.rs | 69 +++++++++++++++++++++++++++++++++++ betree/src/checksum/mod.rs | 2 + betree/src/checksum/xxhash.rs | 1 + betree/src/database/mod.rs | 2 +- 6 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 betree/.cargo/config.toml create mode 100644 betree/src/checksum/gxhash.rs diff --git a/betree/.cargo/config.toml b/betree/.cargo/config.toml new file mode 100644 index 00000000..e6ac8df3 --- /dev/null +++ b/betree/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-C","target-cpu=native"] diff --git a/betree/Cargo.toml b/betree/Cargo.toml index 63507ced..58806262 100644 --- a/betree/Cargo.toml +++ b/betree/Cargo.toml @@ -60,6 +60,7 @@ rand = { version = "0.8", features = ["std_rng"] } pmdk = { path = "./pmdk", optional = true } rustc-hash = "1.1.0" +gxhash = "3.1.1" [dev-dependencies] rand_xorshift = "0.3" diff --git a/betree/src/checksum/gxhash.rs b/betree/src/checksum/gxhash.rs new file mode 100644 index 00000000..5d6b4f1b --- /dev/null +++ b/betree/src/checksum/gxhash.rs @@ -0,0 +1,69 @@ +/// Impl for Checksum for FxHashw. +use super::{Builder, Checksum, ChecksumError, State}; +use crate::size::StaticSize; +use gxhash::GxHasher; +use serde::{Deserialize, Serialize}; +use std::hash::Hasher; + +/// A checksum created by `GxHash`. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct GxHash(u64); + +impl StaticSize for GxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for GxHash { + type Builder = GxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = GxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + GxHashBuilder + } +} + +/// The corresponding `Builder` for `GxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct GxHashBuilder; + +impl Builder for GxHashBuilder { + type State = GxHashState; + + fn build(&self) -> Self::State { + // Due to security concerns the default `GxHasher` is randomized, which + // does not work for us, therefore, use pinned seed. + GxHashState(GxHasher::with_seed(0)) + } +} + +/// The internal state of `GxHash`. +pub struct GxHashState(GxHasher); + +impl State for GxHashState { + type Checksum = GxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + GxHash(self.0.finish()) + } +} diff --git a/betree/src/checksum/mod.rs b/betree/src/checksum/mod.rs index c4d5c0c4..0a72bb4d 100644 --- a/betree/src/checksum/mod.rs +++ b/betree/src/checksum/mod.rs @@ -5,9 +5,11 @@ use serde::{de::DeserializeOwned, Serialize}; use std::{error::Error, fmt, iter::once}; mod fxhash; +mod gxhash; mod xxhash; pub use fxhash::{FxHash, FxHashBuilder}; +pub use gxhash::{GxHash, GxHashBuilder}; pub use xxhash::{XxHash, XxHashBuilder}; /// A checksum to verify data integrity. diff --git a/betree/src/checksum/xxhash.rs b/betree/src/checksum/xxhash.rs index de0c0afd..839c0795 100644 --- a/betree/src/checksum/xxhash.rs +++ b/betree/src/checksum/xxhash.rs @@ -6,6 +6,7 @@ use crate::size::StaticSize; use serde::{Deserialize, Serialize}; use std::hash::Hasher; +/// A checksum created by `XxHash`. #[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] pub struct XxHash(u64); diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index 02fa4fe5..c6071fbe 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -2,7 +2,7 @@ use crate::{ atomic_option::AtomicOption, cache::ClockCache, - checksum::{FxHash, FxHashBuilder, XxHash, XxHashBuilder}, + checksum::XxHash, compression::CompressionConfiguration, cow_bytes::SlicedCowBytes, data_management::{