Skip to content

Commit

Permalink
Adressed and resolved more code smells and updated the test suite
Browse files Browse the repository at this point in the history
  • Loading branch information
LucaCappelletti94 committed Aug 21, 2024
1 parent aaf941e commit 4bfc3c2
Show file tree
Hide file tree
Showing 46 changed files with 1,812 additions and 20,403 deletions.
14 changes: 13 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,21 @@ serde_json = "1.0"
[dev-dependencies]
serde_json = "1.0"
wyhash = "0.5.0"
ahash = "0.8.11"
criterion = { version = "0.5", features = ["html_reports"] }

[features]
default = ["low_precisions", "beta", "plusplus"]
default = ["low_precisions", "beta", "plusplus", "zero_count_correction", "precomputed_beta"]
beta = []
precomputed_beta = ["beta"]
plusplus = []
# Whether to use the STD's ln function or using a lookup table.
# Enabling this feature will make the library not compile in no_std environments.
std_ln = [
"std",
]
# Whether to use the low-cardinality correction based on zero-counts or not.
zero_count_correction = []
# Whether to use local interpolation or kmeans in plus plus bias correction.
plusplus_kmeans = ["plusplus"]
# Whether to use integer or floating point biases and estimates.
Expand Down Expand Up @@ -121,3 +129,7 @@ opt-level = 3
[[bench]]
name = "unique_count_from_sorted_iterators"
harness = false

[[bench]]
name = "hybrid"
harness = false
58 changes: 58 additions & 0 deletions benches/hybrid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//! Benchmark to try and improve performance of the principal hybrid cases.
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use hyperloglog_rs::prelude::*;

fn bench_hybrid(c: &mut Criterion) {
let mut random_state = 76568342984735313_u64;
// We consider the case of hash of 32 bits in a precision of 18, using 6 bits per register.
let entries: Vec<(_, _)> = (0..1_000)
.map(|_| {
random_state = splitmix64(random_state);

let mut keep_hybrid: Hybrid<
PlusPlus<Precision10, Bits6, <Precision10 as ArrayRegister<Bits6>>::Array>,
u32,
> = Hybrid::default();
let mut to_dehybridize: Hybrid<
PlusPlus<Precision10, Bits6, <Precision10 as ArrayRegister<Bits6>>::Array>,
u32,
> = Hybrid::default();

keep_hybrid.extend(iter_random_values::<u64>(
keep_hybrid.capacity() as u64,
None,
Some(random_state),
));
random_state = splitmix64(random_state);
to_dehybridize.extend(iter_random_values::<u64>(
to_dehybridize.capacity() as u64 * 2,
None,
Some(random_state),
));

assert!(keep_hybrid.is_hybrid());
assert!(!to_dehybridize.is_hybrid());

(keep_hybrid, to_dehybridize)
})
.collect();

let mut group = c.benchmark_group("hybrid");

group.bench_function("hybrid_mix_union", |b| {
b.iter(|| {
let mut total_cardinality = 0.0;
for (left, right) in &entries {
total_cardinality += black_box(left).estimate_union_cardinality(right);
}
total_cardinality
})
});

group.finish();
}

criterion_group!(benches, bench_hybrid);

criterion_main!(benches);
25 changes: 16 additions & 9 deletions benches/unique_count_from_sorted_iterators.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Benchmarks to evaluate improvements on the unique_count_from_sorted_iterators function.
use criterion::{criterion_group, criterion_main, Criterion, black_box};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use hyperloglog_rs::hybrid::unique_count_from_sorted_iterators;
use hyperloglog_rs::prelude::{iter_var_len_random_values, splitmix64};

Expand All @@ -12,13 +12,21 @@ fn bench_unique_count_from_sorted_iterators(b: &mut Criterion) {
let entries: Vec<(Vec<u32>, Vec<u32>)> = (0..200)
.map(|_| {
random_state = splitmix64(random_state);
let mut a =
iter_var_len_random_values::<u32>(0, maximal_possible_size, None, Some(random_state))
.collect::<Vec<u32>>();
let mut a = iter_var_len_random_values::<u32>(
0,
maximal_possible_size,
None,
Some(random_state),
)
.collect::<Vec<u32>>();
random_state = splitmix64(random_state);
let mut b =
iter_var_len_random_values::<u32>(0, maximal_possible_size, None, Some(random_state))
.collect::<Vec<u32>>();
let mut b = iter_var_len_random_values::<u32>(
0,
maximal_possible_size,
None,
Some(random_state),
)
.collect::<Vec<u32>>();
a.sort();
b.sort();

Expand All @@ -37,7 +45,6 @@ fn bench_unique_count_from_sorted_iterators(b: &mut Criterion) {
group.finish();
}


criterion_group!(benches, bench_unique_count_from_sorted_iterators);

criterion_main!(benches);
criterion_main!(benches);
20 changes: 13 additions & 7 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ fn write_weights(precisions: &[usize]) {
.unwrap();
}

#[cfg(feature = "plusplus")]
#[cfg(feature = "zero_count_correction")]
fn write_linear_count_zeros(precisions: &[usize]) {
let linear_count_zeros = precisions
.iter()
Expand Down Expand Up @@ -402,9 +402,12 @@ fn write_alphas(precisions: &[usize]) {
.unwrap();
}

#[cfg(any(
all(feature = "beta", not(feature = "precomputed_beta")),
feature = "plusplus"
#[cfg(all(
not(feature = "std_ln"),
any(
all(feature = "beta", not(feature = "precomputed_beta")),
feature = "plusplus",
)
))]
fn write_ln_values(precisions: &[usize]) {
// Since the ln values are needed up to the maximal number of registers, we
Expand Down Expand Up @@ -529,9 +532,12 @@ fn main() {
write_alphas(&precisions);
write_number_of_registers(&precisions);

#[cfg(any(
all(feature = "beta", not(feature = "precomputed_beta")),
feature = "plusplus"
#[cfg(all(
not(feature = "std_ln"),
any(
all(feature = "beta", not(feature = "precomputed_beta")),
feature = "plusplus"
)
))]
write_ln_values(&precisions);

Expand Down
2 changes: 1 addition & 1 deletion evaluate_composite_hash/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ edition = "2021"

[dependencies]
csv = "1.3.0"
hyperloglog-rs = { path = "../../hyperloglog-rs", default-features=false, features=["all_precisions", "plusplus"] }
hyperloglog-rs = { path = "../../hyperloglog-rs", default-features=false, features=["all_precisions", "plusplus", "std"] }
indicatif = {version="0.17.8", features=["rayon"]}
paste = "1.0.15"
rayon = "1.10.0"
Expand Down
Loading

0 comments on commit 4bfc3c2

Please sign in to comment.