Skip to content

Commit

Permalink
Patched sketching corner cases
Browse files Browse the repository at this point in the history
  • Loading branch information
LucaCappelletti94 committed Dec 5, 2023
1 parent e54a415 commit 5eafc7e
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 28 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "hyperloglog-rs"
version = "0.1.50"
version = "0.1.51"
edition = "2021"
authors = ["Luca Cappelletti <[email protected]>"]
description = "A Rust implementation of HyperLogLog trying to be parsimonious with memory."
Expand Down
45 changes: 23 additions & 22 deletions src/hyper_spheres_sketch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ impl<F: Primitive<f32>, PRECISION: Precision + WordType<BITS>, const BITS: usize
union_zeros,
));

// union_estimate = union_estimate.get_min(self_cardinality + other_cardinality);

EstimatedUnionCardinalities::from((self_cardinality, other_cardinality, union_estimate))
}

Expand Down Expand Up @@ -171,6 +173,7 @@ where
right_difference_cardinality_vector[j] = (euc.get_right_difference_cardinality()
- last_right_difference)
.get_max(I::default());

last_right_difference = euc.get_right_difference_cardinality();
}
left_difference_cardinality_vector[i] = (euc.get_left_difference_cardinality()
Expand Down Expand Up @@ -255,42 +258,40 @@ where
);

let maximal_differential_intersection_cardinality =
(euc.get_left_difference_cardinality() - last_left_difference)
.get_max(I::default())
+ (right_cardinality - last_right_cardinality).get_max(I::default());

debug_assert!(
maximal_differential_intersection_cardinality >= differential_intersection,
concat!(
"Expected maximal_differential_intersection_cardinality to be larger than differential_intersection, but it is not. ",
"Got: maximal_differential_intersection_cardinality: {:?}, differential_intersection: {:?}",
),
maximal_differential_intersection_cardinality,
differential_intersection,
);

differential_overlap_cardinality_matrix[i][j] = differential_intersection
/ maximal_differential_intersection_cardinality
(euc.get_left_difference_cardinality() - last_left_difference
+ right_cardinality
- last_right_cardinality)
.get_max(I::non_zero_positive_min_value());

differential_overlap_cardinality_matrix[i][j] = (differential_intersection
/ maximal_differential_intersection_cardinality)
.get_min(I::ONE);
last_row[j] = euc.get_intersection_cardinality().get_max(delta);
comulative_row += differential_intersection;

// We always set the value of the right difference so that the
// last time we write this will necessarily be with the last
// and largest left set.
right_difference_cardinality_vector[j] = (euc.get_right_difference_cardinality()

let differential_right_difference = (euc.get_right_difference_cardinality()
- last_right_difference)
.get_max(I::default())
/ (right_cardinality - last_right_cardinality)
.get_max(I::non_zero_positive_min_value());
.get_max(I::default());
let maximal_differential_right_difference = (right_cardinality
- last_right_cardinality)
.get_max(I::non_zero_positive_min_value());

right_difference_cardinality_vector[j] = (differential_right_difference
/ maximal_differential_right_difference)
.get_min(I::ONE);
last_right_difference = euc.get_right_difference_cardinality();
last_right_cardinality = right_cardinality;
}
left_difference_cardinality_vector[i] = (euc.get_left_difference_cardinality()
left_difference_cardinality_vector[i] = ((euc.get_left_difference_cardinality()
- last_left_difference)
.get_max(I::default())
/ (left_cardinality - last_left_cardinality)
.get_max(I::non_zero_positive_min_value());
.get_max(I::non_zero_positive_min_value()))
.get_min(I::ONE);
last_left_cardinality = left_cardinality;
last_left_difference = euc.get_left_difference_cardinality();
}
Expand Down
14 changes: 9 additions & 5 deletions tests/test_hyper_spheres_sketch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ fn get_random_hyper_spheres_hll<const N: usize>(

#[test]
fn test_hyper_spheres_sketch() {
let number_of_tests = 1_00;
let number_of_tests = 100;

// We run multiple MSE to have an estimate of how much the
// HyperLogLog approximation is off when compared to the
Expand Down Expand Up @@ -222,9 +222,11 @@ fn test_hyper_spheres_sketch() {
left_diff_normalized_hll[i] <= 1.0,
concat!(
"We expect the left difference cardinality vector to ",
"have values less than or equal to 1.0 but we got {:?} instead."
"have values less than or equal to 1.0 but we got {:?} instead. ",
"This happened in position {:?}."
),
left_diff_normalized_hll[i]
left_diff_normalized_hll[i],
i
);

assert!(
Expand Down Expand Up @@ -255,9 +257,11 @@ fn test_hyper_spheres_sketch() {
right_diff_normalized_hll[i] <= 1.0,
concat!(
"We expect the right difference cardinality vector to ",
"have values less than or equal to 1.0 but we got {:?} instead."
"have values less than or equal to 1.0 but we got {:?} instead. ",
"This happened in position {:?}."
),
right_diff_normalized_hll[i]
right_diff_normalized_hll[i],
i
);

assert!(
Expand Down

0 comments on commit 5eafc7e

Please sign in to comment.