Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unit tests to clarify HyperLogLog behavior #55

Merged
merged 1 commit into from
Jun 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,35 @@

import java.util.List;

import static com.facebook.airlift.stats.cardinality.TestUtils.createHashForBucket;
import static com.facebook.airlift.stats.cardinality.TestUtils.sequence;
import static com.facebook.airlift.stats.cardinality.Utils.numberOfBuckets;
import static io.airlift.slice.testing.SliceAssertions.assertSlicesEqual;
import static org.testng.Assert.assertEquals;

public class TestDenseHll
{
@Test(dataProvider = "bits")
public void testCorrectNumberOfZeros(int indexBitLength)
{
DenseHll denseHll = new DenseHll(indexBitLength);
int limit = Math.min(Long.SIZE - indexBitLength, Utils.numberOfBuckets(indexBitLength));
for (int i = 0; i < limit; i++) {
// insert a hash for bucket i that has i leading zeros
denseHll.insertHash(createHashForBucket(indexBitLength, i, i));
}

// each non-empty bucket should have value index + 1
denseHll.eachBucket((i, value) -> {
if (i < limit) {
assertEquals(value, i + 1);
}
else {
assertEquals(value, 0);
}
});
}

@Test(dataProvider = "bits")
public void testMultipleMerges(int prefixBitLength)
throws Exception
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.List;

import static com.facebook.airlift.stats.cardinality.TestUtils.createHashForBucket;
import static com.facebook.airlift.stats.cardinality.TestUtils.sequence;
import static io.airlift.slice.SizeOf.sizeOf;
import static io.airlift.slice.testing.SliceAssertions.assertSlicesEqual;
Expand All @@ -30,6 +31,45 @@ public class TestSparseHll
{
private static final int SPARSE_HLL_INSTANCE_SIZE = ClassLayout.parseClass(SparseHll.class).instanceSize();

@Test(dataProvider = "bits")
public void testCorrectNumberOfZeros(int indexBitLength)
{
SparseHll sparseHll = new SparseHll(indexBitLength);
// Note: the peculiar minus six in the following line reflects a surprising edge case.
// See https://github.com/prestodb/airlift/issues/56.
int limit = Math.min(Long.SIZE - indexBitLength - 6, Utils.numberOfBuckets(indexBitLength));
for (int i = 0; i < limit; i++) {
// insert a hash for bucket i that has i leading zeros
sparseHll.insertHash(createHashForBucket(indexBitLength, i, i));
}

// each non-empty bucket should have value index + 1
sparseHll.eachBucket((i, value) -> assertEquals(value, i + 1));
}

@Test(dataProvider = "bits")
public void testCorrectNumberOfZerosOnUpdate(int indexBitLength)
{
SparseHll sparseHll = new SparseHll(indexBitLength);
int limit = Math.min(Long.SIZE - indexBitLength - 6, Utils.numberOfBuckets(indexBitLength));
for (int i = 0; i < limit; i++) {
// insert a hash for bucket i that has no leading zeros
sparseHll.insertHash(createHashForBucket(indexBitLength, i, 0));
}
for (int i = 0; i < limit; i++) {
// insert a hash for bucket i that has i leading zeros
sparseHll.insertHash(createHashForBucket(indexBitLength, i, i));
}

// each bucket from 0 to limit should have value index + 1
// Note: SparseHll may return multiple values for each bucket, so we keep track of the largest only.
int[] values = new int[limit];
sparseHll.eachBucket((i, value) -> values[i] = Math.max(values[i], value));
for (int i = 0; i < limit; i++) {
assertEquals(values[i], i + 1);
}
}

@Test(dataProvider = "bits")
public void testMerge(int prefixBitLength)
throws Exception
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,13 @@ public static List<Long> sequence(int start, int end)

return builder.build();
}

public static long createHashForBucket(int indexBitLength, int bucket, int leadingZeros)
{
// put a 1 in the indexBitLength + i + 1-th place
long hash = 1L << (Long.SIZE - (indexBitLength + leadingZeros + 1));
// set index bits to corresponding bucket index
hash |= (long) bucket << (Long.SIZE - indexBitLength);
return hash;
}
}