1 benchy boi

spiraldb · Jan 24, 2025 · fb0a9be · fb0a9be · a10y · Jan 24, 2025
1 parent 2330277
commit fb0a9be
Show file tree

Hide file tree

Showing 3 changed files with 112 additions and 0 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml
@@ -61,6 +61,7 @@ tokio = { workspace = true, features = ["full"] }
 uuid = { workspace = true, features = ["v4"] }
 vortex = { workspace = true, features = ["object_store", "parquet"] }
 vortex-datafusion = { workspace = true }
+vortex-mask = { workspace = true }
 xshell = { workspace = true }
 
 [dev-dependencies]
@@ -105,3 +106,8 @@ harness = false
 name = "clickbench"
 test = false
 harness = false
+
+[[bench]]
+name = "sel_vec"
+test = false
+harness = false
diff --git a/bench-vortex/benches/sel_vec.rs b/bench-vortex/benches/sel_vec.rs
@@ -0,0 +1,105 @@
+#![allow(unused_imports, unused, dead_code)]
+//! Various tests for the selection vector being present.
+
+use criterion::{BenchmarkId, Criterion};
+use rand::Rng;
+use vortex::array::PrimitiveArray;
+use vortex::compute::filter;
+use vortex::dtype::{DType, Nullability, PType};
+use vortex::encoding::{ArrayEncodingRef, Encoding};
+use vortex::encodings::alp::{ALPArray, ALPEncoding};
+use vortex::sampling_compressor::compressors::alp::ALPCompressor;
+use vortex::sampling_compressor::compressors::bitpacked::{
+    BitPackedCompressor, BITPACK_WITH_PATCHES,
+};
+use vortex::sampling_compressor::compressors::EncodingCompressor;
+use vortex::sampling_compressor::SamplingCompressor;
+use vortex::variants::PrimitiveArrayTrait;
+use vortex::{ArrayData, IntoArrayData, IntoCanonical};
+use vortex_mask::Mask;
+
+// criterion benchmark setup:
+fn bench_sel_vec(c: &mut Criterion) {
+    let mut group = c.benchmark_group("filter_then_canonical");
+
+    // Run ALP + BitPacking.
+    let compressor = SamplingCompressor::default().including_only(&[
+        &ALPCompressor as &dyn EncodingCompressor,
+        &BITPACK_WITH_PATCHES,
+    ]);
+
+    // Create a low-precision primitive array of f64
+    let arr = PrimitiveArray::from_iter((0..=65535).map(|x| (x as f64) * 0.2f64));
+    assert_eq!(arr.ptype(), PType::F64);
+
+    let arr = compressor
+        .compress(&arr.into_array(), None)
+        .unwrap()
+        .into_array();
+    assert_eq!(arr.encoding().id(), ALPEncoding::ID);
+
+    // Try for various mask
+    let max = 65536;
+    for selectivity in [0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999, 1.0] {
+        // Create a random mask of the given size
+        let true_count = (selectivity * max as f64) as usize;
+        // Create a randomized mask with the correct length and true_count.
+        let mask = create_mask(max, true_count);
+        assert_eq!(mask.len(), max);
+        assert_eq!(mask.true_count(), true_count);
+        group.bench_with_input(
+            BenchmarkId::from_parameter(selectivity),
+            &mask,
+            |b, mask| {
+                // Filter then into_canonical
+                b.iter(|| filter_then_canonical(&arr, &mask))
+            },
+        );
+    }
+    group.finish();
+
+    let mut group = c.benchmark_group("canonical_then_filter");
+    for selectivity in [0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999, 1.0] {
+        // Create a random mask of the given size
+        let true_count = (selectivity * max as f64) as usize;
+        // Create a randomized mask with the correct length and true_count.
+        let mask = create_mask(max, true_count);
+        group.bench_with_input(
+            BenchmarkId::from_parameter(selectivity),
+            &mask,
+            |b, mask| {
+                // Filter then into_canonical
+                b.iter(|| canonical_then_filter(&arr, &mask))
+            },
+        );
+    }
+    group.finish();
+}
+
+fn filter_then_canonical(array: &ArrayData, mask: &Mask) -> ArrayData {
+    let filtered = filter(array, mask).unwrap();
+    filtered.into_canonical().unwrap().into_array()
+}
+
+fn canonical_then_filter(array: &ArrayData, mask: &Mask) -> ArrayData {
+    let canonical = array.clone().into_canonical().unwrap().into_array();
+    filter(&canonical, mask).unwrap()
+}
+
+fn create_mask(len: usize, true_count: usize) -> Mask {
+    let mut mask = vec![false; len];
+    // randomly distribute true_count true values
+    let mut rng = rand::thread_rng();
+    let mut set = 0;
+    while set < true_count {
+        let index = rng.gen_range(0..len);
+        if !mask[index] {
+            mask[index] = true;
+            set += 1;
+        }
+    }
+    Mask::from_iter(mask)
+}
+
+criterion::criterion_group!(sel_vec, bench_sel_vec);
+criterion::criterion_main!(sel_vec);