Improve throughput benchmark accurracy with more black_box

ogxd · Nov 8, 2024 · 57ddc68 · 57ddc68
1 parent e977257
commit 57ddc68
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -109,11 +109,13 @@ cargo bench --bench throughput
 cargo bench --bench hashset
 ```
 
+Note: The `throughput` benchmark does not relies of criterion of timings measurements. In an attempt of reducing biais in this microbenchmark as much as possible, it shuffles seeds, input data, and alignment. It also has the benefit of being less of a "black box" compared to criterion. There is however a criterion-based throughput benchmark named `throughput_criterion` if you prefer. Results vary slightly between the two benchmarks, don't hesitate to submit an issue if you suspect biais and want to suggest improvements.
+
 ### Throughput
 
 Throughput is measured as the number of bytes hashed per second.
 
-*Some prefer talking **latency** (time for generating a hash) or **hashrate** (the number of hashes generated per second) for measuring hash function performance, but those are all equivalent in the end as they all boil down to measuring the time it takes to hash some input and then apply different scalar transformation. For instance, if latency for a `4 bytes` hash is `1 ms`, then the throughput is `1 / 0.001 * 4 = 4000 bytes per second`. Throughput allows us to conveniently compare the performance of a hash function for any input size on a single graph.*
+*Some prefer talking of **latency** (time for generating a hash) or **hashrate** (the number of hashes generated per second) for measuring hash function performance, but those are all equivalent in the end as they all boil down to measuring the time it takes to hash some input and then apply different scalar transformation. For instance, if latency for a `4 bytes` hash is `1 ms`, then the throughput is `1 / 0.001 * 4 = 4000 bytes per second`. Throughput allows us to conveniently compare the performance of a hash function for any input size on a single graph.*
 
 **Latest Benchmark Results:**    
 ![aarch64](./benches/throughput/aarch64.svg)

diff --git a/benches/throughput/main.rs b/benches/throughput/main.rs
@@ -3,16 +3,18 @@ mod result_processor;
 use result_processor::*;
 
 use std::hash::Hasher;
-use std::hint::black_box;
 use std::time::{Instant, Duration};
 use std::alloc::{alloc, dealloc, Layout};
 use std::slice;
 
+// black_box from std::hint is not as good as preventing bias
+use criterion::black_box;
+
 use rand::Rng;
 
 use gxhash::*;
 
-const ITERATIONS: u32 = 1000;
+const ITERATIONS: u32 = 10000;
 const MAX_RUN_DURATION: Duration = Duration::from_millis(1000);
 const FORCE_NO_INLINING: bool = false;
 
@@ -91,7 +93,7 @@ fn main() {
 }
 
 fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str, delegate: F)
-    where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize>
+    where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
 {
     processor.on_start(name);
     for i in 2.. {
@@ -101,7 +103,7 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
         }
 
         // Warmup
-        black_box(time(ITERATIONS, &|| delegate(&data[..len], S::default()))); 
+        black_box(time(ITERATIONS, &|| delegate(black_box(&data[..len]), black_box(S::default())))); 
 
         let mut durations_s = vec![];
         let now = Instant::now();
@@ -116,7 +118,8 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
             let end = start + len;
             let slice = &data[start..end];
             // Execute method for a new iterations
-            let duration = time(ITERATIONS, &|| delegate(slice, S::default()));
+            let seed_copy = seed.clone();
+            let duration = time(ITERATIONS, &|| black_box(delegate(black_box(slice), black_box(seed_copy))));
             durations_s.push(duration.as_secs_f64());
         }
         let average_duration_s = calculate_average_without_outliers(&mut durations_s);

diff --git a/benches/throughput_criterion.rs b/benches/throughput_criterion.rs
@@ -4,7 +4,7 @@ use std::slice;
 use std::hash::Hasher;
 
 use criterion::measurement::WallTime;
-use criterion::{criterion_group, criterion_main, Criterion, Throughput, PlotConfiguration, AxisScale, BenchmarkGroup, BenchmarkId};
+use criterion::{criterion_group, criterion_main, Criterion, Throughput, PlotConfiguration, AxisScale, BenchmarkGroup, BenchmarkId, black_box};
 use rand::Rng;
 
 use gxhash::*;
@@ -21,9 +21,9 @@ fn benchmark<F>(c: &mut BenchmarkGroup<WallTime>, data: &[u8], name: &str, deleg
         c.throughput(Throughput::Bytes(len as u64));
 
         let slice = &data[0..len]; // Aligned
-        // let slice = &data[1..len]; // Unaligned
+        //let slice = &data[1..len]; // Unaligned
         c.bench_with_input(BenchmarkId::new(name, len), slice, |bencher, input| {
-            bencher.iter(|| delegate(criterion::black_box(input), criterion::black_box(42)))
+            bencher.iter(|| black_box(delegate(black_box(input), black_box(42))))
         });
     }
 }