Skip to content

Commit

Permalink
Improve throughput benchmark accurracy with more black_box
Browse files Browse the repository at this point in the history
  • Loading branch information
ogxd committed Nov 8, 2024
1 parent e977257 commit 57ddc68
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,13 @@ cargo bench --bench throughput
cargo bench --bench hashset
```

Note: The `throughput` benchmark does not relies of criterion of timings measurements. In an attempt of reducing biais in this microbenchmark as much as possible, it shuffles seeds, input data, and alignment. It also has the benefit of being less of a "black box" compared to criterion. There is however a criterion-based throughput benchmark named `throughput_criterion` if you prefer. Results vary slightly between the two benchmarks, don't hesitate to submit an issue if you suspect biais and want to suggest improvements.

### Throughput

Throughput is measured as the number of bytes hashed per second.

*Some prefer talking **latency** (time for generating a hash) or **hashrate** (the number of hashes generated per second) for measuring hash function performance, but those are all equivalent in the end as they all boil down to measuring the time it takes to hash some input and then apply different scalar transformation. For instance, if latency for a `4 bytes` hash is `1 ms`, then the throughput is `1 / 0.001 * 4 = 4000 bytes per second`. Throughput allows us to conveniently compare the performance of a hash function for any input size on a single graph.*
*Some prefer talking of **latency** (time for generating a hash) or **hashrate** (the number of hashes generated per second) for measuring hash function performance, but those are all equivalent in the end as they all boil down to measuring the time it takes to hash some input and then apply different scalar transformation. For instance, if latency for a `4 bytes` hash is `1 ms`, then the throughput is `1 / 0.001 * 4 = 4000 bytes per second`. Throughput allows us to conveniently compare the performance of a hash function for any input size on a single graph.*

**Latest Benchmark Results:**
![aarch64](./benches/throughput/aarch64.svg)
Expand Down
13 changes: 8 additions & 5 deletions benches/throughput/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@ mod result_processor;
use result_processor::*;

use std::hash::Hasher;
use std::hint::black_box;
use std::time::{Instant, Duration};
use std::alloc::{alloc, dealloc, Layout};
use std::slice;

// black_box from std::hint is not as good as preventing bias
use criterion::black_box;

use rand::Rng;

use gxhash::*;

const ITERATIONS: u32 = 1000;
const ITERATIONS: u32 = 10000;
const MAX_RUN_DURATION: Duration = Duration::from_millis(1000);
const FORCE_NO_INLINING: bool = false;

Expand Down Expand Up @@ -91,7 +93,7 @@ fn main() {
}

fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str, delegate: F)
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize>
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
{
processor.on_start(name);
for i in 2.. {
Expand All @@ -101,7 +103,7 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
}

// Warmup
black_box(time(ITERATIONS, &|| delegate(&data[..len], S::default())));
black_box(time(ITERATIONS, &|| delegate(black_box(&data[..len]), black_box(S::default()))));

let mut durations_s = vec![];
let now = Instant::now();
Expand All @@ -116,7 +118,8 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
let end = start + len;
let slice = &data[start..end];
// Execute method for a new iterations
let duration = time(ITERATIONS, &|| delegate(slice, S::default()));
let seed_copy = seed.clone();
let duration = time(ITERATIONS, &|| black_box(delegate(black_box(slice), black_box(seed_copy))));
durations_s.push(duration.as_secs_f64());
}
let average_duration_s = calculate_average_without_outliers(&mut durations_s);
Expand Down
6 changes: 3 additions & 3 deletions benches/throughput_criterion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::slice;
use std::hash::Hasher;

use criterion::measurement::WallTime;
use criterion::{criterion_group, criterion_main, Criterion, Throughput, PlotConfiguration, AxisScale, BenchmarkGroup, BenchmarkId};
use criterion::{criterion_group, criterion_main, Criterion, Throughput, PlotConfiguration, AxisScale, BenchmarkGroup, BenchmarkId, black_box};
use rand::Rng;

use gxhash::*;
Expand All @@ -21,9 +21,9 @@ fn benchmark<F>(c: &mut BenchmarkGroup<WallTime>, data: &[u8], name: &str, deleg
c.throughput(Throughput::Bytes(len as u64));

let slice = &data[0..len]; // Aligned
// let slice = &data[1..len]; // Unaligned
//let slice = &data[1..len]; // Unaligned
c.bench_with_input(BenchmarkId::new(name, len), slice, |bencher, input| {
bencher.iter(|| delegate(criterion::black_box(input), criterion::black_box(42)))
bencher.iter(|| black_box(delegate(black_box(input), black_box(42))))
});
}
}
Expand Down

0 comments on commit 57ddc68

Please sign in to comment.