-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* fixed hyperlink in doc * removed useless code from example * added base structure and traits for view * added view types * added view documentation * corrected types to use slices of generics scalar implementation usign the base structure does not seem trivial * completed Dim/Stride traits with basic methods to use in View constructor * added macros for basic dim/stride implem * replaced dim/stride traits with SmallVec type generic traits made stride computing too much of a hassle * moed back data traits to view module * added stride computation for all layout need to add unit test * right layout test * added left stride ciloutation test * removed small vec to use instead const generics * renamed generics for consistency * removed data traits in favor of an enum * added basic dataless constructor * added mirror functions * docs * added Index<> implems for view backend * corrected(?) mirror creation functions * added assertions & comments * added limit case test for 1D views' stride * added basic code for benchmarking gemvoperation * rewrote gemv benches to only use base types criterion produces a nice violin plot with performances for on the layout * added initialization bench * added 2D view init to the bench no variation between cases appear when dim changes * added data access benchmark no performance loss caused by the convoluted Index<> implementation for views * rewrote access bench to use random accesses weird behavior on 3D unchecked accesses; cf. comments in code * access benchmark ok issue lies with the blackbox? * doc update
- Loading branch information
Showing
19 changed files
with
1,387 additions
and
87 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; | ||
|
||
// Currently a partial gemv | ||
// y = Ax / y = xA | ||
// instead of | ||
// y = s1*Au + s2*v | ||
|
||
// regular matrix-vector product | ||
fn f1(size: u32) { | ||
let length = 2_usize.pow(size); | ||
let x = vec![1.0; length]; | ||
#[allow(non_snake_case)] | ||
let A = vec![1.0; length * length]; | ||
// in this case, we can use Rust's iterator directly to easily operate | ||
// line by line. | ||
let y: Vec<f64> = A | ||
.chunks(length) | ||
.map(|row| row.iter().zip(x.iter()).map(|(r_i, x_i)| r_i * x_i).sum()) | ||
.collect(); | ||
black_box(y); | ||
} | ||
|
||
// regular matrix-vector product; using indexes | ||
fn f1_b(size: u32) { | ||
let length = 2_usize.pow(size); | ||
let x = vec![1.0; length]; | ||
#[allow(non_snake_case)] | ||
let A = vec![1.0; length * length]; | ||
// As a reference, an implementation using indexes | ||
let mut y: Vec<f64> = vec![0.0; length]; | ||
// col and row indexes of the matrix | ||
for row in 0..length { | ||
for col in 0..length { | ||
// using unchecked accesses to keep the comparison "fair" | ||
// as iterators bypass those | ||
unsafe { | ||
*y.get_unchecked_mut(row) += | ||
A.get_unchecked(row * length + col) * x.get_unchecked(col) | ||
} | ||
} | ||
} | ||
black_box(y); | ||
} | ||
|
||
// regular vector-matrix product | ||
fn f2(size: u32) { | ||
let length = 2_usize.pow(size); | ||
let x = vec![1.0; length]; | ||
#[allow(non_snake_case)] | ||
let A = vec![1.0; length * length]; | ||
// in the case of a vector-matrix product, the "row-first" layout (i.e. 2D LayoutRight) | ||
// does not allow us to make use of Rust's iterators -> back to indexes | ||
let mut y: Vec<f64> = vec![0.0; length]; | ||
// col and row indexes of the matrix | ||
for col in 0..length { | ||
for row in 0..length { | ||
// using unchecked accesses to keep the comparison "fair" | ||
// as iterators bypass those | ||
unsafe { | ||
*y.get_unchecked_mut(col) += | ||
x.get_unchecked(row) * A.get_unchecked(row * length + col) | ||
} | ||
} | ||
} | ||
black_box(y); | ||
} | ||
|
||
// vector-matrix product with an adapted layout | ||
fn f3(size: u32) { | ||
let length = 2_usize.pow(size); | ||
let x = vec![1.0; length]; | ||
#[allow(non_snake_case)] | ||
let A = vec![1.0; length * length]; | ||
// Thanks to the "row first" layout (i.e. 2D LayoutLeft), we can use | ||
// the iterators again | ||
// The code is essentially the same as the matrix-vector product | ||
let y: Vec<f64> = A | ||
.chunks(length) | ||
.map(|col| x.iter().zip(col.iter()).map(|(x_i, c_i)| x_i * c_i).sum()) | ||
.collect(); | ||
black_box(y); | ||
} | ||
|
||
pub fn criterion_benchmark(c: &mut Criterion) { | ||
// Generate/Define the input | ||
let data_size: u32 = 11; // 2048 length vector, 2048*2048 matrix | ||
|
||
let mut group = c.benchmark_group("gemv"); | ||
group.bench_with_input( | ||
BenchmarkId::new("Matrix-Vector Product (iterators)", ""), | ||
&data_size, | ||
|b, &n| b.iter(|| f1(n)), | ||
); | ||
group.bench_with_input( | ||
BenchmarkId::new("Matrix-Vector Product (indexes)", ""), | ||
&data_size, | ||
|b, &n| b.iter(|| f1_b(n)), | ||
); | ||
group.bench_with_input( | ||
BenchmarkId::new("Vector-Matrix Product (indexes)", ""), | ||
&data_size, | ||
|b, &n| b.iter(|| f2(n)), | ||
); | ||
group.bench_with_input( | ||
BenchmarkId::new("Vector-Matrix Product w/ adapted layout (iterators)", ""), | ||
&data_size, | ||
|b, &n| b.iter(|| f3(n)), | ||
); | ||
group.finish(); | ||
} | ||
|
||
criterion_group!(benches, criterion_benchmark); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; | ||
use poc_kokkos_rs::view::{parameters::Layout, ViewOwned}; | ||
use rand::prelude::*; | ||
|
||
// this bench is used to evaluate the cost of accessing views' data | ||
// all benched functions contain 10^3 accesses. | ||
|
||
// 1D vector access | ||
fn f1(length: usize, indices: &[usize]) { | ||
let y: Vec<f64> = vec![0.0; length]; | ||
let idx = &indices[0..length]; | ||
|
||
idx.iter().for_each(|i| { | ||
let tmp = y[*i]; | ||
black_box(tmp); | ||
}) | ||
} | ||
|
||
// 1D view access | ||
fn f1_b(length: usize, indices: &[usize]) { | ||
let v_y: ViewOwned<'_, 1, f64> = | ||
ViewOwned::new_from_data(vec![0.0; length], Layout::Right, [length]); | ||
let idx = &indices[0..length]; | ||
|
||
idx.iter().for_each(|i| { | ||
let tmp = v_y[[*i]]; | ||
black_box(tmp); | ||
}) | ||
} | ||
|
||
// 2D vector access | ||
fn f2(length: usize, indices: &[(usize, usize)]) { | ||
let y: Vec<f64> = vec![0.0; length * length]; | ||
let idx = &indices[0..length]; | ||
|
||
idx.iter().for_each(|(i, j)| { | ||
let tmp = unsafe { y.get_unchecked(i * length + j) }; | ||
black_box(tmp); | ||
}); | ||
} | ||
|
||
// 2D view access | ||
fn f2_b(length: usize, indices: &[(usize, usize)]) { | ||
let v_y: ViewOwned<'_, 2, f64> = | ||
ViewOwned::new_from_data(vec![0.0; length * length], Layout::Right, [length, length]); | ||
let idx = &indices[0..length]; | ||
|
||
idx.iter().for_each(|(i, j)| { | ||
let tmp = v_y[[*i, *j]]; | ||
black_box(tmp); | ||
}) | ||
} | ||
|
||
// 3D vector access | ||
fn f3(length: usize, indices: &[(usize, usize, usize)]) { | ||
let y: Vec<f64> = vec![0.0; length * length * length]; | ||
let idx = &indices[0..length]; | ||
|
||
idx.iter().for_each(|(i, j, k)| { | ||
// WARNING | ||
// For some reason, if the access is not dereferenced, it gets optimized away | ||
// You can verify it by running the benchmark twice: | ||
// - once with the blackbox, without the deref operator * | ||
// - once without the blackbox, with the deref operator * | ||
// both yields the same result; | ||
// the blackbox is supposed to prevent this, works in the 2D case, but not here | ||
let tmp = *unsafe { y.get_unchecked(i * length * length + j * length + k) }; | ||
black_box(tmp); | ||
}) | ||
} | ||
|
||
// 3D view access | ||
fn f3_b(length: usize, indices: &[(usize, usize, usize)]) { | ||
let v_y: ViewOwned<'_, 3, f64> = ViewOwned::new_from_data( | ||
vec![0.0; length * length * length], | ||
Layout::Right, | ||
[length, length, length], | ||
); | ||
let idx = &indices[0..length]; | ||
|
||
idx.iter().for_each(|(i, j, k)| { | ||
let tmp = v_y[[*i, *j, *k]]; | ||
black_box(tmp); | ||
}) | ||
} | ||
|
||
pub fn criterion_benchmark(c: &mut Criterion) { | ||
// Generate/Define the input | ||
const DATA_SIZE: u32 = 11; // 2048 length vector, 2048*2048 matrix | ||
let length = 2_usize.pow(DATA_SIZE); | ||
let mut rng = SmallRng::from_entropy(); | ||
let indices1: Vec<usize> = rand::seq::index::sample(&mut rng, length, length).into_vec(); | ||
let indices1b: Vec<usize> = rand::seq::index::sample(&mut rng, length, length).into_vec(); | ||
let indices1bb: Vec<usize> = rand::seq::index::sample(&mut rng, length, length).into_vec(); | ||
|
||
let indices2: Vec<(usize, usize)> = indices1 | ||
.iter() | ||
.zip(indices1b.iter()) | ||
.map(|(i1, i2)| (*i1, *i2)) | ||
.collect(); | ||
|
||
let indices3: Vec<(usize, usize, usize)> = indices1 | ||
.iter() | ||
.zip(indices1b.iter()) | ||
.zip(indices1bb.iter()) | ||
.map(|((i1, i2), i3)| (*i1, *i2, *i3)) | ||
.collect(); | ||
|
||
let mut group1 = c.benchmark_group("1D access"); | ||
group1.bench_with_input( | ||
BenchmarkId::new("Vector Access", ""), | ||
&(length, indices1.clone()), | ||
|b, (n, i)| b.iter(|| f1(*n, i)), | ||
); | ||
group1.bench_with_input( | ||
BenchmarkId::new("View Access", ""), | ||
&(length, indices1), | ||
|b, (n, i)| b.iter(|| f1_b(*n, i)), | ||
); | ||
group1.finish(); | ||
|
||
let mut group2 = c.benchmark_group("2D access"); | ||
group2.bench_with_input( | ||
BenchmarkId::new("Vector Access", ""), | ||
&(length, (indices2.clone())), | ||
|b, (n, i)| b.iter(|| f2(*n, i)), | ||
); | ||
group2.bench_with_input( | ||
BenchmarkId::new("View Access", ""), | ||
&(length, (indices2)), | ||
|b, (n, i)| b.iter(|| f2_b(*n, i)), | ||
); | ||
group2.finish(); | ||
|
||
let mut group3 = c.benchmark_group("3D access"); | ||
group3.bench_with_input( | ||
BenchmarkId::new("Vector Access", ""), | ||
&(length, indices3.clone()), | ||
|b, (n, i)| b.iter(|| f3(*n, i)), | ||
); | ||
group3.bench_with_input( | ||
BenchmarkId::new("View Access", ""), | ||
&(length, indices3), | ||
|b, (n, i)| b.iter(|| f3_b(*n, i)), | ||
); | ||
group3.finish(); | ||
} | ||
|
||
criterion_group!(benches, criterion_benchmark); | ||
criterion_main!(benches); |
Oops, something went wrong.