-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* modified functor traits to handle backedn specific closures should upcasting ever be stabilized, this version may compile * Serial code compiles again kernels are written in a Box, using an enum as argument this comes at a ~20% perf cost according to the mdrange_populate bench * rayon features compiles need to write a bench with a simple parallel for to make sure the feature works * moved imports inside test functions to fix warnings these warnings are technically erroneous since the auto fix creates errors * added feature bench to check beckend usage there is indeed a 2* speedup when using `--features rayon` * docs * docs again * update readme * new PartialEq impl for view data * added a raw_val method to views use for test assertions * moved flat index computation to an inline function * added feature-specific writing interface to views * update mirroring function * finished universal writing interface * updated feature bench; automatic kernel typing now works! * small doc update * modified layout bench to operate of a range of data sizes perf significantly worsen past the size of my L1 caches * new ForKernel type for std::thread usage * better kernel type definition * added cfg_if in dispatch code * added basic code for thread backend; currently doesn't work * indices dispatch ok; need to figure out how to share the kernel * kernel dispatch ok; need to fix lifetime issues * no more static lifetime using std threads; kernel dispatch using raw pointers produces sigsegv :) * small fixes * cloning kernel works with Box<impl ...>; impl trait in type aliases have yet to be stabilized, might switch to nightly * gated gpu impl behind a specific feature * removed DEPTH const generics from parallel_for method its just bloat at this point * parallel_for works with all features! * feature docs
- Loading branch information
Showing
14 changed files
with
746 additions
and
238 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; | ||
use poc_kokkos_rs::{ | ||
functor::KernelArgs, | ||
routines::{ | ||
parallel_for, | ||
parameters::{ExecutionPolicy, ExecutionSpace, RangePolicy, Schedule}, | ||
}, | ||
view::{parameters::Layout, ViewOwned}, | ||
}; | ||
|
||
// this bench is used to assess whether the parallel_for routines | ||
// switches backend accordingly to feature. It should be executed | ||
// multiple time by the user, each time with a different feature | ||
|
||
// 1D regular for init & populating | ||
fn f1(length: usize) { | ||
let mut v_y = ViewOwned::new_from_data(vec![0.0; length], Layout::Right, [length]); | ||
black_box(&mut v_y); // prevents the first init to be optimized away | ||
let execp = ExecutionPolicy { | ||
space: ExecutionSpace::DeviceCPU, | ||
range: RangePolicy::RangePolicy(0..length), | ||
schedule: Schedule::Static, | ||
}; | ||
|
||
let kernel = |arg: KernelArgs<1>| match arg { | ||
KernelArgs::Index1D(i) => { | ||
v_y.set([i], 1.0); | ||
black_box(&v_y[[i]]); | ||
} | ||
KernelArgs::IndexND(_) => unimplemented!(), | ||
KernelArgs::Handle => unimplemented!(), | ||
}; | ||
parallel_for(execp, kernel).unwrap(); | ||
black_box(&v_y); | ||
} | ||
|
||
pub fn criterion_benchmark(c: &mut Criterion) { | ||
// Generate/Define the input | ||
const DATA_SIZE: u32 = 20; | ||
let length = 2_usize.pow(DATA_SIZE); | ||
|
||
let mut group = c.benchmark_group("parallel_for"); | ||
group.bench_with_input( | ||
BenchmarkId::new("feature-specific time", ""), | ||
&(length), | ||
|b, n| b.iter(|| f1(*n)), | ||
); | ||
group.finish() | ||
} | ||
|
||
criterion_group!(benches, criterion_benchmark); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,38 @@ | ||
//! functor & kernel related code | ||
//! | ||
//! | ||
//! | ||
//! | ||
//! This module contains all functor and kernel related code. Its content | ||
//! is highly dependant on the features enabled since the traits that a | ||
//! kernel must satisfy changes totally depending on the backend used. | ||
/// Kernel argument types | ||
/// | ||
/// Until some work is done to have a better solution[^sol1][^sol2], this will | ||
/// be an enum and kernels will be written in an idiomatic way. | ||
/// | ||
/// [^sol1]: Current tracking issue for upcasting implementation: <https://github.com/rust-lang/rust/issues/65991> | ||
/// | ||
/// [^sol2]: Current tracking issue to allow impl trait usage in types aliases: <https://github.com/rust-lang/rust/issues/63063> | ||
pub enum KernelArgs<const N: usize> { | ||
/// Arguments of a one-dimensionnal kernel (e.g. a RangePolicy). | ||
Index1D(usize), | ||
/// Arguments of a `N`-dimensionnal kernel (e.g. a MDRangePolicy). | ||
IndexND([usize; N]), | ||
/// Arguments of a team-based kernel. | ||
Handle, | ||
} | ||
|
||
cfg_if::cfg_if! { | ||
if #[cfg(feature = "rayon")] { | ||
/// `rayon`-specific kernel type. | ||
pub type ForKernelType<'a, const N: usize> = Box<dyn Fn(KernelArgs<N>) + Send + Sync + 'a>; | ||
} else if #[cfg(feature = "threads")] { | ||
/// Standard threads specific kernel type. | ||
pub type ForKernelType<'a, const N: usize> = Box<dyn Fn(KernelArgs<N>) + Send + 'a>; | ||
} else { | ||
/// Fall back kernel type. | ||
pub type ForKernelType<'a, const N: usize> = SerialForKernelType<'a, N>; | ||
} | ||
} | ||
|
||
/// Functor trait. User can implement its own functor by implementing this trait. | ||
pub trait Functor<Args, Output>: Fn(Args) -> Output {} | ||
/// Serial kernel type. | ||
pub type SerialForKernelType<'a, const N: usize> = Box<dyn FnMut(KernelArgs<N>) + 'a>; |
Oops, something went wrong.