From 83371a534b8a83e5032b27165486948c8b1e1609 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 10 Jan 2024 09:32:02 +0000 Subject: [PATCH] Try with latest rust-cuda without TAIT --- Cargo.lock | 8 +- necsim/core/Cargo.toml | 4 +- necsim/impls/cuda/Cargo.toml | 4 +- necsim/impls/no-std/Cargo.toml | 4 +- rustcoalescence/Cargo.toml | 9 ++ rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/src/lib.rs | 50 ++++-- .../algorithms/cuda/cpu-kernel/src/link.rs | 14 ++ .../algorithms/cuda/cpu-kernel/src/patch.rs | 143 ++++++++++++++++-- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 +- .../algorithms/cuda/gpu-kernel/src/lib.rs | 25 +-- rustcoalescence/algorithms/cuda/src/launch.rs | 9 +- rustcoalescence/algorithms/cuda/src/lib.rs | 34 ++++- 14 files changed, 260 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 39a949033..e86d2dd06 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=57ed62c#57ed62c6b6de1b9d991aeb96bb4e0124f8f7aa89" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=57ed62c#57ed62c6b6de1b9d991aeb96bb4e0124f8f7aa89" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=57ed62c#57ed62c6b6de1b9d991aeb96bb4e0124f8f7aa89" dependencies = [ "cargo_metadata", "colored", @@ -1594,6 +1594,7 @@ dependencies = [ "log", "necsim-core", "necsim-core-bond", + "necsim-impls-cuda", "necsim-impls-no-std", "necsim-impls-std", "necsim-partitioning-core", @@ -1601,6 +1602,7 @@ dependencies = [ "necsim-partitioning-mpi", "necsim-plugins-core", "ron", + "rust-cuda", "rustcoalescence-algorithms", "rustcoalescence-algorithms-cuda", "rustcoalescence-algorithms-gillespie", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index ef8b0dccc..19f3cb144 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 984ba4a50..299ed07e0 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 07f88df49..388af9a1a 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/Cargo.toml b/rustcoalescence/Cargo.toml index ebf41fc18..49d876094 100644 --- a/rustcoalescence/Cargo.toml +++ b/rustcoalescence/Cargo.toml @@ -10,9 +10,16 @@ edition = "2021" [features] default = [] +necsim-partitioning-mpi = ["dep:necsim-partitioning-mpi"] + +rustcoalescence-algorithms-gillespie = ["dep:rustcoalescence-algorithms-gillespie"] +rustcoalescence-algorithms-independent = ["dep:rustcoalescence-algorithms-independent"] +rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda", "dep:necsim-impls-cuda", "dep:rust-cuda"] + [dependencies] necsim-core = { path = "../necsim/core" } necsim-core-bond = { path = "../necsim/core/bond" } +necsim-impls-cuda = { path = "../necsim/impls/cuda", optional = true } necsim-impls-no-std = { path = "../necsim/impls/no-std" } necsim-impls-std = { path = "../necsim/impls/std" } necsim-plugins-core = { path = "../necsim/plugins/core", features = ["import"] } @@ -22,6 +29,8 @@ necsim-partitioning-monolithic = { path = "../necsim/partitioning/monolithic" } rustcoalescence-scenarios = { path = "scenarios" } rustcoalescence-algorithms = { path = "algorithms" } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = [], optional = true } + necsim-partitioning-mpi = { path = "../necsim/partitioning/mpi", optional = true } rustcoalescence-algorithms-gillespie = { path = "algorithms/gillespie", optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index b51090971..32b01c347 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index ae5937ec8..5e657ac19 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs index cb570a68c..6ef53f494 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs @@ -18,11 +18,20 @@ use necsim_impls_no_std::cogs::{ use rust_cuda::lend::RustToCuda; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; + mod link; mod patch; #[allow(clippy::type_complexity)] pub struct SimulationKernelPtx< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, M: MathsCore + Sync, H: Habitat + RustToCuda + Sync, G: PrimeableRng + RustToCuda + Sync, @@ -38,20 +47,29 @@ pub struct SimulationKernelPtx< ReportSpeciation: Boolean, ReportDispersal: Boolean, >( - std::marker::PhantomData<( - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - )>, + std::marker::PhantomData< + simulate< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + ReportSpeciation, + ReportDispersal, + >, + >, ); diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs index d519b533a..dd10532b6 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs @@ -28,6 +28,13 @@ macro_rules! link_kernel { $report_speciation:ty, $report_dispersal:ty ) => { rustcoalescence_algorithms_cuda_gpu_kernel::link! { impl simulate< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, necsim_impls_cuda::cogs::maths::NvptxMathsCore, $habitat, necsim_impls_cuda::cogs::rng::CudaRng< @@ -83,6 +90,13 @@ macro_rules! link_kernel { > for SimulationKernelPtx } rustcoalescence_algorithms_cuda_gpu_kernel::link! { impl simulate< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, necsim_impls_cuda::cogs::maths::NvptxMathsCore, $habitat, necsim_impls_cuda::cogs::rng::CudaRng< diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index 828562714..686d84bf3 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -29,6 +29,13 @@ extern "C" { #[allow(clippy::trait_duplication_in_bounds)] unsafe impl< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, M: MathsCore + Sync, H: Habitat + RustToCuda + Sync, G: PrimeableRng + RustToCuda + Sync, @@ -45,17 +52,133 @@ unsafe impl< ReportDispersal: Boolean, > CompiledKernelPtx< - simulate, - > for SimulationKernelPtx + simulate< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + ReportSpeciation, + ReportDispersal, + >, + > + for SimulationKernelPtx< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + ReportSpeciation, + ReportDispersal, + > where - SimulationKernelPtx: - CompiledKernelPtx>, - SimulationKernelPtx: - CompiledKernelPtx>, - SimulationKernelPtx: - CompiledKernelPtx>, - SimulationKernelPtx: - CompiledKernelPtx>, + SimulationKernelPtx< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + False, + >: CompiledKernelPtx< + simulate<'a, 'b, 'c, 'd, 'e, 'f, 'g, M, H, G, S, X, D, C, T, N, E, I, A, False, False>, + >, + SimulationKernelPtx< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + True, + >: CompiledKernelPtx< + simulate<'a, 'b, 'c, 'd, 'e, 'f, 'g, M, H, G, S, X, D, C, T, N, E, I, A, False, True>, + >, + SimulationKernelPtx< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + False, + >: CompiledKernelPtx< + simulate<'a, 'b, 'c, 'd, 'e, 'f, 'g, M, H, G, S, X, D, C, T, N, E, I, A, True, False>, + >, + SimulationKernelPtx<'a, 'b, 'c, 'd, 'e, 'f, 'g, M, H, G, S, X, D, C, T, N, E, I, A, True, True>: + CompiledKernelPtx< + simulate<'a, 'b, 'c, 'd, 'e, 'f, 'g, M, H, G, S, X, D, C, T, N, E, I, A, True, True>, + >, { default fn get_ptx() -> &'static CStr { unsafe { unreachable_cuda_simulation_linking_reporter() } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index f0a8873c0..15da42535 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "57ed62c", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index 292aab1ec..5de7d8dbb 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -1,8 +1,7 @@ #![deny(clippy::pedantic)] #![no_std] -#![feature(type_alias_impl_trait)] #![feature(decl_macro)] -#![feature(c_str_literals)] +#![cfg_attr(not(target_os = "cuda"), feature(c_str_literals))] #![cfg_attr(target_os = "cuda", feature(abi_ptx))] #![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] #![allow(long_running_const_eval)] @@ -39,6 +38,13 @@ pub use rust_cuda::lend::RustToCuda; #[allow(clippy::too_many_arguments)] #[allow(clippy::type_complexity)] pub fn simulate< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, M: MathsCore + Sync, H: Habitat + RustToCuda + Sync, G: PrimeableRng + RustToCuda + Sync, @@ -54,27 +60,28 @@ pub fn simulate< ReportSpeciation: Boolean, ReportDispersal: Boolean, >( - simulation: &rust_cuda::kernel::param::PtxJit< + #[warn(clippy::type_complexity)] // FIXME: already specified on top + simulation: &'a rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_core::simulation::Simulation, >, >, - task_list: &mut rust_cuda::kernel::param::PtxJit< + task_list: &'b mut rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_impls_cuda::value_buffer::ValueBuffer, >, >, - event_buffer_reporter: &mut rust_cuda::kernel::param::PtxJit< + event_buffer_reporter: &'c mut rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_impls_cuda::event_buffer::EventBuffer, >, >, - min_spec_sample_buffer: &mut rust_cuda::kernel::param::PtxJit< + min_spec_sample_buffer: &'d mut rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_impls_cuda::value_buffer::ValueBuffer, >, >, - next_event_time_buffer: &mut rust_cuda::kernel::param::PtxJit< + next_event_time_buffer: &'e mut rust_cuda::kernel::param::PtxJit< rust_cuda::kernel::param::DeepPerThreadBorrow< necsim_impls_cuda::value_buffer::ValueBuffer< necsim_core_bond::PositiveF64, @@ -83,10 +90,10 @@ pub fn simulate< >, >, >, - total_time_max: &rust_cuda::kernel::param::ShallowInteriorMutable< + total_time_max: &'f rust_cuda::kernel::param::ShallowInteriorMutable< core::sync::atomic::AtomicU64, >, - total_steps_sum: &rust_cuda::kernel::param::ShallowInteriorMutable< + total_steps_sum: &'g rust_cuda::kernel::param::ShallowInteriorMutable< core::sync::atomic::AtomicU64, >, max_steps: rust_cuda::kernel::param::PerThreadShallowCopy, diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 409cb766b..e4c6fa360 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -57,8 +57,15 @@ pub fn initialise_and_simulate< I: Iterator, L: CudaLineageStoreSampleInitialiser>, O, Error>, Error: From, - Ptx: CompiledKernelPtx< + Ptx: for<'a, 'b, 'c, 'd, 'e, 'f, 'g> CompiledKernelPtx< simulate< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, M, O::Habitat, CudaRng>, diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index 6cea35163..174b5d2aa 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -85,7 +85,14 @@ where RustToCuda + Sync, O::TurnoverRate: RustToCuda + Sync, O::SpeciationProbability: RustToCuda + Sync, - SimulationKernelPtx< + for<'a, 'b, 'c, 'd, 'e, 'f, 'g> SimulationKernelPtx< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, M, O::Habitat, CudaRng>, @@ -125,6 +132,13 @@ where R::ReportDispersal, >: CompiledKernelPtx< simulate< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, M, O::Habitat, CudaRng>, @@ -164,7 +178,14 @@ where R::ReportDispersal, >, >, - SimulationKernelPtx< + for<'a, 'b, 'c, 'd, 'e, 'f, 'g> SimulationKernelPtx< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, M, O::Habitat, CudaRng>, @@ -222,6 +243,13 @@ where R::ReportDispersal, >: CompiledKernelPtx< simulate< + 'a, + 'b, + 'c, + 'd, + 'e, + 'f, + 'g, M, O::Habitat, CudaRng>, @@ -398,7 +426,7 @@ where _, _, _, - as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, ExpEventTimeSampler>, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, ConstEventTimeSampler>, _, _, >>(