From 7d2633ca335658d26d3829aa00f8569f64e0397b Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 13 Jan 2024 09:24:31 +0000 Subject: [PATCH] Update to rust-cuda with async kernel launch async return --- Cargo.lock | 6 ++-- necsim/core/Cargo.toml | 4 +-- necsim/impls/cuda/Cargo.toml | 4 +-- necsim/impls/no-std/Cargo.toml | 4 +-- rustcoalescence/algorithms/cuda/Cargo.toml | 2 +- .../algorithms/cuda/cpu-kernel/Cargo.toml | 2 +- .../algorithms/cuda/gpu-kernel/Cargo.toml | 4 +-- rustcoalescence/algorithms/cuda/src/launch.rs | 34 ++++++++++--------- .../cuda/src/parallelisation/monolithic.rs | 18 +++++----- 9 files changed, 41 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2552e9173..711fe0465 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" +source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" +source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=c74b542#c74b542d35007dda960831ef1ce014c7ddb70ef8" +source = "git+https://github.com/juntyr/rust-cuda?rev=4148959#4148959b21ba72881434e6d1f94fd4bd35f27e2f" dependencies = [ "cargo_metadata", "colored", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index 371db683b..3d23d4987 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "host"], optional = true } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 5a09b89b0..07a1cb4e2 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "host"] } diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index cd72e882e..3835f58d1 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "final", "host"], optional = true } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index 4773c0a7a..66acfc0e3 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 9c347e4c7..416af0ef3 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 03307a6ea..5a1ffb5f6 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "c74b542", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 5a94abdc9..44e0e66f6 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -177,7 +177,7 @@ where }; let (mut status, time, steps, lineages) = with_initialised_cuda(args.device, || { - let stream = CudaDropWrapper::from(Stream::new(StreamFlags::NON_BLOCKING, None)?); + let mut stream = CudaDropWrapper::from(Stream::new(StreamFlags::NON_BLOCKING, None)?); let mut kernel = TypedPtxKernel::new::(Some(Box::new(|kernel| { crate::info::print_kernel_function_attributes("simulate", kernel); @@ -190,21 +190,23 @@ where ptx_jit: args.ptx_jit, }; - let launcher = Launcher { - stream: &stream, - kernel: &mut kernel, - config, - }; - - parallelisation::monolithic::simulate( - &mut simulation, - launcher, - (args.dedup_cache, args.step_slice), - lineages, - event_slice, - pause_before, - local_partition, - ) + rust_cuda::host::Stream::with(&mut stream, |stream| { + let launcher = Launcher { + stream, + kernel: &mut kernel, + config, + }; + + parallelisation::monolithic::simulate( + &mut simulation, + launcher, + (args.dedup_cache, args.step_slice), + lineages, + event_slice, + pause_before, + local_partition, + ) + }) }) .map_err(CudaError::from)?; diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index e28cd0ef0..ccf21262b 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -249,14 +249,14 @@ pub fn simulate< let mut task_list_cuda_async = task_list.move_to_device_async(launcher.stream)?; - launcher.launch9_async( - simulation_cuda_repr.as_async(launcher.stream).as_ref(), - task_list_cuda_async.as_mut_async().proj_mut(), - event_buffer_cuda_async.as_mut_async().proj_mut(), - min_spec_sample_buffer_cuda_async.as_mut_async().proj_mut(), - next_event_time_buffer_cuda_async.as_mut_async().proj_mut(), - total_time_max.as_ref().as_async(launcher.stream).as_ref(), - total_steps_sum.as_ref().as_async(launcher.stream).as_ref(), + let launch = launcher.launch9_async( + simulation_cuda_repr.as_async(launcher.stream).extract_ref(), + task_list_cuda_async.as_mut_async(), + event_buffer_cuda_async.as_mut_async(), + min_spec_sample_buffer_cuda_async.as_mut_async(), + next_event_time_buffer_cuda_async.as_mut_async(), + total_time_max.as_ref().as_async(launcher.stream).extract_ref(), + total_steps_sum.as_ref().as_async(launcher.stream).extract_ref(), step_slice.get(), level_time, )?; @@ -276,6 +276,8 @@ pub fn simulate< next_event_time_buffer = next_event_time_buffer_host_async.synchronize()?; min_spec_sample_buffer = min_spec_sample_buffer_host_async.synchronize()?; + launch.synchronize()?; + // Fetch the completion of the tasks for ((mut spec_sample, mut next_event_time), mut task) in min_spec_sample_buffer