diff --git a/CMakeLists.txt b/CMakeLists.txt
index f51f4f8dc..bf3937de3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@
 
 # Set up the project.
 cmake_minimum_required(VERSION 3.11)
-project(detray VERSION 0.81.0 LANGUAGES CXX)
+project(detray VERSION 0.83.0 LANGUAGES CXX)
 
 # Set up the used C++ standard(s).
 set(CMAKE_CXX_STANDARD 20 CACHE STRING "The (host) C++ standard to use")
diff --git a/core/include/detray/core/detail/container_buffers.hpp b/core/include/detray/core/detail/container_buffers.hpp
index 93f67a6a0..82f9c9929 100644
--- a/core/include/detray/core/detail/container_buffers.hpp
+++ b/core/include/detray/core/detail/container_buffers.hpp
@@ -157,11 +157,10 @@ struct dmulti_buffer : public detail::dbase_buffer {
 
 /// @brief Get the buffer representation of a vecmem vector - non-const
 template <class T>
-auto get_buffer(
-    const dvector_view<T>& vec_view, vecmem::memory_resource& mr,
-    vecmem::copy& cpy, detray::copy cpy_type = detray::copy::sync,
-    vecmem::data::buffer_type buff_type = vecmem::data::buffer_type::fixed_size
-    /*, stream*/) {
+auto get_buffer(const dvector_view<T>& vec_view, vecmem::memory_resource& mr,
+                vecmem::copy& cpy, detray::copy cpy_type = detray::copy::sync,
+                vecmem::data::buffer_type buff_type =
+                    vecmem::data::buffer_type::fixed_size) {
 
     // In case the view references a const object, return a non-const buffer
     using ret_buffer_t = dvector_buffer<std::remove_cv_t<T>>;
@@ -171,7 +170,7 @@ auto get_buffer(
     // TODO: Move this to detray copy util, which bundles vecmem copy object and
     // stream handle and gets this switch case right automatically
     if (cpy_type == detray::copy::async) {
-        cpy(vec_view, buff /*, stream*/);
+        cpy(vec_view, buff)->ignore();
     } else {
         cpy(vec_view, buff)->wait();
     }
diff --git a/core/include/detray/definitions/detail/math.hpp b/core/include/detray/definitions/detail/math.hpp
index 2ec8ced1e..51d28bc67 100644
--- a/core/include/detray/definitions/detail/math.hpp
+++ b/core/include/detray/definitions/detail/math.hpp
@@ -1,6 +1,6 @@
 /** Detray library, part of the ACTS project (R&D line)
  *
- * (c) 2023 CERN for the benefit of the ACTS project
+ * (c) 2023-2024 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
@@ -9,7 +9,7 @@
 
 // SYCL include(s).
 #if defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION)
-#include <CL/sycl.hpp>
+#include <sycl/sycl.hpp>
 #endif
 
 // System include(s).
@@ -19,7 +19,7 @@ namespace detray {
 
 /// Namespace to pick up math functions from
 #if defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION)
-namespace math = cl::sycl;
+namespace math = ::sycl;
 #elif IS_SOA
 
 namespace math {
diff --git a/core/include/detray/propagator/detail/jacobian_cartesian.hpp b/core/include/detray/propagator/detail/jacobian_cartesian.hpp
index dbe8340a4..b0b66ed3d 100644
--- a/core/include/detray/propagator/detail/jacobian_cartesian.hpp
+++ b/core/include/detray/propagator/detail/jacobian_cartesian.hpp
@@ -80,9 +80,9 @@ struct jacobian<cartesian2D<algebra_t>> {
         const matrix_type<3, 2> bound_pos_to_free_pos_derivative =
             matrix_operator().template block<3, 2>(frame, 0u, 0u);
 
-        matrix_operator().template set_block(bound_to_free_jacobian,
-                                             bound_pos_to_free_pos_derivative,
-                                             e_free_pos0, e_bound_loc0);
+        matrix_operator().set_block(bound_to_free_jacobian,
+                                    bound_pos_to_free_pos_derivative,
+                                    e_free_pos0, e_bound_loc0);
     }
 
     DETRAY_HOST_DEVICE
@@ -98,9 +98,9 @@ struct jacobian<cartesian2D<algebra_t>> {
         const matrix_type<2, 3> free_pos_to_bound_pos_derivative =
             matrix_operator().template block<2, 3>(frameT, 0, 0);
 
-        matrix_operator().template set_block(free_to_bound_jacobian,
-                                             free_pos_to_bound_pos_derivative,
-                                             e_bound_loc0, e_free_pos0);
+        matrix_operator().set_block(free_to_bound_jacobian,
+                                    free_pos_to_bound_pos_derivative,
+                                    e_bound_loc0, e_free_pos0);
     }
 
     DETRAY_HOST_DEVICE
diff --git a/core/include/detray/propagator/detail/jacobian_cylindrical.hpp b/core/include/detray/propagator/detail/jacobian_cylindrical.hpp
index 23e395bcd..65cfb0b9a 100644
--- a/core/include/detray/propagator/detail/jacobian_cylindrical.hpp
+++ b/core/include/detray/propagator/detail/jacobian_cylindrical.hpp
@@ -102,9 +102,9 @@ struct jacobian<cylindrical2D<algebra_t>> {
         const auto bound_pos_to_free_pos_derivative =
             matrix_operator().template block<3, 2>(frame, 0u, 0u);
 
-        matrix_operator().template set_block(bound_to_free_jacobian,
-                                             bound_pos_to_free_pos_derivative,
-                                             e_free_pos0, e_bound_loc0);
+        matrix_operator().set_block(bound_to_free_jacobian,
+                                    bound_pos_to_free_pos_derivative,
+                                    e_free_pos0, e_bound_loc0);
     }
 
     DETRAY_HOST_DEVICE
@@ -120,9 +120,9 @@ struct jacobian<cylindrical2D<algebra_t>> {
         const auto free_pos_to_bound_pos_derivative =
             matrix_operator().template block<2, 3>(frameT, 0u, 0u);
 
-        matrix_operator().template set_block(free_to_bound_jacobian,
-                                             free_pos_to_bound_pos_derivative,
-                                             e_bound_loc0, e_free_pos0);
+        matrix_operator().set_block(free_to_bound_jacobian,
+                                    free_pos_to_bound_pos_derivative,
+                                    e_bound_loc0, e_free_pos0);
     }
 
     DETRAY_HOST_DEVICE
diff --git a/core/include/detray/propagator/detail/jacobian_line.hpp b/core/include/detray/propagator/detail/jacobian_line.hpp
index 22c0c0dc2..2fc944b39 100644
--- a/core/include/detray/propagator/detail/jacobian_line.hpp
+++ b/core/include/detray/propagator/detail/jacobian_line.hpp
@@ -120,9 +120,9 @@ struct jacobian<line2D<algebra_t>> {
         const auto bound_pos_to_free_pos_derivative =
             matrix_operator().template block<3, 2>(frame, 0u, 0u);
 
-        matrix_operator().template set_block(bound_to_free_jacobian,
-                                             bound_pos_to_free_pos_derivative,
-                                             e_free_pos0, e_bound_loc0);
+        matrix_operator().set_block(bound_to_free_jacobian,
+                                    bound_pos_to_free_pos_derivative,
+                                    e_free_pos0, e_bound_loc0);
     }
 
     DETRAY_HOST_DEVICE
@@ -138,9 +138,9 @@ struct jacobian<line2D<algebra_t>> {
         const auto free_pos_to_bound_pos_derivative =
             matrix_operator().template block<2, 3>(frameT, 0u, 0u);
 
-        matrix_operator().template set_block(free_to_bound_jacobian,
-                                             free_pos_to_bound_pos_derivative,
-                                             e_bound_loc0, e_free_pos0);
+        matrix_operator().set_block(free_to_bound_jacobian,
+                                    free_pos_to_bound_pos_derivative,
+                                    e_bound_loc0, e_free_pos0);
     }
 
     DETRAY_HOST_DEVICE
diff --git a/core/include/detray/propagator/detail/jacobian_polar.hpp b/core/include/detray/propagator/detail/jacobian_polar.hpp
index 0a297010f..bcc2e1396 100644
--- a/core/include/detray/propagator/detail/jacobian_polar.hpp
+++ b/core/include/detray/propagator/detail/jacobian_polar.hpp
@@ -106,9 +106,9 @@ struct jacobian<polar2D<algebra_t>> {
         matrix_operator().template set_block<3, 1>(
             bound_pos_to_free_pos_derivative, col1, e_free_pos0, e_bound_loc1);
 
-        matrix_operator().template set_block(bound_to_free_jacobian,
-                                             bound_pos_to_free_pos_derivative,
-                                             e_free_pos0, e_bound_loc0);
+        matrix_operator().set_block(bound_to_free_jacobian,
+                                    bound_pos_to_free_pos_derivative,
+                                    e_free_pos0, e_bound_loc0);
     }
 
     DETRAY_HOST_DEVICE
@@ -149,9 +149,9 @@ struct jacobian<polar2D<algebra_t>> {
         matrix_operator().template set_block<1, 3>(
             free_pos_to_bound_pos_derivative, row1, e_bound_loc1, e_free_pos0);
 
-        matrix_operator().template set_block(free_to_bound_jacobian,
-                                             free_pos_to_bound_pos_derivative,
-                                             e_bound_loc0, e_free_pos0);
+        matrix_operator().set_block(free_to_bound_jacobian,
+                                    free_pos_to_bound_pos_derivative,
+                                    e_bound_loc0, e_free_pos0);
     }
 
     DETRAY_HOST_DEVICE
diff --git a/extern/vecmem/CMakeLists.txt b/extern/vecmem/CMakeLists.txt
index 6c9722449..e2fb8a924 100644
--- a/extern/vecmem/CMakeLists.txt
+++ b/extern/vecmem/CMakeLists.txt
@@ -18,7 +18,7 @@ message(STATUS "Building VecMem as part of the Detray project")
 
 # Declare where to get VecMem from.
 set(DETRAY_VECMEM_SOURCE
-    "URL;https://github.com/acts-project/vecmem/archive/refs/tags/v1.8.0.tar.gz;URL_MD5;afddf52d9568964f25062e1c887246b7"
+    "URL;https://github.com/acts-project/vecmem/archive/refs/tags/v1.13.0.tar.gz;URL_MD5;02fe327552c21779f501c224b8c95e87"
     CACHE STRING
     "Source for VecMem, when built as part of this project"
 )
diff --git a/io/include/detray/io/common/geometry_reader.hpp b/io/include/detray/io/common/geometry_reader.hpp
index 46534fcfa..b7f14725e 100644
--- a/io/include/detray/io/common/geometry_reader.hpp
+++ b/io/include/detray/io/common/geometry_reader.hpp
@@ -117,7 +117,7 @@ class geometry_reader {
         }
 
         // @TODO: Implement voume finder IO
-        det_builder.template set_volume_finder();
+        det_builder.set_volume_finder();
     }
 
     /// @returns a surface transform from its io payload @param trf_data
diff --git a/tests/benchmarks/cpu/benchmark_propagator.cpp b/tests/benchmarks/cpu/benchmark_propagator.cpp
index ac3cc4cf6..02d151446 100644
--- a/tests/benchmarks/cpu/benchmark_propagator.cpp
+++ b/tests/benchmarks/cpu/benchmark_propagator.cpp
@@ -76,20 +76,50 @@ auto toy_cfg =
     toy_det_config{}.n_brl_layers(4u).n_edc_layers(7u).do_check(false);
 
 void fill_tracks(vecmem::vector<free_track_parameters<algebra_t>> &tracks,
-                 const std::size_t theta_steps, const std::size_t phi_steps) {
-    // Set momentum of tracks
-    const scalar mom_mag{10.f * unit<scalar>::GeV};
+                 const std::size_t n_tracks, bool do_sort = true) {
+    using scalar_t = dscalar<algebra_t>;
+    using uniform_gen_t =
+        detail::random_numbers<scalar_t,
+                               std::uniform_real_distribution<scalar_t>>;
+    using trk_generator_t =
+        random_track_generator<free_track_parameters<algebra_t>, uniform_gen_t>;
+
+    trk_generator_t::configuration trk_gen_cfg{};
+    trk_gen_cfg.seed(42u);
+    trk_gen_cfg.n_tracks(n_tracks);
+    trk_gen_cfg.randomize_charge(true);
+    trk_gen_cfg.phi_range(-constant<scalar_t>::pi, constant<scalar_t>::pi);
+    trk_gen_cfg.eta_range(-3.f, 3.f);
+    trk_gen_cfg.mom_range(1.f * unit<scalar_t>::GeV,
+                          100.f * unit<scalar_t>::GeV);
+    trk_gen_cfg.origin({0.f, 0.f, 0.f});
+    trk_gen_cfg.origin_stddev({0.f * unit<scalar_t>::mm,
+                               0.f * unit<scalar_t>::mm,
+                               0.f * unit<scalar_t>::mm});
 
     // Iterate through uniformly distributed momentum directions
-    for (auto traj : uniform_track_generator<free_track_parameters<algebra_t>>(
-             phi_steps, theta_steps, mom_mag)) {
+    for (auto traj : trk_generator_t{trk_gen_cfg}) {
         tracks.push_back(traj);
     }
+
+    if (do_sort) {
+        // Sort by theta angle
+        const auto traj_comp = [](const auto &lhs, const auto &rhs) {
+            constexpr auto pi_2{constant<scalar_t>::pi_2};
+            return math::fabs(pi_2 - getter::theta(lhs.dir())) <
+                   math::fabs(pi_2 - getter::theta(rhs.dir()));
+        };
+
+        std::ranges::sort(tracks, traj_comp);
+    }
 }
 
 template <propagate_option opt>
 static void BM_PROPAGATOR_CPU(benchmark::State &state) {
 
+    std::size_t n_tracks{static_cast<std::size_t>(state.range(0)) *
+                         static_cast<std::size_t>(state.range(0))};
+
     // Create the toy geometry and bfield
     auto [det, names] = build_toy_detector(host_mr, toy_cfg);
     test::vector3 B{0.f, 0.f, 2.f * unit<scalar>::T};
@@ -109,8 +139,7 @@ static void BM_PROPAGATOR_CPU(benchmark::State &state) {
 
         // Get tracks
         vecmem::vector<free_track_parameters<algebra_t>> tracks(&host_mr);
-        fill_tracks(tracks, static_cast<std::size_t>(state.range(0)),
-                    static_cast<std::size_t>(state.range(0)));
+        fill_tracks(tracks, n_tracks);
 
         total_tracks += tracks.size();
 
diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp b/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp
index e6743514c..67745c8db 100644
--- a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp
+++ b/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp
@@ -35,30 +35,61 @@ auto toy_cfg =
     toy_det_config{}.n_brl_layers(4u).n_edc_layers(7u).do_check(false);
 
 void fill_tracks(vecmem::vector<free_track_parameters<algebra_t>> &tracks,
-                 const std::size_t theta_steps, const std::size_t phi_steps) {
-    // Set momentum of tracks
-    const scalar mom_mag{10.f * unit<scalar>::GeV};
+                 const std::size_t n_tracks, bool do_sort = true) {
+    using scalar_t = dscalar<algebra_t>;
+    using uniform_gen_t =
+        detail::random_numbers<scalar_t,
+                               std::uniform_real_distribution<scalar_t>>;
+    using trk_generator_t =
+        random_track_generator<free_track_parameters<algebra_t>, uniform_gen_t>;
+
+    trk_generator_t::configuration trk_gen_cfg{};
+    trk_gen_cfg.seed(42u);
+    trk_gen_cfg.n_tracks(n_tracks);
+    trk_gen_cfg.randomize_charge(true);
+    trk_gen_cfg.phi_range(-constant<scalar_t>::pi, constant<scalar_t>::pi);
+    trk_gen_cfg.eta_range(-3.f, 3.f);
+    trk_gen_cfg.mom_range(1.f * unit<scalar_t>::GeV,
+                          100.f * unit<scalar_t>::GeV);
+    trk_gen_cfg.origin({0.f, 0.f, 0.f});
+    trk_gen_cfg.origin_stddev({0.f * unit<scalar_t>::mm,
+                               0.f * unit<scalar_t>::mm,
+                               0.f * unit<scalar_t>::mm});
 
     // Iterate through uniformly distributed momentum directions
-    for (auto traj : uniform_track_generator<free_track_parameters<algebra_t>>(
-             phi_steps, theta_steps, mom_mag)) {
+    for (auto traj : trk_generator_t{trk_gen_cfg}) {
         tracks.push_back(traj);
     }
+
+    if (do_sort) {
+        // Sort by theta angle
+        const auto traj_comp = [](const auto &lhs, const auto &rhs) {
+            constexpr auto pi_2{constant<scalar_t>::pi_2};
+            return math::fabs(pi_2 - getter::theta(lhs.dir())) <
+                   math::fabs(pi_2 - getter::theta(rhs.dir()));
+        };
+
+        std::ranges::sort(tracks, traj_comp);
+    }
 }
 
 template <propagate_option opt>
 static void BM_PROPAGATOR_CUDA(benchmark::State &state) {
 
+    std::size_t n_tracks{static_cast<std::size_t>(state.range(0)) *
+                         static_cast<std::size_t>(state.range(0))};
+
     // Create the toy geometry
-    auto [det, names] = build_toy_detector(bp_mng_mr, toy_cfg);
+    auto [det, names] = build_toy_detector(host_mr, toy_cfg);
     test::vector3 B{0.f, 0.f, 2.f * unit<scalar>::T};
     auto bfield = bfield::create_const_field(B);
 
-    // Get detector data
-    auto det_data = detray::get_data(det);
-
     // vecmem copy helper object
-    vecmem::cuda::copy copy;
+    vecmem::cuda::copy cuda_cpy;
+
+    // Copy detector to device
+    auto det_buff = detray::get_buffer(det, dev_mr, cuda_cpy);
+    auto det_view = detray::get_data(det_buff);
 
     std::size_t total_tracks = 0;
 
@@ -68,8 +99,7 @@ static void BM_PROPAGATOR_CUDA(benchmark::State &state) {
 
         // Get tracks
         vecmem::vector<free_track_parameters<algebra_t>> tracks(&bp_mng_mr);
-        fill_tracks(tracks, static_cast<std::size_t>(state.range(0)),
-                    static_cast<std::size_t>(state.range(0)));
+        fill_tracks(tracks, n_tracks);
 
         total_tracks += tracks.size();
 
@@ -79,7 +109,7 @@ static void BM_PROPAGATOR_CUDA(benchmark::State &state) {
         auto tracks_data = vecmem::get_data(tracks);
 
         // Run the propagator test for GPU device
-        propagator_benchmark(det_data, bfield, tracks_data, opt);
+        propagator_benchmark(det_view, bfield, tracks_data, opt);
     }
 
     state.counters["TracksPropagated"] = benchmark::Counter(
diff --git a/tests/include/detray/test/device/cuda/material_validation.hpp b/tests/include/detray/test/device/cuda/material_validation.hpp
index 03ba9a81e..0a3b37139 100644
--- a/tests/include/detray/test/device/cuda/material_validation.hpp
+++ b/tests/include/detray/test/device/cuda/material_validation.hpp
@@ -81,13 +81,13 @@ struct run_material_validation {
         vecmem::data::vector_buffer<material_record_t> mat_records_buffer(
             static_cast<unsigned int>(tracks.size()), *dev_mr,
             vecmem::data::buffer_type::fixed_size);
-        cuda_cpy.setup(mat_records_buffer);
+        cuda_cpy.setup(mat_records_buffer)->wait();
         auto mat_records_view = vecmem::get_data(mat_records_buffer);
 
         // Buffer for the material parameters at every surface per track
         vecmem::data::jagged_vector_buffer<material_params_t> mat_steps_buffer(
             capacities, *dev_mr, host_mr, vecmem::data::buffer_type::resizable);
-        cuda_cpy.setup(mat_steps_buffer);
+        cuda_cpy.setup(mat_steps_buffer)->wait();
         auto mat_steps_view = vecmem::get_data(mat_steps_buffer);
 
         // Run the material tracing on device
@@ -96,10 +96,10 @@ struct run_material_validation {
 
         // Get the results back to the host and pass them on to be checked
         vecmem::vector<material_record_t> mat_records(host_mr);
-        cuda_cpy(mat_records_buffer, mat_records);
+        cuda_cpy(mat_records_buffer, mat_records)->wait();
 
         vecmem::jagged_vector<material_params_t> mat_steps(host_mr);
-        cuda_cpy(mat_steps_buffer, mat_steps);
+        cuda_cpy(mat_steps_buffer, mat_steps)->wait();
 
         return std::make_tuple(mat_records, mat_steps);
     }
diff --git a/tests/include/detray/test/device/cuda/navigation_validation.hpp b/tests/include/detray/test/device/cuda/navigation_validation.hpp
index 7a28ff474..91d57813d 100644
--- a/tests/include/detray/test/device/cuda/navigation_validation.hpp
+++ b/tests/include/detray/test/device/cuda/navigation_validation.hpp
@@ -103,20 +103,20 @@ inline auto run_navigation_validation(
         navigation::detail::candidate_record<intersection_t>>
         recorded_intersections_buffer(capacities, *dev_mr, host_mr,
                                       vecmem::data::buffer_type::resizable);
-    cuda_cpy.setup(recorded_intersections_buffer);
+    cuda_cpy.setup(recorded_intersections_buffer)->wait();
     auto recorded_intersections_view =
         vecmem::get_data(recorded_intersections_buffer);
 
     vecmem::data::vector_buffer<material_record_t> mat_records_buffer(
         static_cast<unsigned int>(truth_intersection_traces_view.size()),
         *dev_mr, vecmem::data::buffer_type::fixed_size);
-    cuda_cpy.setup(mat_records_buffer);
+    cuda_cpy.setup(mat_records_buffer)->wait();
     auto mat_records_view = vecmem::get_data(mat_records_buffer);
 
     // Buffer for the material parameters at every step per track
     vecmem::data::jagged_vector_buffer<material_params_t> mat_steps_buffer(
         capacities, *dev_mr, host_mr, vecmem::data::buffer_type::resizable);
-    cuda_cpy.setup(mat_steps_buffer);
+    cuda_cpy.setup(mat_steps_buffer)->wait();
     auto mat_steps_view = vecmem::get_data(mat_steps_buffer);
 
     // Run the navigation validation test on device
@@ -127,13 +127,13 @@ inline auto run_navigation_validation(
     // Get the results back to the host and pass them on to the checking
     vecmem::jagged_vector<navigation::detail::candidate_record<intersection_t>>
         recorded_intersections(host_mr);
-    cuda_cpy(recorded_intersections_buffer, recorded_intersections);
+    cuda_cpy(recorded_intersections_buffer, recorded_intersections)->wait();
 
     vecmem::vector<material_record_t> mat_records(host_mr);
-    cuda_cpy(mat_records_buffer, mat_records);
+    cuda_cpy(mat_records_buffer, mat_records)->wait();
 
     vecmem::jagged_vector<material_params_t> mat_steps(host_mr);
-    cuda_cpy(mat_steps_buffer, mat_steps);
+    cuda_cpy(mat_steps_buffer, mat_steps)->wait();
 
     return std::make_tuple(std::move(recorded_intersections),
                            std::move(mat_records), std::move(mat_steps));
diff --git a/tests/integration_tests/device/cuda/propagator_cuda_kernel.hpp b/tests/integration_tests/device/cuda/propagator_cuda_kernel.hpp
index 27f8c1227..9a577a8fb 100644
--- a/tests/integration_tests/device/cuda/propagator_cuda_kernel.hpp
+++ b/tests/integration_tests/device/cuda/propagator_cuda_kernel.hpp
@@ -68,7 +68,7 @@ inline auto run_propagation_device(
         steps_buffer(capacities, *mr, nullptr,
                      vecmem::data::buffer_type::resizable);
 
-    copy.setup(steps_buffer);
+    copy.setup(steps_buffer)->wait();
 
     // Run the propagator test for GPU device
     propagator_test<bfield_bknd_t, detector_t>(det_view, cfg, field_data,
@@ -76,7 +76,7 @@ inline auto run_propagation_device(
 
     vecmem::jagged_vector<detail::step_data<algebra_t>> steps(mr);
 
-    copy(steps_buffer, steps);
+    copy(steps_buffer, steps)->wait();
 
     return steps;
 }
diff --git a/tests/integration_tests/device/sycl/propagator_sycl_kernel.hpp b/tests/integration_tests/device/sycl/propagator_sycl_kernel.hpp
index c61deea68..e2b9d4fd4 100644
--- a/tests/integration_tests/device/sycl/propagator_sycl_kernel.hpp
+++ b/tests/integration_tests/device/sycl/propagator_sycl_kernel.hpp
@@ -55,7 +55,7 @@ inline auto run_propagation_device(
         steps_buffer(capacities, *mr, nullptr,
                      vecmem::data::buffer_type::resizable);
 
-    copy.setup(steps_buffer);
+    copy.setup(steps_buffer)->wait();
 
     // Run the propagator test for GPU device
     propagator_test<bfield_bknd_t, detector_t>(
@@ -63,7 +63,7 @@ inline auto run_propagation_device(
 
     vecmem::jagged_vector<detail::step_data<algebra_t>> steps(mr);
 
-    copy(steps_buffer, steps);
+    copy(steps_buffer, steps)->wait();
 
     return steps;
 }
diff --git a/tests/unit_tests/device/cuda/grids_grid2_cuda.cpp b/tests/unit_tests/device/cuda/grids_grid2_cuda.cpp
index 7851cb52a..8e01676c5 100644
--- a/tests/unit_tests/device/cuda/grids_grid2_cuda.cpp
+++ b/tests/unit_tests/device/cuda/grids_grid2_cuda.cpp
@@ -227,7 +227,7 @@ TEST(grids_cuda, grid2_buffer_attach_populator) {
     grid2_buffer<host_grid2_attach> g2_buffer(
         xaxis, yaxis, {100, 200, 300, 400}, mng_mr, nullptr,
         vecmem::data::buffer_type::resizable);
-    copy.setup(g2_buffer._buffer);
+    copy.setup(g2_buffer._buffer)->wait();
 
     // Check if the initialization work well
     // Non-zero starting size not working yet so initial argument for sizes is
@@ -246,7 +246,7 @@ TEST(grids_cuda, grid2_buffer_attach_populator) {
     grid_attach_fill_test(g2_buffer);
 
     host_grid2_attach g2(xaxis, yaxis, mng_mr, test::point3{0.f, 0.f, 0.f});
-    copy(g2_buffer._buffer, g2.data());
+    copy(g2_buffer._buffer, g2.data())->wait();
 
     // Check if each bin has 100 points
     EXPECT_EQ(g2.data()[0].size(), 100u);
@@ -272,7 +272,7 @@ TEST(grids_cuda, grid2_buffer_attach_populator2) {
 
     grid2_buffer<host_grid2_attach> g2_buffer(xaxis, yaxis, {1, 2, 3, 4},
                                               mng_mr);
-    copy.setup(g2_buffer._buffer);
+    copy.setup(g2_buffer._buffer)->wait();
 
     // Check if the initialization works well
     const auto& ptr = g2_buffer._buffer.host_ptr();
@@ -289,7 +289,7 @@ TEST(grids_cuda, grid2_buffer_attach_populator2) {
     grid_attach_assign_test(g2_buffer);
 
     host_grid2_attach g2(xaxis, yaxis, mng_mr, test::point3{0.f, 0.f, 0.f});
-    copy(g2_buffer._buffer, g2.data());
+    copy(g2_buffer._buffer, g2.data())->wait();
 
     // Check the outputs
     auto bin0 = g2.bin(0u);
diff --git a/tests/unit_tests/device/cuda/mask_store_cuda.cpp b/tests/unit_tests/device/cuda/mask_store_cuda.cpp
index 7a5dc2b60..de8c3eac6 100644
--- a/tests/unit_tests/device/cuda/mask_store_cuda.cpp
+++ b/tests/unit_tests/device/cuda/mask_store_cuda.cpp
@@ -92,7 +92,7 @@ TEST(mask_store_cuda, mask_store) {
         {n_points, n_points, n_points, n_points, n_points}, mng_mr, nullptr,
         vecmem::data::buffer_type::resizable);
 
-    copy.setup(output_buffer);
+    copy.setup(output_buffer)->wait();
 
     auto input_point3_data = vecmem::get_data(input_point3);
     auto store_data = get_data(store);
@@ -101,7 +101,7 @@ TEST(mask_store_cuda, mask_store) {
     mask_test(store_data, input_point3_data, output_buffer);
 
     vecmem::jagged_vector<int> output_device(&mng_mr);
-    copy(output_buffer, output_device);
+    copy(output_buffer, output_device)->wait();
 
     // Compare the values
     for (unsigned int i = 0u; i < n_points; i++) {
diff --git a/tests/unit_tests/device/cuda/navigator_cuda.cpp b/tests/unit_tests/device/cuda/navigator_cuda.cpp
index 08624961f..d9d1e9c8f 100644
--- a/tests/unit_tests/device/cuda/navigator_cuda.cpp
+++ b/tests/unit_tests/device/cuda/navigator_cuda.cpp
@@ -1,6 +1,6 @@
 /** Detray library, part of the ACTS project (R&D line)
  *
- * (c) 2022 CERN for the benefit of the ACTS project
+ * (c) 2022-2024 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
@@ -110,11 +110,11 @@ TEST(navigator_cuda, navigator) {
 
     vecmem::data::jagged_vector_buffer<dindex> volume_records_buffer(
         capacities, dev_mr, &mng_mr, vecmem::data::buffer_type::resizable);
-    copy.setup(volume_records_buffer);
+    copy.setup(volume_records_buffer)->wait();
 
     vecmem::data::jagged_vector_buffer<point3> position_records_buffer(
         capacities, dev_mr, &mng_mr, vecmem::data::buffer_type::resizable);
-    copy.setup(position_records_buffer);
+    copy.setup(position_records_buffer)->wait();
 
     // Get detector data
     auto det_data = detray::get_data(det);
@@ -127,8 +127,8 @@ TEST(navigator_cuda, navigator) {
                    volume_records_buffer, position_records_buffer);
 
     // Copy volume record buffer into volume & position records device
-    copy(volume_records_buffer, volume_records_device);
-    copy(position_records_buffer, position_records_device);
+    copy(volume_records_buffer, volume_records_device)->wait();
+    copy(position_records_buffer, position_records_device)->wait();
 
     for (unsigned int i = 0u; i < volume_records_host.size(); i++) {
 
diff --git a/tests/unit_tests/device/cuda/utils_ranges_cuda.cpp b/tests/unit_tests/device/cuda/utils_ranges_cuda.cpp
index 921580087..99cbe19fe 100644
--- a/tests/unit_tests/device/cuda/utils_ranges_cuda.cpp
+++ b/tests/unit_tests/device/cuda/utils_ranges_cuda.cpp
@@ -1,6 +1,6 @@
 /** Detray library, part of the ACTS project (R&D line)
  *
- * (c) 2022 CERN for the benefit of the ACTS project
+ * (c) 2022-2024 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
@@ -63,14 +63,14 @@ TEST(utils_ranges_cuda, iota) {
         static_cast<vecmem::data::vector_buffer<dindex>::size_type>(range[1] -
                                                                     range[0]),
         managed_resource, vecmem::data::buffer_type::resizable);
-    copy.setup(check_buffer);
+    copy.setup(check_buffer)->wait();
 
     // Run test function
     test_iota(range, check_buffer);
 
     // Copy vector buffer to output vector
     vecmem::vector<dindex> check{&managed_resource};
-    copy(check_buffer, check);
+    copy(check_buffer, check)->wait();
 
     // Check the result
     ASSERT_EQ(check, reference);
@@ -112,14 +112,14 @@ TEST(utils_ranges_cuda, cartesian_product) {
     buffer_t check_buffer(static_cast<buffer_t::size_type>(size),
                           managed_resource,
                           vecmem::data::buffer_type::resizable);
-    copy.setup(check_buffer);
+    copy.setup(check_buffer)->wait();
 
     // Run test function
     test_cartesian_product(range1, range2, range3, check_buffer);
 
     // Copy vector buffer to output vector
     vecmem::vector<std::tuple<dindex, dindex, dindex>> check{&managed_resource};
-    copy(check_buffer, check);
+    copy(check_buffer, check)->wait();
 
     // Check the result
     ASSERT_EQ(result.size(), check.size());
@@ -154,22 +154,22 @@ TEST(utils_ranges_cuda, enumerate) {
     vecmem::data::vector_buffer<dindex> idx_buffer(
         static_cast<vecmem::data::vector_buffer<dindex>::size_type>(seq.size()),
         managed_resource, vecmem::data::buffer_type::resizable);
-    copy.setup(idx_buffer);
+    copy.setup(idx_buffer)->wait();
 
     vecmem::data::vector_buffer<dindex> value_buffer(
         static_cast<vecmem::data::vector_buffer<dindex>::size_type>(seq.size()),
         managed_resource, vecmem::data::buffer_type::resizable);
-    copy.setup(value_buffer);
+    copy.setup(value_buffer)->wait();
 
     // Run test function
     test_enumerate(seq_data, idx_buffer, value_buffer);
 
     // Copy vector buffer to output vector
     vecmem::vector<dindex> idx_vec{&managed_resource};
-    copy(idx_buffer, idx_vec);
+    copy(idx_buffer, idx_vec)->wait();
 
     vecmem::vector<dindex> value_vec{&managed_resource};
-    copy(value_buffer, value_vec);
+    copy(value_buffer, value_vec)->wait();
 
     // Check the result
     for (std::size_t i = 0u; i < idx_vec.size(); i++) {
@@ -200,22 +200,22 @@ TEST(utils_ranges_cuda, pick) {
     vecmem::data::vector_buffer<dindex> idx_buffer(
         static_cast<vecmem::data::vector_buffer<dindex>::size_type>(seq.size()),
         managed_resource, vecmem::data::buffer_type::resizable);
-    copy.setup(idx_buffer);
+    copy.setup(idx_buffer)->wait();
 
     vecmem::data::vector_buffer<dindex> value_buffer(
         static_cast<vecmem::data::vector_buffer<dindex>::size_type>(seq.size()),
         managed_resource, vecmem::data::buffer_type::resizable);
-    copy.setup(value_buffer);
+    copy.setup(value_buffer)->wait();
 
     // Run test function
     test_pick(seq_data, idx_data, idx_buffer, value_buffer);
 
     // Copy vector buffer to output vector
     vecmem::vector<dindex> idx_vec{&managed_resource};
-    copy(idx_buffer, idx_vec);
+    copy(idx_buffer, idx_vec)->wait();
 
     vecmem::vector<dindex> value_vec{&managed_resource};
-    copy(value_buffer, value_vec);
+    copy(value_buffer, value_vec)->wait();
 
     // Check the result
     for (std::size_t i = 0u; i < idx_vec.size(); i++) {
@@ -247,14 +247,14 @@ TEST(utils_ranges_cuda, join) {
         static_cast<vecmem::data::vector_buffer<dindex>::size_type>(
             seq_1.size() + seq_2.size()),
         managed_resource, vecmem::data::buffer_type::resizable);
-    copy.setup(value_buffer);
+    copy.setup(value_buffer)->wait();
 
     // Run test function
     test_join(seq_data_1, seq_data_2, value_buffer);
 
     // Copy vector buffer to output vector
     vecmem::vector<dindex> value_vec{&managed_resource};
-    copy(value_buffer, value_vec);
+    copy(value_buffer, value_vec)->wait();
 
     // First sequence
     for (std::size_t i = 0u; i < seq_1.size(); i++) {
@@ -289,14 +289,14 @@ TEST(utils_ranges_cuda, static_join) {
         static_cast<vecmem::data::vector_buffer<dindex>::size_type>(
             seq_1.size() + seq_2.size()),
         managed_resource, vecmem::data::buffer_type::resizable);
-    copy.setup(value_buffer);
+    copy.setup(value_buffer)->wait();
 
     // Run test function
     test_static_join(seq_data_1, seq_data_2, value_buffer);
 
     // Copy vector buffer to output vector
     vecmem::vector<dindex> value_vec{&managed_resource};
-    copy(value_buffer, value_vec);
+    copy(value_buffer, value_vec)->wait();
 
     // First sequence
     for (std::size_t i = 0u; i < seq_1.size(); i++) {
@@ -330,14 +330,14 @@ TEST(utils_ranges_cuda, subrange) {
     vecmem::data::vector_buffer<int> check_buffer(
         static_cast<vecmem::data::vector_buffer<int>::size_type>(end - begin),
         managed_resource, vecmem::data::buffer_type::resizable);
-    copy.setup(check_buffer);
+    copy.setup(check_buffer)->wait();
 
     // Run test function
     test_subrange(seq_data, check_buffer, begin, end);
 
     // Copy vector buffer to output vector
     vecmem::vector<int> check{&managed_resource};
-    copy(check_buffer, check);
+    copy(check_buffer, check)->wait();
 
     // Check the result
     ASSERT_EQ(check[0], 1);