diff --git a/core/include/detray/definitions/pdg_particle.hpp b/core/include/detray/definitions/pdg_particle.hpp index 36aa99759..95951b372 100644 --- a/core/include/detray/definitions/pdg_particle.hpp +++ b/core/include/detray/definitions/pdg_particle.hpp @@ -28,13 +28,13 @@ struct pdg_particle { m_charge(static_cast(charge)) {} DETRAY_HOST_DEVICE - std::int32_t pdg_num() const { return m_pdg_num; } + constexpr std::int32_t pdg_num() const { return m_pdg_num; } DETRAY_HOST_DEVICE - scalar_type mass() const { return m_mass; } + constexpr scalar_type mass() const { return m_mass; } DETRAY_HOST_DEVICE - scalar_type charge() const { return m_charge; } + constexpr scalar_type charge() const { return m_charge; } private: std::int32_t m_pdg_num; @@ -42,6 +42,23 @@ struct pdg_particle { scalar_type m_charge; }; +/// Apply the charge conjugation operator to a particle hypothesis @param ptc +template +DETRAY_HOST_DEVICE constexpr pdg_particle charge_conjugation( + const pdg_particle& ptc) { + return (ptc.charge() != 0) + ? detray::pdg_particle{-ptc.pdg_num(), ptc.mass(), + -ptc.charge()} + : ptc; +} + +/// @returns an updated particle hypothesis according to the track qop +template +DETRAY_HOST_DEVICE constexpr pdg_particle update_particle_hypothesis( + const pdg_particle& ptc, const track_t& params) { + return (ptc.charge() * params.qop() > 0.f) ? ptc : charge_conjugation(ptc); +} + // Macro for declaring the particle #define DETRAY_DECLARE_PARTICLE(PARTICLE_NAME, PDG_NUM, MASS, CHARGE) \ template \ diff --git a/tests/benchmarks/CMakeLists.txt b/tests/benchmarks/CMakeLists.txt index 846ee04d4..12940e393 100644 --- a/tests/benchmarks/CMakeLists.txt +++ b/tests/benchmarks/CMakeLists.txt @@ -32,7 +32,7 @@ target_include_directories( target_link_libraries( detray_benchmarks - INTERFACE benchmark::benchmark vecmem::core detray::core detray::test_common + INTERFACE benchmark::benchmark vecmem::core detray::core detray::test_utils ) unset(_detray_benchmarks_headers) @@ -46,5 +46,5 @@ endif() # Set up all of the "device" benchmarks. if(DETRAY_BUILD_CUDA) add_subdirectory(cuda) - #add_subdirectory( include/detray/benchmarks/device ) + add_subdirectory(include/detray/benchmarks/device) endif() diff --git a/tests/benchmarks/cpu/CMakeLists.txt b/tests/benchmarks/cpu/CMakeLists.txt index cfb38d945..03140c4af 100644 --- a/tests/benchmarks/cpu/CMakeLists.txt +++ b/tests/benchmarks/cpu/CMakeLists.txt @@ -46,6 +46,25 @@ macro(detray_add_cpu_benchmark algebra) PRIVATE DETRAY_BENCHMARK_PRINTOUTS ) endif() + + # Build the benchmark executable for the propagation + detray_add_executable( benchmark_cpu_propagation_${algebra} + "propagation.cpp" + LINK_LIBRARIES detray::benchmark_cpu benchmark::benchmark_main + vecmem::core detray::core_${algebra} detray::test_utils + ) + + target_compile_options( + detray_benchmark_cpu_propagation_${algebra} + PRIVATE "-march=native" "-ftree-vectorize" + ) + + if(OpenMP_CXX_FOUND) + target_link_libraries( + detray_benchmark_cpu_propagation_${algebra} + PRIVATE OpenMP::OpenMP_CXX + ) + endif() endmacro() # Build the array benchmark. diff --git a/tests/benchmarks/cpu/propagation.cpp b/tests/benchmarks/cpu/propagation.cpp index d00dc6f1d..ec98ba096 100644 --- a/tests/benchmarks/cpu/propagation.cpp +++ b/tests/benchmarks/cpu/propagation.cpp @@ -6,7 +6,6 @@ */ // Project include(s) -#include "detray/benchmarks/cpu/propagation_benchmark.hpp" #include "detray/detectors/bfield.hpp" #include "detray/navigation/navigator.hpp" #include "detray/propagator/actor_chain.hpp" @@ -17,6 +16,9 @@ #include "detray/propagator/rk_stepper.hpp" #include "detray/tracks/tracks.hpp" +// Detray benchmark include(s) +#include "detray/benchmarks/cpu/propagation_benchmark.hpp" + // Detray test include(s). #include "detray/test/utils/detectors/build_toy_detector.hpp" #include "detray/test/utils/detectors/build_wire_chamber.hpp" @@ -63,7 +65,7 @@ int main(int argc, char** argv) { // Configure toy detector toy_det_config toy_cfg{}; - toy_cfg.use_material_maps(true).n_brl_layers(4u).n_edc_layers(7u); + toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u); std::cout << toy_cfg << std::endl; @@ -80,36 +82,32 @@ int main(int argc, char** argv) { std::cout << prop_cfg << std::endl; // Benchmark config - detray::benchmark_base::configuration bench_cfg{}; + detray::benchmarks::benchmark_base::configuration bench_cfg{}; - std::vector n_tracks{8 * 8, 16 * 16, 32 * 32, - 64 * 64, 128 * 128, 256 * 256}; + std::vector n_tracks{8 * 8, 16 * 16, 32 * 32, 64 * 64, + 128 * 128, 256 * 256, 512 * 512}; - int n_trks{*std::max_element(std::begin(n_tracks), std::end(n_tracks))}; - std::cout << n_trks << std::endl; + auto trk_cfg = + detray::benchmarks::get_default_trk_gen_config( + n_tracks); + + // Specific configuration for the random track generation + trk_cfg.seed(42u); + + // Add additional tracks for warmup + std::size_t n_trks{trk_cfg.n_tracks()}; bench_cfg.n_warmup( static_cast(std::ceil(0.1f * static_cast(n_trks)))); // Add tracks for warmup - n_trks += bench_cfg.do_warmup() ? bench_cfg.n_warmup() : 0; - - // Generate tracks - track_generator_t::configuration trk_cfg{}; - trk_cfg.seed(42u); + n_trks += static_cast( + bench_cfg.do_warmup() ? bench_cfg.n_warmup() : 0); trk_cfg.n_tracks(n_trks); - trk_cfg.randomize_charge(true); - trk_cfg.phi_range(-constant::pi, constant::pi); - trk_cfg.eta_range(-3.f, 3.f); - trk_cfg.mom_range(1.f * unit::GeV, 100.f * unit::GeV); - trk_cfg.origin({0.f, 0.f, 0.f}); - trk_cfg.origin_stddev({0.f * unit::mm, 0.f * unit::mm, - 0.f * unit::mm}); - - std::cout << trk_cfg << std::endl; // // Prepare data // - auto tracks = generate_tracks(&host_mr, trk_cfg); + auto tracks = detray::benchmarks::generate_tracks( + &host_mr, trk_cfg); const auto [toy_det, names] = build_toy_detector(host_mr, toy_cfg); const auto [wire_chamber, _] = @@ -133,24 +131,28 @@ int main(int argc, char** argv) { << "----------------------\n\n"; prop_cfg.stepping.do_covariance_transport = true; - register_benchmark( + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, default_chain>( "TOY_DETECTOR_W_COV_TRANSPORT", bench_cfg, prop_cfg, toy_det, bfield, - actor_states, tracks, n_tracks); + tracks, n_tracks, &actor_states); prop_cfg.stepping.do_covariance_transport = false; - register_benchmark( - "TOY_DETECTOR", bench_cfg, prop_cfg, toy_det, bfield, empty_state, - tracks, n_tracks); + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>( + "TOY_DETECTOR", bench_cfg, prop_cfg, toy_det, bfield, tracks, n_tracks, + &empty_state); prop_cfg.stepping.do_covariance_transport = true; - register_benchmark( + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, default_chain>( "WIRE_CHAMBER_W_COV_TRANSPORT", bench_cfg, prop_cfg, wire_chamber, - bfield, actor_states, tracks, n_tracks); + bfield, tracks, n_tracks, &actor_states); prop_cfg.stepping.do_covariance_transport = false; - register_benchmark( - "WIRE_CHAMBER", bench_cfg, prop_cfg, wire_chamber, bfield, empty_state, - tracks, n_tracks); + detray::benchmarks::register_benchmark< + detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>( + "WIRE_CHAMBER", bench_cfg, prop_cfg, wire_chamber, bfield, tracks, + n_tracks, &empty_state); // Run benchmarks ::benchmark::Initialize(&argc, argv); diff --git a/tests/benchmarks/cuda/CMakeLists.txt b/tests/benchmarks/cuda/CMakeLists.txt index d34039a18..a101f0854 100644 --- a/tests/benchmarks/cuda/CMakeLists.txt +++ b/tests/benchmarks/cuda/CMakeLists.txt @@ -26,26 +26,24 @@ if(DETRAY_EIGEN_PLUGIN) endif() foreach(algebra ${algebras}) - detray_add_executable(benchmark_cuda_${algebra} - "benchmark_propagator_cuda_kernel.hpp" - "benchmark_propagator_cuda.cpp" - "benchmark_propagator_cuda_kernel.cu" - LINK_LIBRARIES detray::benchmarks detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils + detray_add_executable(benchmark_cuda_propagation_${algebra} + "propagation.cpp" + LINK_LIBRARIES detray::benchmark_cuda detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils ) target_compile_definitions( - detray_benchmark_cuda_${algebra} + detray_benchmark_cuda_propagation_${algebra} PRIVATE ${algebra}=${algebra} ) target_compile_options( - detray_benchmark_cuda_${algebra} + detray_benchmark_cuda_propagation_${algebra} PRIVATE "-march=native" "-ftree-vectorize" ) if(OpenMP_CXX_FOUND) target_link_libraries( - detray_benchmark_cuda_${algebra} + detray_benchmark_cuda_propagation_${algebra} PRIVATE OpenMP::OpenMP_CXX ) endif() diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp b/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp deleted file mode 100644 index a19739c4e..000000000 --- a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/** Detray library, part of the ACTS project (R&D line) - * - * (c) 2022-2024 CERN for the benefit of the ACTS project - * - * Mozilla Public License Version 2.0 - */ - -// Project include(s) -#include "benchmark_propagator_cuda_kernel.hpp" - -// Detray test include(s). -#include "detray/test/utils/detectors/build_toy_detector.hpp" -#include "detray/test/utils/simulation/event_generator/track_generators.hpp" -#include "detray/test/utils/types.hpp" - -// Vecmem include(s) -#include -#include -#include -#include - -// Google include(s). -#include - -using namespace detray; - -// VecMem memory resource(s) -vecmem::host_memory_resource host_mr; -vecmem::cuda::managed_memory_resource mng_mr; -vecmem::cuda::device_memory_resource dev_mr; -vecmem::binary_page_memory_resource bp_mng_mr(mng_mr); - -// detector configuration -auto toy_cfg = toy_det_config{} - .n_brl_layers(4u) - .n_edc_layers(7u) - .do_check(false) - .use_material_maps(true); - -void fill_tracks(vecmem::vector> &tracks, - const std::size_t n_tracks, bool do_sort = true) { - using scalar_t = dscalar; - using uniform_gen_t = - detail::random_numbers>; - using trk_generator_t = - random_track_generator, uniform_gen_t>; - - trk_generator_t::configuration trk_gen_cfg{}; - trk_gen_cfg.seed(42u); - trk_gen_cfg.n_tracks(n_tracks); - trk_gen_cfg.randomize_charge(true); - trk_gen_cfg.phi_range(-constant::pi, constant::pi); - trk_gen_cfg.eta_range(-3.f, 3.f); - trk_gen_cfg.mom_range(1.f * unit::GeV, - 100.f * unit::GeV); - trk_gen_cfg.origin({0.f, 0.f, 0.f}); - trk_gen_cfg.origin_stddev({0.f * unit::mm, - 0.f * unit::mm, - 0.f * unit::mm}); - - // Iterate through uniformly distributed momentum directions - for (auto traj : trk_generator_t{trk_gen_cfg}) { - tracks.push_back(traj); - } - - if (do_sort) { - // Sort by theta angle - const auto traj_comp = [](const auto &lhs, const auto &rhs) { - constexpr auto pi_2{constant::pi_2}; - return math::fabs(pi_2 - vector::theta(lhs.dir())) < - math::fabs(pi_2 - vector::theta(rhs.dir())); - }; - - std::ranges::sort(tracks, traj_comp); - } -} - -template -static void BM_PROPAGATOR_CPU(benchmark::State &state) { - - // Create the toy geometry and bfield - auto [det, names] = build_toy_detector(host_mr, toy_cfg); - test::vector3 B{0.f, 0.f, 2.f * unit::T}; - auto bfield = bfield::create_const_field(B); - - // Create propagator - propagation::config cfg{}; - cfg.navigation.search_window = {3u, 3u}; - propagator_host_type p{cfg}; - - std::size_t total_tracks = 0; - - // Get tracks - vecmem::vector> tracks(&host_mr); - fill_tracks(tracks, static_cast(state.range(0)), - static_cast(state.range(0))); - - total_tracks += tracks.size(); - - for (auto _ : state) { - -#pragma omp parallel for - for (auto &track : tracks) { - - parameter_transporter::state transporter_state{}; - pointwise_material_interactor::state interactor_state{}; - parameter_resetter::state resetter_state{}; - - auto actor_states = - tie(transporter_state, interactor_state, resetter_state); - - // Create the propagator state - propagator_host_type::state p_state(track, bfield, det); - - // Run propagation - if constexpr (opt == propagate_option::e_unsync) { - ::benchmark::DoNotOptimize(p.propagate(p_state, actor_states)); - } else if constexpr (opt == propagate_option::e_sync) { - ::benchmark::DoNotOptimize( - p.propagate_sync(p_state, actor_states)); - } - } - } - - state.counters["TracksPropagated"] = benchmark::Counter( - static_cast(total_tracks), benchmark::Counter::kIsRate); -} - -template -static void BM_PROPAGATOR_CUDA(benchmark::State &state) { - - std::size_t n_tracks{static_cast(state.range(0)) * - static_cast(state.range(0))}; - - // Create the toy geometry - auto [det, names] = build_toy_detector(host_mr, toy_cfg); - test::vector3 B{0.f, 0.f, 2.f * unit::T}; - auto bfield = bfield::create_const_field(B); - - // vecmem copy helper object - vecmem::cuda::copy cuda_cpy; - - // Copy detector to device - auto det_buff = detray::get_buffer(det, dev_mr, cuda_cpy); - auto det_view = detray::get_data(det_buff); - - std::size_t total_tracks = 0; - - // Get tracks - vecmem::vector> tracks(&bp_mng_mr); - fill_tracks(tracks, static_cast(state.range(0)), - static_cast(state.range(0))); - - total_tracks += tracks.size(); - - for (auto _ : state) { - - // Get tracks data - auto tracks_data = vecmem::get_data(tracks); - - // Run the propagator test for GPU device - propagator_benchmark(det_view, bfield, tracks_data, opt); - } - - state.counters["TracksPropagated"] = benchmark::Counter( - static_cast(total_tracks), benchmark::Counter::kIsRate); -} - -BENCHMARK_TEMPLATE(BM_PROPAGATOR_CUDA, propagate_option::e_unsync) - ->Name("CUDA unsync propagation") - ->RangeMultiplier(2) - ->Range(8, 256); -BENCHMARK_TEMPLATE(BM_PROPAGATOR_CUDA, propagate_option::e_sync) - ->Name("CUDA sync propagation") - ->RangeMultiplier(2) - ->Range(8, 256); - -BENCHMARK_MAIN(); diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu b/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu deleted file mode 100644 index 6bba06fd1..000000000 --- a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.cu +++ /dev/null @@ -1,70 +0,0 @@ -/** Detray library, part of the ACTS project (R&D line) - * - * (c) 2022 CERN for the benefit of the ACTS project - * - * Mozilla Public License Version 2.0 - */ - -#include "benchmark_propagator_cuda_kernel.hpp" -#include "detray/definitions/detail/cuda_definitions.hpp" - -namespace detray { - -__global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( - typename detector_host_type::view_type det_data, - covfie::field_view field_data, - vecmem::data::vector_view> tracks_data, - const propagate_option opt) { - - int gid = threadIdx.x + blockIdx.x * blockDim.x; - - detector_device_type det(det_data); - vecmem::device_vector> tracks(tracks_data); - - if (gid >= tracks.size()) { - return; - } - - // Create propagator - propagation::config cfg{}; - cfg.navigation.search_window = {3u, 3u}; - propagator_device_type p{cfg}; - - parameter_transporter::state transporter_state{}; - pointwise_material_interactor::state interactor_state{}; - parameter_resetter::state resetter_state{}; - - // Create the actor states - auto actor_states = - detray::tie(transporter_state, interactor_state, resetter_state); - // Create the propagator state - propagator_device_type::state p_state(tracks.at(gid), field_data, det); - - // Run propagation - if (opt == propagate_option::e_unsync) { - p.propagate(p_state, actor_states); - } else if (opt == propagate_option::e_sync) { - p.propagate_sync(p_state, actor_states); - } -} - -void propagator_benchmark( - typename detector_host_type::view_type det_data, - covfie::field_view field_data, - vecmem::data::vector_view>& tracks_data, - const propagate_option opt) { - - constexpr int thread_dim = 256; - int block_dim = - static_cast(tracks_data.size() + thread_dim - 1) / thread_dim; - - // run the test kernel - propagator_benchmark_kernel<<>>(det_data, field_data, - tracks_data, opt); - - // cuda error check - DETRAY_CUDA_ERROR_CHECK(cudaGetLastError()); - DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize()); -} - -} // namespace detray diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp b/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp deleted file mode 100644 index ee2f505cb..000000000 --- a/tests/benchmarks/cuda/benchmark_propagator_cuda_kernel.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/** Detray library, part of the ACTS project (R&D line) - * - * (c) 2022-2024 CERN for the benefit of the ACTS project - * - * Mozilla Public License Version 2.0 - */ - -#pragma once - -// Project include(s) -#include "detray/definitions/detail/algebra.hpp" -#include "detray/definitions/units.hpp" -#include "detray/detectors/bfield.hpp" -#include "detray/detectors/toy_metadata.hpp" -#include "detray/navigation/navigator.hpp" -#include "detray/propagator/actor_chain.hpp" -#include "detray/propagator/actors/aborters.hpp" -#include "detray/propagator/actors/parameter_resetter.hpp" -#include "detray/propagator/actors/parameter_transporter.hpp" -#include "detray/propagator/actors/pointwise_material_interactor.hpp" -#include "detray/propagator/base_actor.hpp" -#include "detray/propagator/propagator.hpp" -#include "detray/propagator/rk_stepper.hpp" -#include "detray/tracks/tracks.hpp" - -using algebra_t = ALGEBRA_PLUGIN; - -using detector_host_type = - detray::detector; -using detector_device_type = - detray::detector; - -using navigator_host_type = detray::navigator; -using navigator_device_type = detray::navigator; -using field_type = detray::bfield::const_field_t; -using rk_stepper_type = detray::rk_stepper; -using actor_chain_t = - detray::actor_chain, - detray::pointwise_material_interactor, - detray::parameter_resetter>; -using propagator_host_type = - detray::propagator; -using propagator_device_type = - detray::propagator; - -enum class propagate_option { - e_unsync = 0, - e_sync = 1, -}; - -namespace detray { - -/// test function for propagator with single state -void propagator_benchmark( - typename detector_host_type::view_type det_data, - typename field_type::view_t field_data, - vecmem::data::vector_view>& tracks_data, - const propagate_option opt); - -} // namespace detray diff --git a/tests/benchmarks/cuda/propagation.cpp b/tests/benchmarks/cuda/propagation.cpp new file mode 100644 index 000000000..6ceebd7b7 --- /dev/null +++ b/tests/benchmarks/cuda/propagation.cpp @@ -0,0 +1,139 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Project include(s) +#include "detray/detectors/bfield.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/actors/aborters.hpp" +#include "detray/propagator/actors/parameter_resetter.hpp" +#include "detray/propagator/actors/parameter_transporter.hpp" +#include "detray/propagator/actors/pointwise_material_interactor.hpp" +#include "detray/propagator/rk_stepper.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/device/cuda/propagation_benchmark.hpp" + +// Detray test include(s). +#include "detray/test/utils/detectors/build_toy_detector.hpp" +#include "detray/test/utils/detectors/build_wire_chamber.hpp" +#include "detray/test/utils/simulation/event_generator/track_generators.hpp" +#include "detray/test/utils/types.hpp" + +// Vecmem include(s) +#include +#include + +// System include(s) +#include +#include + +using namespace detray; + +int main(int argc, char** argv) { + + using toy_detector_t = detector; + using algebra_t = typename toy_detector_t::algebra_type; + using scalar_t = dscalar; + using vector3_t = dvector3D; + using free_track_parameters_t = free_track_parameters; + using uniform_gen_t = + detail::random_numbers>; + using track_generator_t = + random_track_generator; + using field_bknd_t = bfield::const_bknd_t; + + vecmem::host_memory_resource host_mr; + vecmem::cuda::device_memory_resource dev_mr; + + // + // Configuration + // + + // Constant magnetic field + vector3_t B{0.f, 0.f, 2.f * unit::T}; + + // Configure toy detector + toy_det_config toy_cfg{}; + toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u); + + std::cout << toy_cfg << std::endl; + + // Configure wire chamber + wire_chamber_config wire_chamber_cfg{}; + wire_chamber_cfg.half_z(500.f * unit::mm); + + std::cout << wire_chamber_cfg << std::endl; + + // Configure propagation + propagation::config prop_cfg{}; + prop_cfg.navigation.search_window = {3u, 3u}; + + std::cout << prop_cfg << std::endl; + + // Benchmark config + detray::benchmarks::benchmark_base::configuration bench_cfg{}; + + std::vector n_tracks{8 * 8, 16 * 16, 32 * 32, 64 * 64, + 128 * 128, 256 * 256, 512 * 512}; + + auto trk_cfg = + detray::benchmarks::get_default_trk_gen_config( + n_tracks); + + // Specific configuration for the random track generation + trk_cfg.seed(42u); + + // Add additional tracks for warmup + std::size_t n_trks{trk_cfg.n_tracks()}; + bench_cfg.n_warmup( + static_cast(std::ceil(0.1f * static_cast(n_trks)))); + // Add tracks for warmup + n_trks += static_cast( + bench_cfg.do_warmup() ? bench_cfg.n_warmup() : 0); + trk_cfg.n_tracks(n_trks); + + // + // Prepare data + // + auto tracks = detray::benchmarks::generate_tracks( + &host_mr, trk_cfg, true); + + const auto [toy_det, names] = build_toy_detector(host_mr, toy_cfg); + const auto [wire_chamber, _] = + build_wire_chamber(host_mr, wire_chamber_cfg); + + auto bfield = bfield::create_const_field(B); + + // + // Register benchmarks + // + std::cout << "Propagation Benchmarks\n" + << "----------------------\n\n"; + + prop_cfg.stepping.do_covariance_transport = true; + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type>( + "TOY_DETECTOR_W_COV_TRANSPORT", bench_cfg, prop_cfg, toy_det, bfield, + tracks, n_tracks, &dev_mr); + + prop_cfg.stepping.do_covariance_transport = true; + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type>( + "WIRE_CHAMBER_W_COV_TRANSPORT", bench_cfg, prop_cfg, wire_chamber, + bfield, tracks, n_tracks, &dev_mr); + + // Run benchmarks + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); +} diff --git a/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp b/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp index 3d1b79d58..c867b6f19 100644 --- a/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp +++ b/tests/benchmarks/include/detray/benchmarks/benchmark_base.hpp @@ -1,6 +1,6 @@ /** Detray library, part of the ACTS project (R&D line) * - * (c) 2023 CERN for the benefit of the ACTS project + * (c) 2023-2024 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -14,14 +14,14 @@ #include #include -namespace detray { +namespace detray::benchmarks { -/// Base type for linear algebra benchmarks with google benchmark +/// Base type for detray benchmarks with google benchmark struct benchmark_base { /// Local configuration type struct configuration { /// Size of data sample to be used in benchmark - int m_samples{100u}; + int m_samples{100}; /// Run a number of operations before the benchmark bool m_warmup = true; // Size of data in warm-up round @@ -46,14 +46,24 @@ struct benchmark_base { /// Getters /// @{ - int n_samples() const { return m_samples; } + constexpr int n_samples() const { return m_samples; } constexpr bool do_warmup() const { return m_warmup; } constexpr int n_warmup() const { return m_n_warmup; } /// @} - /// Print configuration + private: + /// Print the benchmark setup friend std::ostream& operator<<(std::ostream& os, - const configuration& c); + const configuration& cfg) { + os << " -> running:\t " << cfg.n_samples() << " samples" + << std::endl; + if (cfg.do_warmup()) { + os << " -> warmup: \t " << cfg.n_warmup() << " samples" + << std::endl; + } + os << std::endl; + return os; + } }; /// Default construction @@ -63,14 +73,4 @@ struct benchmark_base { virtual ~benchmark_base() = default; }; -std::ostream& operator<<(std::ostream& os, - const benchmark_base::configuration& cfg) { - os << " -> running:\t " << cfg.n_samples() << " samples" << std::endl; - if (cfg.do_warmup()) { - os << " -> warmup: \t " << cfg.n_warmup() << " samples" << std::endl; - } - os << std::endl; - return os; -} - -} // namespace detray +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt index e245ce843..c2bc1a0d8 100644 --- a/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt +++ b/tests/benchmarks/include/detray/benchmarks/cpu/CMakeLists.txt @@ -4,7 +4,7 @@ # # Mozilla Public License Version 2.0 -# Set the CUDA build flags. +# Set the CPU build flags. include(detray-compiler-options-cpp) # Set up a test library, which the "new style" benchmarks and tests could use. @@ -12,7 +12,4 @@ add_library(detray_benchmark_cpu INTERFACE "propagation_benchmark.hpp") add_library(detray::benchmark_cpu ALIAS detray_benchmark_cpu) -target_link_libraries( - detray_benchmark_cpu - INTERFACE detray::benchmarks detray::test_common -) +target_link_libraries(detray_benchmark_cpu INTERFACE detray::benchmarks) diff --git a/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp index d9ead60c9..4e886c90d 100644 --- a/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp +++ b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp @@ -5,16 +5,16 @@ * Mozilla Public License Version 2.0 */ +#pragma once + // Project include(s) +#include "detray/definitions/detail/algebra.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray benchmark include(s) #include "detray/benchmarks/benchmark_base.hpp" #include "detray/benchmarks/propagation_benchmark_config.hpp" #include "detray/benchmarks/propagation_benchmark_utils.hpp" -#include "detray/core/detail/container_views.hpp" -#include "detray/definitions/detail/algebra.hpp" -#include "detray/definitions/detail/containers.hpp" -#include "detray/definitions/units.hpp" -#include "detray/detectors/bfield.hpp" -#include "detray/tracks/tracks.hpp" // Benchmark include #include @@ -25,11 +25,12 @@ #include #include -namespace detray { +namespace detray::benchmarks { template -struct propagation_bm : public benchmark_base { + detray::benchmarks::propagate_option opt = + detray::benchmarks::propagate_option::e_unsync> +struct host_propagation_bm : public benchmark_base { /// Detector dependent types using algebra_t = typename propagator_t::detector_type::algebra_type; using scalar_t = dscalar; @@ -42,41 +43,30 @@ struct propagation_bm : public benchmark_base { configuration m_cfg{}; /// Default construction - propagation_bm() = default; + host_propagation_bm() = default; /// Construct from an externally provided configuration @param cfg - propagation_bm(configuration cfg) : m_cfg{cfg} {} + explicit host_propagation_bm(const configuration &cfg) : m_cfg{cfg} {} /// @return the benchmark configuration configuration &config() { return m_cfg; } /// Prepare data and run benchmark loop - inline void operator()( - ::benchmark::State &state, - dvector> *tracks_ptr, - const typename propagator_t::detector_type *det_ptr, - const bfield_t *bfield_ptr, - typename propagator_t::actor_chain_type::state_tuple *actor_states_ptr) - const { + inline void operator()(::benchmark::State &state, + dvector> *tracks, + const typename propagator_t::detector_type *det, + const bfield_t *bfield, + typename propagator_t::actor_chain_type::state_tuple + *input_actor_states) const { using actor_states_t = typename propagator_t::actor_chain_type::state_tuple; - auto &tracks{*tracks_ptr}; - const auto &det{*det_ptr}; - const auto &bfield{*bfield_ptr}; - auto &input_actor_states{*actor_states_ptr}; - const int n_samples{m_cfg.benchmark().n_samples()}; const int n_warmup{m_cfg.benchmark().n_warmup()}; - assert(static_cast(n_samples + n_warmup) <= tracks.size()); - - // Shuffle the sample - std::random_device rd; - std::mt19937 gen(rd()); - - std::shuffle(std::begin(tracks), std::end(tracks), gen); + assert(static_cast(n_samples + n_warmup) <= + tracks->size()); // Create propagator propagator_t p{m_cfg.propagation()}; @@ -85,8 +75,9 @@ struct propagation_bm : public benchmark_base { if (m_cfg.benchmark().do_warmup()) { #pragma omp parallel for for (int i = 0; i < n_warmup; ++i) { + const auto i_u{static_cast(i)}; // Fresh copy of actor states - actor_states_t actor_state_tuple(input_actor_states); + actor_states_t actor_state_tuple(*input_actor_states); // Tuple of references to pass to the propagator typename propagator_t::actor_chain_type::state actor_states = setup_actor_states( @@ -95,13 +86,20 @@ struct propagation_bm : public benchmark_base { std::size_t, detail::tuple_size_v>{}); - typename propagator_t::state p_state(tracks[i], bfield, det); + typename propagator_t::state p_state((*tracks)[i_u], *bfield, + *det); + // Particle hypothesis + auto &ptc = p_state._stepping.particle_hypothesis(); + p_state.set_particle( + update_particle_hypothesis(ptc, (*tracks)[i_u])); // Run propagation - if constexpr (opt == propagate_option::e_unsync) { + if constexpr (opt == + detray::benchmarks::propagate_option::e_unsync) { ::benchmark::DoNotOptimize( p.propagate(p_state, actor_states)); - } else if constexpr (opt == propagate_option::e_sync) { + } else if constexpr (opt == detray::benchmarks:: + propagate_option::e_sync) { ::benchmark::DoNotOptimize( p.propagate_sync(p_state, actor_states)); } @@ -109,35 +107,46 @@ struct propagation_bm : public benchmark_base { } // Run the benchmark + std::size_t total_tracks = 0u; for (auto _ : state) { #pragma omp parallel for for (int i = n_warmup; i < n_samples + n_warmup; ++i) { + const auto i_u{static_cast(i)}; + // Fresh copy of actor states - actor_states_t actor_state_tuple(input_actor_states); + actor_states_t actor_state_tuple(*input_actor_states); // Tuple of references to pass to the propagator typename propagator_t::actor_chain_type::state actor_states = - setup_actor_states( + detray::benchmarks::setup_actor_states( actor_state_tuple, std::make_integer_sequence< std::size_t, detail::tuple_size_v>{}); - typename propagator_t::state p_state(tracks[i], bfield, det); + typename propagator_t::state p_state((*tracks)[i_u], *bfield, + *det); + // Particle hypothesis + auto &ptc = p_state._stepping.particle_hypothesis(); + p_state.set_particle( + update_particle_hypothesis(ptc, (*tracks)[i_u])); // Run propagation - if constexpr (opt == propagate_option::e_unsync) { + if constexpr (opt == + detray::benchmarks::propagate_option::e_unsync) { ::benchmark::DoNotOptimize( p.propagate(p_state, actor_states)); - } else if constexpr (opt == propagate_option::e_sync) { + } else if constexpr (opt == detray::benchmarks:: + propagate_option::e_sync) { ::benchmark::DoNotOptimize( p.propagate_sync(p_state, actor_states)); } } + total_tracks += static_cast(n_samples); } // Report throughput state.counters["TracksPropagated"] = benchmark::Counter( - static_cast(n_samples), benchmark::Counter::kIsRate); + static_cast(total_tracks), benchmark::Counter::kIsRate); } }; -} // namespace detray +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt new file mode 100644 index 000000000..71bce8dfa --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/device/CMakeLists.txt @@ -0,0 +1,9 @@ +# Detray library, part of the ACTS project (R&D line) +# +# (c) 2024 CERN for the benefit of the ACTS project +# +# Mozilla Public License Version 2.0 + +if(DETRAY_BUILD_CUDA) + add_subdirectory(cuda) +endif() diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt new file mode 100644 index 000000000..9548b0dd7 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt @@ -0,0 +1,29 @@ +# Detray library, part of the ACTS project (R&D line) +# +# (c) 2024 CERN for the benefit of the ACTS project +# +# Mozilla Public License Version 2.0 + +# C++17 support for CUDA requires CMake 3.18. +cmake_minimum_required(VERSION 3.18) + +# Enable CUDA as a language. +enable_language(CUDA) + +# Set the CUDA build flags. +include(detray-compiler-options-cuda) + +# Set up a benchamrk library for CUDA +add_library( + detray_benchmark_cuda + STATIC + "propagation_benchmark.hpp" + "propagation_benchmark.cu" +) + +add_library(detray::benchmark_cuda ALIAS detray_benchmark_cuda) + +target_link_libraries( + detray_benchmark_cuda + PUBLIC detray::benchmarks detray::core_array vecmem::cuda +) diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu new file mode 100644 index 000000000..024698cf6 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu @@ -0,0 +1,104 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#include "detray/benchmarks/device/cuda/propagation_benchmark.hpp" +#include "detray/core/detector_metadata.hpp" +#include "detray/definitions/detail/cuda_definitions.hpp" +#include "detray/detectors/toy_metadata.hpp" + +namespace detray::benchmarks { + +template +__global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( + propagation::config cfg, + typename propagator_t::detector_type::view_type det_view, + typename propagator_t::stepper_type::magnetic_field_type field_view, + vecmem::data::vector_view< + free_track_parameters> + tracks_view, + const detray::benchmarks::propagate_option opt) { + + using detector_device_t = + detector; + using algebra_t = typename detector_device_t::algebra_type; + using propagator_device_t = + propagator, + typename propagator_t::actor_chain_type>; + + detector_device_t det(det_view); + vecmem::device_vector> tracks(tracks_view); + + int gid = threadIdx.x + blockIdx.x * blockDim.x; + if (gid >= tracks.size()) { + return; + } + + // Create propagator + propagator_device_t p{cfg}; + + typename parameter_transporter::state transporter_state{}; + typename pointwise_material_interactor::state interactor_state{}; + typename parameter_resetter::state resetter_state{}; + + // Create the actor states + auto actor_states = + tie(transporter_state, interactor_state, resetter_state); + + // Create the propagator state + typename propagator_device_t::state p_state(tracks.at(gid), field_view, + det); + // Particle hypothesis + auto& ptc = p_state._stepping.particle_hypothesis(); + p_state.set_particle(update_particle_hypothesis(ptc, tracks.at(gid))); + + // Run propagation + if (opt == detray::benchmarks::propagate_option::e_unsync) { + p.propagate(p_state, actor_states); + } else if (opt == detray::benchmarks::propagate_option::e_sync) { + p.propagate_sync(p_state, actor_states); + } +} + +template +void run_propagation_kernel( + const propagation::config& cfg, + typename propagator_t::detector_type::view_type det_view, + typename propagator_t::stepper_type::magnetic_field_type field_view, + vecmem::data::vector_view< + free_track_parameters> + tracks_view, + const int n_samples, const detray::benchmarks::propagate_option opt) { + + constexpr int thread_dim = 256; + int block_dim = (n_samples + thread_dim - 1) / thread_dim; + + // run the test kernel + propagator_benchmark_kernel<<>>( + cfg, det_view, field_view, tracks_view, opt); + + // cuda error check + DETRAY_CUDA_ERROR_CHECK(cudaGetLastError()); + DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize()); +} + +/// Macro declaring the template instantiations for the different detector types +#define DECLARE_PROPAGATION_BENCHMARK(METADATA, FIELD) \ + \ + template void \ + run_propagation_kernel>( \ + const propagation::config&, detector::view_type, \ + covfie::field_view, \ + vecmem::data::vector_view< \ + free_track_parameters::algebra_type>>, \ + const int, const detray::benchmarks::propagate_option); + +DECLARE_PROPAGATION_BENCHMARK(default_metadata, bfield::const_bknd_t) +DECLARE_PROPAGATION_BENCHMARK(toy_metadata, bfield::const_bknd_t) + +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp new file mode 100644 index 000000000..577860123 --- /dev/null +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp @@ -0,0 +1,136 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s) +#include "detray/definitions/detail/algebra.hpp" +#include "detray/detectors/bfield.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/actors/aborters.hpp" +#include "detray/propagator/actors/parameter_resetter.hpp" +#include "detray/propagator/actors/parameter_transporter.hpp" +#include "detray/propagator/actors/pointwise_material_interactor.hpp" +#include "detray/propagator/propagator.hpp" +#include "detray/propagator/rk_stepper.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/benchmark_base.hpp" +#include "detray/benchmarks/propagation_benchmark_config.hpp" +#include "detray/benchmarks/propagation_benchmark_utils.hpp" + +// Vecmem include(s) +#include +#include +#include +#include + +// Benchmark include +#include + +// System include(s) +#include +#include +#include +#include + +namespace detray::benchmarks { + +// Define propagator type +template +using default_chain = actor_chain, + pointwise_material_interactor, + parameter_resetter>; + +template +using cuda_propagator_type = + propagator, + typename detector::algebra_type>, + navigator>, + default_chain::algebra_type>>; + +/// Launch the propagation kernelfor benchmarking +/// +/// @param cfg the propagation configuration +/// @param det_view the detector vecmem view +/// @param field_data the magentic field view (maybe an empty field) +/// @param tracks_data the track collection view +/// @param navigation_cache_view the navigation cache vecemem view +/// @param opt which propagation to run (sync vs. unsync) +template +void run_propagation_kernel( + const propagation::config &cfg, + typename propagator_t::detector_type::view_type det_view, + typename propagator_t::stepper_type::magnetic_field_type field_data, + vecmem::data::vector_view< + free_track_parameters> + tracks_data, + const int n_samples, const detray::benchmarks::propagate_option opt); + +/// Device Propagation becnhmark +template +struct cuda_propagation_bm : public benchmark_base { + /// Detector dependent types + using algebra_t = typename propagator_t::detector_type::algebra_type; + using scalar_t = dscalar; + using vector3_t = dvector3D; + + /// Local configuration type + using configuration = propagation_benchmark_config; + + /// The benchmark configuration + configuration m_cfg{}; + + /// Default construction + cuda_propagation_bm() = default; + + /// Construct from an externally provided configuration @param cfg + explicit cuda_propagation_bm(const configuration &cfg) : m_cfg{cfg} {} + + /// @return the benchmark configuration + configuration &config() { return m_cfg; } + + /// Prepare data and run benchmark loop + inline void operator()(::benchmark::State &state, + vecmem::memory_resource *dev_mr, + dvector> *tracks, + const typename propagator_t::detector_type *det, + const bfield_bknd_t *bfield) const { + + // Helper object for performing memory copies (to CUDA devices) + vecmem::cuda::copy cuda_cpy; + + const int n_samples{m_cfg.benchmark().n_samples()}; + + // Copy the track collection to device + auto track_buffer = + detray::get_buffer(vecmem::get_data(*tracks), *dev_mr, cuda_cpy); + + // Copy the detector to device and get its view + auto det_buffer = detray::get_buffer(*det, *dev_mr, cuda_cpy); + auto det_view = detray::get_data(det_buffer); + + std::size_t total_tracks = 0u; + for (auto _ : state) { + // Launch the propagator test for GPU device + run_propagation_kernel(m_cfg.propagation(), det_view, + *bfield, track_buffer, + n_samples, opt); + + total_tracks += static_cast(n_samples); + } + // Report throughput + state.counters["TracksPropagated"] = benchmark::Counter( + static_cast(total_tracks), benchmark::Counter::kIsRate); + } +}; + +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp index 4c2f52c66..32dbcc617 100644 --- a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp +++ b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_config.hpp @@ -5,14 +5,17 @@ * Mozilla Public License Version 2.0 */ +#pragma once + // Project include(s) #include "detray/benchmarks/benchmark_base.hpp" #include "detray/propagator/propagation_config.hpp" // System include(s) #include +#include -namespace detray { +namespace detray::benchmarks { /// Configuration for propagation benchmarks struct propagation_benchmark_config { @@ -27,8 +30,8 @@ struct propagation_benchmark_config { propagation_benchmark_config() = default; /// Construct from a base configuration - propagation_benchmark_config( - const detray::benchmark_base::configuration& bench_cfg) + explicit propagation_benchmark_config( + const benchmark_base::configuration& bench_cfg) : m_benchmark(bench_cfg) {} /// Getters @@ -44,11 +47,11 @@ struct propagation_benchmark_config { /// Setters /// @{ - propagation_benchmark_config& name(std::string& n) { + propagation_benchmark_config& name(const std::string_view n) { m_name = n; return *this; } /// @} }; -} // namespace detray +} // namespace detray::benchmarks diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp index 80f065be1..dff6f844c 100644 --- a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp +++ b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp @@ -5,10 +5,10 @@ * Mozilla Public License Version 2.0 */ +#pragma once + // Project include(s) -#include "detray/core/detail/container_views.hpp" #include "detray/definitions/detail/algebra.hpp" -#include "detray/definitions/detail/containers.hpp" #include "detray/navigation/navigator.hpp" #include "detray/propagator/actor_chain.hpp" #include "detray/propagator/propagator.hpp" @@ -22,10 +22,11 @@ #include // System include(s) +#include #include #include -namespace detray { +namespace detray::benchmarks { /// Which propagate function to run enum class propagate_option { @@ -33,20 +34,42 @@ enum class propagate_option { e_sync = 1, }; -/// Define propagator type -template > -using propagator_t = - propagator, actor_chain_t>; +/// @returns the default track generation configuration for detray benchmarks +template +inline typename track_generator_t::configuration get_default_trk_gen_config( + const std::vector &n_tracks) { + + using track_t = typename track_generator_t::track_type; + using scalar_t = dscalar; + + int n_trks{*std::ranges::max_element(n_tracks)}; + + // Generate tracks + typename track_generator_t::configuration trk_cfg{}; + trk_cfg.n_tracks(static_cast(n_trks)); + trk_cfg.randomize_charge(true); + trk_cfg.phi_range(-constant::pi, constant::pi); + trk_cfg.eta_range(-3.f, 3.f); + trk_cfg.mom_range(1.f * unit::GeV, 100.f * unit::GeV); + trk_cfg.origin({0.f, 0.f, 0.f}); + trk_cfg.origin_stddev({0.f * unit::mm, 0.f * unit::mm, + 0.f * unit::mm}); + + return trk_cfg; +} /// Precompute the tracks template inline auto generate_tracks( vecmem::memory_resource *mr, - const typename track_generator_t::configuration &cfg = {}) { + const typename track_generator_t::configuration &cfg = {}, + bool do_sort = true) { + + using track_t = typename track_generator_t::track_type; + using scalar_t = dscalar; // Track collection - dvector tracks(mr); + dvector tracks(mr); // Iterate through uniformly distributed momentum directions for (auto track : track_generator_t{cfg}) { @@ -54,12 +77,23 @@ inline auto generate_tracks( tracks.push_back(track); } + if (do_sort) { + // Sort by theta angle + const auto traj_comp = [](const auto &lhs, const auto &rhs) { + constexpr auto pi_2{constant::pi_2}; + return math::fabs(pi_2 - vector::theta(lhs.dir())) < + math::fabs(pi_2 - vector::theta(rhs.dir())); + }; + + std::ranges::sort(tracks, traj_comp); + } + return tracks; } /// Tie the actor states for the propagation template -inline constexpr auto setup_actor_states( +constexpr auto setup_actor_states( typename propagator_t::actor_chain_type::state_tuple &input_actor_states, std::index_sequence) { @@ -74,9 +108,8 @@ inline constexpr auto setup_actor_states( /// Register a propagation benchmark of type @tparam benchmark_t /// /// @tparam benchmark_t the propagation benchmark functor -/// @tparam stepper_t the stepper to use fro track parameter transport -/// @tparam actor_chain_t types of actors -/// @tparam detector_t detector type +/// @tparam propagator_t full propagator type +/// @tparam detector_t host detector type /// @tparam bfield_t covfie magnetic field type /// /// @param name name for the benchmark @@ -88,34 +121,82 @@ inline constexpr auto setup_actor_states( /// actor_chain_t) /// @param tracks the pre-computed test tracks /// @param n_samples the number of track to run -template