Skip to content

Commit

Permalink
Add CUDA benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
niermann999 committed Dec 9, 2024
1 parent d09d4ba commit 3f4585e
Show file tree
Hide file tree
Showing 18 changed files with 685 additions and 451 deletions.
23 changes: 20 additions & 3 deletions core/include/detray/definitions/pdg_particle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,37 @@ struct pdg_particle {
m_charge(static_cast<scalar_t>(charge)) {}

DETRAY_HOST_DEVICE
std::int32_t pdg_num() const { return m_pdg_num; }
constexpr std::int32_t pdg_num() const { return m_pdg_num; }

DETRAY_HOST_DEVICE
scalar_type mass() const { return m_mass; }
constexpr scalar_type mass() const { return m_mass; }

DETRAY_HOST_DEVICE
scalar_type charge() const { return m_charge; }
constexpr scalar_type charge() const { return m_charge; }

private:
std::int32_t m_pdg_num;
scalar_type m_mass;
scalar_type m_charge;
};

/// Apply the charge conjugation operator to a particle hypothesis @param ptc
template <typename scalar_t>
DETRAY_HOST_DEVICE constexpr pdg_particle<scalar_t> charge_conjugation(
const pdg_particle<scalar_t>& ptc) {
return (ptc.charge() != 0)
? detray::pdg_particle<scalar_t>{-ptc.pdg_num(), ptc.mass(),
-ptc.charge()}
: ptc;
}

/// @returns an updated particle hypothesis according to the track qop
template <typename scalar_t, typename track_t>
DETRAY_HOST_DEVICE constexpr pdg_particle<scalar_t> update_particle_hypothesis(
const pdg_particle<scalar_t>& ptc, const track_t& params) {
return (ptc.charge() * params.qop() > 0.f) ? ptc : charge_conjugation(ptc);
}

// Macro for declaring the particle
#define DETRAY_DECLARE_PARTICLE(PARTICLE_NAME, PDG_NUM, MASS, CHARGE) \
template <typename scalar_t> \
Expand Down
4 changes: 2 additions & 2 deletions tests/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ target_include_directories(

target_link_libraries(
detray_benchmarks
INTERFACE benchmark::benchmark vecmem::core detray::core detray::test_common
INTERFACE benchmark::benchmark vecmem::core detray::core detray::test_utils
)

unset(_detray_benchmarks_headers)
Expand All @@ -46,5 +46,5 @@ endif()
# Set up all of the "device" benchmarks.
if(DETRAY_BUILD_CUDA)
add_subdirectory(cuda)
#add_subdirectory( include/detray/benchmarks/device )
add_subdirectory(include/detray/benchmarks/device)
endif()
19 changes: 19 additions & 0 deletions tests/benchmarks/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,25 @@ macro(detray_add_cpu_benchmark algebra)
PRIVATE DETRAY_BENCHMARK_PRINTOUTS
)
endif()

# Build the benchmark executable for the propagation
detray_add_executable( benchmark_cpu_propagation_${algebra}
"propagation.cpp"
LINK_LIBRARIES detray::benchmark_cpu benchmark::benchmark_main
vecmem::core detray::core_${algebra} detray::test_utils
)

target_compile_options(
detray_benchmark_cpu_propagation_${algebra}
PRIVATE "-march=native" "-ftree-vectorize"
)

if(OpenMP_CXX_FOUND)
target_link_libraries(
detray_benchmark_cpu_propagation_${algebra}
PRIVATE OpenMP::OpenMP_CXX
)
endif()
endmacro()

# Build the array benchmark.
Expand Down
66 changes: 34 additions & 32 deletions tests/benchmarks/cpu/propagation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
*/

// Project include(s)
#include "detray/benchmarks/cpu/propagation_benchmark.hpp"
#include "detray/detectors/bfield.hpp"
#include "detray/navigation/navigator.hpp"
#include "detray/propagator/actor_chain.hpp"
Expand All @@ -17,6 +16,9 @@
#include "detray/propagator/rk_stepper.hpp"
#include "detray/tracks/tracks.hpp"

// Detray benchmark include(s)
#include "detray/benchmarks/cpu/propagation_benchmark.hpp"

// Detray test include(s).
#include "detray/test/utils/detectors/build_toy_detector.hpp"
#include "detray/test/utils/detectors/build_wire_chamber.hpp"
Expand Down Expand Up @@ -63,7 +65,7 @@ int main(int argc, char** argv) {

// Configure toy detector
toy_det_config toy_cfg{};
toy_cfg.use_material_maps(true).n_brl_layers(4u).n_edc_layers(7u);
toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u);

std::cout << toy_cfg << std::endl;

Expand All @@ -80,36 +82,32 @@ int main(int argc, char** argv) {
std::cout << prop_cfg << std::endl;

// Benchmark config
detray::benchmark_base::configuration bench_cfg{};
detray::benchmarks::benchmark_base::configuration bench_cfg{};

std::vector<int> n_tracks{8 * 8, 16 * 16, 32 * 32,
64 * 64, 128 * 128, 256 * 256};
std::vector<int> n_tracks{8 * 8, 16 * 16, 32 * 32, 64 * 64,
128 * 128, 256 * 256, 512 * 512};

int n_trks{*std::max_element(std::begin(n_tracks), std::end(n_tracks))};
std::cout << n_trks << std::endl;
auto trk_cfg =
detray::benchmarks::get_default_trk_gen_config<track_generator_t>(
n_tracks);

// Specific configuration for the random track generation
trk_cfg.seed(42u);

// Add additional tracks for warmup
std::size_t n_trks{trk_cfg.n_tracks()};
bench_cfg.n_warmup(
static_cast<int>(std::ceil(0.1f * static_cast<float>(n_trks))));
// Add tracks for warmup
n_trks += bench_cfg.do_warmup() ? bench_cfg.n_warmup() : 0;

// Generate tracks
track_generator_t::configuration trk_cfg{};
trk_cfg.seed(42u);
n_trks += static_cast<std::size_t>(
bench_cfg.do_warmup() ? bench_cfg.n_warmup() : 0);
trk_cfg.n_tracks(n_trks);
trk_cfg.randomize_charge(true);
trk_cfg.phi_range(-constant<scalar_t>::pi, constant<scalar_t>::pi);
trk_cfg.eta_range(-3.f, 3.f);
trk_cfg.mom_range(1.f * unit<scalar_t>::GeV, 100.f * unit<scalar_t>::GeV);
trk_cfg.origin({0.f, 0.f, 0.f});
trk_cfg.origin_stddev({0.f * unit<scalar_t>::mm, 0.f * unit<scalar_t>::mm,
0.f * unit<scalar_t>::mm});

std::cout << trk_cfg << std::endl;

//
// Prepare data
//
auto tracks = generate_tracks<track_generator_t>(&host_mr, trk_cfg);
auto tracks = detray::benchmarks::generate_tracks<track_generator_t>(
&host_mr, trk_cfg);

const auto [toy_det, names] = build_toy_detector(host_mr, toy_cfg);
const auto [wire_chamber, _] =
Expand All @@ -133,24 +131,28 @@ int main(int argc, char** argv) {
<< "----------------------\n\n";

prop_cfg.stepping.do_covariance_transport = true;
register_benchmark<propagation_bm, stepper_t, default_chain>(
detray::benchmarks::register_benchmark<
detray::benchmarks::host_propagation_bm, stepper_t, default_chain>(
"TOY_DETECTOR_W_COV_TRANSPORT", bench_cfg, prop_cfg, toy_det, bfield,
actor_states, tracks, n_tracks);
tracks, n_tracks, &actor_states);

prop_cfg.stepping.do_covariance_transport = false;
register_benchmark<propagation_bm, stepper_t, empty_chain_t>(
"TOY_DETECTOR", bench_cfg, prop_cfg, toy_det, bfield, empty_state,
tracks, n_tracks);
detray::benchmarks::register_benchmark<
detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>(
"TOY_DETECTOR", bench_cfg, prop_cfg, toy_det, bfield, tracks, n_tracks,
&empty_state);

prop_cfg.stepping.do_covariance_transport = true;
register_benchmark<propagation_bm, stepper_t, default_chain>(
detray::benchmarks::register_benchmark<
detray::benchmarks::host_propagation_bm, stepper_t, default_chain>(
"WIRE_CHAMBER_W_COV_TRANSPORT", bench_cfg, prop_cfg, wire_chamber,
bfield, actor_states, tracks, n_tracks);
bfield, tracks, n_tracks, &actor_states);

prop_cfg.stepping.do_covariance_transport = false;
register_benchmark<propagation_bm, stepper_t, empty_chain_t>(
"WIRE_CHAMBER", bench_cfg, prop_cfg, wire_chamber, bfield, empty_state,
tracks, n_tracks);
detray::benchmarks::register_benchmark<
detray::benchmarks::host_propagation_bm, stepper_t, empty_chain_t>(
"WIRE_CHAMBER", bench_cfg, prop_cfg, wire_chamber, bfield, tracks,
n_tracks, &empty_state);

// Run benchmarks
::benchmark::Initialize(&argc, argv);
Expand Down
14 changes: 6 additions & 8 deletions tests/benchmarks/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,24 @@ if(DETRAY_EIGEN_PLUGIN)
endif()

foreach(algebra ${algebras})
detray_add_executable(benchmark_cuda_${algebra}
"benchmark_propagator_cuda_kernel.hpp"
"benchmark_propagator_cuda.cpp"
"benchmark_propagator_cuda_kernel.cu"
LINK_LIBRARIES detray::benchmarks detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils
detray_add_executable(benchmark_cuda_propagation_${algebra}
"propagation.cpp"
LINK_LIBRARIES detray::benchmark_cuda detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils
)

target_compile_definitions(
detray_benchmark_cuda_${algebra}
detray_benchmark_cuda_propagation_${algebra}
PRIVATE ${algebra}=${algebra}
)

target_compile_options(
detray_benchmark_cuda_${algebra}
detray_benchmark_cuda_propagation_${algebra}
PRIVATE "-march=native" "-ftree-vectorize"
)

if(OpenMP_CXX_FOUND)
target_link_libraries(
detray_benchmark_cuda_${algebra}
detray_benchmark_cuda_propagation_${algebra}
PRIVATE OpenMP::OpenMP_CXX
)
endif()
Expand Down
Loading

0 comments on commit 3f4585e

Please sign in to comment.