diff --git a/physics/Disk/include/accretion.hpp b/physics/Disk/include/accretion.hpp index cc22b89e8..9e46d9474 100644 --- a/physics/Disk/include/accretion.hpp +++ b/physics/Disk/include/accretion.hpp @@ -33,7 +33,7 @@ void computeAccretionCondition(size_t first, size_t last, Dataset& d, StarData& template void exchangeAndAccreteOnStar(StarData& star, double minDt_m1, int rank) { - const auto [m_accreted, p_accreted, m_removed, p_removed] = buffered_mpi_allreduce_sum( + const auto [m_accreted, p_accreted, m_removed, p_removed] = buffer::mpiAllreduceSum( star.accreted_local.mass, star.accreted_local.momentum, star.removed_local.mass, star.removed_local.momentum); const double m_star_new = m_accreted + star.m; diff --git a/physics/Disk/include/accretion_gpu.cu b/physics/Disk/include/accretion_gpu.cu index 68786caa7..8e12793d4 100644 --- a/physics/Disk/include/accretion_gpu.cu +++ b/physics/Disk/include/accretion_gpu.cu @@ -3,7 +3,7 @@ // #include "cstone/cuda/cub.hpp" -#include "cstone/cuda/cuda_runtime.hpp +#include "cstone/cuda/cuda_runtime.hpp" #include "cstone/cuda/cuda_utils.cuh" #include "cstone/findneighbors.hpp" diff --git a/physics/Disk/include/buffer_reduce.hpp b/physics/Disk/include/buffer_reduce.hpp index d158d2dce..80bff1f10 100644 --- a/physics/Disk/include/buffer_reduce.hpp +++ b/physics/Disk/include/buffer_reduce.hpp @@ -17,9 +17,11 @@ namespace disk { - -template -requires(N == flattened_size_v) void for_each_buffer(Fn&& f, std::array& buffer, Tuple&& args_tuple) +namespace buffer +{ +template +requires(std::tuple_size_v> == + flattened_size_v) void for_each_buffer(Fn&& f, buffer_type&& buffer, Tuple&& args_tuple) { size_t i_buffer = 0; auto access_buffer = [&i_buffer, &f, &buffer](auto&& arg) @@ -41,24 +43,46 @@ requires(N == flattened_size_v) void for_each_buffer(Fn&& f, std::array(res)); }, args_tuple); } -//! @brief Copy arguments of arithmetic type and of array of this type into a buffer; -//! to collect multiple MPI calls into one. -template -requires same_value_types auto buffered_mpi_allreduce_sum(const T&... args) +template +auto makeBuffer(const T&... args) { using value_type = std::common_type_t>...>; constexpr size_t buffer_size = flattened_size_v>; std::array buffer; - for_each_buffer([](auto& buf_element, const auto& value) { buf_element = value; }, buffer, std::tie(args...)); + return buffer; +} + +template +requires(N == flattened_size_v>) auto extractBuffer(const std::array& buffer) +{ + using RetTuple = std::tuple...>; + if constexpr (sizeof...(Ts) == 1) + { + std::tuple_element_t<0, RetTuple> result; + for_each_buffer([](const auto& buf_element, auto& res) { res = buf_element; }, buffer, std::tie(result)); + return result; + } + else + { + RetTuple result; + for_each_buffer([](const auto& buf_element, auto& res) { res = buf_element; }, buffer, result); + return result; + } +} - MPI_Allreduce(MPI_IN_PLACE, buffer.data(), buffer_size, MpiType{}, MPI_SUM, MPI_COMM_WORLD); +//! @brief Copy arguments of the same arithmetic type and of array of this type into a buffer, +//! to collect multiple MPI calls into one; returns a tuple if there is more than one argument. +template +auto mpiAllreduceSum(const T&... args) +{ + auto buffer = makeBuffer(args...); - std::tuple...> result; - for_each_buffer([](const auto& buf_element, auto& res) { res = buf_element; }, buffer, result); + MPI_Allreduce(MPI_IN_PLACE, buffer.data(), buffer.size(), MpiType>{}, MPI_SUM, + MPI_COMM_WORLD); - if constexpr (sizeof...(T) == 1) { return std::get<0>(result); } - else { return result; } + return extractBuffer(buffer); } +} // namespace buffer } // namespace disk diff --git a/physics/Disk/include/buffer_reduce_concepts.hpp b/physics/Disk/include/buffer_reduce_concepts.hpp index 4d855b63f..6bd56ed7e 100644 --- a/physics/Disk/include/buffer_reduce_concepts.hpp +++ b/physics/Disk/include/buffer_reduce_concepts.hpp @@ -13,14 +13,15 @@ namespace disk { - +namespace buffer +{ template concept array_type = requires(T t, std::size_t i) { t[i]; t.size(); std::tuple_size_v; - typename T::value_type; + typename std::remove_reference_t::value_type; }; template @@ -58,6 +59,9 @@ struct is_same_value_types : std::bool_constant<(std::is_same_v concept same_value_types = is_same_value_types...>::value; +template +concept bufferable_types = arithmetic_or_arrays&& same_value_types; + template struct flattened_size : std::integral_constant { @@ -82,4 +86,5 @@ struct flattened_size> : std::integral_constant inline constexpr std::size_t flattened_size_v = flattened_size>::value; +} // namespace buffer } // namespace disk \ No newline at end of file diff --git a/physics/Disk/include/exchange_star_position.hpp b/physics/Disk/include/exchange_star_position.hpp index 24e8941e5..11a362051 100644 --- a/physics/Disk/include/exchange_star_position.hpp +++ b/physics/Disk/include/exchange_star_position.hpp @@ -4,8 +4,6 @@ #pragma once -#include -#include #include "buffer_reduce.hpp" #include "cstone/primitives/mpi_wrappers.hpp" @@ -18,7 +16,7 @@ void computeAndExchangeStarPosition(StarData& star, double dt, double dt_m1) { if (star.fixed_star == 1) { return; } - const auto global_force = buffered_mpi_allreduce_sum(star.force_local); + const auto global_force = buffer::mpiAllreduceSum(star.force_local); star.potential = global_force[0]; diff --git a/physics/cooling/include/cooling/cooler.cpp b/physics/cooling/include/cooling/cooler.cpp index c5e7e4cba..415817ea9 100644 --- a/physics/cooling/include/cooling/cooler.cpp +++ b/physics/cooling/include/cooling/cooler.cpp @@ -40,8 +40,6 @@ void Cooler::cool_particles(const T dt, const Trho* rho, const Tu* u, const G template void Cooler::cool_particles(double, const float*, const double*, const GrackleFieldPtrs&, double*, const size_t, const size_t); -template void Cooler::cool_particles(double, const double*, const double*, const GrackleFieldPtrs&, double*, - const size_t, const size_t); template template @@ -53,8 +51,6 @@ void Cooler::computeTemperature(const Trho* rho, const Tu* u, const GrackleFi template void Cooler::computeTemperature(const float*, const double*, const GrackleFieldPtrs&, double*, const size_t, const size_t); -template void Cooler::computeTemperature(const double*, const double*, const GrackleFieldPtrs&, double*, - const size_t, const size_t); template template @@ -66,8 +62,6 @@ void Cooler::computePressures(const Trho* rho, const Tu* u, const GrackleFiel template void Cooler::computePressures(const float*, const double*, const GrackleFieldPtrs&, float*, const size_t, const size_t); -template void Cooler::computePressures(const double*, const double*, const GrackleFieldPtrs&, double*, - const size_t, const size_t); template template @@ -79,8 +73,6 @@ void Cooler::computeAdiabaticIndices(const Trho* rho, const Tu* u, const Grac template void Cooler::computeAdiabaticIndices(const float*, const double*, const GrackleFieldPtrs&, float*, const size_t, const size_t); -template void Cooler::computeAdiabaticIndices(const double*, const double*, const GrackleFieldPtrs&, double*, - const size_t, const size_t); template template @@ -92,8 +84,6 @@ double Cooler::cooling_timestep(const Trho* rho, const Tu* u, const GrackleFi template double Cooler::cooling_timestep(const float*, const double*, const GrackleFieldPtrs&, const size_t, const size_t); -template double Cooler::cooling_timestep(const double*, const double*, const GrackleFieldPtrs&, const size_t, - const size_t); template std::vector Cooler::getParameterNames()