diff --git a/CMakeLists.txt b/CMakeLists.txt index c22aaad1..43fe3f21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,13 +118,12 @@ set(LIB_SRC_FILES ${SRC_DIR}/nektar_interface/particle_cell_mapping/map_particles_host.cpp ${SRC_DIR}/nektar_interface/particle_cell_mapping/nektar_graph_local_mapper.cpp ${SRC_DIR}/nektar_interface/utilities.cpp - ${SRC_DIR}/nektar_interface/solver_base/partsys_base.cpp - ) + ${SRC_DIR}/nektar_interface/solver_base/partsys_base.cpp) -#set(LIB_SRC_FILES_IGNORE ${SRC_DIR}/main.cpp) +# set(LIB_SRC_FILES_IGNORE ${SRC_DIR}/main.cpp) check_file_list(${SRC_DIR} cpp "${LIB_SRC_FILES}" "${LIB_SRC_FILES_IGNORE}") -#set(MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp) +# set(MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp) set(HEADER_FILES ${INC_DIR}/nektar_interface/basis_evaluation.hpp ${INC_DIR}/nektar_interface/basis_reference.hpp @@ -218,8 +217,7 @@ set(HEADER_FILES ${INC_DIR}/particle_utility/particle_initialisation_line.hpp ${INC_DIR}/particle_utility/position_distribution.hpp ${INC_DIR}/solvers/solver_callback_handler.hpp - ${INC_DIR}/solvers/solver_runner.hpp - ) + ${INC_DIR}/solvers/solver_runner.hpp) set(HEADER_FILES_IGNORE ${INC_DIR}/revision.hpp) check_file_list(${INC_DIR} hpp "${HEADER_FILES}" "${HEADER_FILES_IGNORE}") @@ -232,8 +230,7 @@ target_include_directories( ${NESO_LIBRARY_NAME} PUBLIC $ $) -target_compile_definitions(${NESO_LIBRARY_NAME} PUBLIC -D${SYCL_FLAG} - ) +target_compile_definitions(${NESO_LIBRARY_NAME} PUBLIC -D${SYCL_FLAG}) target_compile_options(${NESO_LIBRARY_NAME} PRIVATE ${BUILD_TYPE_COMPILE_FLAGS}) target_link_options(${NESO_LIBRARY_NAME} PUBLIC ${BUILD_TYPE_LINK_FLAGS}) target_link_libraries( diff --git a/include/nektar_interface/coordinate_mapping.hpp b/include/nektar_interface/coordinate_mapping.hpp index fcd5697b..6fecf3b4 100644 --- a/include/nektar_interface/coordinate_mapping.hpp +++ b/include/nektar_interface/coordinate_mapping.hpp @@ -33,8 +33,7 @@ template struct BaseCoordinateMapping3D { inline void loc_coord_to_loc_collapsed(const T xi0, const T xi1, const T xi2, T *eta0, T *eta1, T *eta2) { auto &underlying = static_cast(*this); - underlying.loc_coord_to_loc_collapsed_v(xi0, xi1, xi2, eta0, eta1, - eta2); + underlying.loc_coord_to_loc_collapsed_v(xi0, xi1, xi2, eta0, eta1, eta2); } /** @@ -54,8 +53,7 @@ template struct BaseCoordinateMapping3D { inline void loc_collapsed_to_loc_coord(const T eta0, const T eta1, const T eta2, T *xi0, T *xi1, T *xi2) { auto &underlying = static_cast(*this); - underlying.loc_collapsed_to_loc_coord_v(eta0, eta1, eta2, xi0, xi1, - xi2); + underlying.loc_collapsed_to_loc_coord_v(eta0, eta1, eta2, xi0, xi1, xi2); } /** diff --git a/include/nektar_interface/particle_boundary_conditions.hpp b/include/nektar_interface/particle_boundary_conditions.hpp index 9fd0dd74..37224d78 100644 --- a/include/nektar_interface/particle_boundary_conditions.hpp +++ b/include/nektar_interface/particle_boundary_conditions.hpp @@ -64,21 +64,6 @@ class NektarCartesianPeriodic { void execute(); }; -namespace { -struct NormalType { - REAL x; - REAL y; - REAL z; - - inline NormalType &operator=(const int v) { - this->x = v; - this->y = v; - this->z = v; - return *this; - } -}; -} // namespace - /** * Implementation of a reflection process which truncates the particle * trajectory at the mesh boundary. diff --git a/solvers/Electrostatic2D3V/Diagnostics/kinetic_energy.hpp b/solvers/Electrostatic2D3V/Diagnostics/kinetic_energy.hpp index 79b2c96a..67f9722d 100644 --- a/solvers/Electrostatic2D3V/Diagnostics/kinetic_energy.hpp +++ b/solvers/Electrostatic2D3V/Diagnostics/kinetic_energy.hpp @@ -12,8 +12,6 @@ using namespace NESO::Particles; */ class KineticEnergy { private: - BufferDeviceHost dh_kinetic_energy; - public: /// ParticleGroup of interest. ParticleGroupSharedPtr particle_group; @@ -34,69 +32,40 @@ class KineticEnergy { KineticEnergy(ParticleGroupSharedPtr particle_group, const double particle_mass, MPI_Comm comm = MPI_COMM_WORLD) : particle_group(particle_group), particle_mass(particle_mass), - comm(comm), dh_kinetic_energy(particle_group->sycl_target, 1) { + comm(comm) { int flag; MPICHK(MPI_Initialized(&flag)); - ASSERTL1(flag, "MPI is not initialised"); + NESOASSERT(flag, "MPI is not initialised"); } /** * Compute the current kinetic energy of the ParticleGroup. */ inline double compute() { - auto t0 = profile_timestamp(); - auto sycl_target = this->particle_group->sycl_target; - const double k_half_particle_mass = 0.5 * this->particle_mass; - auto k_V = (*this->particle_group)[Sym("V")]->cell_dat.device_ptr(); - const auto k_ndim_velocity = (*this->particle_group)[Sym("V")]->ncomp; - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - this->dh_kinetic_energy.h_buffer.ptr[0] = 0.0; - this->dh_kinetic_energy.host_to_device(); - - auto k_kinetic_energy = this->dh_kinetic_energy.d_buffer.ptr; - - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - double half_mvv = 0.0; - for (int vdimx = 0; vdimx < k_ndim_velocity; vdimx++) { - const double V_vdimx = k_V[cellx][vdimx][layerx]; - half_mvv += (V_vdimx * V_vdimx); - } - half_mvv *= k_half_particle_mass; - - sycl::atomic_ref - kinetic_energy_atomic(k_kinetic_energy[0]); - kinetic_energy_atomic.fetch_add(half_mvv); - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); - sycl_target->profile_map.inc("KineticEnergy", "Execute", 1, - profile_elapsed(t0, profile_timestamp())); - this->dh_kinetic_energy.device_to_host(); - const double kernel_kinetic_energy = - this->dh_kinetic_energy.h_buffer.ptr[0]; - - MPICHK(MPI_Allreduce(&kernel_kinetic_energy, &(this->energy), 1, MPI_DOUBLE, - MPI_SUM, this->comm)); - + const REAL k_half_particle_mass = 0.5 * this->particle_mass; + const auto k_ndim_velocity = + this->particle_group->get_dat(Sym("V"))->ncomp; + + auto ga_kinetic_energy = std::make_shared>( + this->particle_group->sycl_target, 1, 0.0); + + particle_loop( + "KineticEnergy::compute", this->particle_group, + [=](auto k_V, auto k_kinetic_energy) { + REAL half_mvv = 0.0; + for (int vdimx = 0; vdimx < k_ndim_velocity; vdimx++) { + const REAL V_vdimx = k_V.at(vdimx); + half_mvv += (V_vdimx * V_vdimx); + } + half_mvv *= k_half_particle_mass; + k_kinetic_energy.add(0, half_mvv); + }, + Access::read(Sym("V")), Access::add(ga_kinetic_energy)) + ->execute(); + + this->energy = ga_kinetic_energy->get().at(0); return this->energy; } }; diff --git a/solvers/Electrostatic2D3V/Diagnostics/line_field_evaluations.hpp b/solvers/Electrostatic2D3V/Diagnostics/line_field_evaluations.hpp index 647aee78..cb84f514 100644 --- a/solvers/Electrostatic2D3V/Diagnostics/line_field_evaluations.hpp +++ b/solvers/Electrostatic2D3V/Diagnostics/line_field_evaluations.hpp @@ -205,31 +205,12 @@ template class LineFieldEvaluations { this->field_evaluate->evaluate(Sym("FIELD_EVALUATION")); if (this->mean_shift) { - const double k_mean = this->field_mean->get_mean(); - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - const auto k_EVAL = (*this->particle_group)[Sym("FIELD_EVALUATION")] - ->cell_dat.device_ptr(); - - this->particle_group->sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>(sycl::range<1>(pl_iter_range), - [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - k_EVAL[cellx][0][layerx] -= k_mean; - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + "LineFieldEvaluations::write", this->particle_group, + [=](auto k_EVAL) { k_EVAL.at(0) -= k_mean; }, + Access::write(Sym("FIELD_EVALUATION"))) + ->execute(); } this->h5part->write(this->step); diff --git a/solvers/Electrostatic2D3V/Diagnostics/potential_energy.hpp b/solvers/Electrostatic2D3V/Diagnostics/potential_energy.hpp index a77ed8b3..35b55690 100644 --- a/solvers/Electrostatic2D3V/Diagnostics/potential_energy.hpp +++ b/solvers/Electrostatic2D3V/Diagnostics/potential_energy.hpp @@ -23,7 +23,6 @@ template class PotentialEnergy { Array phys_values; int num_quad_points; - BufferDeviceHost dh_energy; std::shared_ptr> field_evaluate; std::shared_ptr> field_mean; @@ -49,8 +48,7 @@ template class PotentialEnergy { ParticleGroupSharedPtr particle_group, std::shared_ptr cell_id_translation, MPI_Comm comm = MPI_COMM_WORLD) - : field(field), particle_group(particle_group), comm(comm), - dh_energy(particle_group->sycl_target, 1) { + : field(field), particle_group(particle_group), comm(comm) { int flag; MPICHK(MPI_Initialized(&flag)); @@ -79,57 +77,24 @@ template class PotentialEnergy { this->field_evaluate->evaluate(Sym("ELEC_PIC_PE")); auto t0 = profile_timestamp(); - auto sycl_target = this->particle_group->sycl_target; - const auto k_Q = - (*this->particle_group)[Sym("Q")]->cell_dat.device_ptr(); - const auto k_PHI = (*this->particle_group)[Sym("ELEC_PIC_PE")] - ->cell_dat.device_ptr(); - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - this->dh_energy.h_buffer.ptr[0] = 0.0; - this->dh_energy.host_to_device(); - - auto k_energy = this->dh_energy.d_buffer.ptr; + auto ga_energy = std::make_shared>( + this->particle_group->sycl_target, 1, 0.0); const double k_potential_shift = -this->field_mean->get_mean(); - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const double phi = k_PHI[cellx][0][layerx]; - const double q = k_Q[cellx][0][layerx]; - const double tmp_contrib = q * (phi + k_potential_shift); - - sycl::atomic_ref - energy_atomic(k_energy[0]); - energy_atomic.fetch_add(tmp_contrib); - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); - sycl_target->profile_map.inc("PotentialEnergy", "Execute", 1, - profile_elapsed(t0, profile_timestamp())); - this->dh_energy.device_to_host(); - const double kernel_energy = this->dh_energy.h_buffer.ptr[0]; - - MPICHK(MPI_Allreduce(&kernel_energy, &(this->energy), 1, MPI_DOUBLE, - MPI_SUM, this->comm)); + particle_loop( + "PotentialEnergy::compute", this->particle_group, + [=](auto k_Q, auto k_PHI, auto k_ga_energy) { + const REAL phi = k_PHI.at(0); + const REAL q = k_Q.at(0); + const REAL tmp_contrib = q * (phi + k_potential_shift); + k_ga_energy.add(0, tmp_contrib); + }, + Access::read(Sym("Q")), Access::read(Sym("ELEC_PIC_PE")), + Access::add(ga_energy)) + ->execute(); // The factor of 1/2 in the electrostatic potential energy calculation. - this->energy *= 0.5; - + this->energy = ga_energy->get().at(0) * 0.5; return this->energy; } }; diff --git a/solvers/Electrostatic2D3V/ParticleSystems/boris_integrator.hpp b/solvers/Electrostatic2D3V/ParticleSystems/boris_integrator.hpp index 89f079de..0939b36b 100644 --- a/solvers/Electrostatic2D3V/ParticleSystems/boris_integrator.hpp +++ b/solvers/Electrostatic2D3V/ParticleSystems/boris_integrator.hpp @@ -69,23 +69,6 @@ class IntegratorBorisUniformB { */ inline void boris_2() { auto t0 = profile_timestamp(); - - auto k_P = (*this->particle_group)[Sym("P")]->cell_dat.device_ptr(); - auto k_V = (*this->particle_group)[Sym("V")]->cell_dat.device_ptr(); - const auto k_M = - (*this->particle_group)[Sym("M")]->cell_dat.device_ptr(); - const auto k_E = - (*this->particle_group)[Sym("E")]->cell_dat.device_ptr(); - const auto k_Q = - (*this->particle_group)[Sym("Q")]->cell_dat.device_ptr(); - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - const double k_dt = this->dt; const double k_dht = this->dt * 0.5; const double k_B_0 = this->B_0; @@ -93,81 +76,70 @@ class IntegratorBorisUniformB { const double k_B_2 = this->B_2; const REAL k_E_coefficient = this->particle_E_coefficient; - this->sycl_target->profile_map.inc( - "IntegratorBorisUniformB", "VelocityVerlet_2_Prepare", 1, - profile_elapsed(t0, profile_timestamp())); - this->sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const REAL Q = k_Q[cellx][0][layerx]; - const REAL M = k_M[cellx][0][layerx]; - const REAL QoM = Q / M; - - const REAL scaling_t = QoM * k_dht; - const REAL t_0 = k_B_0 * scaling_t; - const REAL t_1 = k_B_1 * scaling_t; - const REAL t_2 = k_B_2 * scaling_t; - - const REAL tmagsq = t_0 * t_0 + t_1 * t_1 + t_2 * t_2; - const REAL scaling_s = 2.0 / (1.0 + tmagsq); - - const REAL s_0 = scaling_s * t_0; - const REAL s_1 = scaling_s * t_1; - const REAL s_2 = scaling_s * t_2; - - const REAL V_0 = k_V[cellx][0][layerx]; - const REAL V_1 = k_V[cellx][1][layerx]; - const REAL V_2 = k_V[cellx][2][layerx]; - - // The E dat contains d(phi)/dx not E -> multiply by -1. - const REAL v_minus_0 = V_0 + (-1.0 * k_E[cellx][0][layerx]) * - scaling_t * k_E_coefficient; - const REAL v_minus_1 = V_1 + (-1.0 * k_E[cellx][1][layerx]) * - scaling_t * k_E_coefficient; - // E is zero in the z direction - const REAL v_minus_2 = V_2; - - REAL v_prime_0, v_prime_1, v_prime_2; - ELEC_PIC_2D3V_CROSS_PRODUCT_3D(v_minus_0, v_minus_1, v_minus_2, - t_0, t_1, t_2, v_prime_0, - v_prime_1, v_prime_2) - - v_prime_0 += v_minus_0; - v_prime_1 += v_minus_1; - v_prime_2 += v_minus_2; - - REAL v_plus_0, v_plus_1, v_plus_2; - ELEC_PIC_2D3V_CROSS_PRODUCT_3D(v_prime_0, v_prime_1, v_prime_2, - s_0, s_1, s_2, v_plus_0, - v_plus_1, v_plus_2) - - v_plus_0 += v_minus_0; - v_plus_1 += v_minus_1; - v_plus_2 += v_minus_2; - - // The E dat contains d(phi)/dx not E -> multiply by -1. - k_V[cellx][0][layerx] = - v_plus_0 + scaling_t * (-1.0 * k_E[cellx][0][layerx]) * - k_E_coefficient; - k_V[cellx][1][layerx] = - v_plus_1 + scaling_t * (-1.0 * k_E[cellx][1][layerx]) * - k_E_coefficient; - // E is zero in the z direction - k_V[cellx][2][layerx] = v_plus_2; - - // update of position to next time step - k_P[cellx][0][layerx] += k_dt * k_V[cellx][0][layerx]; - k_P[cellx][1][layerx] += k_dt * k_V[cellx][1][layerx]; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + "IntegratorBorisUniformB::boris_2", this->particle_group, + [=](auto k_P, auto k_V, auto k_M, auto k_E, auto k_Q) { + const REAL Q = k_Q.at(0); + const REAL M = k_M.at(0); + const REAL QoM = Q / M; + + const REAL scaling_t = QoM * k_dht; + const REAL t_0 = k_B_0 * scaling_t; + const REAL t_1 = k_B_1 * scaling_t; + const REAL t_2 = k_B_2 * scaling_t; + + const REAL tmagsq = t_0 * t_0 + t_1 * t_1 + t_2 * t_2; + const REAL scaling_s = 2.0 / (1.0 + tmagsq); + + const REAL s_0 = scaling_s * t_0; + const REAL s_1 = scaling_s * t_1; + const REAL s_2 = scaling_s * t_2; + + const REAL V_0 = k_V.at(0); + const REAL V_1 = k_V.at(1); + const REAL V_2 = k_V.at(2); + + // The E dat contains d(phi)/dx not E -> multiply by -1. + const REAL v_minus_0 = + V_0 + (-1.0 * k_E.at(0)) * scaling_t * k_E_coefficient; + const REAL v_minus_1 = + V_1 + (-1.0 * k_E.at(1)) * scaling_t * k_E_coefficient; + // E is zero in the z direction + const REAL v_minus_2 = V_2; + + REAL v_prime_0, v_prime_1, v_prime_2; + ELEC_PIC_2D3V_CROSS_PRODUCT_3D(v_minus_0, v_minus_1, v_minus_2, t_0, + t_1, t_2, v_prime_0, v_prime_1, + v_prime_2) + + v_prime_0 += v_minus_0; + v_prime_1 += v_minus_1; + v_prime_2 += v_minus_2; + + REAL v_plus_0, v_plus_1, v_plus_2; + ELEC_PIC_2D3V_CROSS_PRODUCT_3D(v_prime_0, v_prime_1, v_prime_2, s_0, + s_1, s_2, v_plus_0, v_plus_1, v_plus_2) + + v_plus_0 += v_minus_0; + v_plus_1 += v_minus_1; + v_plus_2 += v_minus_2; + + // The E dat contains d(phi)/dx not E -> multiply by -1. + k_V.at(0) = + v_plus_0 + scaling_t * (-1.0 * k_E.at(0)) * k_E_coefficient; + k_V.at(1) = + v_plus_1 + scaling_t * (-1.0 * k_E.at(1)) * k_E_coefficient; + // E is zero in the z direction + k_V.at(2) = v_plus_2; + + // update of position to next time step + k_P.at(0) += k_dt * k_V.at(0); + k_P.at(1) += k_dt * k_V.at(1); + }, + Access::write(Sym("P")), Access::write(Sym("V")), + Access::read(Sym("M")), Access::read(Sym("E")), + Access::read(Sym("Q"))) + ->execute(); this->sycl_target->profile_map.inc( "IntegratorBorisUniformB", "Boris_2_Execute", 1, diff --git a/solvers/Electrostatic2D3V/ParticleSystems/charged_particles.hpp b/solvers/Electrostatic2D3V/ParticleSystems/charged_particles.hpp index 1c41a49b..01e2f9ba 100644 --- a/solvers/Electrostatic2D3V/ParticleSystems/charged_particles.hpp +++ b/solvers/Electrostatic2D3V/ParticleSystems/charged_particles.hpp @@ -22,7 +22,6 @@ #include #include "boris_integrator.hpp" -#include "parallel_initialisation.hpp" using namespace Nektar; using namespace NESO; @@ -306,8 +305,8 @@ class ChargedParticles { this->particle_group->add_particles_local(initial_distribution); } - NESO::parallel_advection_initialisation(this->particle_group); - NESO::parallel_advection_store(this->particle_group); + NESO::Particles::parallel_advection_initialisation(this->particle_group); + NESO::Particles::parallel_advection_store(this->particle_group); // auto h5part_local = std::make_shared( // "foo.h5part", this->particle_group, @@ -315,11 +314,12 @@ class ChargedParticles { // Sym("PARTICLE_ID"), Sym("NESO_REFERENCE_POSITIONS")); const int num_steps = 20; for (int stepx = 0; stepx < num_steps; stepx++) { - NESO::parallel_advection_step(this->particle_group, num_steps, stepx); + NESO::Particles::parallel_advection_step(this->particle_group, num_steps, + stepx); this->transfer_particles(); // h5part_local->write(); } - NESO::parallel_advection_restore(this->particle_group); + NESO::Particles::parallel_advection_restore(this->particle_group); // h5part_local->write(); // h5part_local->close(); @@ -556,55 +556,28 @@ class ChargedParticles { * Velocity Verlet - First step. */ inline void velocity_verlet_1() { - + auto t0 = profile_timestamp(); const double k_dt = this->dt; const double k_dht = this->dt * 0.5; - - auto t0 = profile_timestamp(); - - auto k_P = (*this->particle_group)[Sym("P")]->cell_dat.device_ptr(); - auto k_V = (*this->particle_group)[Sym("V")]->cell_dat.device_ptr(); - auto k_M = (*this->particle_group)[Sym("M")]->cell_dat.device_ptr(); - const auto k_E = - (*this->particle_group)[Sym("E")]->cell_dat.device_ptr(); - const auto k_Q = - (*this->particle_group)[Sym("Q")]->cell_dat.device_ptr(); - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - sycl_target->profile_map.inc("ChargedParticles", "VelocityVerlet_1_Prepare", - 1, profile_elapsed(t0, profile_timestamp())); - const REAL k_E_coefficient = this->particle_E_coefficient; - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const double Q = k_Q[cellx][0][layerx]; - const double dht_inverse_particle_mass = - k_E_coefficient * k_dht * Q / k_M[cellx][0][layerx]; - k_V[cellx][0][layerx] -= - k_E[cellx][0][layerx] * dht_inverse_particle_mass; - k_V[cellx][1][layerx] -= - k_E[cellx][1][layerx] * dht_inverse_particle_mass; - - k_P[cellx][0][layerx] += k_dt * k_V[cellx][0][layerx]; - k_P[cellx][1][layerx] += k_dt * k_V[cellx][1][layerx]; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + "ChargedParticles::velocity_verlet_1", this->particle_group, + [=](auto k_Q, auto k_E, auto k_M, auto k_V, auto k_P) { + const double Q = k_Q.at(0); + const double dht_inverse_particle_mass = + k_E_coefficient * k_dht * Q / k_M.at(0); + k_V.at(0) -= k_E.at(0) * dht_inverse_particle_mass; + k_V.at(1) -= k_E.at(1) * dht_inverse_particle_mass; + + k_P.at(0) += k_dt * k_V.at(0); + k_P.at(1) += k_dt * k_V.at(1); + }, + Access::read(Sym("Q")), Access::read(Sym("E")), + Access::read(Sym("M")), Access::write(Sym("V")), + Access::write(Sym("P"))) + ->execute(); + sycl_target->profile_map.inc("ChargedParticles", "VelocityVerlet_1_Execute", 1, profile_elapsed(t0, profile_timestamp())); @@ -617,48 +590,23 @@ class ChargedParticles { * Velocity Verlet - Second step. */ inline void velocity_verlet_2() { - const double k_dht = this->dt * 0.5; - auto t0 = profile_timestamp(); - - auto k_V = (*this->particle_group)[Sym("V")]->cell_dat.device_ptr(); - const auto k_E = - (*this->particle_group)[Sym("E")]->cell_dat.device_ptr(); - const auto k_Q = - (*this->particle_group)[Sym("Q")]->cell_dat.device_ptr(); - auto k_M = (*this->particle_group)[Sym("M")]->cell_dat.device_ptr(); - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - + const double k_dht = this->dt * 0.5; const REAL k_E_coefficient = this->particle_E_coefficient; - sycl_target->profile_map.inc("ChargedParticles", "VelocityVerlet_2_Prepare", - 1, profile_elapsed(t0, profile_timestamp())); - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const double Q = k_Q[cellx][0][layerx]; - const double dht_inverse_particle_mass = - k_E_coefficient * k_dht * Q / k_M[cellx][0][layerx]; - k_V[cellx][0][layerx] -= - k_E[cellx][0][layerx] * dht_inverse_particle_mass; - k_V[cellx][1][layerx] -= - k_E[cellx][1][layerx] * dht_inverse_particle_mass; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + "ChargedParticles::velocity_verlet_2", this->particle_group, + [=](auto k_Q, auto k_E, auto k_M, auto k_V) { + const double Q = k_Q.at(0); + const double dht_inverse_particle_mass = + k_E_coefficient * k_dht * Q / k_M.at(0); + k_V.at(0) -= k_E.at(0) * dht_inverse_particle_mass; + k_V.at(1) -= k_E.at(1) * dht_inverse_particle_mass; + }, + Access::read(Sym("Q")), Access::read(Sym("E")), + Access::read(Sym("M")), Access::write(Sym("V"))) + ->execute(); + sycl_target->profile_map.inc("ChargedParticles", "VelocityVerlet_2_Execute", 1, profile_elapsed(t0, profile_timestamp())); } diff --git a/solvers/Electrostatic2D3V/ParticleSystems/parallel_initialisation.hpp b/solvers/Electrostatic2D3V/ParticleSystems/parallel_initialisation.hpp deleted file mode 100644 index 9b7a2dfe..00000000 --- a/solvers/Electrostatic2D3V/ParticleSystems/parallel_initialisation.hpp +++ /dev/null @@ -1,198 +0,0 @@ -#ifndef __E2D3V_PARALLEL_INITIALISATION_H_ -#define __E2D3V_PARALLEL_INITIALISATION_H_ - -#include -#include -#include - -namespace NESO { - -inline void get_point_in_local_domain(ParticleGroupSharedPtr particle_group, - double *point) { - - auto domain = particle_group->domain; - const int space_ncomp = particle_group->position_dat->ncomp; - auto mesh = std::dynamic_pointer_cast(domain->mesh); - auto graph = mesh->graph; - - NESOASSERT(space_ncomp == 2, "Expected 2 position components"); - - auto trigeoms = graph->GetAllTriGeoms(); - if (trigeoms.size() > 0) { - - auto tri = trigeoms.begin()->second; - auto v0 = tri->GetVertex(0); - auto v1 = tri->GetVertex(1); - auto v2 = tri->GetVertex(2); - - double mid[2]; - mid[0] = 0.5 * ((*v1)[0] - (*v0)[0]); - mid[1] = 0.5 * ((*v1)[1] - (*v0)[1]); - mid[0] += (*v0)[0]; - mid[1] += (*v0)[1]; - - point[0] = 0.5 * ((*v2)[0] - mid[0]); - point[1] = 0.5 * ((*v2)[1] - mid[1]); - point[0] += mid[0]; - point[1] += mid[1]; - - Array test(3); - test[0] = point[0]; - test[1] = point[1]; - test[2] = 0.0; - NESOASSERT(tri->ContainsPoint(test), "Triangle should contain this point"); - - } else { - auto quadgeoms = graph->GetAllQuadGeoms(); - NESOASSERT(quadgeoms.size() > 0, "could not find any 2D geometry objects"); - - auto quad = quadgeoms.begin()->second; - auto v0 = quad->GetVertex(0); - auto v2 = quad->GetVertex(2); - - Array mid(3); - mid[0] = 0.5 * ((*v2)[0] - (*v0)[0]); - mid[1] = 0.5 * ((*v2)[1] - (*v0)[1]); - mid[2] = 0.0; - mid[0] += (*v0)[0]; - mid[1] += (*v0)[1]; - - NESOASSERT(quad->ContainsPoint(mid), "Quad should contain this point"); - - point[0] = mid[0]; - point[1] = mid[1]; - } -} - -inline void parallel_advection_store(ParticleGroupSharedPtr particle_group) { - - auto sycl_target = particle_group->sycl_target; - const int space_ncomp = particle_group->position_dat->ncomp; - - double local_point[3]; - get_point_in_local_domain(particle_group, local_point); - - auto k_P = particle_group->position_dat->cell_dat.device_ptr(); - auto k_ORIG_POS = - (*particle_group)[Sym("ORIG_POS")]->cell_dat.device_ptr(); - auto pl_iter_range = - particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - auto pl_stride = - particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - auto pl_npart_cell = - particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - sycl::buffer b_local_point(local_point, 3); - - // store the target position - sycl_target->queue - .submit([&](sycl::handler &cgh) { - auto a_local_point = - b_local_point.get_access(cgh); - cgh.parallel_for<>(sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - for (int nx = 0; nx < space_ncomp; nx++) { - k_ORIG_POS[cellx][nx][layerx] = k_P[cellx][nx][layerx]; - k_P[cellx][nx][layerx] = a_local_point[nx]; - } - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); -} - -inline void parallel_advection_restore(ParticleGroupSharedPtr particle_group) { - - auto sycl_target = particle_group->sycl_target; - const int space_ncomp = particle_group->position_dat->ncomp; - auto k_P = particle_group->position_dat->cell_dat.device_ptr(); - auto k_ORIG_POS = - (*particle_group)[Sym("ORIG_POS")]->cell_dat.device_ptr(); - auto pl_iter_range = - particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - auto pl_stride = - particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - auto pl_npart_cell = - particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - ErrorPropagate ep(sycl_target); - auto k_ep = ep.device_ptr(); - - // restore the target position - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>(sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - for (int nx = 0; nx < space_ncomp; nx++) { - - const bool valid = ABS(k_ORIG_POS[cellx][nx][layerx] - - k_P[cellx][nx][layerx]) < 1.0e-6; - - NESO_KERNEL_ASSERT(valid, k_ep); - k_P[cellx][nx][layerx] = k_ORIG_POS[cellx][nx][layerx]; - } - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); - - ep.check_and_throw("Advected particle was very far from intended position."); -} - -inline void parallel_advection_step(ParticleGroupSharedPtr particle_group, - const int num_steps, const int step) { - auto sycl_target = particle_group->sycl_target; - const int space_ncomp = particle_group->position_dat->ncomp; - auto k_P = particle_group->position_dat->cell_dat.device_ptr(); - auto k_ORIG_POS = - (*particle_group)[Sym("ORIG_POS")]->cell_dat.device_ptr(); - auto pl_iter_range = - particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - auto pl_stride = - particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - auto pl_npart_cell = - particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - const double steps_left = ((double)num_steps) - ((double)step); - const double inverse_steps_left = 1.0 / steps_left; - - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>(sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - for (int nx = 0; nx < space_ncomp; nx++) { - const double offset = - k_ORIG_POS[cellx][nx][layerx] - k_P[cellx][nx][layerx]; - k_P[cellx][nx][layerx] += inverse_steps_left * offset; - } - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); -} - -inline void -parallel_advection_initialisation(ParticleGroupSharedPtr particle_group) { - - const int space_ncomp = particle_group->position_dat->ncomp; - auto domain = particle_group->domain; - auto sycl_target = particle_group->sycl_target; - particle_group->add_particle_dat( - ParticleDat(sycl_target, ParticleProp(Sym("ORIG_POS"), space_ncomp), - domain->mesh->get_cell_count())); -} - -} // namespace NESO - -#endif diff --git a/solvers/H3LAPD/Diagnostics/MassRecorder.hpp b/solvers/H3LAPD/Diagnostics/MassRecorder.hpp index 523ef2e2..bb6ad848 100644 --- a/solvers/H3LAPD/Diagnostics/MassRecorder.hpp +++ b/solvers/H3LAPD/Diagnostics/MassRecorder.hpp @@ -19,8 +19,6 @@ namespace NESO::Solvers::H3LAPD { */ template class MassRecorder { protected: - /// Buffer to store total particle weight - BufferDeviceHost m_dh_particle_total_weight; /// File handle for recording output std::ofstream m_fh; /// Flag to track whether initial fluid mass has been computed @@ -46,7 +44,6 @@ template class MassRecorder { std::shared_ptr n) : m_session(session), m_particle_sys(particle_sys), m_n(n), m_sycl_target(particle_sys->m_sycl_target), - m_dh_particle_total_weight(particle_sys->m_sycl_target, 1), m_initial_fluid_mass_computed(false) { m_session->LoadParameter("mass_recording_step", m_recording_step, 0); @@ -96,48 +93,20 @@ template class MassRecorder { * MPI tasks. */ inline double compute_particle_mass() { - auto particle_group = m_particle_sys->m_particle_group; - auto k_W = (*particle_group)[Sym("COMPUTATIONAL_WEIGHT")] - ->cell_dat.device_ptr(); - - m_dh_particle_total_weight.h_buffer.ptr[0] = 0.0; - m_dh_particle_total_weight.host_to_device(); - auto k_particle_weight = m_dh_particle_total_weight.d_buffer.ptr; - - const auto pl_iter_range = - particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - m_sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const double contrib = k_W[cellx][0][layerx]; - - sycl::atomic_ref - energy_atomic(k_particle_weight[0]); - energy_atomic.fetch_add(contrib); - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); - - m_dh_particle_total_weight.device_to_host(); - const double tmp_weight = m_dh_particle_total_weight.h_buffer.ptr[0]; - double total_particle_weight; - MPICHK(MPI_Allreduce(&tmp_weight, &total_particle_weight, 1, MPI_DOUBLE, - MPI_SUM, m_sycl_target->comm_pair.comm_parent)); - - return total_particle_weight; + auto ga_total_weight = std::make_shared>( + this->m_particle_sys->m_particle_group->sycl_target, 1, 0.0); + + particle_loop( + "MassRecorder::compute_particle_mass", + this->m_particle_sys->m_particle_group, + [=](auto k_W, auto k_ga_total_weight) { + k_ga_total_weight.add(0, k_W.at(0)); + }, + Access::read(Sym("COMPUTATIONAL_WEIGHT")), + Access::add(ga_total_weight)) + ->execute(); + + return ga_total_weight->get().at(0); } /** diff --git a/solvers/H3LAPD/ParticleSystems/NeutralParticleSystem.hpp b/solvers/H3LAPD/ParticleSystems/NeutralParticleSystem.hpp index df4b3c4e..54e8e90a 100644 --- a/solvers/H3LAPD/ParticleSystems/NeutralParticleSystem.hpp +++ b/solvers/H3LAPD/ParticleSystems/NeutralParticleSystem.hpp @@ -36,15 +36,15 @@ namespace NESO::Solvers::H3LAPD { */ inline double expint_barry_approx(const double x) { constexpr double gamma_Euler_Mascheroni = 0.5772156649015329; - const double G = std::exp(-gamma_Euler_Mascheroni); - const double b = std::sqrt(2 * (1 - G) / G / (2 - G)); - const double h_inf = (1 - G) * (std::pow(G, 2) - 6 * G + 12) / - (3 * G * std::pow(2 - G, 2) * b); - const double q = 20.0 / 47.0 * std::pow(x, std::sqrt(31.0 / 26.0)); - const double h = 1 / (1 + x * std::sqrt(x)) + h_inf * q / (1 + q); + const double G = sycl::exp(-gamma_Euler_Mascheroni); + const double b = sycl::sqrt(2 * (1 - G) / G / (2 - G)); + const double h_inf = + (1 - G) * ((G * G) - 6 * G + 12) / (3 * G * ((2 - G) * (2 - G)) * b); + const double q = 20.0 / 47.0 * sycl::pow(x, sycl::sqrt(31.0 / 26.0)); + const double h = 1 / (1 + x * sycl::sqrt(x)) + h_inf * q / (1 + q); const double logfactor = - std::log(1 + G / x - (1 - G) / std::pow(h + b * x, 2)); - return std::exp(-x) / (G + (1 - G) * std::exp(-(x / (1 - G)))) * logfactor; + sycl::log(1 + G / x - (1 - G) / ((h + b * x) * (h + b * x))); + return sycl::exp(-x) / (G + (1 - G) * sycl::exp(-(x / (1 - G)))) * logfactor; } /** @@ -472,15 +472,16 @@ class NeutralParticleSystem { } m_total_num_particles_added += num_particles_to_add; - parallel_advection_initialisation(m_particle_group); - parallel_advection_store(m_particle_group); + NESO::Particles::parallel_advection_initialisation(m_particle_group); + NESO::Particles::parallel_advection_store(m_particle_group); const int num_steps = 20; for (int stepx = 0; stepx < num_steps; stepx++) { - parallel_advection_step(m_particle_group, num_steps, stepx); + NESO::Particles::parallel_advection_step(m_particle_group, num_steps, + stepx); this->transfer_particles(); } - parallel_advection_restore(m_particle_group); + NESO::Particles::parallel_advection_restore(m_particle_group); // Move particles to the owning ranks and correct cells. this->transfer_particles(); @@ -499,41 +500,20 @@ class NeutralParticleSystem { * Evaluate fields at the particle locations. */ inline void evaluate_fields() { - NESOASSERT(m_field_evaluate_ne != nullptr, "FieldEvaluate object is null. Was setup_evaluate_ne called?"); m_field_evaluate_ne->evaluate(Sym("ELECTRON_DENSITY")); - // Particle property to update - auto k_n = (*m_particle_group)[Sym("ELECTRON_DENSITY")] - ->cell_dat.device_ptr(); - - auto k_n_bg_SI = m_n_bg_SI; - // Unit conversion factors - double k_n_to_SI = m_n_to_SI; - - const auto pl_iter_range = - m_particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - m_particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - m_particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - m_sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - k_n[cellx][0][layerx] = - k_n_bg_SI + k_n[cellx][0][layerx] * k_n_to_SI; - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + const double k_n_to_SI = m_n_to_SI; + const auto k_n_bg_SI = m_n_bg_SI; + + particle_loop( + "NeutralParticleSystem::evaluate_fields", this->m_particle_group, + [=](auto k_n) { k_n.at(0) = k_n_bg_SI + k_n.at(0) * k_n_to_SI; }, + Access::write(Sym("ELECTRON_DENSITY"))) + ->execute(); } /** @@ -542,43 +522,20 @@ class NeutralParticleSystem { * @param dt Time step size. */ inline void forward_euler(const double dt) { - - const double k_dt = dt; - auto t0 = profile_timestamp(); + const double k_dt = dt; - auto k_P = - (*m_particle_group)[Sym("POSITION")]->cell_dat.device_ptr(); - auto k_V = - (*m_particle_group)[Sym("VELOCITY")]->cell_dat.device_ptr(); - - const auto pl_iter_range = - m_particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - m_particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - m_particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - m_sycl_target->profile_map.inc("NeutralParticleSystem", - "ForwardEuler_Prepare", 1, - profile_elapsed(t0, profile_timestamp())); + particle_loop( + "NeutralParticleSystem::forward_euler", this->m_particle_group, + [=](auto k_P, auto k_V) { + k_P.at(0) += k_dt * k_V.at(0); + k_P.at(1) += k_dt * k_V.at(1); + k_P.at(2) += k_dt * k_V.at(2); + }, + Access::write(Sym("POSITION")), + Access::read(Sym("VELOCITY"))) + ->execute(); - m_sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - k_P[cellx][0][layerx] += k_dt * k_V[cellx][0][layerx]; - k_P[cellx][1][layerx] += k_dt * k_V[cellx][1][layerx]; - k_P[cellx][2][layerx] += k_dt * k_V[cellx][2][layerx]; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); m_sycl_target->profile_map.inc("NeutralParticleSystem", "ForwardEuler_Execute", 1, profile_elapsed(t0, profile_timestamp())); @@ -632,71 +589,43 @@ class NeutralParticleSystem { const double k_rate_factor = -k_q_i * 6.7e7 * k_a_i * 1e-6; // 1e-6 to go from cm^3 to m^3 - const INT k_remove_key = m_particle_remove_key; - - auto t0 = profile_timestamp(); - - auto k_ID = - (*m_particle_group)[Sym("PARTICLE_ID")]->cell_dat.device_ptr(); - auto k_n = (*m_particle_group)[Sym("ELECTRON_DENSITY")] - ->cell_dat.device_ptr(); - auto k_SD = - (*m_particle_group)[Sym("SOURCE_DENSITY")]->cell_dat.device_ptr(); - - auto k_V = - (*m_particle_group)[Sym("VELOCITY")]->cell_dat.device_ptr(); - auto k_W = (*m_particle_group)[Sym("COMPUTATIONAL_WEIGHT")] - ->cell_dat.device_ptr(); - - const auto pl_iter_range = - m_particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - m_particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - m_particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - m_sycl_target->profile_map.inc("NeutralParticleSystem", - "Ionisation_Prepare", 1, - profile_elapsed(t0, profile_timestamp())); - const REAL invratio = k_E_i / m_TeV; const REAL rate = -k_rate_factor / (m_TeV * std::sqrt(m_TeV)) * (expint_barry_approx(invratio) / invratio + (k_b_i_expc_i / (invratio + k_c_i)) * expint_barry_approx(invratio + k_c_i)); + const INT k_remove_key = m_particle_remove_key; + + auto t0 = profile_timestamp(); - m_sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - const REAL n_SI = k_n[cellx][0][layerx]; - - const REAL weight = k_W[cellx][0][layerx]; - // note that the rate will be a positive number, so minus sign - // here - REAL deltaweight = -rate * weight * k_dt_SI * n_SI; - - /* Check whether weight is about to drop below zero. - If so, flag particle for removal and adjust deltaweight. - These particles are removed after the project call. - */ - if ((weight + deltaweight) <= 0) { - k_ID[cellx][0][layerx] = k_remove_key; - deltaweight = -weight; - } - - // Mutate the weight on the particle - k_W[cellx][0][layerx] += deltaweight; - // Set value for fluid density source (num / Nektar unit time) - k_SD[cellx][0][layerx] = -deltaweight * k_n_scale / k_dt; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + "NeutralParticleSystem::ionise", this->m_particle_group, + [=](auto k_ID, auto k_n, auto k_SD, auto k_W) { + const REAL n_SI = k_n.at(0); + const REAL weight = k_W.at(0); + // note that the rate will be a positive number, so minus sign + // here + REAL deltaweight = -rate * weight * k_dt_SI * n_SI; + + /* Check whether weight is about to drop below zero. + If so, flag particle for removal and adjust deltaweight. + These particles are removed after the project call. + */ + if ((weight + deltaweight) <= 0) { + k_ID.at(0) = k_remove_key; + deltaweight = -weight; + } + + // Mutate the weight on the particle + k_W.at(0) += deltaweight; + // Set value for fluid density source (num / Nektar unit time) + k_SD.at(0) = -deltaweight * k_n_scale / k_dt; + }, + Access::write(Sym("PARTICLE_ID")), + Access::read(Sym("ELECTRON_DENSITY")), + Access::write(Sym("SOURCE_DENSITY")), + Access::write(Sym("COMPUTATIONAL_WEIGHT"))) + ->execute(); m_sycl_target->profile_map.inc("NeutralParticleSystem", "Ionisation_Execute", 1, diff --git a/solvers/SimpleSOL/Diagnostics/mass_conservation.hpp b/solvers/SimpleSOL/Diagnostics/mass_conservation.hpp index f6854cec..7c1bc8a6 100644 --- a/solvers/SimpleSOL/Diagnostics/mass_conservation.hpp +++ b/solvers/SimpleSOL/Diagnostics/mass_conservation.hpp @@ -20,7 +20,6 @@ template class MassRecording { std::shared_ptr rho; SYCLTargetSharedPtr sycl_target; - BufferDeviceHost dh_particle_total_weight; bool initial_mass_computed; double initial_mass_fluid; int mass_recording_step; @@ -32,8 +31,7 @@ template class MassRecording { std::shared_ptr particle_sys, std::shared_ptr rho) : session(session), particle_sys(particle_sys), rho(rho), - sycl_target(particle_sys->sycl_target), - dh_particle_total_weight(sycl_target, 1), initial_mass_computed(false) { + sycl_target(particle_sys->sycl_target), initial_mass_computed(false) { session->LoadParameter("mass_recording_step", mass_recording_step, 0); rank = sycl_target->comm_pair.rank_parent; @@ -50,48 +48,20 @@ template class MassRecording { } inline double compute_particle_mass() { - auto particle_group = this->particle_sys->particle_group; - auto k_W = (*particle_group)[Sym("COMPUTATIONAL_WEIGHT")] - ->cell_dat.device_ptr(); - - this->dh_particle_total_weight.h_buffer.ptr[0] = 0.0; - this->dh_particle_total_weight.host_to_device(); - auto k_particle_weight = this->dh_particle_total_weight.d_buffer.ptr; - - const auto pl_iter_range = - particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const double contrib = k_W[cellx][0][layerx]; - - sycl::atomic_ref - energy_atomic(k_particle_weight[0]); - energy_atomic.fetch_add(contrib); - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); - - this->dh_particle_total_weight.device_to_host(); - const double tmp_weight = this->dh_particle_total_weight.h_buffer.ptr[0]; - double total_particle_weight; - MPICHK(MPI_Allreduce(&tmp_weight, &total_particle_weight, 1, MPI_DOUBLE, - MPI_SUM, sycl_target->comm_pair.comm_parent)); - - return total_particle_weight; + auto ga_total_weight = + std::make_shared>(this->sycl_target, 1, 0.0); + + particle_loop( + "MassRecording::compute_particle_mass", + this->particle_sys->particle_group, + [=](auto k_W, auto k_ga_total_weight) { + k_ga_total_weight.add(0, k_W.at(0)); + }, + Access::read(Sym("COMPUTATIONAL_WEIGHT")), + Access::add(ga_total_weight)) + ->execute(); + + return ga_total_weight->get().at(0); } inline double compute_fluid_mass() { diff --git a/solvers/SimpleSOL/ParticleSystems/neutral_particles.hpp b/solvers/SimpleSOL/ParticleSystems/neutral_particles.hpp index 6613a4ce..b84e852f 100644 --- a/solvers/SimpleSOL/ParticleSystems/neutral_particles.hpp +++ b/solvers/SimpleSOL/ParticleSystems/neutral_particles.hpp @@ -490,40 +490,22 @@ class NeutralParticleSystem : public PartSysBase { inline void wall_boundary_conditions() { // Find particles that have travelled outside the domain in the x direction. - auto k_P = - (*this->particle_group)[Sym("POSITION")]->cell_dat.device_ptr(); - // reuse this dat for remove flags - auto k_PARTICLE_ID = - (*this->particle_group)[Sym("PARTICLE_ID")]->cell_dat.device_ptr(); - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - const REAL k_lower_bound = 0.0; const REAL k_upper_bound = k_lower_bound + this->unrotated_x_max; - const INT k_remove_key = this->particle_remove_key; - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - const REAL px = k_P[cellx][0][layerx]; - if ((px < k_lower_bound) || (px > k_upper_bound)) { - // mark the particle as removed - k_PARTICLE_ID[cellx][0][layerx] = k_remove_key; - } - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + "NeutralParticleSystem::wall_boundary_conditions", this->particle_group, + [=](auto k_P, auto k_PARTICLE_ID) { + const REAL px = k_P.at(0); + if ((px < k_lower_bound) || (px > k_upper_bound)) { + // mark the particle as removed + k_PARTICLE_ID.at(0) = k_remove_key; + } + }, + Access::read(Sym("POSITION")), + Access::write(Sym("PARTICLE_ID"))) + ->execute(); // remove particles marked to remove by the boundary conditions remove_marked_particles(); @@ -592,46 +574,20 @@ class NeutralParticleSystem : public PartSysBase { * @param dt Time step size. */ inline void forward_euler(const double dt) { - - const double k_dt = dt; - auto t0 = profile_timestamp(); - - auto k_P = - (*this->particle_group)[Sym("POSITION")]->cell_dat.device_ptr(); - auto k_V = - (*this->particle_group)[Sym("VELOCITY")]->cell_dat.device_ptr(); - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - sycl_target->profile_map.inc("NeutralParticleSystem", - "ForwardEuler_Prepare", 1, - profile_elapsed(t0, profile_timestamp())); - - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - k_P[cellx][0][layerx] += k_dt * k_V[cellx][0][layerx]; - k_P[cellx][1][layerx] += k_dt * k_V[cellx][1][layerx]; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + const double k_dt = dt; + particle_loop( + "NeutralParticleSystem::forward_euler", this->particle_group, + [=](auto k_P, auto k_V) { + k_P.at(0) += k_dt * k_V.at(0); + k_P.at(1) += k_dt * k_V.at(1); + }, + Access::write(Sym("POSITION")), + Access::read(Sym("VELOCITY"))) + ->execute(); sycl_target->profile_map.inc("NeutralParticleSystem", "ForwardEuler_Execute", 1, profile_elapsed(t0, profile_timestamp())); - // positions were written so we apply boundary conditions and move // particles between ranks this->transfer_particles(); @@ -660,35 +616,25 @@ class NeutralParticleSystem : public PartSysBase { this->field_evaluate_T->evaluate(Sym("ELECTRON_TEMPERATURE")); // Unit conversion - auto k_TeV = (*this->particle_group)[Sym("ELECTRON_TEMPERATURE")] - ->cell_dat.device_ptr(); - auto k_n = (*this->particle_group)[Sym("ELECTRON_DENSITY")] - ->cell_dat.device_ptr(); + const auto k_TeV = + (*this->particle_group)[Sym("ELECTRON_TEMPERATURE")] + ->cell_dat.device_ptr(); + const auto k_n = (*this->particle_group)[Sym("ELECTRON_DENSITY")] + ->cell_dat.device_ptr(); // Unit conversion factors - double k_T_to_eV = this->T_to_eV; - double k_n_scale_fac = this->n_to_SI; - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>(sycl::range<1>(pl_iter_range), - [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - k_TeV[cellx][0][layerx] *= k_T_to_eV; - k_n[cellx][0][layerx] *= k_n_scale_fac; - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + const double k_T_to_eV = this->T_to_eV; + const double k_n_scale_fac = this->n_to_SI; + + particle_loop( + "NeutralParticleSystem::evaluate_fields", this->particle_group, + [=](auto k_TeV, auto k_n) { + k_TeV.at(0) *= k_T_to_eV; + k_n.at(0) *= k_n_scale_fac; + }, + Access::write(Sym("ELECTRON_TEMPERATURE")), + Access::write(Sym("ELECTRON_DENSITY"))) + ->execute(); } /** @@ -724,86 +670,58 @@ class NeutralParticleSystem : public PartSysBase { auto t0 = profile_timestamp(); - auto k_ID = - (*this->particle_group)[Sym("PARTICLE_ID")]->cell_dat.device_ptr(); - auto k_TeV = (*this->particle_group)[Sym("ELECTRON_TEMPERATURE")] - ->cell_dat.device_ptr(); - auto k_n = (*this->particle_group)[Sym("ELECTRON_DENSITY")] - ->cell_dat.device_ptr(); - auto k_SD = (*this->particle_group)[Sym("SOURCE_DENSITY")] - ->cell_dat.device_ptr(); - auto k_SE = (*this->particle_group)[Sym("SOURCE_ENERGY")] - ->cell_dat.device_ptr(); - auto k_SM = (*this->particle_group)[Sym("SOURCE_MOMENTUM")] - ->cell_dat.device_ptr(); - auto k_V = - (*this->particle_group)[Sym("VELOCITY")]->cell_dat.device_ptr(); - auto k_W = (*this->particle_group)[Sym("COMPUTATIONAL_WEIGHT")] - ->cell_dat.device_ptr(); - - const auto pl_iter_range = - this->particle_group->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = - this->particle_group->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = - this->particle_group->mpi_rank_dat->get_particle_loop_npart_cell(); - - sycl_target->profile_map.inc("NeutralParticleSystem", "Ionisation_Prepare", - 1, profile_elapsed(t0, profile_timestamp())); + particle_loop( + "NeutralParticleSystem::ionise", this->particle_group, + [=](auto k_ID, auto k_TeV, auto k_n, auto k_SD, auto k_SE, auto k_SM, + auto k_V, auto k_W) { + // get the temperatue in eV. TODO: ensure not unit conversion is + // required + const REAL TeV = k_TeV.at(0); + const REAL n_SI = k_n.at(0); + const REAL invratio = k_E_i / TeV; + const REAL rate = -k_rate_factor / (TeV * sycl::sqrt(TeV)) * + (expint_barry_approx(invratio) / invratio + + (k_b_i_expc_i / (invratio + k_c_i)) * + expint_barry_approx(invratio + k_c_i)); + const REAL weight = k_W.at(0); + // note that the rate will be a positive number, so minus sign + // here + REAL deltaweight = -rate * weight * k_dt_SI * n_SI; + + /* Check whether weight is about to drop below zero. + If so, flag particle for removal and adjust deltaweight. + These particles are removed after the project call. + */ + if ((weight + deltaweight) <= 0) { + k_ID.at(0) = k_remove_key; + deltaweight = -weight; + } - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - // get the temperatue in eV. TODO: ensure not unit conversion is - // required - const REAL TeV = k_TeV[cellx][0][layerx]; - const REAL n_SI = k_n[cellx][0][layerx]; - const REAL invratio = k_E_i / TeV; - const REAL rate = -k_rate_factor / (TeV * std::sqrt(TeV)) * - (expint_barry_approx(invratio) / invratio + - (k_b_i_expc_i / (invratio + k_c_i)) * - expint_barry_approx(invratio + k_c_i)); - const REAL weight = k_W[cellx][0][layerx]; - // note that the rate will be a positive number, so minus sign - // here - REAL deltaweight = -rate * weight * k_dt_SI * n_SI; - - /* Check whether weight is about to drop below zero. - If so, flag particle for removal and adjust deltaweight. - These particles are removed after the project call. - */ - if ((weight + deltaweight) <= 0) { - k_ID[cellx][0][layerx] = k_remove_key; - deltaweight = -weight; - } - - // Mutate the weight on the particle - k_W[cellx][0][layerx] += deltaweight; - // Set value for fluid density source (num / Nektar unit time) - k_SD[cellx][0][layerx] = -deltaweight * k_n_scale / k_dt; - - // Compute velocity along the SimpleSOL problem axis. - // (No momentum coupling in orthogonal dimensions) - const REAL v_s = k_V[cellx][0][layerx] * k_cos_theta + - k_V[cellx][1][layerx] * k_sin_theta; - - // Set value for fluid momentum density source - k_SM[cellx][0][layerx] = - k_SD[cellx][0][layerx] * v_s * k_cos_theta; - k_SM[cellx][1][layerx] = - k_SD[cellx][0][layerx] * v_s * k_sin_theta; - - // Set value for fluid energy source - k_SE[cellx][0][layerx] = k_SD[cellx][0][layerx] * v_s * v_s / 2; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + // Mutate the weight on the particle + k_W.at(0) += deltaweight; + // Set value for fluid density source (num / Nektar unit time) + k_SD.at(0) = -deltaweight * k_n_scale / k_dt; + + // Compute velocity along the SimpleSOL problem axis. + // (No momentum coupling in orthogonal dimensions) + const REAL v_s = k_V.at(0) * k_cos_theta + k_V.at(1) * k_sin_theta; + + // Set value for fluid momentum density source + k_SM.at(0) = k_SD.at(0) * v_s * k_cos_theta; + k_SM.at(1) = k_SD.at(0) * v_s * k_sin_theta; + + // Set value for fluid energy source + k_SE.at(0) = k_SD.at(0) * v_s * v_s * 0.5; + }, + Access::write(Sym("PARTICLE_ID")), + Access::read(Sym("ELECTRON_TEMPERATURE")), + Access::read(Sym("ELECTRON_DENSITY")), + Access::write(Sym("SOURCE_DENSITY")), + Access::write(Sym("SOURCE_ENERGY")), + Access::write(Sym("SOURCE_MOMENTUM")), + Access::read(Sym("VELOCITY")), + Access::write(Sym("COMPUTATIONAL_WEIGHT"))) + ->execute(); sycl_target->profile_map.inc("NeutralParticleSystem", "Ionisation_Execute", 1, profile_elapsed(t0, profile_timestamp())); diff --git a/test/integration/nektar_interface/test_function_projection_order.cpp b/test/integration/nektar_interface/test_function_projection_order.cpp index 0d8d5146..72f94b74 100644 --- a/test/integration/nektar_interface/test_function_projection_order.cpp +++ b/test/integration/nektar_interface/test_function_projection_order.cpp @@ -122,34 +122,21 @@ TEST(ParticleFunctionProjection, DisContScalarExpQuantity) { cell_id_translation->execute(); A->cell_move(); - const auto k_P = (*A)[Sym("P")]->cell_dat.device_ptr(); - auto k_Q = (*A)[Sym("Q")]->cell_dat.device_ptr(); - - const auto pl_iter_range = A->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = A->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = A->mpi_rank_dat->get_particle_loop_npart_cell(); const REAL two_over_sqrt_pi = 1.1283791670955126; const REAL reweight = pbc.global_extent[0] * pbc.global_extent[1] / ((REAL)N_total); - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const REAL x = k_P[cellx][0][layerx]; - const REAL y = k_P[cellx][1][layerx]; - const REAL exp_eval = - two_over_sqrt_pi * exp(-(4.0 * ((x) * (x) + (y) * (y)))); - k_Q[cellx][0][layerx] = exp_eval * reweight; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + A, + [=](auto k_P, auto k_Q) { + const REAL x = k_P.at(0); + const REAL y = k_P.at(1); + const REAL exp_eval = + two_over_sqrt_pi * sycl::exp(-(4.0 * ((x) * (x) + (y) * (y)))); + k_Q.at(0) = exp_eval * reweight; + }, + Access::read(Sym("P")), Access::write(Sym("Q"))) + ->execute(); // create projection object auto field_project = std::make_shared>( @@ -319,34 +306,21 @@ TEST(ParticleFunctionProjection, ContScalarExpQuantity) { cell_id_translation->execute(); A->cell_move(); - const auto k_P = (*A)[Sym("P")]->cell_dat.device_ptr(); - auto k_Q = (*A)[Sym("Q")]->cell_dat.device_ptr(); - - const auto pl_iter_range = A->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = A->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = A->mpi_rank_dat->get_particle_loop_npart_cell(); const REAL two_over_sqrt_pi = 1.1283791670955126; const REAL reweight = pbc.global_extent[0] * pbc.global_extent[1] / ((REAL)N_total); - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const REAL x = k_P[cellx][0][layerx]; - const REAL y = k_P[cellx][1][layerx]; - const REAL exp_eval = - two_over_sqrt_pi * exp(-(4.0 * ((x) * (x) + (y) * (y)))); - k_Q[cellx][0][layerx] = exp_eval * reweight; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + A, + [=](auto k_P, auto k_Q) { + const REAL x = k_P.at(0); + const REAL y = k_P.at(1); + const REAL exp_eval = + two_over_sqrt_pi * sycl::exp(-(4.0 * ((x) * (x) + (y) * (y)))); + k_Q.at(0) = exp_eval * reweight; + }, + Access::read(Sym("P")), Access::write(Sym("Q"))) + ->execute(); // create projection object auto field_project = std::make_shared>( @@ -518,37 +492,24 @@ TEST(ParticleFunctionProjection, ContScalarExpQuantityMultiple) { cell_id_translation->execute(); A->cell_move(); - const auto k_P = (*A)[Sym("P")]->cell_dat.device_ptr(); - auto k_Q = (*A)[Sym("Q")]->cell_dat.device_ptr(); - auto k_Q2 = (*A)[Sym("Q2")]->cell_dat.device_ptr(); - - const auto pl_iter_range = A->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = A->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = A->mpi_rank_dat->get_particle_loop_npart_cell(); const REAL two_over_sqrt_pi = 1.1283791670955126; const REAL reweight = pbc.global_extent[0] * pbc.global_extent[1] / ((REAL)N_total); - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const REAL x = k_P[cellx][0][layerx]; - const REAL y = k_P[cellx][1][layerx]; - const REAL exp_eval = - two_over_sqrt_pi * exp(-(4.0 * ((x) * (x) + (y) * (y)))); - k_Q[cellx][0][layerx] = exp_eval * reweight; - k_Q[cellx][1][layerx] = -1.0 * exp_eval * reweight; - k_Q2[cellx][1][layerx] = reweight; - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + A, + [=](auto k_P, auto k_Q, auto k_Q2) { + const REAL x = k_P.at(0); + const REAL y = k_P.at(1); + const REAL exp_eval = + two_over_sqrt_pi * sycl::exp(-(4.0 * ((x) * (x) + (y) * (y)))); + k_Q.at(0) = exp_eval * reweight; + k_Q.at(1) = -1.0 * exp_eval * reweight; + k_Q2.at(1) = reweight; + }, + Access::read(Sym("P")), Access::write(Sym("Q")), + Access::write(Sym("Q2"))) + ->execute(); auto cont_field_u = std::make_shared(session, graph, "u"); auto cont_field_v = std::make_shared(session, graph, "v"); diff --git a/test/integration/nektar_interface/test_function_projection_order_3d.cpp b/test/integration/nektar_interface/test_function_projection_order_3d.cpp index 426b67bd..d573d320 100644 --- a/test/integration/nektar_interface/test_function_projection_order_3d.cpp +++ b/test/integration/nektar_interface/test_function_projection_order_3d.cpp @@ -119,34 +119,18 @@ static inline void projection_wrapper_order_3d(std::string condtions_file_s, cell_id_translation->execute(); A->cell_move(); - const auto k_P = (*A)[Sym("P")]->cell_dat.device_ptr(); - auto k_Q = (*A)[Sym("Q")]->cell_dat.device_ptr(); - - const auto pl_iter_range = A->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = A->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = A->mpi_rank_dat->get_particle_loop_npart_cell(); const REAL reweight = pbc.global_extent[0] * pbc.global_extent[1] * pbc.global_extent[2] / ((REAL)N_total); - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>(sycl::range<1>(pl_iter_range), - [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const REAL x = k_P[cellx][0][layerx]; - const REAL y = k_P[cellx][1][layerx]; - const REAL z = k_P[cellx][2][layerx]; - - const REAL eval0 = func(x, y, z); - const REAL eval = reweight * eval0; - k_Q[cellx][0][layerx] = eval; - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); + particle_loop( + A, + [=](auto k_P, auto k_Q) { + const REAL eval0 = func(k_P.at(0), k_P.at(1), k_P.at(2)); + const REAL eval = reweight * eval0; + k_Q.at(0) = eval; + }, + Access::read(Sym("P")), Access::write(Sym("Q"))) + ->execute(); auto field = std::make_shared(session, graph, "u"); std::vector> fields = {field}; diff --git a/test/unit/nektar_interface/test_particle_function_projection.cpp b/test/unit/nektar_interface/test_particle_function_projection.cpp index f838ca4b..565f5ff4 100644 --- a/test/unit/nektar_interface/test_particle_function_projection.cpp +++ b/test/unit/nektar_interface/test_particle_function_projection.cpp @@ -122,53 +122,26 @@ TEST(ParticleFunctionProjection, BasisEvalCorrectnessCG) { cell_id_translation->execute(); A->cell_move(); - const auto k_P = (*A)[Sym("P")]->cell_dat.device_ptr(); - auto k_Q = (*A)[Sym("Q")]->cell_dat.device_ptr(); - auto k_Q2 = (*A)[Sym("Q2")]->cell_dat.device_ptr(); - - const auto pl_iter_range = A->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = A->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = A->mpi_rank_dat->get_particle_loop_npart_cell(); const REAL two_over_sqrt_pi = 1.1283791670955126; const REAL reweight = pbc.global_extent[0] * pbc.global_extent[1] / ((REAL)N_total); - BufferDeviceHost dh_local_sum(sycl_target, 1); - dh_local_sum.h_buffer.ptr[0] = 0.0; - dh_local_sum.host_to_device(); - auto k_local_sum = dh_local_sum.d_buffer.ptr; - - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>(sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const REAL x = k_P[cellx][0][layerx]; - const REAL y = k_P[cellx][1][layerx]; - const REAL exp_eval = - two_over_sqrt_pi * exp(-(4.0 * ((x) * (x) + (y) * (y)))); - k_Q[cellx][0][layerx] = exp_eval * reweight; - k_Q[cellx][1][layerx] = -1.0 * exp_eval * reweight; - k_Q2[cellx][1][layerx] = reweight; - - sycl::atomic_ref - energy_atomic(k_local_sum[0]); - - energy_atomic.fetch_add(exp_eval * reweight); - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); - - dh_local_sum.device_to_host(); - const double local_sum = dh_local_sum.h_buffer.ptr[0]; - double global_sum; - MPICHK(MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, - sycl_target->comm_pair.comm_parent)); + auto ga_sum = std::make_shared>(sycl_target, 1, 0.0); + particle_loop( + A, + [=](auto k_ga_sum, auto k_P, auto k_Q, auto k_Q2) { + const REAL x = k_P.at(0); + const REAL y = k_P.at(1); + const REAL exp_eval = + two_over_sqrt_pi * sycl::exp(-(4.0 * ((x) * (x) + (y) * (y)))); + k_Q.at(0) = exp_eval * reweight; + k_Q.at(1) = -1.0 * exp_eval * reweight; + k_Q2.at(1) = reweight; + k_ga_sum.add(0, exp_eval * reweight); + }, + Access::add(ga_sum), Access::read(Sym("P")), + Access::write(Sym("Q")), Access::write(Sym("Q2"))) + ->execute(); auto cont_field_u = std::make_shared(session, graph, "u"); auto cont_field_v = std::make_shared(session, graph, "v"); @@ -202,6 +175,7 @@ TEST(ParticleFunctionProjection, BasisEvalCorrectnessCG) { } const double integral = cont_field_u->Integral(cont_field_u->GetPhys()); + auto global_sum = ga_sum->get().at(0); EXPECT_NEAR(global_sum, integral, 0.005); mesh->free(); @@ -301,53 +275,26 @@ TEST(ParticleFunctionProjection, BasisEvalCorrectnessDG) { cell_id_translation->execute(); A->cell_move(); - const auto k_P = (*A)[Sym("P")]->cell_dat.device_ptr(); - auto k_Q = (*A)[Sym("Q")]->cell_dat.device_ptr(); - auto k_Q2 = (*A)[Sym("Q2")]->cell_dat.device_ptr(); - - const auto pl_iter_range = A->mpi_rank_dat->get_particle_loop_iter_range(); - const auto pl_stride = A->mpi_rank_dat->get_particle_loop_cell_stride(); - const auto pl_npart_cell = A->mpi_rank_dat->get_particle_loop_npart_cell(); const REAL two_over_sqrt_pi = 1.1283791670955126; const REAL reweight = pbc.global_extent[0] * pbc.global_extent[1] / ((REAL)N_total); - BufferDeviceHost dh_local_sum(sycl_target, 1); - dh_local_sum.h_buffer.ptr[0] = 0.0; - dh_local_sum.host_to_device(); - auto k_local_sum = dh_local_sum.d_buffer.ptr; - - sycl_target->queue - .submit([&](sycl::handler &cgh) { - cgh.parallel_for<>(sycl::range<1>(pl_iter_range), [=](sycl::id<1> idx) { - NESO_PARTICLES_KERNEL_START - const INT cellx = NESO_PARTICLES_KERNEL_CELL; - const INT layerx = NESO_PARTICLES_KERNEL_LAYER; - - const REAL x = k_P[cellx][0][layerx]; - const REAL y = k_P[cellx][1][layerx]; - const REAL exp_eval = - two_over_sqrt_pi * exp(-(4.0 * ((x) * (x) + (y) * (y)))); - k_Q[cellx][0][layerx] = exp_eval * reweight; - k_Q[cellx][1][layerx] = -1.0 * exp_eval * reweight; - k_Q2[cellx][1][layerx] = reweight; - - sycl::atomic_ref - energy_atomic(k_local_sum[0]); - - energy_atomic.fetch_add(exp_eval * reweight); - - NESO_PARTICLES_KERNEL_END - }); - }) - .wait_and_throw(); - - dh_local_sum.device_to_host(); - const double local_sum = dh_local_sum.h_buffer.ptr[0]; - double global_sum; - MPICHK(MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, - sycl_target->comm_pair.comm_parent)); + auto ga_sum = std::make_shared>(sycl_target, 1, 0.0); + particle_loop( + A, + [=](auto k_ga_sum, auto k_P, auto k_Q, auto k_Q2) { + const REAL x = k_P.at(0); + const REAL y = k_P.at(1); + const REAL exp_eval = + two_over_sqrt_pi * sycl::exp(-(4.0 * ((x) * (x) + (y) * (y)))); + k_Q.at(0) = exp_eval * reweight; + k_Q.at(1) = -1.0 * exp_eval * reweight; + k_Q2.at(1) = reweight; + k_ga_sum.add(0, exp_eval * reweight); + }, + Access::add(ga_sum), Access::read(Sym("P")), + Access::write(Sym("Q")), Access::write(Sym("Q2"))) + ->execute(); auto dis_cont_field_u = std::make_shared(session, graph, "u"); auto dis_cont_field_v = std::make_shared(session, graph, "v"); @@ -382,6 +329,8 @@ TEST(ParticleFunctionProjection, BasisEvalCorrectnessDG) { const double integral = dis_cont_field_u->Integral(dis_cont_field_u->GetPhys()); + + auto global_sum = ga_sum->get().at(0); EXPECT_NEAR(global_sum, integral, 0.005); mesh->free();