diff --git a/src/axom/core/utilities/BitUtilities.hpp b/src/axom/core/utilities/BitUtilities.hpp index 2ed603983f..e75b5f23c2 100644 --- a/src/axom/core/utilities/BitUtilities.hpp +++ b/src/axom/core/utilities/BitUtilities.hpp @@ -19,12 +19,16 @@ #include "axom/core/Types.hpp" // CUDA intrinsics: https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html -// TODO: Support HIP intrinsics (https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html) +// HIP intrinsics: https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html // Check for and setup defines for platform-specific intrinsics // Note: `__GNUC__` is defined for the gnu, clang and intel compilers #if defined(AXOM_USE_CUDA) // Intrinsics included implicitly + +#elif defined(AXOM_USE_HIP) + #include + #elif defined(_WIN64) && (_MSC_VER >= 1600) #define _AXOM_CORE_USE_INTRINSICS_MSVC #include @@ -89,6 +93,8 @@ AXOM_HOST_DEVICE inline int countr_zero(std::uint64_t word) noexcept /* clang-format off */ #if defined(__CUDA_ARCH__) && defined(AXOM_USE_CUDA) return word != std::uint64_t(0) ? __ffsll(word) - 1 : 64; +#elif defined(__HIP_DEVICE_COMPILE__) && defined(AXOM_USE_HIP) + return word != std::uint64_t(0) ? __ffsll(static_cast(word)) - 1 : 64; #elif defined(_AXOM_CORE_USE_INTRINSICS_MSVC) unsigned long cnt; return _BitScanForward64(&cnt, word) ? cnt : 64; @@ -127,6 +133,9 @@ AXOM_HOST_DEVICE inline int popcount(std::uint64_t word) noexcept #if defined(__CUDA_ARCH__) && defined(AXOM_USE_CUDA) // Use CUDA intrinsic for popcount return __popcll(word); +#elif defined(__HIP_DEVICE_COMPILE__) && defined(AXOM_USE_HIP) + // Use HIP intrinsic for popcount + return __popcll(word); #elif defined(_AXOM_CORE_USE_INTRINSICS_MSVC) return __popcnt64(word); #elif defined(_AXOM_CORE_USE_INTRINSICS_GCC) || defined(_AXOM_CORE_USE_INTRINSICS_PPC) @@ -166,6 +175,9 @@ AXOM_HOST_DEVICE inline std::int32_t countl_zero(std::int32_t word) noexcept #if defined(__CUDA_ARCH__) && defined(AXOM_USE_CUDA) // Use CUDA intrinsic for count leading zeros return __clz(word); +#elif defined(__HIP_DEVICE_COMPILE__) && defined(AXOM_USE_HIP) + // Use HIP intrinsic for count leading zeros + return __clz(word); #elif defined(_AXOM_CORE_USE_INTRINSICS_MSVC) unsigned long cnt; return _BitScanReverse(&cnt, word) ? 31 - cnt : 32; diff --git a/src/axom/quest/examples/quest_candidates_example.cpp b/src/axom/quest/examples/quest_candidates_example.cpp index 932fe5a494..2ec37b59af 100644 --- a/src/axom/quest/examples/quest_candidates_example.cpp +++ b/src/axom/quest/examples/quest_candidates_example.cpp @@ -248,9 +248,21 @@ struct HexMesh BoundingBox m_meshBoundingBox; }; +template HexMesh loadBlueprintHexMesh(const std::string& mesh_path, bool verboseOutput = false) { + using HexArray = axom::Array; + using BBoxArray = axom::Array; + constexpr bool on_device = axom::execution_space::onDevice(); + + // Get ids of necessary allocators + const int host_allocator = + axom::getUmpireResourceAllocatorID(umpire::resource::Host); + const int kernel_allocator = on_device + ? axom::getUmpireResourceAllocatorID(umpire::resource::Device) + : axom::execution_space::allocatorID(); + HexMesh hexMesh; // Load Blueprint mesh into Conduit node @@ -292,37 +304,83 @@ HexMesh loadBlueprintHexMesh(const std::string& mesh_path, } // extract hexes into an axom::Array - int* connectivity = n_load[0]["topologies/topo/elements/connectivity"].value(); + auto x_vals_h = + axom::ArrayView(n_load[0]["coordsets/coords/values/x"].value(), + num_nodes); + auto y_vals_h = + axom::ArrayView(n_load[0]["coordsets/coords/values/y"].value(), + num_nodes); + auto z_vals_h = + axom::ArrayView(n_load[0]["coordsets/coords/values/z"].value(), + num_nodes); + + // Move xyz values onto device + axom::Array x_vals_d = on_device + ? axom::Array(x_vals_h, kernel_allocator) + : axom::Array(); + auto x_vals_view = on_device ? x_vals_d.view() : x_vals_h; + + axom::Array y_vals_d = on_device + ? axom::Array(y_vals_h, kernel_allocator) + : axom::Array(); + auto y_vals_view = on_device ? y_vals_d.view() : y_vals_h; + + axom::Array z_vals_d = on_device + ? axom::Array(z_vals_h, kernel_allocator) + : axom::Array(); + auto z_vals_view = on_device ? z_vals_d.view() : z_vals_h; + + // Move connectivity information onto device + auto connectivity_h = axom::ArrayView( + n_load[0]["topologies/topo/elements/connectivity"].value(), + connectivity_size); + + axom::Array connectivity_d = on_device + ? axom::Array(connectivity_h, kernel_allocator) + : axom::Array(); + auto connectivity_view = on_device ? connectivity_d.view() : connectivity_h; + + // Initialize hex elements and bounding boxes + const int numCells = connectivity_size / HEX_OFFSET; - double* x_vals = n_load[0]["coordsets/coords/values/x"].value(); - double* y_vals = n_load[0]["coordsets/coords/values/y"].value(); - double* z_vals = n_load[0]["coordsets/coords/values/z"].value(); + hexMesh.m_hexes = HexArray(numCells, numCells, kernel_allocator); + auto m_hexes_v = (hexMesh.m_hexes).view(); - const int numCells = connectivity_size / HEX_OFFSET; - hexMesh.m_hexes.reserve(numCells); - HexMesh::Hexahedron hex; - axom::Array hexPoints(HEX_OFFSET); + hexMesh.m_hexBoundingBoxes = BBoxArray(numCells, numCells, kernel_allocator); + auto m_hexBoundingBoxes_v = (hexMesh.m_hexBoundingBoxes).view(); - for(int i = 0; i < numCells; ++i) - { - for(int j = 0; j < HEX_OFFSET; j++) - { - int offset = i * HEX_OFFSET; - hexPoints[j] = HexMesh::Point({x_vals[connectivity[offset + j]], - y_vals[connectivity[offset + j]], - z_vals[connectivity[offset + j]]}); - } - hex = HexMesh::Hexahedron(hexPoints); - hexMesh.m_hexes.emplace_back(hex); - } + axom::for_all( + numCells, + AXOM_LAMBDA(axom::IndexType icell) { + HexMesh::Hexahedron hex; + HexMesh::Point hexPoints[HEX_OFFSET]; + for(int j = 0; j < HEX_OFFSET; j++) + { + int offset = icell * HEX_OFFSET; + hexPoints[j] = + HexMesh::Point({x_vals_view[connectivity_view[offset + j]], + y_vals_view[connectivity_view[offset + j]], + z_vals_view[connectivity_view[offset + j]]}); + } + hex = HexMesh::Hexahedron(hexPoints[0], + hexPoints[1], + hexPoints[2], + hexPoints[3], + hexPoints[4], + hexPoints[5], + hexPoints[6], + hexPoints[7]); + m_hexes_v[icell] = hex; + m_hexBoundingBoxes_v[icell] = axom::primal::compute_bounding_box(hex); + }); - // compute and store hex bounding boxes and mesh bounding box - hexMesh.m_hexBoundingBoxes.reserve(numCells); - for(const auto& hex : hexMesh.hexes()) + // Initialize mesh's bounding box on the host + BBoxArray hexBoundingBoxes_h = on_device + ? BBoxArray(hexMesh.m_hexBoundingBoxes, host_allocator) + : hexMesh.m_hexBoundingBoxes; + for(const auto& hexbb : hexBoundingBoxes_h) { - hexMesh.m_hexBoundingBoxes.emplace_back( - axom::primal::compute_bounding_box(hex)); - hexMesh.m_meshBoundingBox.addBox(hexMesh.m_hexBoundingBoxes.back()); + hexMesh.m_meshBoundingBox.addBox(hexbb); } SLIC_INFO( @@ -336,21 +394,21 @@ HexMesh loadBlueprintHexMesh(const std::string& mesh_path, // Append mesh nodes for(int i = 0; i < num_nodes; i++) { - mesh->appendNode(x_vals[i], y_vals[i], z_vals[i]); + mesh->appendNode(x_vals_h[i], y_vals_h[i], z_vals_h[i]); } // Append mesh cells for(int i = 0; i < numCells; i++) { const axom::IndexType cell[] = { - connectivity[i * HEX_OFFSET], - connectivity[(i * HEX_OFFSET) + 1], - connectivity[(i * HEX_OFFSET) + 2], - connectivity[(i * HEX_OFFSET) + 3], - connectivity[(i * HEX_OFFSET) + 4], - connectivity[(i * HEX_OFFSET) + 5], - connectivity[(i * HEX_OFFSET) + 6], - connectivity[(i * HEX_OFFSET) + 7], + connectivity_h[i * HEX_OFFSET], + connectivity_h[(i * HEX_OFFSET) + 1], + connectivity_h[(i * HEX_OFFSET) + 2], + connectivity_h[(i * HEX_OFFSET) + 3], + connectivity_h[(i * HEX_OFFSET) + 4], + connectivity_h[(i * HEX_OFFSET) + 5], + connectivity_h[(i * HEX_OFFSET) + 6], + connectivity_h[(i * HEX_OFFSET) + 7], }; mesh->appendCell(cell); @@ -382,8 +440,6 @@ axom::Array findCandidatesBVH(const HexMesh& insertMesh, SLIC_INFO("Running BVH candidates algorithm in execution Space: " << axom::execution_space::name()); - using HexArray = axom::Array; - using BBoxArray = axom::Array; using IndexArray = axom::Array; constexpr bool on_device = axom::execution_space::onDevice(); @@ -396,31 +452,9 @@ axom::Array findCandidatesBVH(const HexMesh& insertMesh, ? axom::getUmpireResourceAllocatorID(umpire::resource::Device) : axom::execution_space::allocatorID(); - // Copy the insert-BVH hexes to the device, if necessary - // Either way, insert_hexes_v will be a view w/ data in the correct space - auto& insert_hexes_h = insertMesh.hexes(); - HexArray insert_hexes_d = - on_device ? HexArray(insert_hexes_h, kernel_allocator) : HexArray(); - - // Copy the insert-BVH bboxes to the device, if necessary - // Either way, insert_bbox_v will be a view w/ data in the correct space - auto& insert_bbox_h = insertMesh.hexBoundingBoxes(); - BBoxArray insert_bbox_d = - on_device ? BBoxArray(insert_bbox_h, kernel_allocator) : BBoxArray(); - auto insert_bbox_v = on_device ? insert_bbox_d.view() : insert_bbox_h.view(); - - // Copy the query-BVH hexes to the device, if necessary - // Either way, query_hexes_v will be a view w/ data in the correct space - auto& query_hexes_h = queryMesh.hexes(); - HexArray query_hexes_d = - on_device ? HexArray(query_hexes_h, kernel_allocator) : HexArray(); - - // Copy the query-BVH bboxes to the device, if necessary - // Either way, bbox_v will be a view w/ data in the correct space - auto& query_bbox_h = queryMesh.hexBoundingBoxes(); - BBoxArray query_bbox_d = - on_device ? BBoxArray(query_bbox_h, kernel_allocator) : BBoxArray(); - auto query_bbox_v = on_device ? query_bbox_d.view() : query_bbox_h.view(); + // Get the insert and query bounding boxes + auto insert_bbox_v = (insertMesh.hexBoundingBoxes()).view(); + auto query_bbox_v = (queryMesh.hexBoundingBoxes()).view(); // Initialize a BVH tree over the insert mesh bounding boxes axom::spin::BVH<3, ExecSpace, double> bvh; @@ -527,8 +561,6 @@ axom::Array findCandidatesImplicit(const HexMesh& insertMesh, SLIC_INFO("Running Implicit Grid candidates algorithm in execution Space: " << axom::execution_space::name()); - using HexArray = axom::Array; - using BBoxArray = axom::Array; using IndexArray = axom::Array; constexpr bool on_device = axom::execution_space::onDevice(); @@ -539,35 +571,13 @@ axom::Array findCandidatesImplicit(const HexMesh& insertMesh, ? axom::getUmpireResourceAllocatorID(umpire::resource::Device) : axom::execution_space::allocatorID(); - // Copy the insert hexes to the device, if necessary - // Either way, insert_hexes_v will be a view w/ data in the correct space - auto& insert_hexes_h = insertMesh.hexes(); - HexArray insert_hexes_d = - on_device ? HexArray(insert_hexes_h, kernel_allocator) : HexArray(); - - // Copy the insert bboxes to the device, if necessary - // Either way, insert_bbox_v will be a view w/ data in the correct space - auto& insert_bbox_h = insertMesh.hexBoundingBoxes(); - BBoxArray insert_bbox_d = - on_device ? BBoxArray(insert_bbox_h, kernel_allocator) : BBoxArray(); - auto insert_bbox_v = on_device ? insert_bbox_d.view() : insert_bbox_h.view(); + // Get the insert and query bounding boxes + auto insert_bbox_v = (insertMesh.hexBoundingBoxes()).view(); + auto query_bbox_v = (queryMesh.hexBoundingBoxes()).view(); // Bounding box of entire insert mesh HexMesh::BoundingBox insert_mesh_bbox_h = insertMesh.meshBoundingBox(); - // Copy the query hexes to the device, if necessary - // Either way, query_hexes_v will be a view w/ data in the correct space - auto& query_hexes_h = queryMesh.hexes(); - HexArray query_hexes_d = - on_device ? HexArray(query_hexes_h, kernel_allocator) : HexArray(); - - // Copy the query bboxes to the device, if necessary - // Either way, bbox_v will be a view w/ data in the correct space - auto& query_bbox_h = queryMesh.hexBoundingBoxes(); - BBoxArray query_bbox_d = - on_device ? BBoxArray(query_bbox_h, kernel_allocator) : BBoxArray(); - auto query_bbox_v = on_device ? query_bbox_d.view() : query_bbox_h.view(); - // If given resolution is less than one, use the cube root of the // number of hexes if(resolution < 1) @@ -749,15 +759,65 @@ int main(int argc, char** argv) SLIC_INFO(axom::fmt::format("Reading Blueprint file to insert: '{}'...\n", params.mesh_file_first)); - HexMesh insert_mesh = - loadBlueprintHexMesh(params.mesh_file_first, params.isVerbose()); + HexMesh insert_mesh; + + switch(params.policy) + { +#ifdef AXOM_RUNTIME_POLICY_USE_OPENMP + case RuntimePolicy::omp: + insert_mesh = + loadBlueprintHexMesh(params.mesh_file_first, params.isVerbose()); + break; +#endif +#ifdef AXOM_RUNTIME_POLICY_USE_CUDA + case RuntimePolicy::cuda: + insert_mesh = loadBlueprintHexMesh(params.mesh_file_first, + params.isVerbose()); + break; +#endif +#ifdef AXOM_RUNTIME_POLICY_USE_HIP + case RuntimePolicy::hip: + insert_mesh = + loadBlueprintHexMesh(params.mesh_file_first, params.isVerbose()); + break; +#endif + default: // RuntimePolicy::seq + insert_mesh = + loadBlueprintHexMesh(params.mesh_file_first, params.isVerbose()); + break; + } // Load Blueprint mesh for querying spatial index SLIC_INFO(axom::fmt::format("Reading Blueprint file to query: '{}'...\n", params.mesh_file_second)); - HexMesh query_mesh = - loadBlueprintHexMesh(params.mesh_file_second, params.isVerbose()); + HexMesh query_mesh; + + switch(params.policy) + { +#ifdef AXOM_RUNTIME_POLICY_USE_OPENMP + case RuntimePolicy::omp: + query_mesh = loadBlueprintHexMesh(params.mesh_file_second, + params.isVerbose()); + break; +#endif +#ifdef AXOM_RUNTIME_POLICY_USE_CUDA + case RuntimePolicy::cuda: + query_mesh = loadBlueprintHexMesh(params.mesh_file_second, + params.isVerbose()); + break; +#endif +#ifdef AXOM_RUNTIME_POLICY_USE_HIP + case RuntimePolicy::hip: + query_mesh = loadBlueprintHexMesh(params.mesh_file_second, + params.isVerbose()); + break; +#endif + default: // RuntimePolicy::seq + query_mesh = loadBlueprintHexMesh(params.mesh_file_second, + params.isVerbose()); + break; + } timer.stop();