diff --git a/FastCaloSimAnalyzer/CMakeLists.txt b/FastCaloSimAnalyzer/CMakeLists.txt index 3a276d2..b7c9427 100644 --- a/FastCaloSimAnalyzer/CMakeLists.txt +++ b/FastCaloSimAnalyzer/CMakeLists.txt @@ -23,7 +23,6 @@ set(USE_ALPAKA OFF CACHE BOOL "Use alpaka") set(USE_HIP OFF CACHE BOOL "Use HIP") set(HIP_TARGET "AMD" CACHE STRING "HIP backend. must be either AMD or NVIDIA") - if ( USE_STDPAR ) if ( ${STDPAR_TARGET} STREQUAL "cpu" ) if ( NOT RNDGEN_CPU ) @@ -46,11 +45,13 @@ elseif( USE_KOKKOS ) elseif(USE_ALPAKA) find_package(alpaka REQUIRED) elseif(USE_HIP) - find_package(HIP REQUIRED) if ( ${HIP_TARGET} STREQUAL "NVIDIA" ) + find_package(HIP) if ( NOT RNDGEN_CPU ) message(FATAL_ERROR "when HIP_TARGET=NVIDIA, RNDGEN_CPU must be ON") endif() + else() + find_package(HIP REQUIRED) endif() endif() diff --git a/FastCaloSimAnalyzer/FastCaloGpu/FastCaloGpu/Rand4Hits.h b/FastCaloSimAnalyzer/FastCaloGpu/FastCaloGpu/Rand4Hits.h index ab706ea..48c9454 100644 --- a/FastCaloSimAnalyzer/FastCaloGpu/FastCaloGpu/Rand4Hits.h +++ b/FastCaloSimAnalyzer/FastCaloGpu/FastCaloGpu/Rand4Hits.h @@ -170,6 +170,7 @@ class Rand4Hits { unsigned int m_current_hits; void *m_gen{ nullptr }; bool m_useCPU{ false }; + unsigned long long m_seed{0}; // patch in some GPU pointers for cudaMalloc CELL_ENE_T *m_cells_energy{ 0 }; diff --git a/FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt b/FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt index bfd1e55..37a6bbe 100644 --- a/FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt +++ b/FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt @@ -20,30 +20,32 @@ endif() if(USE_HIP) set(FIND_CUDA OFF) endif() - + +if(ENABLE_OMPGPU) + string(STRIP ${CMAKE_CXX_FLAGS} OMP_OFFLOAD_TARGET) + string(FIND ${CMAKE_CXX_FLAGS} "gfx" OMP_OFFLOAD_TARGET_AMD) + string(FIND ${CMAKE_CXX_FLAGS} "sm_" OMP_OFFLOAD_TARGET_NVIDIA) + if(OMP_OFFLOAD_TARGET_NVIDIA GREATER 0) + message(STATUS "OMP_OFFLOAD_TARGET NVIDIA" ) + elseif(OMP_OFFLOAD_TARGET_AMD GREATER 0) + set(FIND_CUDA OFF) + message(STATUS "OMP_OFFLOAD_TARGET AMD" ) + else() + if($ENV{OMP_TARGET_OFFLOAD} MATCHES "disabled") + set(FIND_CUDA OFF) + else() + message(FATAL_ERROR "!! Please specify OpenMP offload target via -DCMAKE_CXX_FLAGS=\"--offload-arch=gfx<>|sm_<>\" or set environment var OMP_TARGET_OFFLOAD=disabled") + endif() + endif() +endif() + if(FIND_CUDA) find_package(CUDA REQUIRED) enable_language( CUDA ) set(CUDA_LIBRARIES PUBLIC ${CUDA_LIBRARIES}) endif() -# Add OpenMP -if(ENABLE_OMPGPU) - find_package(OpenMP) - if(OPENMP_FOUND) - set(OpenMP_OPT_FLAGS "${OpenMP_OPT_FLAGS} -fopenmp-cuda-mode") - set(OpenMP_OPT_FLAGS "${OpenMP_OPT_FLAGS} -foffload-lto") - set(OpenMP_OPT_FLAGS "${OpenMP_OPT_FLAGS} -fopenmp-assume-no-thread-state") - set(OpenMP_OPT_RMRKS "-Rpass=openmp-opt -Rpass-analysis=openmp-opt -Rpass-missed=openmp-opt " ) - set(OpenMP_FLAGS "-fopenmp --offload-arch=sm_86 -lomp") ## nvidia - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_FLAGS} ${OpenMP_OPT_FLAGS} ${OpenMP_OPT_RMRKS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_FLAGS} ${OpenMP_OPT_FLAGS} ${OpenMP_OPT_RMRKS}") - else() - message(WARNING "Configuring with OpenMP GPU but OpenMP is not found!") - endif() -endif() - # Sources if(USE_STDPAR) @@ -51,6 +53,19 @@ if(USE_STDPAR) elseif(USE_KOKKOS) set(FastCaloGpu_Srcs GeoLoadGpu.cxx KernelWrapper_kk.cxx DEV_BigMem_kk.cxx) elseif(ENABLE_OMPGPU) + # Add OpenMP + find_package(OpenMP) + if(OPENMP_FOUND) + set(OpenMP_OPT_FLAGS "${OpenMP_OPT_FLAGS} -fopenmp-cuda-mode") + set(OpenMP_OPT_FLAGS "${OpenMP_OPT_FLAGS} -foffload-lto") + set(OpenMP_OPT_FLAGS "${OpenMP_OPT_FLAGS} -fopenmp-assume-no-thread-state") + set(OpenMP_OPT_RMRKS "-Rpass=openmp-opt -Rpass-analysis=openmp-opt -Rpass-missed=openmp-opt " ) + set(OpenMP_FLAGS "-fopenmp -lomp -lomptarget") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_FLAGS} ${OpenMP_OPT_FLAGS} ${OpenMP_OPT_RMRKS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_FLAGS} ${OpenMP_OPT_FLAGS} ${OpenMP_OPT_RMRKS}") + else() + message(WARNING "Configuring with OpenMP GPU but OpenMP is not found!") + endif() set(FastCaloGpu_Srcs KernelWrapper_omp.cxx gpuQ.cxx CaloGpuGeneral.cxx DEV_BigMem_omp.cxx ) elseif(USE_ALPAKA) set(FastCaloGpu_Srcs CaloGpuGeneral.cxx KernelWrapper_al.cxx Rand4Hits_al.cxx ) @@ -66,10 +81,10 @@ elseif(USE_HIP) include_directories( ${ROCM_PATH}/hip/include ) if ( ${HIP_TARGET} STREQUAL "AMD" ) - message(STATUS " Using AMD HIP backend") + message(STATUS "Using AMD HIP backend") set(CMAKE_HIP_ARCHITECTURES "gfx90a;gfx906;gfx908") elseif( ${HIP_TARGET} STREQUAL "NVIDIA" ) - message(STATUS " Using NVIDIA HIP backend") + message(STATUS "Using NVIDIA HIP backend") find_package(CUDAToolkit REQUIRED) set(CMAKE_CUDA_ARCHITECTURES "70;75;80;86") set(CMAKE_HIP_ARCHITECTURES "gfx90a;gfx906;gfx908") @@ -120,7 +135,20 @@ elseif(USE_HIP) target_compile_definitions(${FastCaloGpu_LIB} PUBLIC -DHIP_TARGET_NVIDIA) target_link_libraries(${FastCaloGpu_LIB} PUBLIC CUDA::cudart) endif() - +elseif(ENABLE_OMPGPU) + if(OMP_OFFLOAD_TARGET_NVIDIA GREATER 0) + target_compile_definitions(${FastCaloGpu_LIB} PUBLIC -DOMP_OFFLOAD_TARGET_NVIDIA) + target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_curand_LIBRARY} ${CUDA_nvToolsExt_LIBRARY}) + endif() + if(OMP_OFFLOAD_TARGET_AMD GREATER 0) + target_compile_definitions(${FastCaloGpu_LIB} PUBLIC -D__HIP_PLATFORM_AMD__) + target_compile_definitions(${FastCaloGpu_LIB} PUBLIC -DOMP_OFFLOAD_TARGET_AMD) + find_package(HIP REQUIRED) + target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/rocrand/include ) + target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/include ) + target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${ROCM_PATH}/lib/librocrand.so) + target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${ROCM_PATH}/lib/libamdhip64.so) + endif() else() target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_curand_LIBRARY} ${CUDA_nvToolsExt_LIBRARY}) endif() @@ -151,14 +179,58 @@ endif() if(RNDGEN_CPU) message(STATUS "Will generate random numbers on CPU") target_compile_definitions(${FastCaloGpu_LIB} PRIVATE -DRNDGEN_CPU ) - # TODO Link a portable RNG library -else() - if ( ${HIP_TARGET} STREQUAL "AMD" ) - target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/hiprand/include ) +elseif(RNDGEN_OMP) + message(STATUS "Will generate random numbers using Portable OpenMP RNG Library") + target_compile_definitions(${FastCaloGpu_LIB} PRIVATE -DRNDGEN_OMP ) + if(NOT DEFINED OMPRNG_HOME) + include(FetchContent) + set(FETCHCONTENT_QUIET OFF) + FetchContent_Declare( + Portable-OpenMP-RNG + GIT_REPOSITORY https://github.com/GKNB/test-benchmark-OpenMP-RNG + GIT_TAG origin/main + ) + FetchContent_Populate(Portable-OpenMP-RNG) + include_directories(${CMAKE_BINARY_DIR}/_deps/portable-openmp-rng-src) + else() + include_directories(${OMPRNG_HOME}) + include_directories(${OMPRNG_HOME}/implementation) + endif() + if(ARCH_CUDA) + target_compile_definitions(${FastCaloGpu_LIB} PRIVATE -DARCH_CUDA ) + target_link_libraries(${FastCaloGpu_LIB} PRIVATE ${CUDA_curand_LIBRARY} ${CUDA_nvToolsExt_LIBRARY}) + elseif(ARCH_HIP) + target_compile_definitions(${FastCaloGpu_LIB} PRIVATE -DARCH_HIP ) target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/rocrand/include ) - target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${ROCM_PATH}/lib/libhiprand.so) - elseif( ${HIP_TARGET} STREQUAL "NVIDIA" ) - target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_curand_LIBRARY} ${CUDA_nvToolsExt_LIBRARY}) + elseif(USE_RANDOM123) + if(NOT DEFINED RANDOM123_HOME) + include(FetchContent) + set(FETCHCONTENT_QUIET OFF) + FetchContent_Declare( + random123 + GIT_REPOSITORY https://github.com/DEShawResearch/random123 + GIT_TAG origin/main + ) + FetchContent_Populate(random123) + include_directories(${CMAKE_BINARY_DIR}/_deps/random123-src/include ) + else() + include_directories(${RANDOM123_HOME}/include/) + endif() + target_compile_definitions(${FastCaloGpu_LIB} PRIVATE -DUSE_RANDOM123 ) + endif() +else() + if(USE_HIP) + if ( ${HIP_TARGET} STREQUAL "AMD" ) + target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/hiprand/include ) + target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/rocrand/include ) + target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${ROCM_PATH}/lib/libhiprand.so) + elseif( ${HIP_TARGET} STREQUAL "NVIDIA" ) + target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_curand_LIBRARY} ${CUDA_nvToolsExt_LIBRARY}) + endif() + elseif(ENABLE_OMPGPU) + if(ENV{OMP_TARGET_OFFLOAD} MATCHES "disabled") + message(FATAL_ERROR "when OMP_TARGET_OFFLOAD disabled, RNDGEN_CPU must be ON") + endif() endif() endif() diff --git a/FastCaloSimAnalyzer/FastCaloGpu/src/CaloGpuGeneral_omp.cxx b/FastCaloSimAnalyzer/FastCaloGpu/src/CaloGpuGeneral_omp.cxx index 0a6a41c..e8aa457 100644 --- a/FastCaloSimAnalyzer/FastCaloGpu/src/CaloGpuGeneral_omp.cxx +++ b/FastCaloSimAnalyzer/FastCaloGpu/src/CaloGpuGeneral_omp.cxx @@ -8,16 +8,12 @@ #include "Hit.h" #include "Rand4Hits.h" -#include "gpuQ.h" #include "Args.h" #include "DEV_BigMem.h" -// #include "OMP_BigMem.h" #include #include #include -#include -#include #include #include diff --git a/FastCaloSimAnalyzer/FastCaloGpu/src/GeoRegion.cxx b/FastCaloSimAnalyzer/FastCaloGpu/src/GeoRegion.cxx index 81befe1..434f306 100644 --- a/FastCaloSimAnalyzer/FastCaloGpu/src/GeoRegion.cxx +++ b/FastCaloSimAnalyzer/FastCaloGpu/src/GeoRegion.cxx @@ -6,13 +6,13 @@ #include #include -#define PI 3.14159265358979323846 +#define PI_FCS 3.14159265358979323846 #define TWOPI 2 * 3.14159265358979323846 __HOSTDEV__ double Phi_mpi_pi(double x) { - while (x >= PI) + while (x >= PI_FCS) x -= TWOPI; - while (x < -PI) + while (x < -PI_FCS) x += TWOPI; return x; } diff --git a/FastCaloSimAnalyzer/FastCaloGpu/src/Rand4Hits_omp.cxx b/FastCaloSimAnalyzer/FastCaloGpu/src/Rand4Hits_omp.cxx index 00ea772..89db2ad 100644 --- a/FastCaloSimAnalyzer/FastCaloGpu/src/Rand4Hits_omp.cxx +++ b/FastCaloSimAnalyzer/FastCaloGpu/src/Rand4Hits_omp.cxx @@ -1,22 +1,35 @@ /* Copyright (C) 2002-2021 CERN for the benefit of the ATLAS collaboration */ -#include "gpuQ.h" #include "Rand4Hits.h" #include "DEV_BigMem.h" -#include -#include -#include - #include "GpuParams.h" #include "Rand4Hits_cpu.cxx" -#define CURAND_CALL( x ) \ - if ( ( x ) != CURAND_STATUS_SUCCESS ) { \ - printf( "Error at %s:%d\n", __FILE__, __LINE__ ); \ - exit( EXIT_FAILURE ); \ - } +#include +#ifdef RNDGEN_OMP +# include "openmp_rng.h" +#endif + +#ifdef OMP_OFFLOAD_TARGET_NVIDIA +# include "gpuQ.h" +# include +# include +# define CURAND_CALL( x ) \ + if ( ( x ) != CURAND_STATUS_SUCCESS ) { \ + printf( "Error at %s:%d\n", __FILE__, __LINE__ ); \ + exit( EXIT_FAILURE ); \ + } +#elif defined OMP_OFFLOAD_TARGET_AMD +# include "hip/hip_runtime.h" +# include +# define ROCRAND_CALL( x ) \ + if ((x) != ROCRAND_STATUS_SUCCESS) { \ + printf("Error at %s:%d\n", __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } +#endif void Rand4Hits::allocate_simulation( int maxbins, int maxhitct, unsigned long n_cells ) { @@ -67,12 +80,23 @@ Rand4Hits::~Rand4Hits() { << " lost: " << DEV_BigMem::bm_ptr->lost() << std::endl; delete DEV_BigMem::bm_ptr; } +#ifdef RNDGEN_OMP omp_target_free( m_rand_ptr, m_select_device ); +#endif if ( m_useCPU ) { destroyCPUGen(); } else { +#ifndef RNDGEN_OMP +#ifndef USE_RANDOM123 +#ifdef OMP_OFFLOAD_TARGET_NVIDIA CURAND_CALL( curandDestroyGenerator( *( (curandGenerator_t*)m_gen ) ) ); delete (curandGenerator_t*)m_gen; +#elif defined OMP_OFFLOAD_TARGET_AMD + ROCRAND_CALL(rocrand_destroy_generator( *( (rocrand_generator*)m_gen))); + delete (rocrand_generator *)m_gen; +#endif +#endif +#endif } }; @@ -84,7 +108,28 @@ void Rand4Hits::rd_regen() { std::cout << "ERROR: copy random numbers from cpu to gpu " << std::endl; } } else { +#ifdef RNDGEN_OMP + auto gen = generator_enum::xorwow; +# ifdef USE_RANDOM123 + float* f_r123 = (float*) malloc ( 3 * m_total_a_hits * sizeof( float ) ); + omp_get_rng_uniform_float(f_r123, 3 * m_total_a_hits, m_seed, gen); + if ( omp_target_memcpy( m_rand_ptr, f_r123, 3 * m_total_a_hits * sizeof( float ), m_offset, m_offset, m_select_device, + m_initial_device ) ) { + std::cout << "ERROR: copy random numbers from cpu to gpu " << std::endl; + } + free(f_r123); +# else + omp_get_rng_uniform_float(m_rand_ptr, 3 * m_total_a_hits, m_seed, gen); +# endif +#else +# ifndef RNDGEN_CPU +# ifdef OMP_OFFLOAD_TARGET_NVIDIA CURAND_CALL( curandGenerateUniform( *( (curandGenerator_t*)m_gen ), m_rand_ptr, 3 * m_total_a_hits ) ); +# elif defined OMP_OFFLOAD_TARGET_AMD + ROCRAND_CALL(rocrand_generate_uniform( *( (rocrand_generator*)m_gen), m_rand_ptr, 3 * m_total_a_hits)); +# endif +# endif +#endif } }; @@ -105,12 +150,42 @@ void Rand4Hits::create_gen( unsigned long long seed, size_t num, bool useCPU ) { std::cout << "ERROR: copy random numbers from cpu to gpu " << std::endl; } } else { +#ifdef RNDGEN_OMP + f = (float*)omp_target_alloc( num * sizeof( float ), m_select_device ); + auto gen = generator_enum::xorwow; + #ifdef USE_RANDOM123 + float* f_r123 = (float*) malloc ( num * sizeof( float ) ); + omp_get_rng_uniform_float(f_r123, num, seed, gen); + if ( omp_target_memcpy( f, f_r123, num * sizeof( float ), m_offset, m_offset, m_select_device, + m_initial_device ) ) { + std::cout << "ERROR: copy random numbers from cpu to gpu " << std::endl; + } + free(f_r123); + #else + omp_get_rng_uniform_float(f, num, seed, gen); + #endif + m_gen = (void*)gen; + // We need to save the seed for rd_regen + m_seed = seed; +#else + #ifndef RNDGEN_CPU + #ifdef OMP_OFFLOAD_TARGET_NVIDIA gpuQ( cudaMalloc( &f, num * sizeof( float ) ) ); curandGenerator_t* gen = new curandGenerator_t; CURAND_CALL( curandCreateGenerator( gen, CURAND_RNG_PSEUDO_DEFAULT ) ); CURAND_CALL( curandSetPseudoRandomGeneratorSeed( *gen, seed ) ); CURAND_CALL( curandGenerateUniform( *gen, f, num ) ); m_gen = (void*)gen; + #elif defined OMP_OFFLOAD_TARGET_AMD + hipMalloc(&f, num * sizeof(float)); + rocrand_generator* gen = new rocrand_generator; + ROCRAND_CALL(rocrand_create_generator(gen, ROCRAND_RNG_PSEUDO_DEFAULT)); + ROCRAND_CALL(rocrand_set_seed(*gen, seed)); + ROCRAND_CALL(rocrand_generate_uniform(*gen, f, num)); + m_gen = (void*)gen; + #endif + #endif +#endif } m_rand_ptr = f; diff --git a/FastCaloSimAnalyzer/FastCaloGpu/src/gpuQ.cxx b/FastCaloSimAnalyzer/FastCaloGpu/src/gpuQ.cxx index 526f532..eb99660 100644 --- a/FastCaloSimAnalyzer/FastCaloGpu/src/gpuQ.cxx +++ b/FastCaloSimAnalyzer/FastCaloGpu/src/gpuQ.cxx @@ -3,6 +3,7 @@ */ #ifdef USE_OMPGPU +#ifdef OMP_OFFLOAD_TARGET_NVIDIA #include "gpuQ.h" #include @@ -13,6 +14,7 @@ void gpu_assert(cudaError_t code, const char *file, const int line) { exit(code); } } +#endif #else #include "gpuQ.cu" #endif diff --git a/README.md b/README.md index 36adcd0..f3669b8 100644 --- a/README.md +++ b/README.md @@ -193,7 +193,11 @@ export HIP_RUNTIME=cuda module load hip export FCS_DATAPATH=/pscratch/sd/a/atif/FastCaloSimInputs source /global/homes/a/atif/packages/root_install/bin/thisroot.sh -cmake ../FastCaloSimAnalyzer/ -DENABLE_XROOTD=Off -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=/global/homes/a/atif/FCS-GPU/scripts/hipcc_nvidia -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DENABLE_GPU=on -DUSE_HIP=on -DHIP_TARGET=NVIDIA -DCMAKE_LIBRARY_PATH="/opt/nvidia/hpc_sdk/Linux_x86_64/22.7/math_libs/11.7/lib64/;/global/common/software/nersc/pe/rocm/5.5.1/hip/include/hip/" -DRNDGEN_CPU=on +cmake ../FastCaloSimAnalyzer/ -DENABLE_XROOTD=Off -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_CXX_COMPILER=/global/homes/a/atif/FCS-GPU/scripts/hipcc_nvidia \ + -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DENABLE_GPU=on -DUSE_HIP=on \ + -DHIP_TARGET=NVIDIA -DRNDGEN_CPU=on \ + -DCMAKE_LIBRARY_PATH="/opt/nvidia/hpc_sdk/Linux_x86_64/22.7/math_libs/11.7/lib64/;/global/common/software/nersc/pe/rocm/5.5.1/hip/include/hip/" ``` ### alpaka @@ -210,7 +214,8 @@ export OMP_TARGET_OFFLOAD=mandatory cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on \ -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=14 \ -DCUDA_CUDART_LIBRARY=/usr/local/cuda/lib64/libcudart.so \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ + -DCMAKE_CXX_FLAGS="--offload-arch=sm_70" ``` ## Build Instructions for Perlmutter @@ -219,7 +224,7 @@ module load clang-16.0.6-omp-nvptx module load cudatoolkit source /global/homes/a/atif/packages/root_install/bin/thisroot.sh export FCS_DATAPATH=/pscratch/sd/a/atif/FastCaloSimInputs -cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=off -DENABLE_OMPGPU=on -DCMAKE_CXX_COMPILER=clang++ -DINPUT_PATH="../../FastCaloSimInputs" -DCMAKE_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/22.7/math_libs/11.7/lib64/ +cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=off -DENABLE_OMPGPU=on -DCMAKE_CXX_COMPILER=clang++ -DINPUT_PATH="../../FastCaloSimInputs" -DCMAKE_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/22.7/math_libs/11.7/lib64/ -DCMAKE_CXX_FLAGS="--offload-arch=sm_80" ## Build Instructions for Cori diff --git a/scripts/fcs-llvm-amdgpu.Dockerfile b/scripts/fcs-llvm-amdgpu.Dockerfile new file mode 100644 index 0000000..e14b9b0 --- /dev/null +++ b/scripts/fcs-llvm-amdgpu.Dockerfile @@ -0,0 +1,26 @@ +FROM dingpf/fcs-rocm + +USER root + +RUN \ + cd /hep-mini-apps && \ + mkdir -p llvm-amdgpu && \ + git clone --depth 1 --branch llvmorg-19.1.0 https://github.com/llvm/llvm-project.git && \ + cd llvm-project && \ + mkdir -p build && \ + cd build && \ + cmake -G "Unix Makefiles" \ + -B /hep-mini-apps/llvm-project/build/ \ + -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;lld;lldb;compiler-rt" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ + -DLLVM_ENABLE_RUNTIMES:STRING="openmp;offload" \ + -DCLANG_DEFAULT_OPENMP_RUNTIME:STRING=libomp \ + -DCMAKE_INSTALL_PREFIX=/hep-mini-apps/llvm-amdgpu \ + -DLLVM_TARGETS_TO_BUILD:STRING="X86;AMDGPU" \ + -DLIBOMPTARGET_DEVICE_ARCHITECTURES="gfx906;gfx908;gfx90a" \ + /hep-mini-apps/llvm-project/llvm && \ + make -j128 && \ + make install && \ + cd ../../ && \ + rm -rf llvm-project diff --git a/scripts/script_build_gr_all.sh b/scripts/script_build_gr_all.sh new file mode 100644 index 0000000..df983b0 --- /dev/null +++ b/scripts/script_build_gr_all.sh @@ -0,0 +1,389 @@ +# CPU Exalearn5 + +# CUDA ----------------- +## Nvidia -------------- +### CURAND Exalearn5 +### CPURNG Exalearn5 + +# OpenMP --------------- +## Nvidia -------------- +### CURAND Exalearn5 +### CPURNG Exalearn5 +### OMPRNG --------- +#### ARCH_CUDA Exalearn5 +#### RANDOM123 Exalearn5 +## AMD ----------------- +### ROCRAND Exalearn4 +### CPURNG Exalearn4 +### OMPRNG --------- +#### ARCH_HIP Exalearn4 +#### RANDOM123 Exalearn4 +## Multicore CPU -------- +### CPURNG Exalearn4 +### OMPRNG --------- +#### RANDOM123 Exalearn5 + +# HIP ------------------ +## Nvidia -------------- +### CURAND xxxxxxxxx +### CPURNG Perlmutter +## AMD ----------------- +### HIPRAND Exalearn4 +### CPURNG Exalearn4 + +# STDPAR --------------- +## Nvidia -------------- +### CURAND Exalearn5 +### CPURNG Exalearn5 +## Multicore ----------- +### CPURNG Exalearn5 +## CPU ----------------- +### CPURNG Exalearn5 + +# Alpaka --------------- +## Nvidia CUDA --------- +### CURAND Exalearn5 +### CPURNG Exalearn5 +## AMD HIP ------------- +### HIPRAND +### CPURNG + +# Kokkos --------------- +## Nvidia -------------- +### CURAND Exalearn5 +### CPURNG Exalearn5 + +# Edit this to exalearn4 or 5 accordingly +system="exalearn5" + +rm -rf build-exalearn4-* +rm -rf build-exalearn5-* + +if [ "$system" = "exalearn4" ]; then + source /global/home/users/fmohammad/packages/root-clang15/bin/thisroot.sh + export FCS_DATAPATH=/global/home/users/cgleggett/data/FastCaloSimInputs + module use /global/home/users/fmohammad/modulefiles/ + module load clang-18.0.0-gcc-8.5.0-omp-amdgcn +fi + +if [ "$system" = "exalearn5" ]; then + source /global/home/users/fmohammad/packages/root-clang15/bin/thisroot.sh + export FCS_DATAPATH=/global/home/users/cgleggett/data/FastCaloSimInputs + module use /global/home/users/fmohammad/modulefiles/ + #module load clang-15.0.6-gcc-8.5.0-omp-nvptx + module load clang-17.0.0-gcc-8.5.0-omp-nvptx +fi + +# # # # # # # # # # # # # # + +# CPU +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x CPU BUILD x-x-x-x-x" + mkdir -p build-exalearn5-cpu + cd build-exalearn5-cpu + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=off -DCMAKE_CXX_STANDARD=17 + make -j16 + echo "x-x-x-x-x CPU BUILD DONE! x-x-x-x-x" + cd .. +fi + +# # # # # # # # # # # # # # + +# CUDA +## Nvidia +### CURAND +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x CUDA CURAND BUILD x-x-x-x-x" + module load cuda/11.5 + mkdir -p build-exalearn5-cuda-curand + cd build-exalearn5-cuda-curand + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DRNDGEN_CPU=Off -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=17 -DCMAKE_CUDA_ARCHITECTURES=80 + make -j16 + echo "x-x-x-x-x CUDA CURAND BUILD DONE! x-x-x-x-x" + cd .. +fi +### CPURNG +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x CUDA CPURNG BUILD x-x-x-x-x" + mkdir -p build-exalearn5-cuda-cpurng + cd build-exalearn5-cuda-cpurng + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DRNDGEN_CPU=On -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=17 -DCMAKE_CUDA_ARCHITECTURES=80 + make -j16 + echo "x-x-x-x-x CUDA CPURNG BUILD DONE! x-x-x-x-x" + cd .. +fi + +# # # # # # # # # # # # # # + +# OpenMP +## Nvidia +### CURAND +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x OpenMP Nvidia CURAND BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=mandatory + mkdir -p build-exalearn5-openmp-nv-curand + cd build-exalearn5-openmp-nv-curand + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=Off -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_FLAGS="--offload-arch=sm_80" + make -j16 + echo "x-x-x-x-x OpenMP Nvidia CURAND BUILD DONE! x-x-x-x-x" + cd .. +fi +### CPURNG +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x OpenMP Nvidia CPURNG BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=mandatory + mkdir -p build-exalearn5-openmp-nv-cpurng + cd build-exalearn5-openmp-nv-cpurng + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=On -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_FLAGS="--offload-arch=sm_80" + make -j16 + echo "x-x-x-x-x OpenMP Nvidia CPURNG BUILD DONE x-x-x-x-x" + cd .. +fi +### Portable OMP RNG +#### ARCH_CUDA +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x OpenMP Nvidia PortableOMPRNG ARCH_CUDA BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=mandatory + mkdir -p build-exalearn5-openmp-nv-omprng-archcuda + cd build-exalearn5-openmp-nv-omprng-archcuda +cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=Off -DRNDGEN_OMP=On -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=14 -DCMAKE_CXX_FLAGS="--offload-arch=sm_80" -DARCH_CUDA=on + make -j16 + echo "x-x-x-x-x OpenMP Nvidia PortableOMPRNG ARCH_CUDA DONE x-x-x-x-x" + cd .. +fi +#### USE_RANDOM123 +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x OpenMP Nvidia PortableOMPRNG USE_RANDOM123 BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=mandatory + mkdir -p build-exalearn5-openmp-nv-omprng-random123 + cd build-exalearn5-openmp-nv-omprng-random123 +cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=Off -DRNDGEN_OMP=On -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=14 -DCMAKE_CXX_FLAGS="--offload-arch=sm_80" -DUSE_RANDOM123=on + make -j16 + echo "x-x-x-x-x OpenMP Nvidia PortableOMPRNG USE_RANDOM123 DONE x-x-x-x-x" + cd .. +fi + +# OpenMP +## AMD +### ROCRAND +if [ "$system" = "exalearn4" ]; then + echo "x-x-x-x-x OpenMP AMD CPURNG BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=mandatory + export ROCM_PATH=/opt/rocm/ + mkdir -p build-exalearn4-openmp-amd-rocrand + cd build-exalearn4-openmp-amd-rocrand + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=Off -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_FLAGS="--offload-arch=gfx908" + make -j32 + echo "x-x-x-x-x OpenMP AMD CPURNG BUILD DONE x-x-x-x-x" + cd .. +fi +### CPURNG +if [ "$system" = "exalearn4" ]; then + echo "x-x-x-x-x OpenMP AMD CPURNG BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=mandatory + mkdir -p build-exalearn4-openmp-amd-cpurng + cd build-exalearn4-openmp-amd-cpurng + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=On -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_FLAGS="--offload-arch=gfx908" + make -j32 + echo "x-x-x-x-x OpenMP AMD CPURNG BUILD DONE x-x-x-x-x" + cd .. +fi +### Portable OMP RNG +#### ARCH_HIP +if [ "$system" = "exalearn4" ]; then + echo "x-x-x-x-x OpenMP AMD PortableOMPRNG ARCH_HIP BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=mandatory + mkdir -p build-exalearn5-openmp-amd-omprng-archhip + cd build-exalearn5-openmp-amd-omprng-archhip +cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=Off -DRNDGEN_OMP=On -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=14 -DCMAKE_CXX_FLAGS="--offload-arch=gfx908" -DARCH_HIP=on + make -j16 + echo "x-x-x-x-x OpenMP AMD PortableOMPRNG ARCH_HIP DONE x-x-x-x-x" + cd .. +fi +#### USE_RANDOM123 +if [ "$system" = "exalearn4" ]; then + echo "x-x-x-x-x OpenMP AMD PortableOMPRNG USE_RANDOM123 BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=mandatory + mkdir -p build-exalearn5-openmp-nv-omprng-random123 + cd build-exalearn5-openmp-nv-omprng-random123 +cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=Off -DRNDGEN_OMP=On -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=14 -DCMAKE_CXX_FLAGS="--offload-arch=gfx908" -DUSE_RANDOM123=on + make -j16 + echo "x-x-x-x-x OpenMP AMD PortableOMPRNG USE_RANDOM123 DONE x-x-x-x-x" + cd .. +fi + + +## Multicore CPU +### CPURNG +if [ "$system" = "exalearn4" ]; then + echo "x-x-x-x-x OpenMP MULTICORE CPU CPURNG BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=disabled + mkdir -p build-exalearn4-openmp-multicorecpu-cpurng + cd build-exalearn4-openmp-multicorecpu-cpurng + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=On -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_FLAGS="--offload-arch=gfx908" + make -j32 + echo "x-x-x-x-x OpenMP MULTICORE CPU CPURNG BUILD DONE x-x-x-x-x" + cd .. +fi +### Portable-OMP-RNG +#### USE_RANDOM123 +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x OpenMP Nvidia PortableOMPRNG USE_RANDOM123 BUILD x-x-x-x-x" + export OMP_TARGET_OFFLOAD=disabled + mkdir -p build-exalearn5-openmp-multicorecpu-omprng-random123 + cd build-exalearn5-openmp-multicorecpu-omprng-random123 +cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DENABLE_OMPGPU=on -DRNDGEN_CPU=Off -DRNDGEN_OMP=On -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_STANDARD=14 -DCMAKE_CXX_FLAGS="--offload-arch=sm_80" -DUSE_RANDOM123=on + make -j16 + echo "x-x-x-x-x OpenMP Nvidia PortableOMPRNG USE_RANDOM123 DONE x-x-x-x-x" + cd .. +fi + + +# # # # # # # # # # # # # # + +# HIP +## Nvidia +### CURAND +### CPURNG + +## HIP +## AMD +### HIPRAND +if [ "$system" = "exalearn4" ]; then + echo "x-x-x-x-x HIP HIPRAND BUILD x-x-x-x-x" + mkdir -p build-exalearn4-hip-amd-hiprand + cd build-exalearn4-hip-amd-hiprand + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=Off -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=hipcc -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DENABLE_GPU=on -DUSE_HIP=on -DHIP_TARGET=AMD -DRNDGEN_CPU=Off + make -j32 + echo "x-x-x-x-x HIP HIPRAND BUILD DONE! x-x-x-x-x" + cd .. +fi +### CPURNG +if [ "$system" = "exalearn4" ]; then + echo "x-x-x-x-x HIP CPURNG BUILD x-x-x-x-x" + mkdir -p build-exalearn4-hip-amd-cpurng + cd build-exalearn4-hip-amd-cpurng + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=Off -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=hipcc -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DENABLE_GPU=on -DUSE_HIP=on -DHIP_TARGET=AMD -DRNDGEN_CPU=On + make -j32 + echo "x-x-x-x-x HIP CPURNG BUILD DONE! x-x-x-x-x" + cd .. +fi + +# # # # # # # # # # # # # # + +# STDPAR +## Nvidia +### CURAND +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x std::par Nvidia CURAND BUILD x-x-x-x-x" + module purge + root/6.24.06-gcc85-c17 + module load nvhpc/22.9 + module load cuda/11.5 + mkdir -p build-exalearn5-stdpar-nv-curand + cd build-exalearn5-stdpar-nv-curand + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=Off -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DCMAKE_CXX_COMPILER=/global/home/users/fmohammad/FCS-GPU//scripts/nvc++_p -DENABLE_GPU=on -DUSE_STDPAR=ON -DSTDPAR_TARGET=gpu -DCMAKE_CUDA_ARCHITECTURES=80 -DRNDGEN_CPU=Off + make -j16 + echo "x-x-x-x-x std::par Nvidia CURAND BUILD DONE x-x-x-x-x" + cd .. +fi + +### CPURNG +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x std::par Nvidia CPURNG BUILD x-x-x-x-x" + mkdir -p build-exalearn5-stdpar-nv-cpurng + cd build-exalearn5-stdpar-nv-cpurng + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=Off -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DCMAKE_CXX_COMPILER=/global/home/users/fmohammad/FCS-GPU//scripts/nvc++_p -DENABLE_GPU=on -DUSE_STDPAR=ON -DSTDPAR_TARGET=gpu -DCMAKE_CUDA_ARCHITECTURES=80 -DRNDGEN_CPU=On + make -j16 + echo "x-x-x-x-x std::par Nvidia CPURNG BUILD DONE! x-x-x-x-x" + cd .. +fi + +## Multicore +### CPURNG +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x std::par Multicore CPURNG BUILD x-x-x-x-x" + mkdir -p build-exalearn5-stdpar-multicore + cd build-exalearn5-stdpar-multicore + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=Off -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DCMAKE_CXX_COMPILER=/global/home/users/fmohammad/FCS-GPU//scripts/nvc++_p -DENABLE_GPU=on -DUSE_STDPAR=ON -DSTDPAR_TARGET=multicore -DCMAKE_CUDA_ARCHITECTURES=80 -DRNDGEN_CPU=On + make -j16 + echo "x-x-x-x-x std::par Multicore CPURNG BUILD DONE! x-x-x-x-x" + cd .. +fi + +## CPU +### CPURNG +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x std::par CPU CPURNG BUILD x-x-x-x-x" + mkdir -p build-exalearn5-stdpar-cpu + cd build-exalearn5-stdpar-cpu + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=Off -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DCMAKE_CXX_COMPILER=/global/home/users/fmohammad/FCS-GPU//scripts/nvc++_p -DENABLE_GPU=on -DUSE_STDPAR=ON -DSTDPAR_TARGET=cpu -DCMAKE_CUDA_ARCHITECTURES=80 -DRNDGEN_CPU=On + make -j16 + echo "x-x-x-x-x std::par CPU CPURNG BUILD DONE! x-x-x-x-x" + cd .. +fi + +# # # # # # # # # # # # # # + +# Alpaka +## Nvidia CUDA +### CURAND +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x Alpaka Nvidia CURAND BUILD x-x-x-x-x" + module purge + root/6.24.06-gcc85-c17 + module load alpaka/0.9.0 + module load cuda/11.5 + mkdir -p build-exalearn5-alpaka-nv-curand + cd build-exalearn5-alpaka-nv-curand + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DCMAKE_CXX_STANDARD=17 -DUSE_ALPAKA=on -Dalpaka_ROOT=/opt/alpaka/0.9.0/ -Dalpaka_ACC_GPU_CUDA_ENABLE=ON -Dalpaka_ACC_GPU_CUDA_ONLY_MODE=ON -DRNDGEN_CPU=Off -DCMAKE_CUDA_ARCHITECTURES=80 + make -j16 + echo "x-x-x-x-x Alpaka Nvidia CURAND BUILD DONE! x-x-x-x-x" + cd .. +fi + +### CPURNG +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x Alpaka Nvidia CURAND BUILD x-x-x-x-x" + mkdir -p build-exalearn5-alpaka-nv-cpurng + cd build-exalearn5-alpaka-nv-cpurng + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=off -DENABLE_GPU=on -DCMAKE_CXX_STANDARD=17 -DUSE_ALPAKA=on -Dalpaka_ROOT=/opt/alpaka/0.9.0/ -Dalpaka_ACC_GPU_CUDA_ENABLE=ON -Dalpaka_ACC_GPU_CUDA_ONLY_MODE=ON -DRNDGEN_CPU=On -DCMAKE_CUDA_ARCHITECTURES=80 + make -j16 + echo "x-x-x-x-x Alpaka Nvidia CURAND BUILD DONE! x-x-x-x-x" + cd .. +fi + +## AMD HIP +### HIPRAND +### CPURNG + + +# # # # # # # # # # # # # # + +# Kokkos +## Nvidia +### CURAND +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x Kokkos Nvidia CURAND BUILD x-x-x-x-x" + module purge + root/6.24.06-gcc85-c17 + module load kokkos/4.1-cuda11.5-shlib + mkdir -p build-exalearn5-kokkos-nv-curand + cd build-exalearn5-kokkos-nv-curand + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=Off -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DCMAKE_CXX_COMPILER=nvcc_wrapper -DENABLE_GPU=on -DUSE_KOKKOS=ON -DRNDGEN_CPU=Off + make -j16 + echo "x-x-x-x-x Kokkos Nvidia CURAND BUILD DONE! x-x-x-x-x" + cd .. +fi +### CPURNG +if [ "$system" = "exalearn5" ]; then + echo "x-x-x-x-x Kokkos Nvidia CPURNG BUILD x-x-x-x-x" + mkdir -p build-exalearn5-kokkos-nv-cpurng + cd build-exalearn5-kokkos-nv-cpurng + cmake ../FastCaloSimAnalyzer -DENABLE_XROOTD=Off -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_EXTENSIONS=Off -DCMAKE_CXX_COMPILER=nvcc_wrapper -DENABLE_GPU=on -DUSE_KOKKOS=ON -DRNDGEN_CPU=On + make -j16 + echo "x-x-x-x-x Kokkos Nvidia CPURNG BUILD DONE! x-x-x-x-x" + cd .. +fi + +