Skip to content

Commit

Permalink
Merge branch 'main' into dingpf/packaging
Browse files Browse the repository at this point in the history
  • Loading branch information
dingp authored Oct 3, 2024
2 parents 01e9f09 + 1659d34 commit 10c1a34
Show file tree
Hide file tree
Showing 21 changed files with 530 additions and 59 deletions.
19 changes: 17 additions & 2 deletions FastCaloSimAnalyzer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ if ( USE_STDPAR )
SET(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
if ( ${STDPAR_TARGET} STREQUAL "cpu" )
if ( NOT RNDGEN_CPU )
message(FATAL_ERROR "when STDPAR_TARGET=cpu, RNDGEN_CPU must be ON")
message(WARNING "when STDPAR_TARGET=cpu, RNDGEN_CPU must be ON")
message(WARNING "Setting RNDGEN_CPU to ON")
set( RNDGEN_CPU ON )
endif()
set(STDPAR_DIRECTIVE "-nostdpar")
elseif( ${STDPAR_TARGET} STREQUAL "gpu" )
Expand All @@ -38,6 +40,11 @@ if ( USE_STDPAR )
message(WARNING "Setting USE_ATOMIC_ADD to OFF")
set ( USE_ATOMIC_ADD OFF )
endif()
if ( NOT RNDGEN_CPU )
message(WARNING "when STDPAR_TARGET=multicore, RNDGEN_CPU must be ON")
message(WARNING "Setting RNDGEN_CPU to ON")
set( RNDGEN_CPU ON )
endif()
set(STDPAR_DIRECTIVE "-stdpar=multicore")
else()
message(FATAL_ERROR "unknown stdpar target ${STDPAR_TARGET}")
Expand All @@ -51,7 +58,15 @@ elseif(USE_ALPAKA)
find_package(alpaka REQUIRED)

elseif(USE_HIP)
find_package(HIP REQUIRED)
if ( ${HIP_TARGET} STREQUAL "NVIDIA" )
find_package(HIP)
if ( NOT RNDGEN_CPU )
message(FATAL_ERROR "when HIP_TARGET=NVIDIA, RNDGEN_CPU must be ON")
endif()
else()
find_package(HIP REQUIRED)
endif()

endif()

include(FastCaloSim)
Expand Down
10 changes: 10 additions & 0 deletions FastCaloSimAnalyzer/FastCaloGpu/FastCaloGpu/CountingIterator.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
#ifndef COUNTING_ITERATOR_H
#define COUNTING_ITERATOR_H 1

#include <cstddef>
#include <type_traits>
#include <iterator>

struct counting_iterator {

typedef size_t Index_t;
Expand Down Expand Up @@ -67,3 +74,6 @@ struct counting_iterator {
private:
value_type value;
};


#endif
3 changes: 3 additions & 0 deletions FastCaloSimAnalyzer/FastCaloGpu/FastCaloGpu/HostDevDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
# define __HOSTDEV__
# define __INLINE__ inline
#elif defined(USE_HIP)
# if defined(HIP_TARGET_NVIDIA)
# include "cuda_runtime.h"
# endif
# define __DEVICE__ __device__
# define __HOST__ __host__
# define __HOSTDEV__ __host__ __device__
Expand Down
18 changes: 18 additions & 0 deletions FastCaloSimAnalyzer/FastCaloGpu/FastCaloGpu/TestStdPar.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef FCS_TEST_STDPAR
#define FCS_TEST_STDPAR 1

class TestStdPar {

public:

void testAll(unsigned long);

void test_floatArray(unsigned long);
void test_vecInt(unsigned long);
void test_vecFloat(unsigned long);
void test_atomicAdd_int(unsigned long);
void test_atomicAdd_float(unsigned long);

};

#endif
54 changes: 42 additions & 12 deletions FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,27 +87,43 @@ endif()

# Sources
if(USE_STDPAR)
set(FastCaloGpu_Srcs GeoLoadGpu.cxx KernelWrapper_sp.cxx gpuQ.cu Rand4Hits.cu Rand4Hits_sp.cxx )
# set(FastCaloGpu_Srcs gpuQ.cu CaloGpuGeneral.cxx KernelWrapper.cu Rand4Hits.cu )
set(FastCaloGpu_Srcs GeoLoadGpu.cxx KernelWrapper_sp.cxx Rand4Hits_sp.cxx TestStdPar.cxx)
if ( ${STDPAR_TARGET} STREQUAL "gpu" )
set(FastCaloGpu_Srcs ${FastCaloGpu_Srcs} gpuQ.cu )
endif()
if ( RNDGEN_CPU )
set(FastCaloGpu_Srcs ${FastCaloGpu_Srcs} Rand4Hits_cpu.cxx )
else()
set(FastCaloGpu_Srcs ${FastCaloGpu_Srcs} Rand4Hits.cu )
endif()
elseif(USE_KOKKOS)
set(FastCaloGpu_Srcs GeoLoadGpu.cxx KernelWrapper_kk.cxx )
elseif(USE_ALPAKA)
set(FastCaloGpu_Srcs CaloGpuGeneral.cxx KernelWrapper_al.cxx Rand4Hits_al.cxx )
elseif(ENABLE_OMPGPU)
set(FastCaloGpu_Srcs KernelWrapper_omp.cxx)
elseif(USE_HIP)
include_directories( /opt/rocm/hip/include )
# Define ROCM_PATH if not defined
if (NOT DEFINED ENV{ROCM_PATH})
message(WARNING "Environment variable ROCM_PATH not set! Using default /opt/rocm/")
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.")
else()
set(ROCM_PATH $ENV{ROCM_PATH})
endif()
include_directories( ${ROCM_PATH}/hip/include )

if ( ${HIP_TARGET} STREQUAL "AMD" )
message(STATUS " Using AMD HIP backend")
add_compile_definitions(__HIP_PLATFORM_AMD__ HIP_PLATFORM_AMD)
set(CMAKE_HIP_ARCHITECTURES "gfx90a;gfx906;gfx908")
elseif( ${HIP_TARGET} STREQUAL "NVIDIA" )
message(STATUS " Using NVIDIA HIP backend")
add_compile_definitions(__HIP_PLATFORM_NVIDIA__ HIP_PLATFORM_NVIDIA)
find_package(CUDAToolkit REQUIRED)
set(CMAKE_CUDA_ARCHITECTURES "70;75;80;86")
set(CMAKE_HIP_ARCHITECTURES "gfx90a;gfx906;gfx908")
else()
message(FATAL_ERROR "unknown HIP_TARGET=${HIP_TARGET}. Must be either AMD or NVIDIA")
endif()
add_compile_definitions(__HIP_PLATFORM_HCC__ HIP_PLATFORM_HCC)


set(FastCaloGpu_Srcs GeoLoadGpu.cxx KernelWrapper_hip.cxx gpuQ_hip.cxx Rand4Hits_hip.cxx )

else()
Expand Down Expand Up @@ -144,10 +160,13 @@ if(USE_ALPAKA)
elseif(USE_HIP)
target_compile_definitions(${FastCaloGpu_LIB} PRIVATE ${FCS_CommonDefinitions})
target_include_directories(${FastCaloGpu_LIB} PRIVATE ../FastCaloGpu/ } )
target_include_directories(${FastCaloGpu_LIB} PRIVATE /opt/rocm/hiprand/include )
target_include_directories(${FastCaloGpu_LIB} PRIVATE /opt/rocm/rocrand/include )

target_link_libraries(${FastCaloGpu_LIB} PUBLIC /opt/rocm/lib/libhiprand.so)

if ( ${HIP_TARGET} STREQUAL "AMD" )
target_compile_definitions(${FastCaloGpu_LIB} PUBLIC -DHIP_TARGET_AMD)
elseif( ${HIP_TARGET} STREQUAL "NVIDIA" )
target_compile_definitions(${FastCaloGpu_LIB} PUBLIC -DHIP_TARGET_NVIDIA)
target_link_libraries(${FastCaloGpu_LIB} PUBLIC CUDA::cudart)
endif()
else()
target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${NVTOOLSEXT_LIB} ${CURAND_LIB})
endif()
Expand Down Expand Up @@ -184,13 +203,24 @@ if(USE_STDPAR)

endif()

target_link_options(${FastCaloGpu_LIB} PRIVATE ${STDPAR_DIRECTIVE})
target_link_options(${FastCaloGpu_LIB} PRIVATE ${STDPAR_DIRECTIVE} -Xlinker -z noexecstack)

endif()

if(RNDGEN_CPU)
message(STATUS "Will generate random numbers on CPU")
target_compile_definitions(${FastCaloGpu_LIB} PRIVATE -DRNDGEN_CPU )
# TODO Link a portable RNG library
else()
if(USE_HIP)
if ( ${HIP_TARGET} STREQUAL "AMD" )
target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/hiprand/include )
target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/rocrand/include )
target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${ROCM_PATH}/lib/libhiprand.so)
elseif( ${HIP_TARGET} STREQUAL "NVIDIA" )
target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_curand_LIBRARY} ${CUDA_nvToolsExt_LIBRARY})
endif()
endif()
endif()

if(DUMP_HITCELLS)
Expand Down
4 changes: 2 additions & 2 deletions FastCaloSimAnalyzer/FastCaloGpu/src/CaloGpuGeneral.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ void* CaloGpuGeneral::Rand4Hits_init( long long maxhits, unsigned short maxbin,
std::cout << "using alpaka\n";
#elif defined(USE_HIP)
std::cout << "using HIP on ";
#ifdef __HIP_PLATFORM_NVIDIA__
#ifdef HIP_TARGET_NVIDIA
std::cout << "NVIDIA\n";
#elif defined __HIP_PLATFORM_AMD__
#elif defined HIP_TARGET_AMD
std::cout << "AMD\n";
#else
std::cout << "UNKNOWN\n";
Expand Down
6 changes: 5 additions & 1 deletion FastCaloSimAnalyzer/FastCaloGpu/src/CaloGpuGeneral_sp.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
#include "Rand4Hits.h"
#include "Hit.h"
#include "CountingIterator.h"
#include "nvToolsExt.h"

// FIXME: Bug in nvhpc 24.X
#if defined ( _NVHPC_STDPAR_NONE )
#include "nvToolsExt.h"
#endif

#define DO_ATOMIC_TESTS 0

Expand Down
12 changes: 6 additions & 6 deletions FastCaloSimAnalyzer/FastCaloGpu/src/Rand4Hits.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,21 @@ void Rand4Hits::allocateGenMem(size_t num) {


Rand4Hits::~Rand4Hits() {

#ifdef USE_STDPAR
deallocate();
#else
delete ( m_rnd_cpu );
#endif

#ifdef USE_STDPAR
if (!m_useCPU) {
gpuQ( cudaFree( m_rand_ptr ) );
}
#else
gpuQ( cudaFree( m_rand_ptr ) );
#endif

if ( m_useCPU ) {
destroyCPUGen();
} else {
Expand All @@ -91,7 +91,7 @@ void Rand4Hits::create_gen( unsigned long long seed, size_t num, bool useCPU ) {
float* f{nullptr};

m_useCPU = useCPU;

if ( m_useCPU ) {
allocateGenMem( num );
createCPUGen( seed );
Expand All @@ -110,9 +110,9 @@ void Rand4Hits::create_gen( unsigned long long seed, size_t num, bool useCPU ) {
CURAND_CALL( curandGenerateUniform( *gen, f, num ) );
m_gen = (void*)gen;
}

m_rand_ptr = f;

std::cout << "R4H m_rand_ptr: " << m_rand_ptr << std::endl;

}
2 changes: 1 addition & 1 deletion FastCaloSimAnalyzer/FastCaloGpu/src/Rand4Hits_cpu.cxx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
Copyright (C) 2002-2021 CERN for the benefit of the ATLAS collaboration
*/

#include "Rand4Hits.h"
#include <random>
#include <vector>
#include <algorithm>
Expand Down
48 changes: 43 additions & 5 deletions FastCaloSimAnalyzer/FastCaloGpu/src/Rand4Hits_hip.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,30 @@
#include "Rand4Hits.h"
#include "gpuQ.h"
#include <iostream>

#ifndef RNDGEN_CPU
#if defined (HIP_TARGET_NVIDIA)
#include <curand.h>
#else
#include <hiprand.h>
#endif
#endif

#include "Rand4Hits_cpu.cxx"

#define CURAND_CALL( x ) \
if ( ( x ) != HIPRAND_STATUS_SUCCESS ) { \
printf( "Error at %s:%d\n", __FILE__, __LINE__ ); \
exit( EXIT_FAILURE ); \
}
#if defined (HIP_TARGET_NVIDIA)
#define CURAND_CALL(x) \
if ((x) != CURAND_STATUS_SUCCESS) { \
printf("Error at %s:%d\n", __FILE__, __LINE__); \
exit(EXIT_FAILURE); \
}
#else
#define CURAND_CALL( x ) \
if ( ( x ) != HIPRAND_STATUS_SUCCESS ) { \
printf( "Error at %s:%d\n", __FILE__, __LINE__ ); \
exit( EXIT_FAILURE ); \
}
#endif

#ifndef USE_STDPAR
void Rand4Hits::allocate_simulation( long long /*maxhits*/, unsigned short /*maxbins*/, unsigned short maxhitct,
Expand Down Expand Up @@ -70,8 +85,15 @@ Rand4Hits::~Rand4Hits() {
if ( m_useCPU ) {
destroyCPUGen();
} else {
#ifndef RNDGEN_CPU
#if defined (HIP_TARGET_NVIDIA)
CURAND_CALL(curandDestroyGenerator(*((curandGenerator_t *)m_gen)));
delete (curandGenerator_t *)m_gen;
#else
CURAND_CALL( hiprandDestroyGenerator( *( (hiprandGenerator_t*)m_gen ) ) );
delete (hiprandGenerator_t*)m_gen;
#endif
#endif
}
};

Expand All @@ -82,7 +104,14 @@ void Rand4Hits::rd_regen() {
gpuQ( hipMemcpy( m_rand_ptr, m_rnd_cpu->data(), 3 * m_total_a_hits * sizeof( float ), hipMemcpyHostToDevice ) );
#endif
} else {
#ifndef RNDGEN_CPU
#if defined (HIP_TARGET_NVIDIA)
CURAND_CALL(curandGenerateUniform(*((curandGenerator_t *)m_gen), m_rand_ptr,
3 * m_total_a_hits));
#else
CURAND_CALL( hiprandGenerateUniform( *( (hiprandGenerator_t*)m_gen ), m_rand_ptr, 3 * m_total_a_hits ) );
#endif
#endif
}
};

Expand All @@ -103,12 +132,21 @@ void Rand4Hits::create_gen( unsigned long long seed, size_t num, bool useCPU ) {
gpuQ( hipMemcpy( f, m_rnd_cpu->data(), num * sizeof( float ), hipMemcpyHostToDevice ) );
#endif
} else {
#ifndef RNDGEN_CPU
gpuQ( hipMalloc( &f, num * sizeof( float ) ) );
#if defined (HIP_TARGET_NVIDIA)
curandGenerator_t *gen = new curandGenerator_t;
CURAND_CALL(curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(*gen, seed));
CURAND_CALL(curandGenerateUniform(*gen, f, num));
#else
hiprandGenerator_t* gen = new hiprandGenerator_t;
CURAND_CALL( hiprandCreateGenerator( gen, HIPRAND_RNG_PSEUDO_DEFAULT ) );
CURAND_CALL( hiprandSetPseudoRandomGeneratorSeed( *gen, seed ) );
CURAND_CALL( hiprandGenerateUniform( *gen, f, num ) );
#endif
m_gen = (void*)gen;
#endif
}

m_rand_ptr = f;
Expand Down
Loading

0 comments on commit 10c1a34

Please sign in to comment.