Skip to content

Commit

Permalink
CMakeLists.txt Improvements for CUDA (#1337)
Browse files Browse the repository at this point in the history
This PR bumps `cmake` version to `3.17` and replaces the deprecated
`find_package(CUDA)` with
[FindCUDAToolkit](https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html),
with a number of improvements to the compilation process:
- CUDA Include and Library directories are now handled automatically by
`cmake`
- CUDA architecture handling is reworked: No more regex in
`CMakeLists.txt` or manual `-gencode` string generation in python code.
- CUDA source files are now included directly in the targets: cmake
handles proper compilation and linking of device code automatically.
- Similar modifications to `OpenMP` and `Threads` targets
  • Loading branch information
kylosus authored Jan 1, 2024
1 parent b93a4c9 commit 1393cb0
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 59 deletions.
86 changes: 38 additions & 48 deletions dace/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
cmake_minimum_required(VERSION 3.15)
cmake_minimum_required(VERSION 3.17)
project(dace_program)

# General options
Expand All @@ -9,6 +9,9 @@ set(DACE_FILES "" CACHE STRING "List of host code files relative to the root of
set(DACE_LIBS "" CACHE STRING "Extra libraries")
set(HLSLIB_PART_NAME "${DACE_XILINX_PART_NAME}")

# CUDA
set(DACE_CUDA_ARCHITECTURES_DEFAULT "" CACHE STRING "Default CUDA architectures in case native not found")

# FPGA specific
set(DACE_FPGA_AUTOBUILD_BITSTREAM OFF CACHE STRING "Automatically build bitstreams if they are not present.")

Expand Down Expand Up @@ -60,7 +63,7 @@ foreach(DACE_FILE ${DACE_FILES})
set(DACE_HIP_FILES ${DACE_HIP_FILES} ${DACE_FILE})
else()
set(DACE_ENABLE_CUDA ON)
set(DACE_CUDA_FILES ${DACE_CUDA_FILES} ${DACE_FILE})
set(DACE_CPP_FILES ${DACE_CPP_FILES} ${DACE_FILE})
endif()
elseif(${DACE_FILE_TARGET} STREQUAL "xilinx")
set(DACE_ENABLE_XILINX ON)
Expand Down Expand Up @@ -103,24 +106,42 @@ include_directories(${DACE_RUNTIME_DIR}/include)
# Global DaCe external dependencies
find_package(Threads REQUIRED)
find_package(OpenMP REQUIRED COMPONENTS CXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")

list(APPEND DACE_LIBS Threads::Threads)
list(APPEND DACE_LIBS OpenMP::OpenMP_CXX)

add_definitions(-DDACE_BINARY_DIR=\"${CMAKE_BINARY_DIR}\")
set(DACE_LIBS ${DACE_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${OpenMP_CXX_LIBRARIES})

if(DACE_ENABLE_MPI)
find_package(MPI REQUIRED)
include_directories(${MPI_CXX_INCLUDE_PATH})
set(DACE_LIBS ${DACE_LIBS} ${MPI_CXX_LIBRARIES})
list(APPEND DACE_LIBS MPI::MPI_CXX)
endif()

if(DACE_ENABLE_CUDA)
find_package(CUDA REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
include_directories(${CUDA_INCLUDE_DIRS})
if (MSVC_IDE)
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/x64)
else()
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
set(CUDAToolkit_ROOT ${CUDA_TOOLKIT_ROOT_DIR})

find_package(CUDAToolkit REQUIRED)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)

# CMake 3.24: set_property(TARGET tgt PROPERTY CUDA_ARCHITECTURES native)
if (NOT DEFINED LOCAL_CUDA_ARCHITECTURES)
execute_process(COMMAND "${CUDAToolkit_NVCC_EXECUTABLE}" "--run"
"${CMAKE_SOURCE_DIR}/tools/get_cuda_arch.cpp"
OUTPUT_VARIABLE _local_arch RESULT_VARIABLE _arch_res)

if(_arch_res EQUAL 0)
set(LOCAL_CUDA_ARCHITECTURES "${_local_arch}" CACHE STRING "Detected local GPUs for compilation")
message(STATUS "Local CUDA architectures detected: ${LOCAL_CUDA_ARCHITECTURES}")
else()
set(LOCAL_CUDA_ARCHITECTURES "${DACE_CUDA_ARCHITECTURES_DEFAULT}" CACHE STRING "Detected local GPUs for compilation")
message(STATUS "No local CUDA-capable GPUs found. Using default: ${DACE_CUDA_ARCHITECTURES_DEFAULT}")
endif()
endif()
set(DACE_LIBS ${DACE_LIBS} ${CUDA_LIBRARIES})

set(CMAKE_CUDA_ARCHITECTURES "${LOCAL_CUDA_ARCHITECTURES}")
enable_language(CUDA)
list(APPEND DACE_LIBS CUDA::cudart)
add_definitions(-DWITH_CUDA)

if (MSVC_IDE)
Expand Down Expand Up @@ -242,38 +263,6 @@ if (DACE_ENABLE_RTL AND DACE_ENABLE_XILINX)
include ("${DACE_RTLLIB_DIR}/cmake/rtl_target.cmake")
endif()

# Create CUDA object files
if(DACE_ENABLE_CUDA)
# Get local CUDA architectures
if (NOT DEFINED LOCAL_CUDA_ARCHITECTURES)
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin" "${CMAKE_CXX_COMPILER}" "--run"
"${CMAKE_SOURCE_DIR}/tools/get_cuda_arch.cpp"
OUTPUT_VARIABLE _arch_out RESULT_VARIABLE _arch_res
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

if(_arch_res EQUAL 0)
string(REGEX REPLACE "\n" ";" _arch_out "${_arch_out}")
list(GET _arch_out -1 _local_arch)
string(REGEX REPLACE " " ";" _local_arch "${_local_arch}")
set(LOCAL_CUDA_ARCHITECTURES "${_local_arch}" CACHE STRING "Detected local GPUs for compilation")
message(STATUS "Local CUDA architectures detected: ${LOCAL_CUDA_ARCHITECTURES}")
else()
set(LOCAL_CUDA_ARCHITECTURES "" CACHE STRING "Detected local GPUs for compilation")
message(STATUS "No local CUDA-capable GPUs found")
endif()
endif()

# Add flags to compile for local CUDA architectures
foreach(var ${LOCAL_CUDA_ARCHITECTURES})
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_${var},code=sm_${var})
endforeach()

cuda_include_directories(${DACE_RUNTIME_DIR}/include)
cuda_compile(DACE_CUDA_OBJECTS ${DACE_CUDA_FILES})
set(DACE_OBJECTS ${DACE_OBJECTS} ${DACE_CUDA_OBJECTS})
endif() # DACE_ENABLE_CUDA


# Create HIP object files
if(DACE_ENABLE_HIP)
# Get local AMD architectures
Expand Down Expand Up @@ -580,7 +569,7 @@ include("targets/mlir/mlir.cmake")

# Create DaCe library file
add_library(${DACE_PROGRAM_NAME} SHARED ${DACE_CPP_FILES} ${DACE_OBJECTS})
target_link_libraries(${DACE_PROGRAM_NAME} ${DACE_LIBS})
target_link_libraries(${DACE_PROGRAM_NAME} PUBLIC ${DACE_LIBS})

# Add additional required files
if(DACE_ENABLE_INTELFPGA)
Expand All @@ -599,6 +588,7 @@ if(DACE_ENABLE_INTELFPGA)
DEPENDS ${DACE_PROGRAM_NAME}_hardware.aocx)
endif()
endif()

if(DACE_ENABLE_XILINX)
if(DACE_XILINX_MODE STREQUAL "software_emulation" AND DACE_FPGA_AUTOBUILD_BITSTREAM)
add_custom_target(autobuild_bitstream ALL
Expand All @@ -619,7 +609,7 @@ endif()

# Create DaCe loader stub
add_library(dacestub_${DACE_PROGRAM_NAME} SHARED "${CMAKE_SOURCE_DIR}/tools/dacestub.cpp")
target_link_libraries(dacestub_${DACE_PROGRAM_NAME} ${CMAKE_THREAD_LIBS_INIT} ${OpenMP_CXX_LIBRARIES})
target_link_libraries(dacestub_${DACE_PROGRAM_NAME} Threads::Threads OpenMP::OpenMP_CXX ${CMAKE_DL_LIBS})

# Windows-specific fixes
if (MSVC_IDE)
Expand Down
8 changes: 4 additions & 4 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,11 +484,11 @@ def cmake_options():
cuda_arch = Config.get('compiler', 'cuda', 'cuda_arch').split(',')
cuda_arch = [ca for ca in cuda_arch if ca is not None and len(ca) > 0]

flags = Config.get("compiler", "cuda", "args")
flags += ' ' + ' '.join('-gencode arch=compute_{arch},code=sm_{arch}'.format(arch=arch)
for arch in cuda_arch)
cuda_arch = ';'.join(cuda_arch)
options.append(f'-DDACE_CUDA_ARCHITECTURES_DEFAULT="{cuda_arch}"')

options.append("-DCUDA_NVCC_FLAGS=\"{}\"".format(flags))
flags = Config.get("compiler", "cuda", "args")
options.append("-DCMAKE_CUDA_FLAGS=\"{}\"".format(flags))

if backend == 'hip':
hip_arch = Config.get('compiler', 'cuda', 'hip_arch').split(',')
Expand Down
15 changes: 10 additions & 5 deletions dace/codegen/tools/get_cuda_arch.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
// Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
#include <cuda_runtime.h>

#include <algorithm>
#include <iostream>
#include <iterator>
#include <set>
#include <sstream>
#include <string>

int main(int argc, char **argv) {
int main() {
int count;
if (cudaGetDeviceCount(&count) != cudaSuccess) return 1;

Expand All @@ -22,10 +24,13 @@ int main(int argc, char **argv) {
architectures.insert(ss.str());
}

// Print out architectures
for (std::set<std::string>::iterator iter = architectures.begin();
iter != architectures.end(); ++iter)
std::cout << *iter << " ";
if (architectures.empty()) {
return 1;
}

std::copy(architectures.begin(), std::prev(architectures.end(), 1),
std::ostream_iterator<std::string>(std::cout, ";"));
std::cout << *architectures.rbegin();

return 0;
}
4 changes: 2 additions & 2 deletions dace/config_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ required:
type: str
title: nvcc Arguments
description: Compiler argument flags for CUDA
default: '-std=c++14 -Xcompiler -fPIC -O3 -Xcompiler -march=native --use_fast_math -Xcompiler -Wno-unused-parameter'
default_Windows: '-std=c++14 -O3 --use_fast_math'
default: '-Xcompiler -march=native --use_fast_math -Xcompiler -Wno-unused-parameter'
default_Windows: '-O3 --use_fast_math'

hip_args:
type: str
Expand Down

0 comments on commit 1393cb0

Please sign in to comment.