Skip to content

Commit

Permalink
Add GPU aware MPI support in cannon algorithm (#647)
Browse files Browse the repository at this point in the history
Add GPU aware MPI support in cannon algorithm with norms calculation in GPU
  • Loading branch information
gsitaram authored Jan 24, 2024
1 parent 14105ed commit 3bc658f
Show file tree
Hide file tree
Showing 29 changed files with 1,494 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/doc-generation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- "/etc/ssh/ssh_known_hosts:/etc/ssh/ssh_known_hosts:ro"

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker-build-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:

steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Prepare
id: prep
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
image: ghcr.io/cp2k/dbcsr-build-env-ubuntu-22.04:develop

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: true

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/testing-gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
image: ghcr.io/cp2k/dbcsr-build-env-latest-gcc:develop

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: true
Expand Down
18 changes: 10 additions & 8 deletions .github/workflows/testing-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
container:
image: ghcr.io/cp2k/dbcsr-build-env-ubuntu-22.04:develop
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Run pre-commit
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
Expand All @@ -40,7 +40,7 @@ jobs:
mpi_suffix: mpich

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: true
Expand Down Expand Up @@ -100,7 +100,7 @@ jobs:
use_openmp: [OPENMP=ON]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: true
Expand Down Expand Up @@ -134,7 +134,7 @@ jobs:
use_smm: [SMM=libxsmm]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: true
Expand Down Expand Up @@ -164,9 +164,10 @@ jobs:
strategy:
matrix:
use_openmp: [OPENMP=ON]
use_g2g: [G2G=ON, G2G=OFF]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: true
Expand All @@ -176,10 +177,11 @@ jobs:
mkdir -p build
cd build
cmake -G Ninja \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_BUILD_TYPE=Release \
-DUSE_${{ matrix.use_openmp }} \
-DUSE_ACCEL=hip \
-DWITH_GPU=Mi100 \
-DWITH_GPU=Mi250 \
-DWITH_${{ matrix.use_g2g }} \
-DWITH_EXAMPLES=ON \
-DCMAKE_PREFIX_PATH=/opt/rocm \
..
Expand All @@ -194,7 +196,7 @@ jobs:
image: ghcr.io/cp2k/dbcsr-build-env-ubuntu-22.04:develop

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Download coverage data
uses: actions/download-artifact@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/testing-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
mpi_suffix: mpich

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: true
Expand Down
32 changes: 20 additions & 12 deletions .pre-commit/check_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import re
import mmap
import sys
from os import path
import pathlib
from collections import defaultdict
from os import path, listdir
from contextlib import contextmanager

TYPES = {
Expand Down Expand Up @@ -41,14 +43,19 @@ def mmap_open(name, mode="r"):

def check_header(header_dir, files, verbose=False):
retval = 0
header_re = {}
header_len = {}

for headertype in TYPES:
with open(path.join(header_dir, headertype), "rb") as fhandle:
header_content = fhandle.read()
header_re[headertype] = re.compile(re.escape(header_content))
header_len[headertype] = len(header_content)
header_re = defaultdict(list)
header_len = defaultdict(list)

for headerfile in listdir(header_dir):
headertype = pathlib.Path(headerfile).stem
if headertype in TYPES:
with open(path.join(header_dir, headerfile), "rb") as fhandle:
header_content = fhandle.read()
header_re[headertype].append(re.compile(re.escape(header_content)))
header_len[headertype].append(len(header_content))
else:
print("no matching headerfile to file extensions")
sys.exit(1)

ext_map = {e: t for t, exts in TYPES.items() for e in exts}

Expand All @@ -62,9 +69,10 @@ def check_header(header_dir, files, verbose=False):

with mmap_open(fpath) as fmapped:
header_type = ext_map[fext]
match = header_re[header_type].search(
fmapped, 0, ALLOWED_LINES * MAX_LINE_LENGTH + header_len[header_type]
)
for h_re, h_len in zip(header_re[header_type], header_len[header_type]):
match = h_re.search(fmapped, 0, ALLOWED_LINES * MAX_LINE_LENGTH + h_len)
if match:
break

if not match:
print("✗ {} ... required header not found".format(fpath))
Expand Down
File renamed without changes.
9 changes: 9 additions & 0 deletions .pre-commit/headers/c_cpp.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/*------------------------------------------------------------------------------------------------*/
/* Copyright (C) by the DBCSR developers group - All rights reserved */
/* Copyright (C) 2022 Advanced Micro Devices, Inc. - All rights reserved */
/* This file is part of the DBCSR library. */
/* */
/* For information on the license, see the LICENSE file. */
/* For further information please visit https://dbcsr.cp2k.org */
/* SPDX-License-Identifier: GPL-2.0+ */
/*------------------------------------------------------------------------------------------------*/
File renamed without changes.
9 changes: 9 additions & 0 deletions .pre-commit/headers/fortran.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
!--------------------------------------------------------------------------------------------------!
! Copyright (C) by the DBCSR developers group - All rights reserved !
! Copyright (C) 2022 Advanced Micro Devices, Inc. - All rights reserved !
! This file is part of the DBCSR library. !
! !
! For information on the license, see the LICENSE file. !
! For further information please visit https://dbcsr.cp2k.org !
! SPDX-License-Identifier: GPL-2.0+ !
!--------------------------------------------------------------------------------------------------!
File renamed without changes.
File renamed without changes.
2 changes: 2 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ Christian Pousa <[email protected]>
Dorothea Golze <[email protected]>
Fawzi Mohamed <[email protected]>
Florian Schiffmann <[email protected]>
Gina Sitaraman <[email protected]>
Harald Forbert <[email protected]>
H. Bani-Hashemian <[email protected]>
Iain Bethune <[email protected]>
Ilia Sivkov <[email protected]>
Jan Wilhelm <[email protected]>
Joost VandeVondele <[email protected]>
Juerg Hutter <[email protected]>
Leopold Grinberg <[email protected]>
Lianheng Tong <[email protected]>
Marcella Mauri-Iannuzzi <[email protected]>
Matthias Krack <[email protected]>
Expand Down
19 changes: 19 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
cmake_minimum_required(VERSION 3.22)

set(CMAKE_INTERPROCEDURAL_OPTIMIZATION FALSE FORCE)

# include our cmake snippets
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

Expand Down Expand Up @@ -111,7 +113,13 @@ set_property(CACHE WITH_GPU PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES}

option(WITH_CUDA_PROFILING "Enable profiling within CUDA" OFF)
option(WITH_HIP_PROFILING "Enable profiling within HIP" OFF)
option(WITH_G2G "Enable GPU aware MPI within CUDA/HIP backends" OFF)

if (WITH_G2G AND ((NOT USE_ACCEL) OR ((NOT USE_ACCEL MATCHES "cuda")
AND (NOT USE_ACCEL MATCHES "hip"))))
message(
FATAL_ERROR "GPU aware MPI can only be enabled for HIP/CUDA GPU backends")
endif ()
# =================================================================================================
# LANGUAGES AND TESTING
enable_language(Fortran)
Expand Down Expand Up @@ -236,6 +244,7 @@ if (USE_ACCEL MATCHES "cuda|hip")
endif ()

if (USE_ACCEL MATCHES "cuda")
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)

if (CUDAToolkit_VERSION LESS 5.5)
Expand All @@ -257,9 +266,15 @@ if (USE_ACCEL MATCHES "cuda")
message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS})
message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER})
message(STATUS "GPU profiling enabled: " ${WITH_CUDA_PROFILING})
message(STATUS "GPU aware MPI enabled: " ${WITH_G2G})
endif ()

if (USE_ACCEL MATCHES "hip")
if (NOT CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES OFF)
endif ()
enable_language(HIP)

# Make sure the GPU required is supported
list(FIND SUPPORTED_HIP_ARCHITECTURES ${WITH_GPU} GPU_SUPPORTED)
if (GPU_SUPPORTED EQUAL -1)
Expand Down Expand Up @@ -296,6 +311,7 @@ if (USE_ACCEL MATCHES "hip")
message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS})
message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER})
message(STATUS "GPU profiling enabled: " ${WITH_HIP_PROFILING})
message(STATUS "GPU aware MPI enabled: " ${WITH_G2G})

# =================================== BLAS on GPU backend
find_package(hipblas CONFIG REQUIRED HINTS ${ROCM_PATH})
Expand Down Expand Up @@ -339,3 +355,6 @@ endif ()
add_subdirectory(docs)

include(CustomTargets)

# Disable LTO
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION FALSE FORCE)
2 changes: 2 additions & 0 deletions docs/guide/2-user-guide/1-installation/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ make
-DUSE_SMM=<blas|libxsmm>
-DUSE_ACCEL=<opencl|cuda|hip>
-DWITH_CUDA_PROFILING=<OFF|ON>
-DWITH_HIP_PROFILING=<OFF|ON>
-DWITH_G2G=<OFF|ON>
-DWITH_C_API=<ON|OFF>
-DWITH_EXAMPLES=<ON|OFF>
-DWITH_GPU=<P100|K20X|K40|K80|V100|Mi50|Mi100|Mi250>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,4 @@ Assumed square matrix with 20x20 matrix with 5x5 blocks and a 2x2 processor grid
| `__CUDA_PROFILING` | To turn on Nvidia Tools Extensions. It requires to link `-lnvToolsExt` | Fortran, C, C++ |
| `__CUDA` | Enable CUDA acceleration | C, C++ |
| `__HIP` | Enable HIP acceleration | C, C++ |
| `__DBCSR_ACC_G2G` | Enable GPU Aware MPI in CUDA and HIP backends | Fortran, C, C++ |
32 changes: 32 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ set(DBCSR_HIP_AND_CUDA_SRCS
acc/libsmm_acc/libsmm_acc_benchmark.cpp
acc/libsmm_acc/libsmm_acc_init.cpp
acc/libsmm_acc/libsmm_acc.cpp
acc/cuda_hip/calculate_norms.cpp
acc/cuda_hip/acc_blas.cpp
acc/cuda_hip/acc_dev.cpp
acc/cuda_hip/acc_error.cpp
Expand All @@ -122,6 +123,18 @@ set(DBCSR_CUDA_SRCS ${DBCSR_HIP_AND_CUDA_SRCS} acc/cuda/acc_cuda.cpp

set(DBCSR_HIP_SRCS ${DBCSR_HIP_AND_CUDA_SRCS} acc/hip/acc_hip.cpp)

if (USE_ACCEL MATCHES "hip")
set_source_files_properties(acc/cuda_hip/calculate_norms.cpp
PROPERTIES LANGUAGE HIP)
set_source_files_properties(acc/cuda_hip/calculate_norms.cpp
PROPERTIES COMPILE_FLAGS "-fPIE")
elseif (USE_ACCEL MATCHES "cuda")
set_source_files_properties(acc/cuda_hip/calculate_norms.cpp
PROPERTIES LANGUAGE CUDA)
set_source_files_properties(acc/cuda_hip/calculate_norms.cpp
PROPERTIES COMPILE_FLAGS "--x cu")
endif ()

set(DBCSR_OPENCL_SRCS
acc/opencl/smm/opencl_libsmm.c acc/opencl/acc_opencl.c
acc/opencl/acc_opencl_event.c acc/opencl/acc_opencl_mem.c
Expand Down Expand Up @@ -164,6 +177,13 @@ set_target_properties(
SOVERSION ${dbcsr_APIVERSION}
POSITION_INDEPENDENT_CODE ON)

if (USE_ACCEL MATCHES "hip")
set_target_properties(dbcsr PROPERTIES HIP_ARCHITECTURES "${ACC_ARCH_NUMBER}")
elseif (USE_ACCEL MATCHES "cuda")
set_target_properties(dbcsr PROPERTIES CUDA_ARCHITECTURES
"${ACC_ARCH_NUMBER}")
endif ()

if (USE_SMM MATCHES "libxsmm")
target_compile_definitions(dbcsr PRIVATE __LIBXSMM)
target_link_directories(dbcsr PUBLIC ${LIBXSMM_LIBRARY_DIRS})
Expand Down Expand Up @@ -262,6 +282,18 @@ if (USE_ACCEL)
$<$<BOOL:${WITH_HIP_PROFILING}>:roctx64>
$<$<BOOL:${WITH_HIP_PROFILING}>:roctracer64>
$<$<STREQUAL:${USE_ACCEL},opencl>:OpenCL::OpenCL>)

if (WITH_G2G)
target_compile_definitions(
dbcsr
PRIVATE __DBCSR_ACC_G2G
$<$<STREQUAL:${USE_ACCEL},cuda>:__CUDA>
$<$<STREQUAL:${USE_ACCEL},cuda>:ARCH_NUMBER=${ACC_ARCH_NUMBER}>
$<$<STREQUAL:${USE_ACCEL},hip>:__HIP>
$<$<STREQUAL:${USE_ACCEL},hip>:ARCH_NUMBER=${ACC_ARCH_NUMBER}>
$<$<BOOL:${WITH_CUDA_PROFILING}>:__CUDA_PROFILING>
$<$<BOOL:${WITH_HIP_PROFILING}>:__HIP_PROFILING>)
endif ()
endif ()

# =================================================================================================
Expand Down
2 changes: 1 addition & 1 deletion src/acc/cuda/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ $(ACCDIR)/dbcsr_acc_smm.a: $(OBJSMM)
%.o: %.cpp $(INCALL) $(MAKDIR)/Makefile
$(CXX) $(DFLAGS) $(CXXFLAGS) $(CFLAGS_XSMM) -c $< -o $@

$(DIRSMM)/calculate_norms.o: $(DIRSMM)/calculate_norms.cpp $(INCALL) $(MAKDIR)/Makefile
$(ACCDIR)/cuda_hip/calculate_norms.o: $(ACCDIR)/cuda_hip/calculate_norms.cpp $(INCALL) $(MAKDIR)/Makefile
$(NVCC) $(DFLAGS) -x cu -allow-unsupported-compiler \
--compiler-options="$(filter-out -pedantic,$(CXXFLAGS)) $(CFLAGS_XSMM)" -c $< -o $@

Expand Down
3 changes: 3 additions & 0 deletions src/acc/cuda_hip/acc_dev.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*------------------------------------------------------------------------------------------------*/
/* Copyright (C) by the DBCSR developers group - All rights reserved */
/* Copyright (C) 2022 Advanced Micro Devices, Inc. - All rights reserved */
/* This file is part of the DBCSR library. */
/* */
/* For information on the license, see the LICENSE file. */
Expand All @@ -20,7 +21,9 @@
#include <math.h>

// for debug purpose
#if defined(__HIP_PLATFORM_NVCC__)
static const int verbose_print = 1;
#endif

/****************************************************************************/
extern "C" int c_dbcsr_acc_get_ndevices(int* n_devices) {
Expand Down
Loading

0 comments on commit 3bc658f

Please sign in to comment.