Skip to content

Commit

Permalink
Merge branch 'release-2.7.0-rc2'
Browse files Browse the repository at this point in the history
  • Loading branch information
alazzaro committed Jun 28, 2024
2 parents ba75a56 + 96b2be4 commit 4e300bc
Show file tree
Hide file tree
Showing 78 changed files with 2,088 additions and 1,911 deletions.
File renamed without changes.
2 changes: 1 addition & 1 deletion .ci/daint.cscs.ch/ocl.build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ if [ ! -d "${HOME}/libxsmm" ]; then
fi
cd "${HOME}/libxsmm"
git fetch
git checkout 05705477183444a82c8d9be8d7c2627efd6d67fa
git checkout 2fe2b1a7077ddfbc9ab3b3f7ba1f5a45d52549cb
make -j
cd ..

Expand Down
2 changes: 1 addition & 1 deletion .ci/daint.cscs.ch/ocl.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export OMP_PROC_BIND=TRUE # set thread affinity
# OMP_NUM_THREADS is set by cmake

# use default parameters (omit loading tuned parameters)
export OPENCL_LIBSMM_SMM_PARAMS=0
#export OPENCL_LIBSMM_SMM_PARAMS=0

# document the current environment
env |& tee -a "${STAGE_NAME}.out"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/testing-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ jobs:
-DMPI_EXECUTABLE_SUFFIX=.${{ matrix.mpi_suffix }} \
-DMPIEXEC_PREFLAGS="$([ "${{ matrix.mpi_suffix }}" = "openmpi" ] && echo "-mca btl ^openib --allow-run-as-root --oversubscribe")" \
-DLCOV_ARGS="--test-name;${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.use_smm }}-cpu" \
-DTEST_MPI_RANKS=auto \
..
- name: Build
Expand Down
8 changes: 2 additions & 6 deletions .github/workflows/testing-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ on:
- 'develop'
pull_request:

# Workaround issue in Xcode 14.1/2
env:
DEVELOPER_DIR: /Applications/Xcode_14.0.1.app/Contents/Developer

jobs:
build-and-test:
runs-on: macos-latest
Expand Down Expand Up @@ -45,15 +41,15 @@ jobs:
mkdir -p build
cd build
env \
CC=gcc-12 CXX=g++-12 FC=gfortran-12 \
CC=gcc-14 CXX=g++-14 FC=gfortran-14 \
cmake -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DUSE_${{ matrix.use_mpi }} \
-DUSE_${{ matrix.use_openmp }} \
-DUSE_${{ matrix.use_smm }} \
$([ "${{ matrix.blas_impl }}" = "openblas" ] && echo '-DCMAKE_PREFIX_PATH=/usr/local/opt/openblas') \
-DMPIEXEC_PREFLAGS="$([ "${{ matrix.mpi_suffix }}" = "openmpi" ] && echo "-mca btl ^openib --allow-run-as-root")" \
-DTEST_MPI_RANKS=1 \
-DTEST_MPI_RANKS=auto \
..
- name: Build
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ fail_fast: false
minimum_pre_commit_version: 3.2.0
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.3.2'
rev: 'v0.4.10'
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
Expand All @@ -15,13 +15,13 @@ repos:
.cp2k/.*|
)$
- repo: https://github.com/psf/black
rev: 24.2.0
rev: 24.4.2
hooks:
- id: black
name: Reformat Python files with the black code formatter
files: '^.*(/PACKAGE)|(\.py)$'
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: check-ast
- id: check-yaml
Expand Down
8 changes: 8 additions & 0 deletions .pre-commit/headers/c_cpp.3
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/*------------------------------------------------------------------------------------------------*/
/* Copyright (C) by the DBCSR developers group - All rights reserved */
/* This file is part of the DBCSR library. */
/* */
/* For information on the license, see the LICENSE file. */
/* For further information please visit https://dbcsr.cp2k.org */
/* SPDX-License-Identifier: BSD-3-Clause */
/*------------------------------------------------------------------------------------------------*/
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ cmake_dependent_option(WITH_EXAMPLES "Build the examples" ON "USE_MPI" OFF
)# all examples require MPI

set(TEST_MPI_RANKS
"auto"
2
CACHE STRING "Number of MPI ranks for testing")
set(TEST_OMP_THREADS
2
Expand Down
4 changes: 2 additions & 2 deletions VERSION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
MAJOR = 2
MINOR = 7
PATCH = 0-rc1
PATCH = 0-rc2
# A specific DATE (YYYY-MM-DD) fixes an official release, otherwise
# it is considered Development version.
DATE = 2024-03-13
DATE = 2024-06-27


14 changes: 10 additions & 4 deletions cmake/CompilerConfiguration.cmake
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffree-form -std=f2008ts -fimplicit-none -Werror=aliasing -Werror=ampersand -Werror=c-binding-type -Werror=intrinsic-shadow -Werror=intrinsics-std -Werror=line-truncation -Werror=tabs -Werror=target-lifetime -Werror=underflow -Werror=unused-but-set-parameter -Werror=unused-but-set-variable -Werror=unused-variable -Werror=unused-dummy-argument -Werror=conversion -Werror=zerotrip -Werror=uninitialized -Wno-maybe-uninitialized -Werror=unused-parameter")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Werror=argument-mismatch") # gcc 10+ has this automatically
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffree-form -std=f2008ts -fimplicit-none -Werror=aliasing -Werror=ampersand -Werror=c-binding-type -Werror=intrinsic-shadow -Werror=intrinsics-std -Werror=line-truncation -Werror=tabs -Werror=target-lifetime -Werror=underflow -Werror=unused-but-set-parameter -Werror=unused-but-set-variable -Werror=unused-variable -Werror=unused-dummy-argument -Werror=conversion -Werror=zerotrip -Wno-maybe-uninitialized -Werror=unused-parameter")
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) # comparison against CXX version rather than GFortran version
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fallow-argument-mismatch") # required for 10+ (MPI wrap)
else ()
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fallow-argument-mismatch") # requires for 10+ for the MPI wrap module
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Werror=argument-mismatch") # gcc 10+ has this automatically
endif ()
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13) # comparison against CXX version rather than GFortran version
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Wno-error=uninitialized") # false positive (allocatable array)
endif ()
set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -g -funroll-loops")
set(CMAKE_Fortran_FLAGS_COVERAGE "-O0 -g --coverage -fno-omit-frame-pointer -fcheck=all,no-array-temps -ffpe-trap=invalid,zero,overflow -fbacktrace -finit-real=snan -finit-integer=-42 -finit-derived -Werror=realloc-lhs -finline-matmul-limit=0 -Werror")
Expand Down Expand Up @@ -48,6 +51,9 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if ((NOT (USE_MPI)) OR (NOT ("${MPI_Fortran_LIBRARY_VERSION_STRING}" MATCHES "Open MPI")))
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=leak")
endif ()
if (USE_ACCEL MATCHES "hip" AND hip_VERSION GREATER_EQUAL 6.0.0) # Remove deprecated function error with ROCm v6+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations")
endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops")
set(CMAKE_CXX_FLAGS_COVERAGE "-O0 -g --coverage")
Expand Down
4 changes: 2 additions & 2 deletions docs/guide/2-user-guide/1-installation/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ make
-DWITH_GPU=<P100|K20X|K40|K80|V100|Mi50|Mi100|Mi250>
-DCMAKE_BUILD_TYPE=<Release|Debug|Coverage>
-DBUILD_TESTING=<ON|OFF>
-DTEST_MPI_RANKS=<auto,N>
-DTEST_OMP_THREADS=<2,N>
-DTEST_MPI_RANKS=<2|auto|N>
-DTEST_OMP_THREADS=<2|N>
```

When providing a build of LIBXSMM, make sure the `lib` directory is added to the `PKG_CONFIG_PATH` variable prior
Expand Down
7 changes: 7 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ foreach (dbcsr_program_src ${DBCSR_PROGRAM_SRCS_FTN})
get_filename_component(dbcsr_program_name ${dbcsr_program_src} NAME_WE)
add_executable(${dbcsr_program_name} ${dbcsr_program_src})
target_link_libraries(${dbcsr_program_name} dbcsr)
if (OpenMP_FOUND)
target_link_libraries(${dbcsr_program_name} OpenMP::OpenMP_Fortran)
endif ()

# with the Intel compiler CMake 3.12 seems to forget that the source is
# actually Fortran and needs to be told explicitly:
Expand All @@ -29,6 +32,10 @@ if (WITH_C_API)
set(dbcsr_program_name ${dbcsr_program_name}_cpp)
add_executable(${dbcsr_program_name} ${dbcsr_program_src})
target_link_libraries(${dbcsr_program_name} dbcsr_c MPI::MPI_CXX)
set_target_properties(${dbcsr_program_name} PROPERTIES LINKER_LANGUAGE CXX)
if (OpenMP_FOUND)
target_link_libraries(${dbcsr_program_name} OpenMP::OpenMP_CXX)
endif ()

if (CMAKE_CXX_COMPILER_ID STREQUAL "Cray")
# for recent Cray compiler versions CMake doesn't know
Expand Down
4 changes: 2 additions & 2 deletions examples/dbcsr_example_3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ int main(int argc, char* argv[]) {

for (int i = 0; i != mpi_size; ++i) {
if (mpi_rank == i) {
std::cout << "I'm processor " << mpi_rank << " over " << mpi_size << " proc" << ", (" << coord[0] << ", " << coord[1]
<< ") in the 2D grid" << std::endl;
std::cout << "I'm processor " << mpi_rank << " over " << mpi_size << " proc"
<< ", (" << coord[0] << ", " << coord[1] << ") in the 2D grid" << std::endl;
}
MPI_Barrier(MPI_COMM_WORLD);
}
Expand Down
4 changes: 4 additions & 0 deletions src/acc/acc_bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
# define INLINE
#endif

#if !defined(MAX_KERNEL_DIM)
# define MAX_KERNEL_DIM 80
#endif

#define INIT_MAT(ELEM_TYPE, SEED, MAT, M, N, SCALE) \
do { \
const double init_mat_seed1_ = (SCALE) * (SEED) + (SCALE); \
Expand Down
6 changes: 2 additions & 4 deletions src/acc/acc_bench_smm.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
STRIDE_C, INDEX_STRIDE, INDEX_BASE, BATCHSIZE) \
ACC_BENCH_USEOMP(libxsmm_gemm_batch) \
(IPREC, OPREC, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, STRIDE_A, B, LDB, STRIDE_B, BETA, C, LDC, STRIDE_C, INDEX_STRIDE, \
INDEX_BASE, BATCHSIZE, 0 /*batchcheck*/)
INDEX_BASE, BATCHSIZE)
# define PRINTF(...) \
do { \
const size_t print_buffer_size = sizeof(print_buffer) - print_offset; \
Expand Down Expand Up @@ -227,9 +227,7 @@ int main(int argc, char* argv[]) {
int ndevices = 0;
result = c_dbcsr_acc_get_ndevices(&ndevices);
if (0 < ndevices && (0 == device || EXIT_SUCCESS == c_dbcsr_acc_set_active_device(device))) {
#if defined(_DEBUG)
fprintf(stderr, "Activated device %i of %i (device%i).\n", device + 1, ndevices, device);
#endif
printf("Activated device%i (ndevices=%i)\n", device, ndevices);
}
else {
if (0 >= ndevices) {
Expand Down
4 changes: 1 addition & 3 deletions src/acc/acc_bench_trans.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,7 @@ int main(int argc, char* argv[]) {
if (EXIT_SUCCESS == result) {
result = c_dbcsr_acc_get_ndevices(&ndevices);
if (0 < ndevices && (0 == device || EXIT_SUCCESS == c_dbcsr_acc_set_active_device(device))) {
#if defined(_DEBUG)
fprintf(stderr, "Activated device %i of %i (device%i).\n", device + 1, ndevices, device);
#endif
printf("Activated device%i (ndevices=%i)\n", device, ndevices);
}
else {
if (0 >= ndevices) {
Expand Down
20 changes: 12 additions & 8 deletions src/acc/cuda/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ OBJSMM := $(SRCSMM:.cpp=.o)

INCALL := $(INCACC) $(INCSMM)

LIBXSMMROOT := $(wildcard $(ACCDIR)/../../../libxsmm)
LIBXSMMROOT := $(wildcard $(ACCDIR)/../../../../../libxsmm)
ifeq (,$(LIBXSMMROOT))
LIBXSMMROOT := $(wildcard $(HOME)/libxsmm)
endif
Expand All @@ -33,6 +33,9 @@ INTEL ?= 0
GNU ?= 0
DEV ?= 0

# C++ baseline standard
CXXSTD ?= -std=c++14

# select from set of predefined triplet specifications
SPECID ?= 0
# limit shape in tests (zero or negative for unlimited)
Expand Down Expand Up @@ -118,7 +121,7 @@ else ifneq (0,$(GNU))
else
override AR := ar
endif
override LD_LIBRARY_DIRS := $(NULL)
#override LD_LIBRARY_DIRS := $(NULL)
else
CXX := g++
CC := gcc
Expand Down Expand Up @@ -211,7 +214,7 @@ LD_LIBSTUB_PATH := $(wildcard $(patsubst %,%/stubs,$(LD_LIBRARY_DIRS)))
LIBPATHS := $(foreach DIR,$(LD_LIBRARY_DIRS),$(if $(filter -L$(DIR),$(LDFLAGS)),$(NULL),-L$(DIR)))
LIBSTUBS := $(foreach DIR,$(LD_LIBSTUB_PATH),$(if $(filter -L$(DIR),$(LDFLAGS)),$(NULL),-L$(DIR)))
LDFLAGS += $(LIBPATHS) $(LIBSTUBS) -lcudart -lcublas -lnvrtc -lcuda
CXXFLAGS += -std=c++11 $(CFLAGS)
CXXFLAGS += $(CXXSTD) $(CFLAGS)

.PHONY: bench
bench: $(ACCDIR)/acc_bench_smm $(ACCDIR)/acc_bench_trans
Expand Down Expand Up @@ -296,15 +299,16 @@ libsmm: $(ACCDIR)/dbcsr_acc_smm.a
$(ACCDIR)/dbcsr_acc_smm.a: $(OBJSMM)
$(AR) -rs $@ $^

%.o: %.cu $(INCALL) $(MAKDIR)/Makefile
$(NVCC) $(DFLAGS) -allow-unsupported-compiler --compiler-options="$(CXXFLAGS) $(CFLAGS_XSMM)" -c $< -o $@

%.o: %.cpp $(INCALL) $(MAKDIR)/Makefile
$(CXX) $(DFLAGS) $(CXXFLAGS) $(CFLAGS_XSMM) -c $< -o $@

%.o: %.cu $(INCALL) $(MAKDIR)/Makefile
$(NVCC) $(DFLAGS) -allow-unsupported-compiler $(CXXSTD) \
--compiler-options="$(filter-out $(CXXSTD),$(CXXFLAGS)) $(CFLAGS_XSMM)" -c $< -o $@

$(ACCDIR)/cuda_hip/calculate_norms.o: $(ACCDIR)/cuda_hip/calculate_norms.cpp $(INCALL) $(MAKDIR)/Makefile
$(NVCC) $(DFLAGS) -x cu -allow-unsupported-compiler \
--compiler-options="$(filter-out -pedantic,$(CXXFLAGS)) $(CFLAGS_XSMM)" -c $< -o $@
$(NVCC) $(DFLAGS) -allow-unsupported-compiler $(CXXSTD) -x cu \
--compiler-options="$(filter-out $(CXXSTD) -pedantic,$(CXXFLAGS)) $(CFLAGS_XSMM)" -c $< -o $@

$(MAKDIR)/acc_bench_smm.o: $(ACCDIR)/acc_bench_smm.c $(MAKDIR)/Makefile
ifneq (0,$(LIBXSMM))
Expand Down
11 changes: 4 additions & 7 deletions src/acc/cuda_hip/acc_dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@
#include <stdio.h>
#include <math.h>

// for debug purpose
#if defined(__HIP_PLATFORM_NVCC__)
static const int verbose_print = 1;
#endif

/****************************************************************************/
extern "C" int c_dbcsr_acc_get_ndevices(int* n_devices) {
ACC_API_CALL(GetDeviceCount, (n_devices));
Expand All @@ -49,9 +44,11 @@ extern "C" int c_dbcsr_acc_set_active_device(int device_id) {
// establish context
ACC_API_CALL(Free, (0));

#if defined(__HIP_PLATFORM_NVCC__)
if (verbose_print) {
#if defined(__CUDA) || defined(__HIP_PLATFORM_NVCC__)
static bool once = false;
if (!once) {
ACC_API_CALL(DeviceSetLimit, (ACC(LimitPrintfFifoSize), (size_t)1000000000));
once = true;
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/acc/cuda_hip/acc_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ extern "C" int c_dbcsr_acc_stream_create(void** stream_p, const char* name, int
cErr = ACC(StreamCreate)(acc_stream);
}

if (verbose_print) printf("StreamCreate : %p -> %p \n", *stream_p, *acc_stream);
if (verbose_print) printf("StreamCreate : %p -> %p \n", *stream_p, (const void*)*acc_stream);
if (acc_error_check(cErr)) return -1;
if (acc_error_check(ACC(GetLastError)())) return -1;

Expand Down
2 changes: 2 additions & 0 deletions src/acc/libsmm_acc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ The performance of the matrix-matrix multiplication kernels is highly dependent

## Contributing to libsmm_acc

We expect users to contribute to the library by providing new optimized kernels and support for new GPUs.

#### Autotuning procedure

Follow the [autotuning procedure](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/tune/README.md)
Expand Down
Loading

0 comments on commit 4e300bc

Please sign in to comment.