diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b8f82a6d71..1def9bcfb38 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,10 +25,18 @@ repos: - id: check-yaml - id: check-symlinks - id: trailing-whitespace + exclude: >- + (?x)^( + tools/vecLibFort/.*| + )$ - repo: https://github.com/pseewald/fprettify rev: v0.3.7 hooks: - id: fprettify + exclude: >- + (?x)^( + tools/vecLibFort/.*| + )$ - repo: https://github.com/cheshirekow/cmake-format-precommit rev: v0.6.13 hooks: @@ -64,3 +72,8 @@ repos: files: \.(c|cc|cxx|cpp|cl|frag|glsl|h|hpp|hxx|ih|ispc|ipp|java|js|m|mm|proto|textproto|vert)$ args: ['-i', '-fallback-style=none', '--style=file'] additional_dependencies: ['clang-format'] + exclude: >- + (?x)^( + tools/vecLibFort/.*| + )$ + diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a174910f21..d1dd70b41c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,6 +163,23 @@ endif () find_package(LAPACK REQUIRED) # needed for some of the integrated test routines, # also calls find_package(BLAS) +if (APPLE + AND (BLAS_LIBRARIES MATCHES "Accelerate" + OR BLAS_LIBRARIES MATCHES "vecLib" # automated search + OR BLA_VENDOR STREQUAL "Accelerate" + OR BLA_VENDOR STREQUAL "NAS" # user override + )) + message(CHECK_START "Looking for vecLibFort library") + find_library(VECLIBFORT_LIBRARY vecLibFort) + if (NOT VECLIBFORT_LIBRARY) + message(CHECK_FAIL "not found, building it") + add_subdirectory(tools/vecLibFort) + set(VECLIBFORT_LIBRARY vecLibFort) + else () + message(CHECK_PASS "found at " ${VECLIBFORT_LIBRARY}) + endif () +endif () + # =================================== Python this module looks preferably for # version 3 of Python. If not found, version 2 is searched. In CMake 3.15, if a # python virtual environment is activated, it will search the virtual diff --git a/docs/guide/2-user-guide/1-installation/index.md b/docs/guide/2-user-guide/1-installation/index.md index fc06dbb2b86..315edf4438a 100644 --- a/docs/guide/2-user-guide/1-installation/index.md +++ b/docs/guide/2-user-guide/1-installation/index.md @@ -9,8 +9,12 @@ You need: * [CMake](https://cmake.org/) (3.22+) * GNU make or Ninja * Fortran compiler which supports at least Fortran 2008 (including the TS 29113 when using the C-bindings) -* BLAS+LAPACK implementation (reference, OpenBLAS and MKL have been tested. Note: DBCSR linked to OpenBLAS 0.3.6 gives wrong results on Power9 architectures.) -* Python version installed (2.7 or 3.6+ have been tested) +* BLAS+LAPACK implementation + * Reference BLAS/LAPACK, OpenBLAS and MKL have been tested and can be considered supported. + * On macOS [vecLibFort](https://github.com/mcg1969/vecLibFort) is required to use Accelerate and/or vecLib. + The build system will automatically build a bundled version if not found on the system. + * DBCSR linked to OpenBLAS 0.3.6 gives wrong results on Power9 architectures. +* Python version installed (3.6+ have been tested) Optional: diff --git a/docs/guide/3-developer-guide/3-programming/1-overview/index.md b/docs/guide/3-developer-guide/3-programming/1-overview/index.md index 087667bbce9..88d37a270f9 100644 --- a/docs/guide/3-developer-guide/3-programming/1-overview/index.md +++ b/docs/guide/3-developer-guide/3-programming/1-overview/index.md @@ -46,7 +46,6 @@ Assumed square matrix with 20x20 matrix with 5x5 blocks and a 2x2 processor grid | `__NO_STATM_ACCESS`, `__STATM_RESIDENT` or `__STATM_TOTAL` | Toggle memory usage reporting between resident memory and total memory. In particular, macOS users must use `-D__NO_STATM_ACCESS` | Fortran | | `__NO_ABORT` | Avoid calling abort, but STOP instead (useful for coverage testing, and to avoid core dumps on some systems) | Fortran | | `__LIBXSMM` | Enable [LIBXSMM](https://github.com/hfp/libxsmm/) link for optimized small matrix multiplications on CPU | Fortran | -| `__ACCELERATE` | Must be defined on macOS when Apple's Accelerate framework is used for BLAS and LAPACK (this is due to some interface incompatibilities between Accelerate and reference BLAS/LAPACK) | Fortran | | `NDEBUG` | Assertions are stripped ("compiled out"), `NDEBUG` is the ANSI-conforming symbol name (not `__NDEBUG`). Regular release builds may carry assertions for safety | Fortran, C, C++ | | `__CRAY_PM_ACCEL_ENERGY` or `__CRAY_PM_ENERGY` | Switch on collectin energy profiling on Cray systems | Fortran | | `__DBCSR_ACC` | Enable Accelerator compilation | Fortran, C, C++ | diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 42934e9b0fa..929504a604d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -176,8 +176,8 @@ if (APPLE) # fix /proc/self/statm can not be opened on macOS target_compile_definitions(dbcsr PRIVATE __NO_STATM_ACCESS) - if (BLAS_LIBRARIES MATCHES "Accelerate") - target_compile_definitions(dbcsr PRIVATE __ACCELERATE) + if (VECLIBFORT_LIBRARY) + target_link_libraries(dbcsr PRIVATE ${VECLIBFORT_LIBRARY}) endif () endif () diff --git a/src/mm/dbcsr_mm_common.F b/src/mm/dbcsr_mm_common.F index 937043f23e9..e12a4fbb455 100644 --- a/src/mm/dbcsr_mm_common.F +++ b/src/mm/dbcsr_mm_common.F @@ -579,11 +579,7 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & INTEGER :: blk, bp, bpe, row, col REAL(KIND=real_8), EXTERNAL :: DDOT -#if defined (__ACCELERATE) - REAL(KIND=real_8), EXTERNAL :: SDOT -#else REAL(KIND=real_4), EXTERNAL :: SDOT -#endif ! --------------------------------------------------------------------------- diff --git a/src/mm/dbcsr_mm_multrec.F b/src/mm/dbcsr_mm_multrec.F index d8d0420f782..35c77b938de 100644 --- a/src/mm/dbcsr_mm_multrec.F +++ b/src/mm/dbcsr_mm_multrec.F @@ -707,11 +707,7 @@ SUBROUTINE multrec_filtering_${nametype1}$ (filter_eps, nblks, rowi, coli, blkp, REAL(kind=real_8) :: nrm REAL(KIND=real_8), EXTERNAL :: DZNRM2, DDOT -#if defined (__ACCELERATE) - REAL(KIND=real_8), EXTERNAL :: SCNRM2, SDOT -#else REAL(KIND=real_4), EXTERNAL :: SCNRM2, SDOT -#endif REAL(kind=real_8) :: filter_eps_opt diff --git a/src/ops/dbcsr_operations.F b/src/ops/dbcsr_operations.F index 9f5bd5a1747..d16e5c1130c 100644 --- a/src/ops/dbcsr_operations.F +++ b/src/ops/dbcsr_operations.F @@ -1910,11 +1910,7 @@ SUBROUTINE dbcsr_filter_anytype(matrix, eps, method, & TYPE(dbcsr_iterator) :: iter REAL(KIND=real_8), EXTERNAL :: DZNRM2 -#if defined (__ACCELERATE) - REAL(KIND=real_8), EXTERNAL :: SCNRM2 -#else REAL(KIND=real_4), EXTERNAL :: SCNRM2 -#endif ! --------------------------------------------------------------------------- diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6be544b1fd1..2b050add41d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -95,23 +95,20 @@ set(dbcsr_unittest_common_SRCS dbcsr_test_add.F dbcsr_test_multiply.F) # instead of building a full-blown lib, it would be better to simply build an # OBJECT lib, but we would need cmake 3.12 to be able to specify # target_link_libraries on those to get the proper compile flags -add_library(dbcsr_unittest_common STATIC ${dbcsr_unittest_common_SRCS}) +add_library(dbcsr_unittest_common OBJECT ${dbcsr_unittest_common_SRCS}) +target_link_libraries(dbcsr_unittest_common PUBLIC dbcsr) target_link_libraries(dbcsr_unittest_common PUBLIC ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) if (OpenMP_FOUND) target_link_libraries(dbcsr_unittest_common PUBLIC OpenMP::OpenMP_Fortran) endif () -if (APPLE AND BLAS_LIBRARIES MATCHES "Accelerate") - target_compile_definitions(dbcsr_unittest_common PRIVATE __ACCELERATE) -endif () -target_link_libraries(dbcsr_unittest_common PUBLIC dbcsr) - # Compile Fortran tests foreach (dbcsr_test ${DBCSR_TESTS_FTN}) add_executable(${dbcsr_test} ${${dbcsr_test}_SRCS}) - target_link_libraries(${dbcsr_test} dbcsr_unittest_common) + target_link_libraries(${dbcsr_test} PUBLIC dbcsr_unittest_common) set_target_properties(${dbcsr_test} PROPERTIES LINKER_LANGUAGE Fortran) + # register unittest executable with CMake if (USE_MPI) separate_arguments(MPIEXEC_PREFLAGS) @@ -124,7 +121,6 @@ foreach (dbcsr_test ${DBCSR_TESTS_FTN}) add_test(NAME ${dbcsr_test} COMMAND ${dbcsr_test}) endif () if (OpenMP_FOUND) - target_link_libraries(${dbcsr_test} OpenMP::OpenMP_Fortran) set_tests_properties( ${dbcsr_test} PROPERTIES ENVIRONMENT OMP_NUM_THREADS=${TEST_OMP_THREADS}) endif () diff --git a/tests/dbcsr_test_add.F b/tests/dbcsr_test_add.F index 692f9c914d7..30fe02679d3 100644 --- a/tests/dbcsr_test_add.F +++ b/tests/dbcsr_test_add.F @@ -377,11 +377,7 @@ SUBROUTINE dbcsr_check_add(test_name, matrix_a, dense_a_dbcsr, dense_a, dense_b, LOGICAL :: valid REAL(real_4), ALLOCATABLE, DIMENSION(:) :: work_sp -#if defined (__ACCELERATE) - REAL(real_8), EXTERNAL :: clange, slamch, slange -#else REAL(real_4), EXTERNAL :: clange, slamch, slange -#endif REAL(real_8) :: a_norm_dbcsr, a_norm_in, a_norm_out, & b_norm, eps, residual REAL(real_8), ALLOCATABLE, DIMENSION(:) :: work diff --git a/tests/dbcsr_test_multiply.F b/tests/dbcsr_test_multiply.F index 96081a15272..d36474e3b9f 100644 --- a/tests/dbcsr_test_multiply.F +++ b/tests/dbcsr_test_multiply.F @@ -553,11 +553,7 @@ SUBROUTINE dbcsr_check_multiply(test_name, matrix_c, dense_c_dbcsr, dense_a, den LOGICAL :: valid REAL(real_4), ALLOCATABLE, DIMENSION(:) :: work_sp -#if defined (__ACCELERATE) - REAL(real_8), EXTERNAL :: clange, slamch, slange -#else REAL(real_4), EXTERNAL :: clange, slamch, slange -#endif REAL(real_8) :: a_norm, b_norm, c_norm_dbcsr, c_norm_in, & c_norm_out, eps, eps_norm, residual REAL(real_8), ALLOCATABLE, DIMENSION(:) :: work diff --git a/tools/vecLibFort/CMakeLists.txt b/tools/vecLibFort/CMakeLists.txt new file mode 100644 index 00000000000..9fa4f450c7d --- /dev/null +++ b/tools/vecLibFort/CMakeLists.txt @@ -0,0 +1,7 @@ +add_library(vecLibFort STATIC vecLibFort.c) + +install( + TARGETS vecLibFort + EXPORT DBCSRTargets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")