diff --git a/README.md b/README.md index cd25f3ab9..ef766dbc9 100644 --- a/README.md +++ b/README.md @@ -416,6 +416,13 @@ advisable for NVIDIA and **mandatory for AMD** to provide the specific device architecture through `-DDPCPP_SYCL_ARCH=`, e.g., `` can be `sm_80` for NVIDIA or `gfx908` for AMD. +It is possible to use the `DEFAULT` target even for AMD and NVIDIA GPUs, but +defining `-DDPCPP_SYCL_TARGET` and `-DDPCPP_SYCL_ARCH` is mandatory. The rules +mentioned above also apply in this case. +Using `DEFAULT` as the target will speedup compilation at the expense of +runtime performance. Additionally, some operators will be disabled. +For full compatibility and best performance, set the `TUNING_TARGET` appropriately. + #### DPC++ Compiler Support As DPCPP SYCL compiler the project is fully compatible with `icpx` provided by @@ -487,7 +494,7 @@ Some of the supported options are: | `BLAS_ENABLE_TESTING` | `ON`/`OFF` | Set it to `OFF` to avoid building the tests (`ON` is the default value) | | `BLAS_ENABLE_BENCHMARK` | `ON`/`OFF` | Set it to `OFF` to avoid building the benchmarks (`ON` is the default value) | | `SYCL_COMPILER` | name | Used to determine which SYCL implementation to use. By default, the first implementation found is used. Supported values are: `dpcpp`, `adaptivecpp` and `computecpp`*(deprecated)*. | -| `TUNING_TARGET` | name | By default, this flag is set to `DEFAULT_CPU` to restrict any device specific compiler optimizations. Use this flag to tune the code for a target (**highly recommended** for performance). The supported targets are: `INTEL_GPU`, `NVIDIA_GPU`, `AMD_GPU` | +| `TUNING_TARGET` | name | By default, this flag is set to `DEFAULT` to restrict any device specific compiler optimizations. Use this flag to tune the code for a target (**highly recommended** for performance). The supported targets are: `INTEL_GPU`, `NVIDIA_GPU`, `AMD_GPU` | | `CMAKE_PREFIX_PATH` | path | List of paths to check when searching for dependencies | | `CMAKE_INSTALL_PREFIX` | path | Specify the install location, used when invoking `ninja install` | | `BUILD_SHARED_LIBS` | `ON`/`OFF` | Build as shared library (`ON` by default) | diff --git a/cmake/CmakeFunctionHelper.cmake b/cmake/CmakeFunctionHelper.cmake index fe60c42f7..553f7a5c7 100644 --- a/cmake/CmakeFunctionHelper.cmake +++ b/cmake/CmakeFunctionHelper.cmake @@ -98,11 +98,11 @@ function(set_target_compile_def in_target) elseif(${TUNING_TARGET} STREQUAL "NVIDIA_GPU") target_compile_definitions(${in_target} PUBLIC NVIDIA_GPU=1) else() - if(NOT ${TUNING_TARGET} STREQUAL "DEFAULT_CPU") - message(STATUS "${TUNING_TARGET} not supported. Switching to DEFAULT_CPU instead.") - set(TUNING_TARGET "DEFAULT_CPU") + if(NOT ${TUNING_TARGET} STREQUAL "DEFAULT") + message(STATUS "${TUNING_TARGET} not supported. Switching to DEFAULT instead.") + set(TUNING_TARGET "DEFAULT") endif() - target_compile_definitions(${in_target} PUBLIC DEFAULT_CPU=1) + target_compile_definitions(${in_target} PUBLIC DEFAULT=1) endif() message(STATUS "Adding ${TUNING_TARGET} backend to target ${in_target}") #setting tall skinny support diff --git a/cmake/Modules/ConfigurePORTBLAS.cmake b/cmake/Modules/ConfigurePORTBLAS.cmake index a66eebfed..b05714e58 100644 --- a/cmake/Modules/ConfigurePORTBLAS.cmake +++ b/cmake/Modules/ConfigurePORTBLAS.cmake @@ -56,7 +56,7 @@ if(NAIVE_GEMM) endif() # the TUNING_TARGET variable defines the platform for which the sycl library is tuned -SET(TUNING_TARGET "DEFAULT_CPU" CACHE STRING "Default Platform 'DEFAULT_CPU'") +SET(TUNING_TARGET "DEFAULT" CACHE STRING "Default Platform 'DEFAULT'") message(STATUS "${TUNING_TARGET} is chosen as a tuning target") if(DEFINED TARGET) diff --git a/cmake/Modules/SYCL.cmake b/cmake/Modules/SYCL.cmake index a4efc0226..54246a2c5 100644 --- a/cmake/Modules/SYCL.cmake +++ b/cmake/Modules/SYCL.cmake @@ -97,7 +97,7 @@ elseif(is_adaptivecpp) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") get_target_property(SYCL_INCLUDE_DIRS AdaptiveCpp::acpp-rt INTERFACE_INCLUDE_DIRECTORIES) - set(HIP_BENCH_UNSUPPORTED_TARGETS "INTEL_GPU" "DEFAULT_CPU") + set(HIP_BENCH_UNSUPPORTED_TARGETS "INTEL_GPU" "DEFAULT") if((${BLAS_ENABLE_BENCHMARK}) AND (${TUNING_TARGET} IN_LIST HIP_BENCH_UNSUPPORTED_TARGETS)) message(STATUS "Benchmarks are not supported when targetting OpenCL/LevelZero backend devices. portBLAS Benchmarks are disabled.") diff --git a/doc/Gemm.md b/doc/Gemm.md index 653549212..07b50ae68 100644 --- a/doc/Gemm.md +++ b/doc/Gemm.md @@ -172,7 +172,7 @@ This cmake variable causes a corresponding define for the selected platform to b #elif defined POWER_VR #include "interface/blas3/backend/power_vr.hpp" #else -#include "interface/blas3/backend/default_cpu.hpp" +#include "interface/blas3/backend/default.hpp" #endif ``` @@ -307,7 +307,7 @@ The relevant parameters are: - Vector size, the number of elements to use in vectorized loads/stores. - Batch type, whether to use strided (most `GEMM` kernels) or the interleaved `GEMM` for batched calls. -For an example of a backend target header and some of the ways that configurations are selected let's look at `src/interface/blas3/backend/default_cpu.hpp` : +For an example of a backend target header and some of the ways that configurations are selected let's look at `src/interface/blas3/backend/default.hpp` : ```c++ template