From e7d7aa13d6ffbe11a2ceaa6213ac242af103c735 Mon Sep 17 00:00:00 2001 From: nscipione Date: Wed, 17 Apr 2024 14:00:26 +0100 Subject: [PATCH 1/9] Add skipping broken test for default config on amd and nvidia --- test/unittest/CMakeLists.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index a77490ff3..fd73ca8be 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -101,6 +101,26 @@ if(is_dpcpp) ) endif() +if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT_CPU") + if (${DPCPP_SYCL_TARGET} STREQUAL "nvptx64-nvidia-cuda") + set(DEFAULT_SKIP + ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp + ${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp + ${PORTBLAS_UNITTEST}/blas2/blas2_trsv_test.cpp + ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp + ${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp + ${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp + ) + elseif(${DPCPP_SYCL_TARGET} STREQUAL "amdgcn-amd-amdhsa") + set(DEFAULT_SKIP + ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp + ${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp + ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp + ${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp + ) + endif() +endif() + if(GEMM_TALL_SKINNY_SUPPORT) list(APPEND SYCL_UNITTEST_SRCS ${PORTBLAS_UNITTEST}/blas3/blas3_gemm_tall_skinny_test.cpp) endif() @@ -115,6 +135,9 @@ foreach(blas_test ${SYCL_UNITTEST_SRCS}) if(is_adaptivecpp AND ${blas_test} IN_LIST ADAPTIVE_CPP_SKIP) continue() endif() + if(${blas_test} IN_LIST DEFAULT_SKIP) + continue() + endif() get_filename_component(test_exec ${blas_test} NAME_WE) add_executable(${test_exec} main.cpp ${blas_test}) if(is_computecpp) From 3cbda4538be11ccccf63faff2421f06ed58ac1d9 Mon Sep 17 00:00:00 2001 From: nscipione Date: Wed, 17 Apr 2024 15:25:00 +0100 Subject: [PATCH 2/9] Add cmake warning for disabled tests. Using DEFAULT_CPU as TUNING_TARGET for AMD or NVIDIA GPUs let some tests fail. Disabling failing tests for now and output a proper message. Signed-off-by: nscipione --- test/unittest/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index fd73ca8be..fc6420458 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -111,6 +111,8 @@ if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT_CPU") ${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp ${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp ) + message(WARNING "Targetting NVIDIA hardware with DEFAULT_CPU TUNING_TARGET. + Disabling tests for following operators: iamax, iamin, trsv, tbsv, tpsv, trsm.") elseif(${DPCPP_SYCL_TARGET} STREQUAL "amdgcn-amd-amdhsa") set(DEFAULT_SKIP ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp @@ -118,6 +120,8 @@ if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT_CPU") ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp ) + message(WARNING "Targetting AMD hardware with DEFAULT_CPU TUNING_TARGET. + Disabling tests for following operators: iamax, iamin, tbsv, tpsv.") endif() endif() From c407fbcedf5ab4ec16f7d5f5dbef992c84863e5c Mon Sep 17 00:00:00 2001 From: nscipione Date: Wed, 17 Apr 2024 15:38:59 +0100 Subject: [PATCH 3/9] Changing TUNING_TARGET from DEFAULT_CPU to DEFAULT This tuning target is capable of compiling for every device if DPCPP_SYCL_TARGET and DPCPP_SYCL_ARCH are specified correctly. So this comment is a first step in changing this target behaviour and increasing general understanding. --- README.md | 2 +- cmake/CmakeFunctionHelper.cmake | 8 ++++---- cmake/Modules/ConfigurePORTBLAS.cmake | 2 +- cmake/Modules/SYCL.cmake | 2 +- src/interface/blas1/backend/default_cpu.hpp | 4 ++-- src/interface/blas2/backend/default_cpu.hpp | 4 ++-- src/interface/blas3/backend/default_cpu.hpp | 4 ++-- src/interface/extension/backend/default_cpu.hpp | 4 ++-- test/unittest/CMakeLists.txt | 6 +++--- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index cd25f3ab9..1b9e7af81 100644 --- a/README.md +++ b/README.md @@ -487,7 +487,7 @@ Some of the supported options are: | `BLAS_ENABLE_TESTING` | `ON`/`OFF` | Set it to `OFF` to avoid building the tests (`ON` is the default value) | | `BLAS_ENABLE_BENCHMARK` | `ON`/`OFF` | Set it to `OFF` to avoid building the benchmarks (`ON` is the default value) | | `SYCL_COMPILER` | name | Used to determine which SYCL implementation to use. By default, the first implementation found is used. Supported values are: `dpcpp`, `adaptivecpp` and `computecpp`*(deprecated)*. | -| `TUNING_TARGET` | name | By default, this flag is set to `DEFAULT_CPU` to restrict any device specific compiler optimizations. Use this flag to tune the code for a target (**highly recommended** for performance). The supported targets are: `INTEL_GPU`, `NVIDIA_GPU`, `AMD_GPU` | +| `TUNING_TARGET` | name | By default, this flag is set to `DEFAULT` to restrict any device specific compiler optimizations. Use this flag to tune the code for a target (**highly recommended** for performance). The supported targets are: `INTEL_GPU`, `NVIDIA_GPU`, `AMD_GPU` | | `CMAKE_PREFIX_PATH` | path | List of paths to check when searching for dependencies | | `CMAKE_INSTALL_PREFIX` | path | Specify the install location, used when invoking `ninja install` | | `BUILD_SHARED_LIBS` | `ON`/`OFF` | Build as shared library (`ON` by default) | diff --git a/cmake/CmakeFunctionHelper.cmake b/cmake/CmakeFunctionHelper.cmake index fe60c42f7..553f7a5c7 100644 --- a/cmake/CmakeFunctionHelper.cmake +++ b/cmake/CmakeFunctionHelper.cmake @@ -98,11 +98,11 @@ function(set_target_compile_def in_target) elseif(${TUNING_TARGET} STREQUAL "NVIDIA_GPU") target_compile_definitions(${in_target} PUBLIC NVIDIA_GPU=1) else() - if(NOT ${TUNING_TARGET} STREQUAL "DEFAULT_CPU") - message(STATUS "${TUNING_TARGET} not supported. Switching to DEFAULT_CPU instead.") - set(TUNING_TARGET "DEFAULT_CPU") + if(NOT ${TUNING_TARGET} STREQUAL "DEFAULT") + message(STATUS "${TUNING_TARGET} not supported. Switching to DEFAULT instead.") + set(TUNING_TARGET "DEFAULT") endif() - target_compile_definitions(${in_target} PUBLIC DEFAULT_CPU=1) + target_compile_definitions(${in_target} PUBLIC DEFAULT=1) endif() message(STATUS "Adding ${TUNING_TARGET} backend to target ${in_target}") #setting tall skinny support diff --git a/cmake/Modules/ConfigurePORTBLAS.cmake b/cmake/Modules/ConfigurePORTBLAS.cmake index a66eebfed..b05714e58 100644 --- a/cmake/Modules/ConfigurePORTBLAS.cmake +++ b/cmake/Modules/ConfigurePORTBLAS.cmake @@ -56,7 +56,7 @@ if(NAIVE_GEMM) endif() # the TUNING_TARGET variable defines the platform for which the sycl library is tuned -SET(TUNING_TARGET "DEFAULT_CPU" CACHE STRING "Default Platform 'DEFAULT_CPU'") +SET(TUNING_TARGET "DEFAULT" CACHE STRING "Default Platform 'DEFAULT'") message(STATUS "${TUNING_TARGET} is chosen as a tuning target") if(DEFINED TARGET) diff --git a/cmake/Modules/SYCL.cmake b/cmake/Modules/SYCL.cmake index a4efc0226..54246a2c5 100644 --- a/cmake/Modules/SYCL.cmake +++ b/cmake/Modules/SYCL.cmake @@ -97,7 +97,7 @@ elseif(is_adaptivecpp) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") get_target_property(SYCL_INCLUDE_DIRS AdaptiveCpp::acpp-rt INTERFACE_INCLUDE_DIRECTORIES) - set(HIP_BENCH_UNSUPPORTED_TARGETS "INTEL_GPU" "DEFAULT_CPU") + set(HIP_BENCH_UNSUPPORTED_TARGETS "INTEL_GPU" "DEFAULT") if((${BLAS_ENABLE_BENCHMARK}) AND (${TUNING_TARGET} IN_LIST HIP_BENCH_UNSUPPORTED_TARGETS)) message(STATUS "Benchmarks are not supported when targetting OpenCL/LevelZero backend devices. portBLAS Benchmarks are disabled.") diff --git a/src/interface/blas1/backend/default_cpu.hpp b/src/interface/blas1/backend/default_cpu.hpp index ac2fe764d..cdb22b71d 100644 --- a/src/interface/blas1/backend/default_cpu.hpp +++ b/src/interface/blas1/backend/default_cpu.hpp @@ -22,8 +22,8 @@ * @filename defaul_cpu.hpp * **************************************************************************/ -#ifndef PORTBLAS_ASUM_DEFAULT_CPU_BACKEND_HPP -#define PORTBLAS_ASUM_DEFAULT_CPU_BACKEND_HPP +#ifndef PORTBLAS_ASUM_DEFAULT_BACKEND_HPP +#define PORTBLAS_ASUM_DEFAULT_BACKEND_HPP #include "interface/blas1_interface.h" namespace blas { diff --git a/src/interface/blas2/backend/default_cpu.hpp b/src/interface/blas2/backend/default_cpu.hpp index 58ce7495c..37d38ba5b 100644 --- a/src/interface/blas2/backend/default_cpu.hpp +++ b/src/interface/blas2/backend/default_cpu.hpp @@ -22,8 +22,8 @@ * @filename default_cpu.hpp * **************************************************************************/ -#ifndef PORTBLAS_GEMV_DEFAULT_CPU_BACKEND_HPP -#define PORTBLAS_GEMV_DEFAULT_CPU_BACKEND_HPP +#ifndef PORTBLAS_GEMV_DEFAULT_BACKEND_HPP +#define PORTBLAS_GEMV_DEFAULT_BACKEND_HPP #include "interface/blas2_interface.h" namespace blas { diff --git a/src/interface/blas3/backend/default_cpu.hpp b/src/interface/blas3/backend/default_cpu.hpp index dbe475d1b..32756da9f 100644 --- a/src/interface/blas3/backend/default_cpu.hpp +++ b/src/interface/blas3/backend/default_cpu.hpp @@ -22,8 +22,8 @@ * @filename default_cpu.hpp * **************************************************************************/ -#ifndef PORTBLAS_GEMM_DEFAULT_CPU_BACKEND_HPP -#define PORTBLAS_GEMM_DEFAULT_CPU_BACKEND_HPP +#ifndef PORTBLAS_GEMM_DEFAULT_BACKEND_HPP +#define PORTBLAS_GEMM_DEFAULT_BACKEND_HPP #include "interface/gemm_launcher.h" namespace blas { diff --git a/src/interface/extension/backend/default_cpu.hpp b/src/interface/extension/backend/default_cpu.hpp index ba714e78c..7c73c0e24 100644 --- a/src/interface/extension/backend/default_cpu.hpp +++ b/src/interface/extension/backend/default_cpu.hpp @@ -22,8 +22,8 @@ * @filename default_cpu.hpp * **************************************************************************/ -#ifndef PORTBLAS_TRANSPOSE_DEFAULT_CPU_BACKEND_HPP -#define PORTBLAS_TRANSPOSE_DEFAULT_CPU_BACKEND_HPP +#ifndef PORTBLAS_TRANSPOSE_DEFAULT_BACKEND_HPP +#define PORTBLAS_TRANSPOSE_DEFAULT_BACKEND_HPP #include "interface/extension_interface.h" namespace blas { diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index fc6420458..14b773678 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -101,7 +101,7 @@ if(is_dpcpp) ) endif() -if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT_CPU") +if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT") if (${DPCPP_SYCL_TARGET} STREQUAL "nvptx64-nvidia-cuda") set(DEFAULT_SKIP ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp @@ -111,7 +111,7 @@ if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT_CPU") ${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp ${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp ) - message(WARNING "Targetting NVIDIA hardware with DEFAULT_CPU TUNING_TARGET. + message(WARNING "Targetting NVIDIA hardware with DEFAULT TUNING_TARGET. Disabling tests for following operators: iamax, iamin, trsv, tbsv, tpsv, trsm.") elseif(${DPCPP_SYCL_TARGET} STREQUAL "amdgcn-amd-amdhsa") set(DEFAULT_SKIP @@ -120,7 +120,7 @@ if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT_CPU") ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp ) - message(WARNING "Targetting AMD hardware with DEFAULT_CPU TUNING_TARGET. + message(WARNING "Targetting AMD hardware with DEFAULT TUNING_TARGET. Disabling tests for following operators: iamax, iamin, tbsv, tpsv.") endif() endif() From f68a94cb0094219af841015597e70df0b74af65b Mon Sep 17 00:00:00 2001 From: nscipione Date: Mon, 22 Apr 2024 15:02:13 +0100 Subject: [PATCH 4/9] Add trsv to skipped test for AMD GPUs Running tests on w6800 AMD GPU revails another test failing. Skip this test for now. Signed-off-by: nscipione --- test/unittest/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index 14b773678..c49012a02 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -106,9 +106,9 @@ if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT") set(DEFAULT_SKIP ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp ${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp - ${PORTBLAS_UNITTEST}/blas2/blas2_trsv_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp + ${PORTBLAS_UNITTEST}/blas2/blas2_trsv_test.cpp ${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp ) message(WARNING "Targetting NVIDIA hardware with DEFAULT TUNING_TARGET. @@ -119,9 +119,10 @@ if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT") ${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp + ${PORTBLAS_UNITTEST}/blas2/blas2_trsv_test.cpp ) message(WARNING "Targetting AMD hardware with DEFAULT TUNING_TARGET. - Disabling tests for following operators: iamax, iamin, tbsv, tpsv.") + Disabling tests for following operators: iamax, iamin, tbsv, tpsv, trsv.") endif() endif() From fa31d27bc944e3ae0b33e6eac736f062f2137f8d Mon Sep 17 00:00:00 2001 From: nscipione Date: Tue, 23 Apr 2024 09:58:00 +0100 Subject: [PATCH 5/9] Update README Add possibility of using DEFAULT as target. Explaining how and current operators situation. --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 1b9e7af81..465528dbe 100644 --- a/README.md +++ b/README.md @@ -416,6 +416,14 @@ advisable for NVIDIA and **mandatory for AMD** to provide the specific device architecture through `-DDPCPP_SYCL_ARCH=`, e.g., `` can be `sm_80` for NVIDIA or `gfx908` for AMD. +It is possible to use the `DEFAULT` target even for AMD and NVIDIA GPUs, but +defining `-DDPCPP_SYCL_TARGET` and `-DDPCPP_SYCL_ARCH` is mandatory. The rules +mentioned above also apply in this case. +Using `DEFAULT` as the target will speedup compilation time at expense of +runtime. Moreover, currently, some operators don't work, and therefore, their +tests are disabled. For full compatibility and performance, set also `TUNING_TARGET` +to the appropriate one. + #### DPC++ Compiler Support As DPCPP SYCL compiler the project is fully compatible with `icpx` provided by From 91bb5105539b085f25dffd891c294dc645aee3cd Mon Sep 17 00:00:00 2001 From: nscipione Date: Fri, 3 May 2024 17:06:46 +0200 Subject: [PATCH 6/9] Rename "default_cpu" to "default" The default target is not compatible only with CPU, so renaming files and references from "default_cpu" to "default" --- doc/Gemm.md | 4 ++-- src/interface/blas1/backend/backend.hpp | 2 +- src/interface/blas1/backend/{default_cpu.hpp => default.hpp} | 0 src/interface/blas2/backend/backend.hpp | 2 +- src/interface/blas2/backend/{default_cpu.hpp => default.hpp} | 2 +- src/interface/blas3/backend/backend.hpp | 2 +- src/interface/blas3/backend/{default_cpu.hpp => default.hpp} | 2 +- src/interface/extension/backend/backend.hpp | 2 +- .../extension/backend/{default_cpu.hpp => default.hpp} | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) rename src/interface/blas1/backend/{default_cpu.hpp => default.hpp} (100%) rename src/interface/blas2/backend/{default_cpu.hpp => default.hpp} (99%) rename src/interface/blas3/backend/{default_cpu.hpp => default.hpp} (99%) rename src/interface/extension/backend/{default_cpu.hpp => default.hpp} (99%) diff --git a/doc/Gemm.md b/doc/Gemm.md index 653549212..07b50ae68 100644 --- a/doc/Gemm.md +++ b/doc/Gemm.md @@ -172,7 +172,7 @@ This cmake variable causes a corresponding define for the selected platform to b #elif defined POWER_VR #include "interface/blas3/backend/power_vr.hpp" #else -#include "interface/blas3/backend/default_cpu.hpp" +#include "interface/blas3/backend/default.hpp" #endif ``` @@ -307,7 +307,7 @@ The relevant parameters are: - Vector size, the number of elements to use in vectorized loads/stores. - Batch type, whether to use strided (most `GEMM` kernels) or the interleaved `GEMM` for batched calls. -For an example of a backend target header and some of the ways that configurations are selected let's look at `src/interface/blas3/backend/default_cpu.hpp` : +For an example of a backend target header and some of the ways that configurations are selected let's look at `src/interface/blas3/backend/default.hpp` : ```c++ template Date: Mon, 6 May 2024 18:03:11 +0200 Subject: [PATCH 7/9] Restructure how to select tests to skip Set a unique variable to decide which tests are skipped according to compiler, tuning target and hardware Signed-off-by: nscipione --- test/unittest/CMakeLists.txt | 41 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index c49012a02..5bc2f5a80 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -73,18 +73,20 @@ set(SYCL_UNITTEST_SRCS ${PORTBLAS_UNITTEST}/buffers/sycl_buffer_test.cpp ) -# Skip these tests for AdaptiveCpp for SPIRV/OpenCL targets -# that use SYCL 2020 features like group reduction or hang -# during execution (https://github.com/AdaptiveCpp/AdaptiveCpp/issues/1309) -set(ADAPTIVE_CPP_SKIP - ${PORTBLAS_UNITTEST}/blas1/blas1_asum_test.cpp - ${PORTBLAS_UNITTEST}/blas1/blas1_sdsdot_test.cpp - ${PORTBLAS_UNITTEST}/blas1/blas1_nrm2_test.cpp - ${PORTBLAS_UNITTEST}/blas1/blas1_dot_test.cpp - ${PORTBLAS_UNITTEST}/blas1/blas1_rot_test.cpp - # Hang during execution (without failing) - ${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp -) +if(is_adaptivecpp) + # Skip these tests for AdaptiveCpp for SPIRV/OpenCL targets + # that use SYCL 2020 features like group reduction or hang + # during execution (https://github.com/AdaptiveCpp/AdaptiveCpp/issues/1309) + set(TEST_TO_SKIP + ${PORTBLAS_UNITTEST}/blas1/blas1_asum_test.cpp + ${PORTBLAS_UNITTEST}/blas1/blas1_sdsdot_test.cpp + ${PORTBLAS_UNITTEST}/blas1/blas1_nrm2_test.cpp + ${PORTBLAS_UNITTEST}/blas1/blas1_dot_test.cpp + ${PORTBLAS_UNITTEST}/blas1/blas1_rot_test.cpp + # Hang during execution (without failing) + ${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp + ) +endif() if(${BLAS_ENABLE_EXTENSIONS}) list(APPEND SYCL_UNITTEST_SRCS ${PORTBLAS_UNITTEST}/extension/transpose_test.cpp @@ -102,8 +104,10 @@ if(is_dpcpp) endif() if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT") - if (${DPCPP_SYCL_TARGET} STREQUAL "nvptx64-nvidia-cuda") - set(DEFAULT_SKIP + string(REGEX MATCH "nvidia_gpu" is_nvidia ${DPCPP_SYCL_TARGET}) + string(REGEX MATCH "amd_gpu" is_amd ${DPCPP_SYCL_TARGET}) + if (${DPCPP_SYCL_TARGET} STREQUAL "nvptx64-nvidia-cuda" OR is_nvidia STREQUAL "nvidia_gpu") + set(TEST_TO_SKIP ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp ${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp @@ -113,8 +117,8 @@ if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT") ) message(WARNING "Targetting NVIDIA hardware with DEFAULT TUNING_TARGET. Disabling tests for following operators: iamax, iamin, trsv, tbsv, tpsv, trsm.") - elseif(${DPCPP_SYCL_TARGET} STREQUAL "amdgcn-amd-amdhsa") - set(DEFAULT_SKIP + elseif(${DPCPP_SYCL_TARGET} STREQUAL "amdgcn-amd-amdhsa" OR is_amd STREQUAL "amd_gpu") + set(TEST_TO_SKIP ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp ${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp @@ -137,10 +141,7 @@ set(HALF_DATA_OPS "blas1_axpy_test" ) foreach(blas_test ${SYCL_UNITTEST_SRCS}) - if(is_adaptivecpp AND ${blas_test} IN_LIST ADAPTIVE_CPP_SKIP) - continue() - endif() - if(${blas_test} IN_LIST DEFAULT_SKIP) + if(${blas_test} IN_LIST TEST_TO_SKIP) continue() endif() get_filename_component(test_exec ${blas_test} NAME_WE) From 24fb6f73af12c3d18c3621612f22f6570859665b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Scipione?= Date: Tue, 7 May 2024 11:04:24 +0200 Subject: [PATCH 8/9] Update README.md Co-authored-by: HJA Bird --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 465528dbe..ef766dbc9 100644 --- a/README.md +++ b/README.md @@ -419,10 +419,9 @@ for NVIDIA or `gfx908` for AMD. It is possible to use the `DEFAULT` target even for AMD and NVIDIA GPUs, but defining `-DDPCPP_SYCL_TARGET` and `-DDPCPP_SYCL_ARCH` is mandatory. The rules mentioned above also apply in this case. -Using `DEFAULT` as the target will speedup compilation time at expense of -runtime. Moreover, currently, some operators don't work, and therefore, their -tests are disabled. For full compatibility and performance, set also `TUNING_TARGET` -to the appropriate one. +Using `DEFAULT` as the target will speedup compilation at the expense of +runtime performance. Additionally, some operators will be disabled. +For full compatibility and best performance, set the `TUNING_TARGET` appropriately. #### DPC++ Compiler Support From abb491e9dc344f6321d3f660445dc7a46c6492e6 Mon Sep 17 00:00:00 2001 From: nscipione Date: Tue, 7 May 2024 12:04:16 +0200 Subject: [PATCH 9/9] Address PR comments --- test/unittest/CMakeLists.txt | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index 5bc2f5a80..ba5ebbca3 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -77,7 +77,7 @@ if(is_adaptivecpp) # Skip these tests for AdaptiveCpp for SPIRV/OpenCL targets # that use SYCL 2020 features like group reduction or hang # during execution (https://github.com/AdaptiveCpp/AdaptiveCpp/issues/1309) - set(TEST_TO_SKIP + set(TESTS_TO_SKIP ${PORTBLAS_UNITTEST}/blas1/blas1_asum_test.cpp ${PORTBLAS_UNITTEST}/blas1/blas1_sdsdot_test.cpp ${PORTBLAS_UNITTEST}/blas1/blas1_nrm2_test.cpp @@ -104,10 +104,8 @@ if(is_dpcpp) endif() if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT") - string(REGEX MATCH "nvidia_gpu" is_nvidia ${DPCPP_SYCL_TARGET}) - string(REGEX MATCH "amd_gpu" is_amd ${DPCPP_SYCL_TARGET}) - if (${DPCPP_SYCL_TARGET} STREQUAL "nvptx64-nvidia-cuda" OR is_nvidia STREQUAL "nvidia_gpu") - set(TEST_TO_SKIP + if (${DPCPP_SYCL_TARGET} MATCHES "nvidia") + set(TESTS_TO_SKIP ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp ${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp @@ -117,8 +115,8 @@ if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT") ) message(WARNING "Targetting NVIDIA hardware with DEFAULT TUNING_TARGET. Disabling tests for following operators: iamax, iamin, trsv, tbsv, tpsv, trsm.") - elseif(${DPCPP_SYCL_TARGET} STREQUAL "amdgcn-amd-amdhsa" OR is_amd STREQUAL "amd_gpu") - set(TEST_TO_SKIP + elseif (${DPCPP_SYCL_TARGET} MATCHES "amd") + set(TESTS_TO_SKIP ${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp ${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp ${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp @@ -141,7 +139,7 @@ set(HALF_DATA_OPS "blas1_axpy_test" ) foreach(blas_test ${SYCL_UNITTEST_SRCS}) - if(${blas_test} IN_LIST TEST_TO_SKIP) + if(${blas_test} IN_LIST TESTS_TO_SKIP) continue() endif() get_filename_component(test_exec ${blas_test} NAME_WE)