From c06a6062ab27b072ba2ef0e4f5a1151882aa4085 Mon Sep 17 00:00:00 2001 From: Hugh Bird Date: Thu, 8 Aug 2024 10:09:17 +0100 Subject: [PATCH 01/10] Add generic device; Initial support in portBLAS * oneMKL Interfaces currently only supports known targets: Intel CPU/GPU, AMD GPU, Nvidia GPU * This PR: * Enables a new generic target * Enables the generic target to use the portBLAS backend * Adds documentation --- CMakeLists.txt | 8 +++++ README.md | 11 ++++-- docs/building_the_project_with_dpcpp.rst | 36 ++++++++++++++++++++ include/oneapi/mkl/detail/backends_table.hpp | 8 ++++- include/oneapi/mkl/detail/get_device_id.hpp | 8 +++++ src/CMakeLists.txt | 1 + src/blas/backends/portblas/CMakeLists.txt | 9 ++++- src/config.hpp.in | 2 ++ 8 files changed, 79 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1bd39f188..56ae8ba43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,9 @@ option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF option(ENABLE_ROCFFT_BACKEND "Enable the rocFFT backend for the DFT interface" OFF) option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interface. Cannot be used with other DFT backends." OFF) +# Generic devices +option(ENABLE_GENERIC_DEVICE "Enable generic devices. Requires the portBLAS backend." OFF) + set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler") set(HIP_TARGETS "" CACHE STRING "Target HIP architectures") @@ -123,6 +126,11 @@ if (ENABLE_PORTFFT_BACKEND message(FATAL_ERROR "ENABLE_PORTFFT_BACKEND cannot be enabled at the same time as other DFT backends.") endif() +if(ENABLE_GENERIC_DEVICE + AND NOT ENABLE_PORTBLAS_BACKEND) + message(FATAL_ERROR "ENABLE_GENERIC_DEVICE requires that the portBLAS backend is enabled.") +endif() + # Define required CXX compilers before project if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++") if(WIN32) diff --git a/README.md b/README.md index fdbd4b8e9..e609cd0d7 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org). portBLAS - x86 CPU, Intel GPU, NVIDIA GPU, AMD GPU + x86 CPU, Intel GPU, NVIDIA GPU, AMD GPU, Other SYCL devices (unsupported) portFFT @@ -172,7 +172,7 @@ Supported compilers include: - BLAS + BLAS x86 CPU Intel(R) oneMKL Intel DPC++
AdaptiveCpp @@ -221,6 +221,12 @@ Supported compilers include: Open DPC++ Dynamic, Static + + Other SYCL devices (unsupported) + portBLAS + Intel DPC++
Open DPC++ + Dynamic, Static + LAPACK x86 CPU @@ -405,6 +411,7 @@ Supported compilers include: - Intel(R) Data Center GPU Max Series - NVIDIA(R) A100 (Linux* only) - AMD(R) GPUs see [here](https://github.com/RadeonOpenCompute/ROCm#hardware-and-software-support) tested on AMD Vega 20 (gfx906) + - Other SYCL devices can be used, but are not supported --- ### Supported Operating Systems diff --git a/docs/building_the_project_with_dpcpp.rst b/docs/building_the_project_with_dpcpp.rst index e33a78046..c57988391 100644 --- a/docs/building_the_project_with_dpcpp.rst +++ b/docs/building_the_project_with_dpcpp.rst @@ -128,6 +128,9 @@ The most important supported build options are: * - ENABLE_PORTFFT_BACKEND - True, False - False + * - ENABLE_GENERIC_DEVICE + - True, False + - False * - BUILD_FUNCTIONAL_TESTS - True, False - True @@ -225,6 +228,23 @@ A few often-used architectures are listed below: For a host with ROCm installed, the device architecture can be retrieved via the ``rocminfo`` tool. The architecture will be displayed in the ``Name:`` row. +.. _build_for_other_SYCL_devices: + +Building for other SYCL devices +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +SYCL enables portable heterogeneous computing on a wide range of accelerators. +Consequently, it is possible to use oneMKL Interfaces with accelerators not +anticipated by the oneMKL Interfaces team. This can be enabled using the +``-DENABLE_GENERIC_DEVICE=ON`` option. However, this is not a supported +configuration. + +For generic SYCL devices, only the portBLAS backend is enabled. The user must +set the appropriate ``-fsycl-targets`` for their device, and also any +``PORTBLAS_TUNING_TARGET`` required for performance. See +`Building for portBLAS`_. Extensive testing is strongly advised for these +unsupported configurations. + .. _build_for_portlibs_dpcpp: Pure SYCL backends: portBLAS and portFFT @@ -408,6 +428,22 @@ set, the backend libraries to enable the use of BLAS, LAPACK and RNG with MKLGPU and MKLCPU would also be enabled. The build of examples is disabled. Since functional testing was not disabled, tests would be built. +Build oneMKL for the BLAS domain on a generic SYCL device: + +.. code-block:: bash + + cmake $ONEMKL_DIR \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_C_COMPILER=clang \ + -DENABLE_MKLCPU_BACKEND=False \ + -DENABLE_MKLGPU_BACKEND=False \ + -DENABLE_PORTBLAS_BACKEND=True \ + -DENABLE_GENERIC_DEVICE=True + +Note that this is not a supported configuration. This builds oneMKL Interfaces +with the portBLAS backend only, for a generic SYCL device supported by the +Open DPC++ project. + .. _project_cleanup: Project Cleanup diff --git a/include/oneapi/mkl/detail/backends_table.hpp b/include/oneapi/mkl/detail/backends_table.hpp index 8e68674cc..8594de13d 100644 --- a/include/oneapi/mkl/detail/backends_table.hpp +++ b/include/oneapi/mkl/detail/backends_table.hpp @@ -40,7 +40,7 @@ namespace oneapi { namespace mkl { -enum class device : uint16_t { x86cpu, intelgpu, nvidiagpu, amdgpu }; +enum class device : uint16_t { x86cpu, intelgpu, nvidiagpu, amdgpu, generic_device }; enum class domain : uint16_t { blas, dft, lapack, rng, sparse_blas }; static std::map>> libraries = { @@ -82,6 +82,12 @@ static std::map>> libraries = #endif #ifdef ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU LIB_NAME("blas_portblas"), +#endif + } }, + { device::generic_device, + { +#ifdef ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE + LIB_NAME("blas_portblas"), #endif } } } }, diff --git a/include/oneapi/mkl/detail/get_device_id.hpp b/include/oneapi/mkl/detail/get_device_id.hpp index 88b235754..91b460110 100644 --- a/include/oneapi/mkl/detail/get_device_id.hpp +++ b/include/oneapi/mkl/detail/get_device_id.hpp @@ -59,11 +59,19 @@ inline oneapi::mkl::device get_device_id(sycl::queue &queue) { else if (vendor_id == AMD_ID) device_id = device::amdgpu; else { +#ifdef ENABLE_GENERIC_DEVICE + device_id = device::generic_device; +#else throw unsupported_device("", "", queue.get_device()); +#endif // ENABLE_GENERIC_DEVICE } } else { +#ifdef ENABLE_GENERIC_DEVICE + device_id = device::generic_device; +#else throw unsupported_device("", "", queue.get_device()); +#endif // ENABLE_GENERIC_DEVICE } return device_id; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0b632c1bd..918e0dc2b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,6 +29,7 @@ set(ENABLE_PORTBLAS_BACKEND_INTEL_CPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_INTEL_GPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_AMD_GPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU OFF CACHE INTERNAL "") +set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE OFF CACHE INTERNAL "") # store path to CMAKE_CURRENT_BINARY_DIR to use it later (makes FetchContent_Declare workable) set(ONEMKL_GENERATED_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/src/blas/backends/portblas/CMakeLists.txt b/src/blas/backends/portblas/CMakeLists.txt index 03fddbb38..abb8586fe 100644 --- a/src/blas/backends/portblas/CMakeLists.txt +++ b/src/blas/backends/portblas/CMakeLists.txt @@ -46,7 +46,14 @@ if(NUM_TARGETS EQUAL 0) list(LENGTH SYCL_TARGETS NUM_TARGETS) endif() -if(PORTBLAS_TUNING_TARGET) +if(ENABLE_GENERIC_DEVICE) + set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE "ON" CACHE INTERAL "") + if(NOT PORTBLAS_TUNING_TARGET) + # If a generic device is specified, set the tuning target to default for best compatibility. + message(STATUS "Setting DEFAULT portBLAS tuning target for generic device.") + set(PORTBLAS_TUNING_TARGET "DEFAULT") + endif() +elseif (PORTBLAS_TUNING_TARGET) # Allow the user to manually enable a specific device type # for tuned portBLAS configurations and sets sycl-target. if(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_CPU") diff --git a/src/config.hpp.in b/src/config.hpp.in index 5698abf9b..e68b3b10e 100644 --- a/src/config.hpp.in +++ b/src/config.hpp.in @@ -32,12 +32,14 @@ #cmakedefine ENABLE_PORTBLAS_BACKEND_INTEL_CPU #cmakedefine ENABLE_PORTBLAS_BACKEND_INTEL_GPU #cmakedefine ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU +#cmakedefine ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE #cmakedefine ENABLE_PORTFFT_BACKEND #cmakedefine ENABLE_ROCBLAS_BACKEND #cmakedefine ENABLE_ROCFFT_BACKEND #cmakedefine ENABLE_ROCRAND_BACKEND #cmakedefine ENABLE_ROCSOLVER_BACKEND #cmakedefine BUILD_SHARED_LIBS +#cmakedefine ENABLE_GENERIC_DEVICE #cmakedefine REF_BLAS_LIBNAME "@REF_BLAS_LIBNAME@" #cmakedefine REF_CBLAS_LIBNAME "@REF_CBLAS_LIBNAME@" From 06c278c70684c17a640ffd766333285a8d4e9dd7 Mon Sep 17 00:00:00 2001 From: Hugh Bird Date: Wed, 14 Aug 2024 16:22:14 +0100 Subject: [PATCH 02/10] CMake typo; Add -fno-sycl-instrument-device-code --- src/blas/backends/portblas/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/blas/backends/portblas/CMakeLists.txt b/src/blas/backends/portblas/CMakeLists.txt index abb8586fe..3612de5dd 100644 --- a/src/blas/backends/portblas/CMakeLists.txt +++ b/src/blas/backends/portblas/CMakeLists.txt @@ -47,7 +47,8 @@ if(NUM_TARGETS EQUAL 0) endif() if(ENABLE_GENERIC_DEVICE) - set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE "ON" CACHE INTERAL "") + set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE "ON" CACHE INTERNAL "") + target_compile_options(ONEMKL::SYCL::SYCL INTERFACE -fno-sycl-instrument-device-code) if(NOT PORTBLAS_TUNING_TARGET) # If a generic device is specified, set the tuning target to default for best compatibility. message(STATUS "Setting DEFAULT portBLAS tuning target for generic device.") From be9ee68efc85052812dfe6a44fa169124aad356e Mon Sep 17 00:00:00 2001 From: nscipione Date: Wed, 4 Sep 2024 13:41:11 +0100 Subject: [PATCH 03/10] remove ENABLE_GENERIC_DEVICE option and add always generic_device to possible devices. This commit remove the option ENABLE_GENERIC_DEVICE and instead add generic_device to the backends_table. The check for unsupported_device exception is moved to table_initializer and to keep it as informative as it is, it is required a change to the function_tables operator[]. This change allows to use portBLAS (and in a possible future) all "port" libraries with any device supported. --- CMakeLists.txt | 8 - docs/building_the_project_with_dpcpp.rst | 10 +- include/oneapi/mkl/detail/backends_table.hpp | 2 - include/oneapi/mkl/detail/get_device_id.hpp | 8 - src/CMakeLists.txt | 1 - src/blas/backends/portblas/CMakeLists.txt | 10 +- src/blas/blas_loader.cpp | 3102 +++++++++--------- src/config.hpp.in | 2 - src/dft/dft_loader.cpp | 8 +- src/include/function_table_initializer.hpp | 17 +- src/lapack/lapack_loader.cpp | 1542 ++++----- src/rng/rng_loader.cpp | 8 +- 12 files changed, 2418 insertions(+), 2300 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 56ae8ba43..1bd39f188 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,9 +60,6 @@ option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF option(ENABLE_ROCFFT_BACKEND "Enable the rocFFT backend for the DFT interface" OFF) option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interface. Cannot be used with other DFT backends." OFF) -# Generic devices -option(ENABLE_GENERIC_DEVICE "Enable generic devices. Requires the portBLAS backend." OFF) - set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler") set(HIP_TARGETS "" CACHE STRING "Target HIP architectures") @@ -126,11 +123,6 @@ if (ENABLE_PORTFFT_BACKEND message(FATAL_ERROR "ENABLE_PORTFFT_BACKEND cannot be enabled at the same time as other DFT backends.") endif() -if(ENABLE_GENERIC_DEVICE - AND NOT ENABLE_PORTBLAS_BACKEND) - message(FATAL_ERROR "ENABLE_GENERIC_DEVICE requires that the portBLAS backend is enabled.") -endif() - # Define required CXX compilers before project if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++") if(WIN32) diff --git a/docs/building_the_project_with_dpcpp.rst b/docs/building_the_project_with_dpcpp.rst index c57988391..d85af5760 100644 --- a/docs/building_the_project_with_dpcpp.rst +++ b/docs/building_the_project_with_dpcpp.rst @@ -128,9 +128,6 @@ The most important supported build options are: * - ENABLE_PORTFFT_BACKEND - True, False - False - * - ENABLE_GENERIC_DEVICE - - True, False - - False * - BUILD_FUNCTIONAL_TESTS - True, False - True @@ -235,9 +232,7 @@ Building for other SYCL devices SYCL enables portable heterogeneous computing on a wide range of accelerators. Consequently, it is possible to use oneMKL Interfaces with accelerators not -anticipated by the oneMKL Interfaces team. This can be enabled using the -``-DENABLE_GENERIC_DEVICE=ON`` option. However, this is not a supported -configuration. +anticipated by the oneMKL Interfaces team. For generic SYCL devices, only the portBLAS backend is enabled. The user must set the appropriate ``-fsycl-targets`` for their device, and also any @@ -437,8 +432,7 @@ Build oneMKL for the BLAS domain on a generic SYCL device: -DCMAKE_C_COMPILER=clang \ -DENABLE_MKLCPU_BACKEND=False \ -DENABLE_MKLGPU_BACKEND=False \ - -DENABLE_PORTBLAS_BACKEND=True \ - -DENABLE_GENERIC_DEVICE=True + -DENABLE_PORTBLAS_BACKEND=True Note that this is not a supported configuration. This builds oneMKL Interfaces with the portBLAS backend only, for a generic SYCL device supported by the diff --git a/include/oneapi/mkl/detail/backends_table.hpp b/include/oneapi/mkl/detail/backends_table.hpp index 8594de13d..418f91131 100644 --- a/include/oneapi/mkl/detail/backends_table.hpp +++ b/include/oneapi/mkl/detail/backends_table.hpp @@ -86,9 +86,7 @@ static std::map>> libraries = } }, { device::generic_device, { -#ifdef ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE LIB_NAME("blas_portblas"), -#endif } } } }, { domain::dft, diff --git a/include/oneapi/mkl/detail/get_device_id.hpp b/include/oneapi/mkl/detail/get_device_id.hpp index 91b460110..fbfe64219 100644 --- a/include/oneapi/mkl/detail/get_device_id.hpp +++ b/include/oneapi/mkl/detail/get_device_id.hpp @@ -59,19 +59,11 @@ inline oneapi::mkl::device get_device_id(sycl::queue &queue) { else if (vendor_id == AMD_ID) device_id = device::amdgpu; else { -#ifdef ENABLE_GENERIC_DEVICE device_id = device::generic_device; -#else - throw unsupported_device("", "", queue.get_device()); -#endif // ENABLE_GENERIC_DEVICE } } else { -#ifdef ENABLE_GENERIC_DEVICE device_id = device::generic_device; -#else - throw unsupported_device("", "", queue.get_device()); -#endif // ENABLE_GENERIC_DEVICE } return device_id; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 918e0dc2b..0b632c1bd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,7 +29,6 @@ set(ENABLE_PORTBLAS_BACKEND_INTEL_CPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_INTEL_GPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_AMD_GPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU OFF CACHE INTERNAL "") -set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE OFF CACHE INTERNAL "") # store path to CMAKE_CURRENT_BINARY_DIR to use it later (makes FetchContent_Declare workable) set(ONEMKL_GENERATED_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/src/blas/backends/portblas/CMakeLists.txt b/src/blas/backends/portblas/CMakeLists.txt index 3612de5dd..8f981f88b 100644 --- a/src/blas/backends/portblas/CMakeLists.txt +++ b/src/blas/backends/portblas/CMakeLists.txt @@ -46,15 +46,7 @@ if(NUM_TARGETS EQUAL 0) list(LENGTH SYCL_TARGETS NUM_TARGETS) endif() -if(ENABLE_GENERIC_DEVICE) - set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE "ON" CACHE INTERNAL "") - target_compile_options(ONEMKL::SYCL::SYCL INTERFACE -fno-sycl-instrument-device-code) - if(NOT PORTBLAS_TUNING_TARGET) - # If a generic device is specified, set the tuning target to default for best compatibility. - message(STATUS "Setting DEFAULT portBLAS tuning target for generic device.") - set(PORTBLAS_TUNING_TARGET "DEFAULT") - endif() -elseif (PORTBLAS_TUNING_TARGET) +if (PORTBLAS_TUNING_TARGET) # Allow the user to manually enable a specific device type # for tuned portBLAS configurations and sets sycl-target. if(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_CPU") diff --git a/src/blas/blas_loader.cpp b/src/blas/blas_loader.cpp index c1f1339c6..1c3cfcb71 100644 --- a/src/blas/blas_loader.cpp +++ b/src/blas/blas_loader.cpp @@ -35,413 +35,418 @@ static oneapi::mkl::detail::table_initializer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_scasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_scasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_dzasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_dzasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_sasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_sasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_dasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_dasum_sycl(queue, n, x, incx, result); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_saxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_saxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_daxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_caxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_caxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_zaxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_zaxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_scopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_scopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dcopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_dcopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_ccopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_ccopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zcopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_zcopy_sycl(queue, n, x, incx, y, incy); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_scopy_batch_strided_sycl(queue, n, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_scopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_dcopy_batch_strided_sycl(queue, n, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_dcopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_ccopy_batch_strided_sycl(queue, n, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_ccopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_zcopy_batch_strided_sycl(queue, n, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_zcopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].column_major_sdot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_sdot_sycl(queue, n, x, incx, y, incy, result); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].column_major_ddot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_ddot_sycl(queue, n, x, incx, y, incy, result); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].column_major_dsdot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_dsdot_sycl(queue, n, x, incx, y, incy, result); } void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].column_major_cdotc_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_cdotc_sycl(queue, n, x, incx, y, incy, result); } void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].column_major_zdotc_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_zdotc_sycl(queue, n, x, incx, y, incy, result); } void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].column_major_cdotu_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_cdotu_sycl(queue, n, x, incx, y, incy, result); } void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].column_major_zdotu_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_zdotu_sycl(queue, n, x, incx, y, incy, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_isamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_isamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_idamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_idamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_icamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_icamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_izamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_izamin_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_isamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_isamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_idamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_idamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_icamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_icamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_izamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_izamax_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_scnrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_scnrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_dznrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_dznrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_snrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_snrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].column_major_dnrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].column_major_dnrm2_sycl(queue, n, x, incx, result); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { - function_tables[libkey].column_major_srot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].column_major_srot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { - function_tables[libkey].column_major_drot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].column_major_drot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { - function_tables[libkey].column_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].column_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { - function_tables[libkey].column_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].column_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { - function_tables[libkey].column_major_srotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].column_major_srotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { - function_tables[libkey].column_major_drotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].column_major_drotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, sycl::buffer, 1> &b, sycl::buffer &c, sycl::buffer, 1> &s) { - function_tables[libkey].column_major_crotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].column_major_crotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, sycl::buffer, 1> &b, sycl::buffer &c, sycl::buffer, 1> &s) { - function_tables[libkey].column_major_zrotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].column_major_zrotg_sycl(queue, a, b, c, s); } void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { - function_tables[libkey].column_major_srotm_sycl(queue, n, x, incx, y, incy, param); + function_tables[{ libkey, queue }].column_major_srotm_sycl(queue, n, x, incx, y, incy, param); } void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { - function_tables[libkey].column_major_drotm_sycl(queue, n, x, incx, y, incy, param); + function_tables[{ libkey, queue }].column_major_drotm_sycl(queue, n, x, incx, y, incy, param); } void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, sycl::buffer &x1, float y1, sycl::buffer ¶m) { - function_tables[libkey].column_major_srotmg_sycl(queue, d1, d2, x1, y1, param); + function_tables[{ libkey, queue }].column_major_srotmg_sycl(queue, d1, d2, x1, y1, param); } void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, sycl::buffer &x1, double y1, sycl::buffer ¶m) { - function_tables[libkey].column_major_drotmg_sycl(queue, d1, d2, x1, y1, param); + function_tables[{ libkey, queue }].column_major_drotmg_sycl(queue, d1, d2, x1, y1, param); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_sscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_sscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_dscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_cscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_cscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_csscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_csscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_zscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_zscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_zdscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].column_major_zdscal_sycl(queue, n, alpha, x, incx); } void sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].column_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, result); + function_tables[{ libkey, queue }].column_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, + result); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_sswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_sswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_dswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_cswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_cswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].column_major_zswap_sycl(queue, n, x, incx, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -449,8 +454,8 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std:: sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -458,40 +463,40 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std:: sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -499,9 +504,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t stridea, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_sgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_sgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -509,9 +514,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t stridea, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_dgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_dgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -520,9 +525,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_cgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_cgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -531,9 +536,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].column_major_zgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, - y, incy, stridey, batch_size); + function_tables[{ libkey, queue }].column_major_zgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, @@ -541,7 +546,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].column_major_sdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_sdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -550,7 +555,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].column_major_ddgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_ddgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -559,7 +564,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].column_major_cdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_cdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -568,430 +573,444 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].column_major_zdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_zdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].column_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, + a, lda); } void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].column_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, + a, lda); } void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].column_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, + a, lda); } void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a, lda); + function_tables[{ libkey, queue }].column_major_cher2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a, lda); } void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].column_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a, lda); + function_tables[{ libkey, queue }].column_major_zher2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a, lda); } void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].column_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].column_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].column_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a) { - function_tables[libkey].column_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].column_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a) { - function_tables[libkey].column_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].column_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a) { - function_tables[libkey].column_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a); + function_tables[{ libkey, queue }].column_major_chpr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a); } void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a) { - function_tables[libkey].column_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a); + function_tables[{ libkey, queue }].column_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a); } void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, - incx, beta, y, incy); + function_tables[{ libkey, queue }].column_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].column_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].column_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { - function_tables[libkey].column_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].column_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { - function_tables[libkey].column_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].column_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { - function_tables[libkey].column_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a); + function_tables[{ libkey, queue }].column_major_sspr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a); } void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { - function_tables[libkey].column_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a); + function_tables[{ libkey, queue }].column_major_dspr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a); } void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].column_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].column_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].column_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, + a, lda); } void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].column_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, + a, lda); } void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a, lda); + function_tables[{ libkey, queue }].column_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a, lda); } void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].column_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, - a, lda); + function_tables[{ libkey, queue }].column_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, + incx, y, incy, a, lda); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, - lda, x, incx); + function_tables[{ libkey, queue }].column_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].column_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_strmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_strsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].column_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].column_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, - x, incx); + function_tables[{ libkey, queue }].column_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_sgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -999,8 +1018,8 @@ void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, tran sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_cgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1008,32 +1027,32 @@ void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, tran sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_hgemm_sycl(queue, transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_gemm_f16f16f32_sycl(queue, transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_sycl( + queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_gemm_bf16bf16f32_sycl(queue, transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_sycl( + queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -1041,8 +1060,8 @@ void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_chemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_chemm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -1050,23 +1069,23 @@ void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zhemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zhemm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_cherk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zherk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1074,8 +1093,8 @@ void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_cher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_cher2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1083,24 +1102,24 @@ void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, double beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zher2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_ssymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_ssymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dsymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -1108,8 +1127,8 @@ void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_csymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_csymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -1117,56 +1136,56 @@ void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zsymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_ssyrk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dsyrk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_csyrk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zsyrk_sycl(queue, upper_lower, trans, n, k, + alpha, a, lda, beta, c, ldc); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_ssyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].column_major_ssyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_dsyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].column_major_dsyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1174,9 +1193,9 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_csyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].column_major_csyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1184,25 +1203,25 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_zsyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].column_major_zsyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_ssyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_ssyr2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dsyr2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1210,8 +1229,8 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_csyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_csyr2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -1219,72 +1238,72 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zsyr2k_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_strmm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_strmm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_dtrmm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_dtrmm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_ctrmm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_ctrmm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_ztrmm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_ztrmm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_strsm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_strsm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_dtrsm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_dtrsm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_ctrsm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_ctrsm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_ztrsm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_ztrsm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1293,7 +1312,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_sgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_sgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1304,7 +1323,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_dgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_dgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1315,7 +1334,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_cgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_cgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1326,7 +1345,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_zgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_zgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1337,7 +1356,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_hgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_hgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1348,7 +1367,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_gemm_f16f16f32_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1359,7 +1378,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_gemm_s8s8f32_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1370,7 +1389,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_gemm_s8s8s32_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -1380,7 +1399,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_strsm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_strsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1390,7 +1409,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_dtrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_dtrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1400,7 +1419,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_ctrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_ctrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1410,7 +1429,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_ztrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_ztrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1419,16 +1438,16 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_sgemmt_sycl(queue, upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_sgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_dgemmt_sycl(queue, upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_dgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, @@ -1436,8 +1455,8 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_cgemmt_sycl(queue, upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_cgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, @@ -1445,8 +1464,8 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zgemmt_sycl(queue, upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].column_major_zgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1454,7 +1473,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].column_major_gemm_s8u8s32_bias_sycl( + function_tables[{ libkey, queue }].column_major_gemm_s8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -1463,7 +1482,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].column_major_gemm_s8s8s32_bias_sycl( + function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -1472,7 +1491,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].column_major_gemm_u8s8s32_bias_sycl( + function_tables[{ libkey, queue }].column_major_gemm_u8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -1481,7 +1500,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].column_major_gemm_u8u8s32_bias_sycl( + function_tables[{ libkey, queue }].column_major_gemm_u8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -1489,7 +1508,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_somatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1497,7 +1516,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_domatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1506,7 +1525,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_comatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_comatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -1515,38 +1534,38 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].column_major_zomatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_zomatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].column_major_simatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].column_major_dimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].column_major_cimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].column_major_zimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, @@ -1555,7 +1574,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_somatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_somatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -1566,7 +1585,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_domatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_domatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -1577,7 +1596,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_comatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_comatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -1589,7 +1608,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].column_major_zomatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].column_major_zomatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -1597,97 +1616,105 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_somatcopy_sycl(queue, trans, m, n, alpha, a, + lda, b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].column_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_domatcopy_sycl(queue, trans, m, n, alpha, a, + lda, b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_comatcopy_sycl(queue, trans, m, n, alpha, a, + lda, b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].column_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].column_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, + lda, b, ldb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].column_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, - b, ldb, strideb); + function_tables[{ libkey, queue }].column_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, + lda, stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].column_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, - b, ldb, strideb); + function_tables[{ libkey, queue }].column_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, + lda, stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].column_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, - b, ldb, strideb); + function_tables[{ libkey, queue }].column_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, + lda, stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].column_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, - b, ldb, strideb); + function_tables[{ libkey, queue }].column_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, + lda, stridea, b, ldb, strideb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].column_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].column_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, + lda, ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].column_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].column_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, + lda, ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].column_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].column_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, + lda, ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].column_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].column_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, + lda, ldb); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_somatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].column_major_somatadd_sycl( + queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].column_major_domatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].column_major_domatadd_sycl( + queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1695,8 +1722,8 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_comatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].column_major_comatadd_sycl( + queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -1704,8 +1731,8 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].column_major_zomatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].column_major_zomatadd_sycl( + queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } // USM APIs @@ -1713,64 +1740,64 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_scasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_scasum_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dzasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dzasum_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_sasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_sasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_saxpy_usm_sycl(queue, n, alpha, x, incx, + y, incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_daxpy_usm_sycl(queue, n, alpha, x, incx, + y, incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_caxpy_usm_sycl(queue, n, alpha, x, incx, + y, incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, + y, incy, dependencies); } sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_saxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_saxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1778,7 +1805,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 double *alpha, const double **x, std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_daxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_daxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1787,7 +1814,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_caxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_caxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1796,7 +1823,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zaxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zaxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1804,7 +1831,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const float *x, std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_saxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_saxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1812,7 +1839,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double *x, std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_daxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1821,7 +1848,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_caxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_caxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1830,73 +1857,73 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zaxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zaxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_saxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].column_major_saxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_daxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].column_major_daxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_caxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].column_major_caxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zaxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].column_major_zaxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_scopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_scopy_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_dcopy_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_ccopy_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_zcopy_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_scopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_scopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1904,7 +1931,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double **x, std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1912,7 +1939,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex **x, std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ccopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ccopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1920,7 +1947,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex **x, std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -1928,7 +1955,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const float *x, std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_scopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_scopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1936,7 +1963,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double *x, std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1944,7 +1971,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ccopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ccopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -1952,303 +1979,307 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_sdot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_sdot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_ddot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_ddot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, const float *y, std::int64_t incy, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsdot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dsdot_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_cdotc_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_cdotc_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdotc_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_zdotc_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_cdotu_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_cdotu_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdotu_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_zdotu_usm_sycl( + queue, n, x, incx, y, incy, result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_isamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_isamin_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_idamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_idamin_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_icamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_icamin_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_izamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_izamin_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_isamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_isamax_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_idamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_idamax_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_icamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_icamax_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_izamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_izamax_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_scnrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_scnrm2_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dznrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dznrm2_usm_sycl(queue, n, x, incx, + result, dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_snrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_snrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_dnrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].column_major_dnrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, float c, float s, const std::vector &dependencies) { - return function_tables[libkey].column_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].column_major_srot_usm_sycl(queue, n, x, incx, y, incy, + c, s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, double c, double s, const std::vector &dependencies) { - return function_tables[libkey].column_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].column_major_drot_usm_sycl(queue, n, x, incx, y, incy, + c, s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, float c, float s, const std::vector &dependencies) { - return function_tables[libkey].column_major_csrot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].column_major_csrot_usm_sycl(queue, n, x, incx, y, + incy, c, s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, double c, double s, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].column_major_zdrot_usm_sycl(queue, n, x, incx, y, + incy, c, s, dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, float *a, float *b, float *c, float *s, const std::vector &dependencies) { - return function_tables[libkey].column_major_srotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].column_major_srotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, double *a, double *b, double *c, double *s, const std::vector &dependencies) { - return function_tables[libkey].column_major_drotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].column_major_drotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, std::complex *b, float *c, std::complex *s, const std::vector &dependencies) { - return function_tables[libkey].column_major_crotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].column_major_crotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, std::complex *b, double *c, std::complex *s, const std::vector &dependencies) { - return function_tables[libkey].column_major_zrotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].column_major_zrotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, float *param, const std::vector &dependencies) { - return function_tables[libkey].column_major_srotm_usm_sycl(queue, n, x, incx, y, incy, param, - dependencies); + return function_tables[{ libkey, queue }].column_major_srotm_usm_sycl( + queue, n, x, incx, y, incy, param, dependencies); } sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, double *param, const std::vector &dependencies) { - return function_tables[libkey].column_major_drotm_usm_sycl(queue, n, x, incx, y, incy, param, - dependencies); + return function_tables[{ libkey, queue }].column_major_drotm_usm_sycl( + queue, n, x, incx, y, incy, param, dependencies); } sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, const std::vector &dependencies) { - return function_tables[libkey].column_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, param, - dependencies); + return function_tables[{ libkey, queue }].column_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, + param, dependencies); } sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, const std::vector &dependencies) { - return function_tables[libkey].column_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, param, - dependencies); + return function_tables[{ libkey, queue }].column_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, + param, dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_sscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_sscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_dscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_cscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_cscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_csscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_csscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_zscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_zscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].column_major_zdscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *result, const std::vector &dependencies) { - return function_tables[libkey].column_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y, incy, - result, dependencies); + return function_tables[{ libkey, queue }].column_major_sdsdot_usm_sycl( + queue, n, sb, x, incx, y, incy, result, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_sswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_sswap_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_dswap_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_cswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_cswap_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].column_major_zswap_usm_sycl(queue, n, x, incx, y, + incy, dependencies); } sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2256,7 +2287,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2265,7 +2296,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2274,7 +2305,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2282,16 +2313,16 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_sgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_dgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -2299,8 +2330,8 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_cgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -2308,8 +2339,8 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_zgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, @@ -2318,7 +2349,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stridex, float beta, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -2329,7 +2360,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stridex, double beta, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -2341,7 +2372,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -2353,7 +2384,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -2363,7 +2394,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -2373,7 +2404,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -2384,7 +2415,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex **x, std::int64_t *incx, std::complex *beta, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -2396,7 +2427,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex *beta, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -2406,7 +2437,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t stridea, const float *x, std::int64_t incx, std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -2416,7 +2447,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t stridea, const double *x, std::int64_t incx, std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ddgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ddgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -2427,7 +2458,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t incx, std::int64_t stridex, std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -2438,7 +2469,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t incx, std::int64_t stridex, std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -2448,7 +2479,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -2457,7 +2488,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ddgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ddgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -2466,7 +2497,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, const std::complex **x, std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -2475,55 +2506,55 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, const std::complex **x, std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_sger_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_sger_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_dger_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_dger_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgerc_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_cgerc_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgerc_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_zgerc_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgeru_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_cgeru_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgeru_usm_sycl(queue, m, n, alpha, x, incx, y, incy, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_zgeru_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -2531,7 +2562,7 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_chbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_chbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2540,7 +2571,7 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zhbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2549,7 +2580,7 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_chemv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_chemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2558,7 +2589,7 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhemv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zhemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2566,32 +2597,32 @@ sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower float alpha, const std::complex *x, std::int64_t incx, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_cher_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_cher_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const std::complex *x, std::int64_t incx, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_zher_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_zher_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_cher2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_cher2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_zher2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_zher2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -2599,8 +2630,8 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_chpmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_chpmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -2608,45 +2639,45 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhpmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_zhpmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const std::complex *x, std::int64_t incx, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_chpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, dependencies); + return function_tables[{ libkey, queue }].column_major_chpr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const std::complex *x, std::int64_t incx, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, dependencies); + return function_tables[{ libkey, queue }].column_major_zhpr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_chpr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, dependencies); + return function_tables[{ libkey, queue }].column_major_chpr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhpr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, dependencies); + return function_tables[{ libkey, queue }].column_major_zhpr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2654,57 +2685,57 @@ sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsbmv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *a, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_sspmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_sspmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *a, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dspmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].column_major_dspmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_sspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, dependencies); + return function_tables[{ libkey, queue }].column_major_sspr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_dspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, dependencies); + return function_tables[{ libkey, queue }].column_major_dspr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_sspr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, dependencies); + return function_tables[{ libkey, queue }].column_major_sspr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, const std::vector &dependencies) { - return function_tables[libkey].column_major_dspr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, dependencies); + return function_tables[{ libkey, queue }].column_major_dspr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssymv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -2712,224 +2743,224 @@ sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsymv_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_ssyr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyr_usm_sycl(queue, upper_lower, n, alpha, x, incx, - a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_dsyr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_ssyr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyr2_usm_sycl(queue, upper_lower, n, alpha, x, - incx, y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].column_major_dsyr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_stbmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_stbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtbmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctbmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztbmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_stbsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_stbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtbsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctbsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztbsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_stpmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_stpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtpmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctpmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztpmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_stpsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_stpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtpsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctpsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztpsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_strmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_strmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrmv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_strsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_strsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_dtrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ctrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrsv_usm_sycl(queue, upper_lower, trans, unit_diag, - n, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].column_major_ztrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2937,7 +2968,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2946,7 +2977,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2955,7 +2986,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2964,7 +2995,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, sycl::half beta, sycl::half *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_hgemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_hgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2972,7 +3003,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_f16f16f32_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2980,7 +3011,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_bf16bf16f32_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2989,7 +3020,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_chemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_chemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -2998,7 +3029,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zhemm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zhemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3006,7 +3037,7 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, float alpha, const std::complex *a, std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_cherk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3014,7 +3045,7 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, double alpha, const std::complex *a, std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zherk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3023,7 +3054,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_cher2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3032,7 +3063,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zher2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3040,7 +3071,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssymm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3048,7 +3079,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsymm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3057,7 +3088,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_csymm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3066,7 +3097,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsymm_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3074,7 +3105,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyrk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3082,7 +3113,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyrk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3091,7 +3122,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *a, std::int64_t lda, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_csyrk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3100,7 +3131,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *a, std::int64_t lda, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsyrk_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } @@ -3109,7 +3140,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp const float **a, std::int64_t *lda, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -3119,7 +3150,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp const double **a, std::int64_t *lda, double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -3130,7 +3161,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_csyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -3141,7 +3172,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp std::int64_t *lda, std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -3151,7 +3182,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::int64_t lda, std::int64_t stride_a, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3161,7 +3192,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe const double *a, std::int64_t lda, std::int64_t stride_a, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3172,7 +3203,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_csyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3183,7 +3214,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3192,7 +3223,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_ssyr2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ssyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3200,7 +3231,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dsyr2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3209,7 +3240,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_csyr2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_csyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3218,7 +3249,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zsyr2k_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -3226,18 +3257,18 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_strmm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_strmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrmm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_dtrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -3245,9 +3276,9 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrmm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_ctrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -3255,27 +3286,27 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrmm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_ztrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_strsm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_strsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrsm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_dtrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -3283,9 +3314,9 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrsm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_ctrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -3293,9 +3324,9 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrsm_usm_sycl(queue, left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_ztrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, @@ -3303,7 +3334,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t n, float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_strsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_strsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3313,7 +3344,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t n, double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dtrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3324,7 +3355,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ctrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3335,7 +3366,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ztrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3345,7 +3376,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_strsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_strsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -3355,7 +3386,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dtrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dtrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -3366,7 +3397,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ctrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ctrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -3377,7 +3408,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_ztrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_ztrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -3388,7 +3419,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3399,7 +3430,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3410,7 +3441,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex **b, std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3422,7 +3453,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3433,7 +3464,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_hgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_hgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3444,7 +3475,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_f16f16f32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3455,7 +3486,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8f32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3466,7 +3497,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8s32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -3477,7 +3508,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_sgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3488,7 +3519,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3500,7 +3531,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3512,7 +3543,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3524,7 +3555,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_hgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_hgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3535,7 +3566,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_f16f16f32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3546,7 +3577,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8f32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3557,7 +3588,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8s32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -3566,18 +3597,18 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_sgemmt_usm_sycl(queue, upper_lower, transa, transb, - n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + return function_tables[{ libkey, queue }].column_major_sgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_dgemmt_usm_sycl(queue, upper_lower, transa, transb, - n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + return function_tables[{ libkey, queue }].column_major_dgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, @@ -3586,9 +3617,9 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_cgemmt_usm_sycl(queue, upper_lower, transa, transb, - n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + return function_tables[{ libkey, queue }].column_major_cgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, @@ -3597,9 +3628,9 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zgemmt_usm_sycl(queue, upper_lower, transa, transb, - n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + return function_tables[{ libkey, queue }].column_major_zgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, @@ -3608,7 +3639,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8u8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -3619,7 +3650,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_s8s8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -3630,7 +3661,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_u8s8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_u8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -3641,7 +3672,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].column_major_gemm_u8u8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].column_major_gemm_u8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -3651,7 +3682,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3660,7 +3691,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3669,7 +3700,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3678,7 +3709,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -3686,7 +3717,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_simatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -3694,7 +3725,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_dimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -3703,7 +3734,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_cimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -3712,7 +3743,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -3722,7 +3753,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -3733,7 +3764,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -3745,7 +3776,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -3757,7 +3788,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -3765,39 +3796,39 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatcopy_usm_sycl(queue, trans, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_somatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatcopy_usm_sycl(queue, trans, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_domatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatcopy_usm_sycl(queue, trans, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_comatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatcopy_usm_sycl(queue, trans, m, n, alpha, a, - lda, b, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_zomatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -3805,7 +3836,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -3814,7 +3845,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex *a, std::int64_t lda, std::int64_t stridea, std::complex *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -3823,45 +3854,45 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex *a, std::int64_t lda, std::int64_t stridea, std::complex *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_simatcopy_usm_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_simatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_dimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_dimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_cimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_cimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].column_major_zimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, dependencies); + return function_tables[{ libkey, queue }].column_major_zimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatadd_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -3869,7 +3900,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatadd_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -3878,7 +3909,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr const std::complex *a, std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatadd_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -3887,7 +3918,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr const std::complex *a, std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatadd_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -3896,7 +3927,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_somatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_somatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -3905,7 +3936,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_domatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_domatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -3914,7 +3945,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_comatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_comatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -3923,7 +3954,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_zomatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zomatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -3931,7 +3962,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *m, std::int64_t *n, float *alpha, float **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_simatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_simatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -3939,7 +3970,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *m, std::int64_t *n, double *alpha, double **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_dimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_dimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -3948,7 +3979,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_cimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_cimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -3957,7 +3988,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].column_major_zimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].column_major_zimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -3973,413 +4004,418 @@ static oneapi::mkl::detail::table_initializer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_scasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_scasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_dzasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_dzasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_sasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_sasum_sycl(queue, n, x, incx, result); } void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_dasum_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_dasum_sycl(queue, n, x, incx, result); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_saxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_saxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_daxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_caxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_caxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_zaxpy_batch_strided_sycl(queue, n, alpha, x, incx, stridex, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_zaxpy_batch_strided_sycl( + queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].row_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].row_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].row_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); + function_tables[{ libkey, queue }].row_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, + incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_scopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_scopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dcopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_dcopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_ccopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_ccopy_sycl(queue, n, x, incx, y, incy); } void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zcopy_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_zcopy_sycl(queue, n, x, incx, y, incy); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_scopy_batch_strided_sycl(queue, n, x, incx, stridex, y, incy, - stridey, batch_size); + function_tables[{ libkey, queue }].row_major_scopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_dcopy_batch_strided_sycl(queue, n, x, incx, stridex, y, incy, - stridey, batch_size); + function_tables[{ libkey, queue }].row_major_dcopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_ccopy_batch_strided_sycl(queue, n, x, incx, stridex, y, incy, - stridey, batch_size); + function_tables[{ libkey, queue }].row_major_ccopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_zcopy_batch_strided_sycl(queue, n, x, incx, stridex, y, incy, - stridey, batch_size); + function_tables[{ libkey, queue }].row_major_zcopy_batch_strided_sycl( + queue, n, x, incx, stridex, y, incy, stridey, batch_size); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].row_major_sdot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_sdot_sycl(queue, n, x, incx, y, incy, result); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].row_major_ddot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_ddot_sycl(queue, n, x, incx, y, incy, result); } void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].row_major_dsdot_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_dsdot_sycl(queue, n, x, incx, y, incy, result); } void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].row_major_cdotc_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_cdotc_sycl(queue, n, x, incx, y, incy, result); } void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].row_major_zdotc_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_zdotc_sycl(queue, n, x, incx, y, incy, result); } void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].row_major_cdotu_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_cdotu_sycl(queue, n, x, incx, y, incy, result); } void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &result) { - function_tables[libkey].row_major_zdotu_sycl(queue, n, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_zdotu_sycl(queue, n, x, incx, y, incy, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_isamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_isamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_idamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_idamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_icamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_icamin_sycl(queue, n, x, incx, result); } void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_izamin_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_izamin_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_isamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_isamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_idamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_idamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_icamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_icamax_sycl(queue, n, x, incx, result); } void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_izamax_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_izamax_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_scnrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_scnrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_dznrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_dznrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_snrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_snrm2_sycl(queue, n, x, incx, result); } void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { - function_tables[libkey].row_major_dnrm2_sycl(queue, n, x, incx, result); + function_tables[{ libkey, queue }].row_major_dnrm2_sycl(queue, n, x, incx, result); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { - function_tables[libkey].row_major_srot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].row_major_srot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { - function_tables[libkey].row_major_drot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].row_major_drot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { - function_tables[libkey].row_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].row_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); } void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { - function_tables[libkey].row_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); + function_tables[{ libkey, queue }].row_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { - function_tables[libkey].row_major_srotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].row_major_srotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { - function_tables[libkey].row_major_drotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].row_major_drotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, sycl::buffer, 1> &b, sycl::buffer &c, sycl::buffer, 1> &s) { - function_tables[libkey].row_major_crotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].row_major_crotg_sycl(queue, a, b, c, s); } void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, sycl::buffer, 1> &b, sycl::buffer &c, sycl::buffer, 1> &s) { - function_tables[libkey].row_major_zrotg_sycl(queue, a, b, c, s); + function_tables[{ libkey, queue }].row_major_zrotg_sycl(queue, a, b, c, s); } void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { - function_tables[libkey].row_major_srotm_sycl(queue, n, x, incx, y, incy, param); + function_tables[{ libkey, queue }].row_major_srotm_sycl(queue, n, x, incx, y, incy, param); } void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { - function_tables[libkey].row_major_drotm_sycl(queue, n, x, incx, y, incy, param); + function_tables[{ libkey, queue }].row_major_drotm_sycl(queue, n, x, incx, y, incy, param); } void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, sycl::buffer &x1, float y1, sycl::buffer ¶m) { - function_tables[libkey].row_major_srotmg_sycl(queue, d1, d2, x1, y1, param); + function_tables[{ libkey, queue }].row_major_srotmg_sycl(queue, d1, d2, x1, y1, param); } void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, sycl::buffer &x1, double y1, sycl::buffer ¶m) { - function_tables[libkey].row_major_drotmg_sycl(queue, d1, d2, x1, y1, param); + function_tables[{ libkey, queue }].row_major_drotmg_sycl(queue, d1, d2, x1, y1, param); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_sscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_sscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_dscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_cscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_cscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_csscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_csscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_zscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_zscal_sycl(queue, n, alpha, x, incx); } void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_zdscal_sycl(queue, n, alpha, x, incx); + function_tables[{ libkey, queue }].row_major_zdscal_sycl(queue, n, alpha, x, incx); } void sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { - function_tables[libkey].row_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, result); + function_tables[{ libkey, queue }].row_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, + result); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_sswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_sswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_dswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_cswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_cswap_sycl(queue, n, x, incx, y, incy); } void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zswap_sycl(queue, n, x, incx, y, incy); + function_tables[{ libkey, queue }].row_major_zswap_sycl(queue, n, x, incx, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4387,8 +4423,8 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std:: sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4396,40 +4432,40 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std:: sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].row_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].row_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].row_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, - y, incy); + function_tables[{ libkey, queue }].row_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4437,9 +4473,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t stridea, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_sgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_sgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4447,9 +4483,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t stridea, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_dgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_dgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4458,9 +4494,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_cgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_cgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -4469,9 +4505,9 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - function_tables[libkey].row_major_zgemv_batch_strided_sycl(queue, trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, - incy, stridey, batch_size); + function_tables[{ libkey, queue }].row_major_zgemv_batch_strided_sycl( + queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, @@ -4479,7 +4515,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].row_major_sdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_sdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -4488,7 +4524,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &x, std::int64_t incx, std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].row_major_ddgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_ddgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -4497,7 +4533,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].row_major_cdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_cdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } @@ -4506,426 +4542,444 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - function_tables[libkey].row_major_zdgmm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_zdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); + function_tables[{ libkey, queue }].row_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, + lda); } void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].row_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, + lda); } void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].row_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, + lda); } void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, - lda); + function_tables[{ libkey, queue }].row_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a, std::int64_t lda) { - function_tables[libkey].row_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, - lda); + function_tables[{ libkey, queue }].row_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, - incy); + function_tables[{ libkey, queue }].row_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - function_tables[libkey].row_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, - incy); + function_tables[{ libkey, queue }].row_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a) { - function_tables[libkey].row_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].row_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &a) { - function_tables[libkey].row_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].row_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a) { - function_tables[libkey].row_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); + function_tables[{ libkey, queue }].row_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a); } void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, sycl::buffer, 1> &a) { - function_tables[libkey].row_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); + function_tables[{ libkey, queue }].row_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a); } void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, - incy); + function_tables[{ libkey, queue }].row_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, - incy); + function_tables[{ libkey, queue }].row_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { - function_tables[libkey].row_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].row_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { - function_tables[libkey].row_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, a); + function_tables[{ libkey, queue }].row_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, + a); } void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { - function_tables[libkey].row_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); + function_tables[{ libkey, queue }].row_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a); } void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { - function_tables[libkey].row_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); + function_tables[{ libkey, queue }].row_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a); } void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - function_tables[libkey].row_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, - beta, y, incy); + function_tables[{ libkey, queue }].row_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].row_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, + lda); } void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); + function_tables[{ libkey, queue }].row_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, + lda); } void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, - lda); + function_tables[{ libkey, queue }].row_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, std::int64_t lda) { - function_tables[libkey].row_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, - lda); + function_tables[{ libkey, queue }].row_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, - x, incx); + function_tables[{ libkey, queue }].row_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, - incx); + function_tables[{ libkey, queue }].row_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - function_tables[libkey].row_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - function_tables[libkey].row_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, - incx); + function_tables[{ libkey, queue }].row_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, + a, lda, x, incx); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -4933,8 +4987,8 @@ void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, tran sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -4942,32 +4996,32 @@ void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, tran sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_gemm_f16f16f32_sycl(queue, transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_sycl( + queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_gemm_bf16bf16f32_sycl(queue, transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_sycl( + queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -4975,8 +5029,8 @@ void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_chemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_chemm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -4984,23 +5038,23 @@ void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zhemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zhemm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5008,8 +5062,8 @@ void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_cher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_cher2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5017,24 +5071,24 @@ void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, double beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zher2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_ssymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_ssymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dsymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -5042,8 +5096,8 @@ void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_csymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_csymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -5051,56 +5105,56 @@ void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, - lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zsymm_sycl(queue, left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, - beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_ssyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].row_major_ssyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_dsyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].row_major_dsyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5108,9 +5162,9 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_csyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].row_major_csyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5118,25 +5172,25 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_zsyrk_batch_strided_sycl(queue, upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size); + function_tables[{ libkey, queue }].row_major_zsyrk_batch_strided_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_ssyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_ssyr2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5144,8 +5198,8 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_csyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_csyr2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -5153,72 +5207,72 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, - ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_strmm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_strmm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_dtrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_dtrmm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_ctrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_ctrmm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_ztrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_ztrmm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_strsm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_strsm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_dtrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_dtrsm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_ctrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_ctrsm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_ztrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, - m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_ztrsm_sycl(queue, left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -5227,7 +5281,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_sgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_sgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5238,7 +5292,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_dgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_dgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5249,7 +5303,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_cgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_cgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5260,7 +5314,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_zgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_zgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5271,7 +5325,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_hgemm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_hgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5282,7 +5336,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_gemm_f16f16f32_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5293,7 +5347,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_gemm_s8s8f32_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5304,7 +5358,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_gemm_s8s8s32_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } @@ -5314,7 +5368,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_strsm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_strsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5324,7 +5378,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_dtrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_dtrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5334,7 +5388,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_ctrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_ctrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5344,7 +5398,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_ztrsm_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_ztrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5353,16 +5407,16 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_sgemmt_sycl(queue, upper_lower, transa, transb, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_sgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_dgemmt_sycl(queue, upper_lower, transa, transb, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_dgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, @@ -5370,8 +5424,8 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_cgemmt_sycl(queue, upper_lower, transa, transb, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_cgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, @@ -5379,8 +5433,8 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, tra sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zgemmt_sycl(queue, upper_lower, transa, transb, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + function_tables[{ libkey, queue }].row_major_zgemmt_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -5388,7 +5442,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].row_major_gemm_s8u8s32_bias_sycl( + function_tables[{ libkey, queue }].row_major_gemm_s8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -5397,7 +5451,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].row_major_gemm_s8s8s32_bias_sycl( + function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -5406,7 +5460,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].row_major_gemm_u8s8s32_bias_sycl( + function_tables[{ libkey, queue }].row_major_gemm_u8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -5415,7 +5469,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { - function_tables[libkey].row_major_gemm_u8u8s32_bias_sycl( + function_tables[{ libkey, queue }].row_major_gemm_u8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } @@ -5423,7 +5477,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_somatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5431,7 +5485,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_domatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5440,7 +5494,7 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_comatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_comatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } @@ -5449,38 +5503,38 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - function_tables[libkey].row_major_zomatcopy_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_zomatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].row_major_simatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].row_major_dimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].row_major_cimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - function_tables[libkey].row_major_zimatcopy_batch_strided_sycl(queue, trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, @@ -5489,7 +5543,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_somatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_somatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -5500,7 +5554,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_domatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_domatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -5511,7 +5565,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_comatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_comatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -5523,7 +5577,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - function_tables[libkey].row_major_zomatadd_batch_strided_sycl( + function_tables[{ libkey, queue }].row_major_zomatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } @@ -5531,97 +5585,105 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose tra void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - function_tables[libkey].row_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - function_tables[libkey].row_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); + function_tables[{ libkey, queue }].row_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].row_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, - ldb, strideb); + function_tables[{ libkey, queue }].row_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].row_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, - ldb, strideb); + function_tables[{ libkey, queue }].row_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].row_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, - ldb, strideb); + function_tables[{ libkey, queue }].row_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t strideb) { - function_tables[libkey].row_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, - ldb, strideb); + function_tables[{ libkey, queue }].row_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].row_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].row_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, + ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].row_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].row_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, + ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].row_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].row_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, + ldb); } void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb) { - function_tables[libkey].row_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); + function_tables[{ libkey, queue }].row_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, + ldb); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_somatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].row_major_somatadd_sycl(queue, transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { - function_tables[libkey].row_major_domatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].row_major_domatadd_sycl(queue, transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -5629,8 +5691,8 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_comatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].row_major_comatadd_sycl(queue, transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, @@ -5638,8 +5700,8 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, sycl::buffer, 1> &c, std::int64_t ldc) { - function_tables[libkey].row_major_zomatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, - beta, b, ldb, c, ldc); + function_tables[{ libkey, queue }].row_major_zomatadd_sycl(queue, transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } // USM APIs @@ -5647,64 +5709,64 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, t sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_scasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_scasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dzasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dzasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_sasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_sasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dasum_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dasum_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, + incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, + incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, + incy, dependencies); } sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, + incy, dependencies); } sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_saxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_saxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5712,7 +5774,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 double *alpha, const double **x, std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_daxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_daxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5721,7 +5783,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_caxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_caxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5730,7 +5792,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zaxpy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zaxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5738,7 +5800,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const float *x, std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_saxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_saxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5746,7 +5808,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double *x, std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_daxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5755,7 +5817,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_caxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_caxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5764,73 +5826,73 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zaxpy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zaxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_saxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].row_major_saxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_daxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].row_major_daxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_caxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].row_major_caxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zaxpby_usm_sycl(queue, n, alpha, x, incx, beta, y, - incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zaxpby_usm_sycl( + queue, n, alpha, x, incx, beta, y, incy, dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_scopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_scopy_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_scopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_scopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5838,7 +5900,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double **x, std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5846,7 +5908,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex **x, std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ccopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ccopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5854,7 +5916,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex **x, std::int64_t *incx, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } @@ -5862,7 +5924,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const float *x, std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_scopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_scopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5870,7 +5932,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const double *x, std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5878,7 +5940,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ccopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ccopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } @@ -5886,299 +5948,307 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int6 const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_sdot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_sdot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_ddot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_ddot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, const float *y, std::int64_t incy, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsdot_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dsdot_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_cdotc_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_cdotc_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdotc_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_zdotc_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_cdotu_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_cdotu_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdotu_usm_sycl(queue, n, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_zdotu_usm_sycl(queue, n, x, incx, y, incy, + result, dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_isamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_isamin_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_idamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_idamin_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_icamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_icamin_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_izamin_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_izamin_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_isamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_isamax_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_idamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_idamax_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_icamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_icamax_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, std::int64_t *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_izamax_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_izamax_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_scnrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_scnrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dznrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dznrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_snrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_snrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_dnrm2_usm_sycl(queue, n, x, incx, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_dnrm2_usm_sycl(queue, n, x, incx, result, + dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, float c, float s, const std::vector &dependencies) { - return function_tables[libkey].row_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].row_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, + s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, double c, double s, const std::vector &dependencies) { - return function_tables[libkey].row_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].row_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, + s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, float c, float s, const std::vector &dependencies) { - return function_tables[libkey].row_major_csrot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].row_major_csrot_usm_sycl(queue, n, x, incx, y, incy, + c, s, dependencies); } sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, double c, double s, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, c, s, - dependencies); + return function_tables[{ libkey, queue }].row_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, + c, s, dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, float *a, float *b, float *c, float *s, const std::vector &dependencies) { - return function_tables[libkey].row_major_srotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].row_major_srotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, double *a, double *b, double *c, double *s, const std::vector &dependencies) { - return function_tables[libkey].row_major_drotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].row_major_drotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, std::complex *b, float *c, std::complex *s, const std::vector &dependencies) { - return function_tables[libkey].row_major_crotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].row_major_crotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, std::complex *b, double *c, std::complex *s, const std::vector &dependencies) { - return function_tables[libkey].row_major_zrotg_usm_sycl(queue, a, b, c, s, dependencies); + return function_tables[{ libkey, queue }].row_major_zrotg_usm_sycl(queue, a, b, c, s, + dependencies); } sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, float *param, const std::vector &dependencies) { - return function_tables[libkey].row_major_srotm_usm_sycl(queue, n, x, incx, y, incy, param, - dependencies); + return function_tables[{ libkey, queue }].row_major_srotm_usm_sycl(queue, n, x, incx, y, incy, + param, dependencies); } sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, double *param, const std::vector &dependencies) { - return function_tables[libkey].row_major_drotm_usm_sycl(queue, n, x, incx, y, incy, param, - dependencies); + return function_tables[{ libkey, queue }].row_major_drotm_usm_sycl(queue, n, x, incx, y, incy, + param, dependencies); } sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, const std::vector &dependencies) { - return function_tables[libkey].row_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, param, - dependencies); + return function_tables[{ libkey, queue }].row_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, + param, dependencies); } sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, const std::vector &dependencies) { - return function_tables[libkey].row_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, param, - dependencies); + return function_tables[{ libkey, queue }].row_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, + param, dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_sscal_usm_sycl(queue, n, alpha, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_sscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dscal_usm_sycl(queue, n, alpha, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_cscal_usm_sycl(queue, n, alpha, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_cscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_csscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].row_major_csscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_zscal_usm_sycl(queue, n, alpha, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_zscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdscal_usm_sycl(queue, n, alpha, x, incx, - dependencies); + return function_tables[{ libkey, queue }].row_major_zdscal_usm_sycl(queue, n, alpha, x, incx, + dependencies); } sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *result, const std::vector &dependencies) { - return function_tables[libkey].row_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y, incy, result, - dependencies); + return function_tables[{ libkey, queue }].row_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y, + incy, result, dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_sswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_sswap_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_dswap_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_cswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_cswap_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zswap_usm_sycl(queue, n, x, incx, y, incy, - dependencies); + return function_tables[{ libkey, queue }].row_major_zswap_usm_sycl(queue, n, x, incx, y, incy, + dependencies); } sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgbmv_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -6186,7 +6256,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgbmv_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -6195,7 +6265,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgbmv_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -6204,7 +6274,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgbmv_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } @@ -6212,16 +6282,16 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_sgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_dgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -6229,8 +6299,8 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_cgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, @@ -6238,8 +6308,8 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemv_usm_sycl(queue, trans, m, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zgemv_usm_sycl( + queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, @@ -6248,7 +6318,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stridex, float beta, float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -6259,7 +6329,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stridex, double beta, double *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -6271,7 +6341,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -6283,7 +6353,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemv_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } @@ -6293,7 +6363,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -6303,7 +6373,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -6314,7 +6384,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex **x, std::int64_t *incx, std::complex *beta, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -6326,7 +6396,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex *beta, std::complex **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemv_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } @@ -6336,7 +6406,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t stridea, const float *x, std::int64_t incx, std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -6346,7 +6416,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t stridea, const double *x, std::int64_t incx, std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ddgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ddgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -6357,7 +6427,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t incx, std::int64_t stridex, std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -6368,7 +6438,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t incx, std::int64_t stridex, std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdgmm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } @@ -6378,7 +6448,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -6387,7 +6457,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ddgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ddgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -6396,7 +6466,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, const std::complex **x, std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } @@ -6405,55 +6475,55 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, const std::complex **x, std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zdgmm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_sger_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_sger_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_dger_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_dger_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgerc_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_cgerc_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgerc_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_zgerc_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgeru_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_cgeru_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgeru_usm_sycl(queue, m, n, alpha, x, incx, y, incy, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_zgeru_usm_sycl( + queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6461,8 +6531,8 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_chbmv_usm_sycl(queue, upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_chbmv_usm_sycl( + queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6470,8 +6540,8 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhbmv_usm_sycl(queue, upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zhbmv_usm_sycl( + queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6479,8 +6549,8 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_chemv_usm_sycl(queue, upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_chemv_usm_sycl( + queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6488,40 +6558,40 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhemv_usm_sycl(queue, upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zhemv_usm_sycl( + queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const std::complex *x, std::int64_t incx, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_cher_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_cher_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const std::complex *x, std::int64_t incx, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_zher_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_zher_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_cher2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].row_major_cher2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_zher2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].row_major_zher2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6529,8 +6599,8 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_chpmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_chpmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, @@ -6538,328 +6608,328 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *x, std::int64_t incx, std::complex beta, std::complex *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhpmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_zhpmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const std::complex *x, std::int64_t incx, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_chpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - dependencies); + return function_tables[{ libkey, queue }].row_major_chpr_usm_sycl(queue, upper_lower, n, alpha, + x, incx, a, dependencies); } sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const std::complex *x, std::int64_t incx, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - dependencies); + return function_tables[{ libkey, queue }].row_major_zhpr_usm_sycl(queue, upper_lower, n, alpha, + x, incx, a, dependencies); } sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_chpr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, dependencies); + return function_tables[{ libkey, queue }].row_major_chpr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex *x, std::int64_t incx, const std::complex *y, std::int64_t incy, std::complex *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhpr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, dependencies); + return function_tables[{ libkey, queue }].row_major_zhpr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssbmv_usm_sycl(queue, upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_ssbmv_usm_sycl( + queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsbmv_usm_sycl(queue, upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_dsbmv_usm_sycl( + queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *a, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_sspmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_sspmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *a, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dspmv_usm_sycl(queue, upper_lower, n, alpha, a, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_dspmv_usm_sycl( + queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_sspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - dependencies); + return function_tables[{ libkey, queue }].row_major_sspr_usm_sycl(queue, upper_lower, n, alpha, + x, incx, a, dependencies); } sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_dspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - dependencies); + return function_tables[{ libkey, queue }].row_major_dspr_usm_sycl(queue, upper_lower, n, alpha, + x, incx, a, dependencies); } sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_sspr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, dependencies); + return function_tables[{ libkey, queue }].row_major_sspr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, const std::vector &dependencies) { - return function_tables[libkey].row_major_dspr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, dependencies); + return function_tables[{ libkey, queue }].row_major_dspr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssymv_usm_sycl(queue, upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_ssymv_usm_sycl( + queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsymv_usm_sycl(queue, upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy, dependencies); + return function_tables[{ libkey, queue }].row_major_dsymv_usm_sycl( + queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_ssyr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, - lda, dependencies); + return function_tables[{ libkey, queue }].row_major_dsyr_usm_sycl( + queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].row_major_ssyr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyr2_usm_sycl(queue, upper_lower, n, alpha, x, incx, - y, incy, a, lda, dependencies); + return function_tables[{ libkey, queue }].row_major_dsyr2_usm_sycl( + queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_stbmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_stbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtbmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctbmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztbmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztbmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_stbsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_stbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtbsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctbsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztbsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - k, a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztbsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_stpmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_stpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtpmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctpmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztpmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztpmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_stpsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_stpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtpsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctpsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztpsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztpsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_strmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_strmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrmv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztrmv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_strsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_strsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_dtrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ctrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, std::complex *x, std::int64_t incx, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrsv_usm_sycl(queue, upper_lower, trans, unit_diag, n, - a, lda, x, incx, dependencies); + return function_tables[{ libkey, queue }].row_major_ztrsv_usm_sycl( + queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6867,7 +6937,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6876,7 +6946,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6885,7 +6955,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6894,7 +6964,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, sycl::half beta, sycl::half *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_hgemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_hgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6902,7 +6972,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_f16f16f32_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6910,7 +6980,7 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_bf16bf16f32_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6919,7 +6989,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_chemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_chemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6928,7 +6998,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zhemm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zhemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6936,16 +7006,16 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, float alpha, const std::complex *a, std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_cherk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_cherk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, const std::complex *a, std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zherk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_zherk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -6953,7 +7023,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_cher2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6962,7 +7032,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zher2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6970,7 +7040,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssymm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ssymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6978,7 +7048,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsymm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6987,7 +7057,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_csymm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_csymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -6996,7 +7066,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsymm_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7004,16 +7074,16 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyrk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_ssyrk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyrk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_dsyrk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -7021,8 +7091,8 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *a, std::int64_t lda, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_csyrk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_csyrk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, @@ -7030,8 +7100,8 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lowe const std::complex *a, std::int64_t lda, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsyrk_usm_sycl(queue, upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc, dependencies); + return function_tables[{ libkey, queue }].row_major_zsyrk_usm_sycl( + queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, @@ -7039,7 +7109,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp const float **a, std::int64_t *lda, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ssyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -7049,7 +7119,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp const double **a, std::int64_t *lda, double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -7060,7 +7130,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_csyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_csyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -7071,7 +7141,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upp std::int64_t *lda, std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsyrk_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } @@ -7081,7 +7151,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::int64_t lda, std::int64_t stride_a, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ssyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7091,7 +7161,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe const double *a, std::int64_t lda, std::int64_t stride_a, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7102,7 +7172,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_csyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_csyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7113,7 +7183,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo uppe std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsyrk_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7122,7 +7192,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_ssyr2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ssyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7130,7 +7200,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dsyr2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7139,7 +7209,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_csyr2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_csyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7148,7 +7218,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *a, std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zsyr2k_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } @@ -7156,18 +7226,18 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_strmm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_strmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrmm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_dtrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -7175,9 +7245,9 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrmm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_ctrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -7185,27 +7255,27 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrmm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_ztrmm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_strsm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_strsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrsm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_dtrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -7213,9 +7283,9 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrsm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_ctrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, @@ -7223,9 +7293,9 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrsm_usm_sycl(queue, left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb, - dependencies); + return function_tables[{ libkey, queue }].row_major_ztrsm_usm_sycl( + queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, @@ -7233,7 +7303,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t n, float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_strsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_strsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7243,7 +7313,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t n, double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dtrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7254,7 +7324,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ctrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7265,7 +7335,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrsm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ztrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7275,7 +7345,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_strsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_strsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -7285,7 +7355,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dtrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dtrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -7296,7 +7366,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ctrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ctrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -7307,7 +7377,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *lef std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_ztrsm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_ztrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } @@ -7318,7 +7388,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7329,7 +7399,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7340,7 +7410,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex **b, std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7352,7 +7422,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::complex *beta, std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7363,7 +7433,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_hgemm_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_hgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7374,7 +7444,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_f16f16f32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7385,7 +7455,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8f32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7396,7 +7466,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8s32_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } @@ -7407,7 +7477,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_sgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7418,7 +7488,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7430,7 +7500,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7442,7 +7512,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7454,7 +7524,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_hgemm_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_hgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7465,7 +7535,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_f16f16f32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7476,7 +7546,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8f32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7487,7 +7557,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8s32_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } @@ -7496,18 +7566,18 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_sgemmt_usm_sycl(queue, upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + return function_tables[{ libkey, queue }].row_major_sgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_dgemmt_usm_sycl(queue, upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + return function_tables[{ libkey, queue }].row_major_dgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, @@ -7516,9 +7586,9 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_cgemmt_usm_sycl(queue, upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + return function_tables[{ libkey, queue }].row_major_cgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, @@ -7527,9 +7597,9 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_low const std::complex *b, std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zgemmt_usm_sycl(queue, upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + return function_tables[{ libkey, queue }].row_major_zgemmt_usm_sycl( + queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); } sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, @@ -7538,7 +7608,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8u8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -7549,7 +7619,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_s8s8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -7560,7 +7630,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_u8s8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_u8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -7571,7 +7641,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, const std::vector &dependencies) { - return function_tables[libkey].row_major_gemm_u8u8s32_bias_usm_sycl( + return function_tables[{ libkey, queue }].row_major_gemm_u8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } @@ -7581,7 +7651,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7590,7 +7660,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7599,7 +7669,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7608,7 +7678,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } @@ -7616,7 +7686,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_simatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -7624,7 +7694,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_dimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -7633,7 +7703,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_cimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -7642,7 +7712,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zimatcopy_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } @@ -7652,7 +7722,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -7663,7 +7733,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -7675,7 +7745,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -7687,7 +7757,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatadd_batch_strided_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } @@ -7695,39 +7765,39 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transp sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatcopy_usm_sycl(queue, trans, m, n, alpha, a, lda, - b, ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_somatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatcopy_usm_sycl(queue, trans, m, n, alpha, a, lda, - b, ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_domatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatcopy_usm_sycl(queue, trans, m, n, alpha, a, lda, - b, ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_comatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatcopy_usm_sycl(queue, trans, m, n, alpha, a, lda, - b, ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_zomatcopy_usm_sycl( + queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -7735,7 +7805,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -7744,7 +7814,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex *a, std::int64_t lda, std::int64_t stridea, std::complex *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } @@ -7753,45 +7823,45 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose const std::complex *a, std::int64_t lda, std::int64_t stridea, std::complex *b, std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatcopy2_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_simatcopy_usm_sycl(queue, trans, m, n, alpha, ab, lda, - ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_simatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_dimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, lda, - ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_dimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_cimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, lda, - ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_cimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, const std::vector &dependencies) { - return function_tables[libkey].row_major_zimatcopy_usm_sycl(queue, trans, m, n, alpha, ab, lda, - ldb, dependencies); + return function_tables[{ libkey, queue }].row_major_zimatcopy_usm_sycl( + queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatadd_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -7799,7 +7869,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatadd_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -7808,7 +7878,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr const std::complex *a, std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatadd_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -7817,7 +7887,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose tr const std::complex *a, std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, std::complex *c, std::int64_t ldc, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatadd_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } @@ -7826,7 +7896,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_somatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_somatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -7835,7 +7905,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_domatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_domatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -7844,7 +7914,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_comatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_comatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -7853,7 +7923,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans const std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_zomatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zomatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } @@ -7861,7 +7931,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *m, std::int64_t *n, float *alpha, float **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_simatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_simatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -7869,7 +7939,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::int64_t *m, std::int64_t *n, double *alpha, double **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_dimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_dimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -7878,7 +7948,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_cimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_cimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -7887,7 +7957,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, trans std::complex **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, const std::vector &dependencies) { - return function_tables[libkey].row_major_zimatcopy_batch_group_usm_sycl( + return function_tables[{ libkey, queue }].row_major_zimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } diff --git a/src/config.hpp.in b/src/config.hpp.in index e68b3b10e..5698abf9b 100644 --- a/src/config.hpp.in +++ b/src/config.hpp.in @@ -32,14 +32,12 @@ #cmakedefine ENABLE_PORTBLAS_BACKEND_INTEL_CPU #cmakedefine ENABLE_PORTBLAS_BACKEND_INTEL_GPU #cmakedefine ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU -#cmakedefine ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE #cmakedefine ENABLE_PORTFFT_BACKEND #cmakedefine ENABLE_ROCBLAS_BACKEND #cmakedefine ENABLE_ROCFFT_BACKEND #cmakedefine ENABLE_ROCRAND_BACKEND #cmakedefine ENABLE_ROCSOLVER_BACKEND #cmakedefine BUILD_SHARED_LIBS -#cmakedefine ENABLE_GENERIC_DEVICE #cmakedefine REF_BLAS_LIBNAME "@REF_BLAS_LIBNAME@" #cmakedefine REF_CBLAS_LIBNAME "@REF_CBLAS_LIBNAME@" diff --git a/src/dft/dft_loader.cpp b/src/dft/dft_loader.cpp index b0c421fb0..55a280388 100644 --- a/src/dft/dft_loader.cpp +++ b/src/dft/dft_loader.cpp @@ -34,28 +34,28 @@ template <> commit_impl* create_commit( const descriptor& desc, sycl::queue& sycl_queue) { auto libkey = get_device_id(sycl_queue); - return function_tables[libkey].create_commit_sycl_fz(desc, sycl_queue); + return function_tables[{ libkey, sycl_queue }].create_commit_sycl_fz(desc, sycl_queue); } template <> commit_impl* create_commit( const descriptor& desc, sycl::queue& sycl_queue) { auto libkey = get_device_id(sycl_queue); - return function_tables[libkey].create_commit_sycl_dz(desc, sycl_queue); + return function_tables[{ libkey, sycl_queue }].create_commit_sycl_dz(desc, sycl_queue); } template <> commit_impl* create_commit( const descriptor& desc, sycl::queue& sycl_queue) { auto libkey = get_device_id(sycl_queue); - return function_tables[libkey].create_commit_sycl_fr(desc, sycl_queue); + return function_tables[{ libkey, sycl_queue }].create_commit_sycl_fr(desc, sycl_queue); } template <> commit_impl* create_commit( const descriptor& desc, sycl::queue& sycl_queue) { auto libkey = get_device_id(sycl_queue); - return function_tables[libkey].create_commit_sycl_dr(desc, sycl_queue); + return function_tables[{ libkey, sycl_queue }].create_commit_sycl_dr(desc, sycl_queue); } template diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp index 24b2ffb86..953226c3e 100644 --- a/src/include/function_table_initializer.hpp +++ b/src/include/function_table_initializer.hpp @@ -59,11 +59,11 @@ class table_initializer { using dlhandle = std::unique_ptr; public: - function_table_t &operator[](oneapi::mkl::device key) { - auto lib = tables.find(key); + function_table_t &operator[](std::pair device_queue_pair) { + auto lib = tables.find(device_queue_pair.first); if (lib != tables.end()) return lib->second; - return add_table(key); + return add_table(device_queue_pair.first, device_queue_pair.second); } private: @@ -90,7 +90,7 @@ class table_initializer { } #endif - function_table_t &add_table(oneapi::mkl::device key) { + function_table_t &add_table(oneapi::mkl::device key, sycl::queue &q) { dlhandle handle; // check all available libraries for the key(device) for (const char *libname : libraries[domain_id][key]) { @@ -99,8 +99,13 @@ class table_initializer { break; } if (!handle) { - std::cerr << ERROR_MSG << '\n'; - throw mkl::backend_not_found(); + if (key == oneapi::mkl::device::generic_device) { + throw mkl::unsupported_device("", "", q.get_device()); + } + else { + std::cerr << ERROR_MSG << '\n'; + throw mkl::backend_not_found(); + } } auto t = reinterpret_cast(::GET_FUNC(handle.get(), table_names[domain_id])); diff --git a/src/lapack/lapack_loader.cpp b/src/lapack/lapack_loader.cpp index 43fe349d1..f26e5f5ad 100644 --- a/src/lapack/lapack_loader.cpp +++ b/src/lapack/lapack_loader.cpp @@ -35,162 +35,178 @@ void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std:: sycl::buffer &e, sycl::buffer> &tauq, sycl::buffer> &taup, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); } void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); } void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].sgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); } void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer> &tauq, sycl::buffer> &taup, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); } void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size); } void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgetri_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgetri_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgetri_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgetri_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); } void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); } void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].sgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); } void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); } void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, + ldvt, scratchpad, scratchpad_size); } void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, + ldvt, scratchpad, scratchpad_size); } void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, @@ -198,8 +214,8 @@ void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd j sycl::buffer> &u, std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, + ldvt, scratchpad, scratchpad_size); } void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, @@ -207,341 +223,363 @@ void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd j sycl::buffer> &u, std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, + ldvt, scratchpad, scratchpad_size); } void heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); } void heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); } void hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].chegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].chegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, + scratchpad, scratchpad_size); } void hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zhegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zhegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, + scratchpad, scratchpad_size); } void hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].chetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].chetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); } void hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zhetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zhetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); } void hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].chetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].chetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zhetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zhetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].sorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, + scratchpad_size); } void orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, + scratchpad_size); } void ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); } void ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); } void ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].spotrf_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dpotrf_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cpotrf_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zpotrf_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].spotri_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dpotri_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cpotri_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zpotri_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size); } void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].spotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size); } void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size); } void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size); } void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size); } void syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dsyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dsyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); } void syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ssyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].ssyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); } void sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dsygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dsygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, + scratchpad, scratchpad_size); } void sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ssygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].ssygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, + scratchpad, scratchpad_size); } void sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dsytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dsytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); } void sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ssytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].ssytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); } void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ssytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].ssytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].csytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].csytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); } void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ctrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].ctrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); } void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dtrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dtrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); } void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].strtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].strtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); } void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].ztrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].ztrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); } void ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); } void ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, + scratchpad_size); } void ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, sycl::buffer> &tau, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, + scratchpad_size); } void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, @@ -549,8 +587,8 @@ void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, @@ -558,8 +596,8 @@ void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, @@ -567,8 +605,8 @@ void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, @@ -576,8 +614,8 @@ void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, + scratchpad, scratchpad_size); } void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, @@ -585,8 +623,8 @@ void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); } void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, @@ -594,196 +632,199 @@ void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side sid sycl::buffer> &tau, sycl::buffer> &c, std::int64_t ldc, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); } sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, float *d, float *e, std::complex *tauq, std::complex *taup, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgebrd_usm_sycl(queue, m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgebrd_usm_sycl( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *d, double *e, double *tauq, double *taup, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgebrd_usm_sycl(queue, m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgebrd_usm_sycl( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *d, float *e, float *tauq, float *taup, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgebrd_usm_sycl(queue, m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgebrd_usm_sycl( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, double *d, double *e, std::complex *tauq, std::complex *taup, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgebrd_usm_sycl(queue, m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgebrd_usm_sycl( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); } sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); } sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrs_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetrs_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, double *b, std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrs_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetrs_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, float *b, std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrs_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetrs_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrs_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetrs_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, - ldvt, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, - ldvt, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, float *s, std::complex *u, std::int64_t ldu, std::complex *vt, std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, - ldvt, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, @@ -791,407 +832,422 @@ sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::j std::int64_t ldu, std::complex *vt, std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, - ldvt, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } sycl::event heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, float *w, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cheevd_usm_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cheevd_usm_sycl( + queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } sycl::event heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, double *w, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zheevd_usm_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zheevd_usm_sycl( + queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, float *w, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].chegvd_usm_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].chegvd_usm_sycl( + queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, double *w, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zhegvd_usm_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zhegvd_usm_sycl( + queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, float *d, float *e, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].chetrd_usm_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].chetrd_usm_sycl( + queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, double *d, double *e, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zhetrd_usm_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zhetrd_usm_sycl( + queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].chetrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].chetrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zhetrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zhetrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgbr_usm_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sorgbr_usm_sycl( + queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgbr_usm_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dorgbr_usm_sycl( + queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgqr_usm_sycl(queue, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dorgqr_usm_sycl( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgqr_usm_sycl(queue, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sorgqr_usm_sycl( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgtr_usm_sycl(queue, uplo, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sorgtr_usm_sycl( + queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgtr_usm_sycl(queue, uplo, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dorgtr_usm_sycl( + queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sormtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, - ldc, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sormtr_usm_sycl(queue, side, uplo, trans, m, n, a, + lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dormtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, - ldc, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dormtr_usm_sycl(queue, side, uplo, trans, m, n, a, + lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); } sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrs_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotrs_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrs_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotrs_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrs_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotrs_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrs_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotrs_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } sycl::event syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *w, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dsyevd_usm_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dsyevd_usm_sycl( + queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } sycl::event syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *w, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ssyevd_usm_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ssyevd_usm_sycl( + queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *b, std::int64_t ldb, double *w, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dsygvd_usm_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dsygvd_usm_sycl( + queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *b, std::int64_t ldb, float *w, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ssygvd_usm_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ssygvd_usm_sycl( + queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *d, double *e, double *tau, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dsytrd_usm_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dsytrd_usm_sycl( + queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *d, float *e, float *tau, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ssytrd_usm_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ssytrd_usm_sycl( + queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ssytrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ssytrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dsytrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dsytrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].csytrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].csytrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zsytrf_usm_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zsytrf_usm_sycl( + queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ctrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, - ldb, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ctrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dtrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, - ldb, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dtrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].strtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, - ldb, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].strtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, std::complex *b, std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].ztrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, - ldb, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].ztrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungbr_usm_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cungbr_usm_sycl( + queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungbr_usm_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zungbr_usm_sycl( + queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungqr_usm_sycl(queue, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cungqr_usm_sycl( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungqr_usm_sycl(queue, m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zungqr_usm_sycl( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungtr_usm_sycl(queue, uplo, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cungtr_usm_sycl( + queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungtr_usm_sycl(queue, uplo, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zungtr_usm_sycl( + queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, @@ -1199,8 +1255,9 @@ sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::s std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, - ldc, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, + lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, @@ -1208,70 +1265,71 @@ sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::s std::complex *tau, std::complex *c, std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, - ldc, scratchpad, scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, + lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgeqrf_batch_sycl(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgeqrf_batch_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgeqrf_batch_sycl(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgeqrf_batch_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgeqrf_batch_sycl(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgeqrf_batch_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgeqrf_batch_sycl(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgeqrf_batch_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetri_batch_sycl(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgetri_batch_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetri_batch_sycl(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgetri_batch_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetri_batch_sycl(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgetri_batch_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetri_batch_sycl(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgetri_batch_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, @@ -1279,9 +1337,9 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::tr sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].sgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, @@ -1289,9 +1347,9 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::tr sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].dgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, @@ -1299,9 +1357,9 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::tr std::int64_t stride_ipiv, sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].cgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, @@ -1309,149 +1367,153 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::tr std::int64_t stride_ipiv, sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + function_tables[{ libkey, queue }].zgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sgetrf_batch_sycl(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sgetrf_batch_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dgetrf_batch_sycl(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dgetrf_batch_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cgetrf_batch_sycl(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cgetrf_batch_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zgetrf_batch_sycl(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zgetrf_batch_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } void orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].sorgqr_batch_sycl(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].sorgqr_batch_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dorgqr_batch_sycl(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dorgqr_batch_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].spotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); } void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); } void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); } void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); } void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].spotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].spotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, + ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].dpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].dpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, + ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, + ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, + ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } void ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].cungqr_batch_sycl(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].cungqr_batch_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } void ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - function_tables[libkey].zungqr_batch_sycl(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size); + function_tables[{ libkey, queue }].zungqr_batch_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, float *tau, std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgeqrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgeqrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, double *tau, std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgeqrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgeqrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, @@ -1459,9 +1521,9 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgeqrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgeqrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, @@ -1469,27 +1531,27 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgeqrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgeqrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, @@ -1497,9 +1559,9 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, @@ -1507,45 +1569,45 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrf_batch_usm_sycl(queue, m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetrf_batch_usm_sycl( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetri_batch_usm_sycl(queue, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetri_batch_usm_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetri_batch_usm_sycl(queue, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetri_batch_usm_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetri_batch_usm_sycl(queue, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetri_batch_usm_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetri_batch_usm_sycl(queue, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetri_batch_usm_sycl( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, float *a, @@ -1553,7 +1615,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t stride_ipiv, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrs_batch_usm_sycl( + return function_tables[{ libkey, queue }].sgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } @@ -1564,7 +1626,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrs_batch_usm_sycl( + return function_tables[{ libkey, queue }].dgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } @@ -1575,7 +1637,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrs_batch_usm_sycl( + return function_tables[{ libkey, queue }].cgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } @@ -1586,7 +1648,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrs_batch_usm_sycl( + return function_tables[{ libkey, queue }].zgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } @@ -1595,31 +1657,31 @@ sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t stride_a, float *tau, std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgqr_batch_usm_sycl(queue, m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sorgqr_batch_usm_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, std::int64_t stride_a, double *tau, std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgqr_batch_usm_sycl(queue, m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dorgqr_batch_usm_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrf_batch_usm_sycl( + return function_tables[{ libkey, queue }].spotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrf_batch_usm_sycl( + return function_tables[{ libkey, queue }].dpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, @@ -1627,7 +1689,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t stride_a, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrf_batch_usm_sycl( + return function_tables[{ libkey, queue }].cpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, @@ -1635,7 +1697,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t stride_a, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrf_batch_usm_sycl( + return function_tables[{ libkey, queue }].zpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, @@ -1643,18 +1705,18 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrs_batch_usm_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, - ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotrs_batch_usm_sycl( + queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size, dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrs_batch_usm_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, - ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotrs_batch_usm_sycl( + queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size, dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, @@ -1662,9 +1724,9 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrs_batch_usm_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, - ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotrs_batch_usm_sycl( + queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size, dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::complex *a, @@ -1672,9 +1734,9 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrs_batch_usm_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, - ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotrs_batch_usm_sycl( + queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size, dependencies); } sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, @@ -1682,9 +1744,9 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungqr_batch_usm_sycl(queue, m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cungqr_batch_usm_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, @@ -1692,27 +1754,27 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungqr_batch_usm_sycl(queue, m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zungqr_batch_usm_sycl( + queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, std::int64_t *lda, float **tau, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgeqrf_group_usm_sycl(queue, m, n, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].sgeqrf_group_usm_sycl( + queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, std::int64_t *lda, double **tau, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgeqrf_group_usm_sycl(queue, m, n, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dgeqrf_group_usm_sycl( + queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::complex **a, std::int64_t *lda, @@ -1720,9 +1782,9 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgeqrf_group_usm_sycl(queue, m, n, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cgeqrf_group_usm_sycl( + queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::complex **a, std::int64_t *lda, @@ -1730,79 +1792,79 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgeqrf_group_usm_sycl(queue, m, n, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zgeqrf_group_usm_sycl( + queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrf_group_usm_sycl(queue, m, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].sgetrf_group_usm_sycl( + queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrf_group_usm_sycl(queue, m, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dgetrf_group_usm_sycl( + queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrf_group_usm_sycl(queue, m, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cgetrf_group_usm_sycl( + queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrf_group_usm_sycl(queue, m, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zgetrf_group_usm_sycl( + queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetri_group_usm_sycl(queue, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].sgetri_group_usm_sycl( + queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetri_group_usm_sycl(queue, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dgetri_group_usm_sycl( + queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetri_group_usm_sycl(queue, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cgetri_group_usm_sycl( + queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetri_group_usm_sycl(queue, n, a, lda, ipiv, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zgetri_group_usm_sycl( + queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, @@ -1810,9 +1872,9 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sgetrs_group_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, - ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].sgetrs_group_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, @@ -1820,9 +1882,9 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dgetrs_group_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, - ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dgetrs_group_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, @@ -1831,9 +1893,9 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cgetrs_group_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, - ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cgetrs_group_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, @@ -1842,79 +1904,79 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zgetrs_group_usm_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, - ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zgetrs_group_usm_sycl( + queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, float **a, std::int64_t *lda, float **tau, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].sorgqr_group_usm_sycl(queue, m, n, k, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].sorgqr_group_usm_sycl( + queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, double **a, std::int64_t *lda, double **tau, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dorgqr_group_usm_sycl(queue, m, n, k, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dorgqr_group_usm_sycl( + queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, float **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrf_group_usm_sycl(queue, uplo, n, a, lda, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].spotrf_group_usm_sycl( + queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, double **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrf_group_usm_sycl(queue, uplo, n, a, lda, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].dpotrf_group_usm_sycl( + queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrf_group_usm_sycl(queue, uplo, n, a, lda, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cpotrf_group_usm_sycl( + queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::complex **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrf_group_usm_sycl(queue, uplo, n, a, lda, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zpotrf_group_usm_sycl( + queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].spotrs_group_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].spotrs_group_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].dpotrs_group_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].dpotrs_group_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::complex **a, @@ -1922,9 +1984,9 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cpotrs_group_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].cpotrs_group_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::complex **a, @@ -1932,9 +1994,9 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi:: std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zpotrs_group_usm_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + return function_tables[{ libkey, queue }].zpotrs_group_usm_sycl( + queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex **a, @@ -1942,9 +2004,9 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].cungqr_group_usm_sycl(queue, m, n, k, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].cungqr_group_usm_sycl( + queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex **a, @@ -1952,92 +2014,92 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, const std::vector &dependencies) { - return function_tables[libkey].zungqr_group_usm_sycl(queue, m, n, k, a, lda, tau, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies); + return function_tables[{ libkey, queue }].zungqr_group_usm_sycl( + queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, + dependencies); } template <> std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgebrd_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].sgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgebrd_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].dgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgebrd_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].cgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgebrd_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].zgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgerqf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].sgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgerqf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].dgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgerqf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].cgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgerqf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].zgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgeqrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].sgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgeqrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].dgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgeqrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].cgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgeqrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].zgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - return function_tables[libkey].sgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, lda, ldu, - ldvt); + return function_tables[{ libkey, queue }].sgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, + lda, ldu, ldvt); } template <> std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - return function_tables[libkey].dgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, lda, ldu, - ldvt); + return function_tables[{ libkey, queue }].dgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, + lda, ldu, ldvt); } template <> std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libkey, @@ -2046,8 +2108,8 @@ std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - return function_tables[libkey].cgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, lda, ldu, - ldvt); + return function_tables[{ libkey, queue }].cgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, + lda, ldu, ldvt); } template <> std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libkey, @@ -2056,64 +2118,66 @@ std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - return function_tables[libkey].zgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, lda, ldu, - ldvt); + return function_tables[{ libkey, queue }].zgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n, + lda, ldu, ldvt); } template <> std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgetrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].sgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgetrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].dgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgetrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].cgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgetrf_scratchpad_size_sycl(queue, m, n, lda); + return function_tables[{ libkey, queue }].zgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sgetri_scratchpad_size_sycl(queue, n, lda); + return function_tables[{ libkey, queue }].sgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dgetri_scratchpad_size_sycl(queue, n, lda); + return function_tables[{ libkey, queue }].dgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cgetri_scratchpad_size_sycl(queue, n, lda); + return function_tables[{ libkey, queue }].cgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zgetri_scratchpad_size_sycl(queue, n, lda); + return function_tables[{ libkey, queue }].zgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].sgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].sgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, + lda, ldb); } template <> std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].dgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].dgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, + lda, ldb); } template <> std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libkey, @@ -2121,7 +2185,8 @@ std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].cgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].cgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, + lda, ldb); } template <> std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libkey, @@ -2129,21 +2194,24 @@ std::int64_t getrs_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].zgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].zgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, + lda, ldb); } template <> std::int64_t heevd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cheevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); + return function_tables[{ libkey, queue }].cheevd_scratchpad_size_sycl(queue, jobz, uplo, n, + lda); } template <> std::int64_t heevd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zheevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); + return function_tables[{ libkey, queue }].zheevd_scratchpad_size_sycl(queue, jobz, uplo, n, + lda); } template <> std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libkey, @@ -2151,8 +2219,8 @@ std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].chegvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, - ldb); + return function_tables[{ libkey, queue }].chegvd_scratchpad_size_sycl(queue, itype, jobz, uplo, + n, lda, ldb); } template <> std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libkey, @@ -2160,270 +2228,278 @@ std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].zhegvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, - ldb); + return function_tables[{ libkey, queue }].zhegvd_scratchpad_size_sycl(queue, itype, jobz, uplo, + n, lda, ldb); } template <> std::int64_t hetrd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].chetrd_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].chetrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrd_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zhetrd_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zhetrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].chetrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].chetrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zhetrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zhetrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].sorgbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); + return function_tables[{ libkey, queue }].sorgbr_scratchpad_size_sycl(queue, vect, m, n, k, + lda); } template <> std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].dorgbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); + return function_tables[{ libkey, queue }].dorgbr_scratchpad_size_sycl(queue, vect, m, n, k, + lda); } template <> std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].sorgtr_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].sorgtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dorgtr_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dorgtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].sorgqr_scratchpad_size_sycl(queue, m, n, k, lda); + return function_tables[{ libkey, queue }].sorgqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].dorgqr_scratchpad_size_sycl(queue, m, n, k, lda); + return function_tables[{ libkey, queue }].dorgqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].sormrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].sormrq_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].dormrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].dormrq_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].sormqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].sormqr_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].dormqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].dormqr_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].sormtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, - ldc); + return function_tables[{ libkey, queue }].sormtr_scratchpad_size_sycl(queue, side, uplo, trans, + m, n, lda, ldc); } template <> std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].dormtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, - ldc); + return function_tables[{ libkey, queue }].dormtr_scratchpad_size_sycl(queue, side, uplo, trans, + m, n, lda, ldc); } template <> std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].spotrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].spotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dpotrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cpotrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].cpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zpotrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].spotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].spotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, + ldb); } template <> std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].dpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].dpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, + ldb); } template <> std::int64_t potrs_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].cpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].cpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, + ldb); } template <> std::int64_t potrs_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].zpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); + return function_tables[{ libkey, queue }].zpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, + ldb); } template <> std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].spotri_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].spotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dpotri_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cpotri_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].cpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zpotri_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].ssytrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].ssytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dsytrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dsytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].csytrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].csytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zsytrf_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zsytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].ssyevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); + return function_tables[{ libkey, queue }].ssyevd_scratchpad_size_sycl(queue, jobz, uplo, n, + lda); } template <> std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dsyevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); + return function_tables[{ libkey, queue }].dsyevd_scratchpad_size_sycl(queue, jobz, uplo, n, + lda); } template <> std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].ssygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, - ldb); + return function_tables[{ libkey, queue }].ssygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, + n, lda, ldb); } template <> std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].dsygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, - ldb); + return function_tables[{ libkey, queue }].dsygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, + n, lda, ldb); } template <> std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].ssytrd_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].ssytrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].dsytrd_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].dsytrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].strtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, - lda, ldb); + return function_tables[{ libkey, queue }].strtrs_scratchpad_size_sycl(queue, uplo, trans, diag, + n, nrhs, lda, ldb); } template <> std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].dtrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, - lda, ldb); + return function_tables[{ libkey, queue }].dtrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, + n, nrhs, lda, ldb); } template <> std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libkey, @@ -2432,8 +2508,8 @@ std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].ctrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, - lda, ldb); + return function_tables[{ libkey, queue }].ctrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, + n, nrhs, lda, ldb); } template <> std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libkey, @@ -2442,8 +2518,8 @@ std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return function_tables[libkey].ztrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, - lda, ldb); + return function_tables[{ libkey, queue }].ztrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, + n, nrhs, lda, ldb); } template <> std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2451,7 +2527,8 @@ std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].cungbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); + return function_tables[{ libkey, queue }].cungbr_scratchpad_size_sycl(queue, vect, m, n, k, + lda); } template <> std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2459,33 +2536,34 @@ std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].zungbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); + return function_tables[{ libkey, queue }].zungbr_scratchpad_size_sycl(queue, vect, m, n, k, + lda); } template <> std::int64_t ungqr_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].cungqr_scratchpad_size_sycl(queue, m, n, k, lda); + return function_tables[{ libkey, queue }].cungqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ungqr_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return function_tables[libkey].zungqr_scratchpad_size_sycl(queue, m, n, k, lda); + return function_tables[{ libkey, queue }].zungqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ungtr_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].cungtr_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].cungtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t ungtr_scratchpad_size>(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return function_tables[libkey].zungtr_scratchpad_size_sycl(queue, uplo, n, lda); + return function_tables[{ libkey, queue }].zungtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libkey, @@ -2494,8 +2572,8 @@ std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libk std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].cunmrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].cunmrq_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libkey, @@ -2504,8 +2582,8 @@ std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device lib std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].zunmrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].zunmrq_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2514,8 +2592,8 @@ std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libk std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].cunmqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].cunmqr_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2524,8 +2602,8 @@ std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device lib std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].zunmqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, - ldc); + return function_tables[{ libkey, queue }].zunmqr_scratchpad_size_sycl(queue, side, trans, m, n, + k, lda, ldc); } template <> std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2534,8 +2612,8 @@ std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libk oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].cunmtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, - ldc); + return function_tables[{ libkey, queue }].cunmtr_scratchpad_size_sycl(queue, side, uplo, trans, + m, n, lda, ldc); } template <> std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libkey, @@ -2544,68 +2622,68 @@ std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device lib oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - return function_tables[libkey].zunmtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, - ldc); + return function_tables[{ libkey, queue }].zunmtr_scratchpad_size_sycl(queue, side, uplo, trans, + m, n, lda, ldc); } template <> std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].sgetrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].sgetrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].dgetrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].dgetrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].cgetrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].cgetrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].zgetrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].zgetrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].sgetri_batch_scratchpad_size_sycl(queue, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].sgetri_batch_scratchpad_size_sycl( + queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].dgetri_batch_scratchpad_size_sycl(queue, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].dgetri_batch_scratchpad_size_sycl( + queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].cgetri_batch_scratchpad_size_sycl(queue, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].cgetri_batch_scratchpad_size_sycl( + queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - return function_tables[libkey].zgetri_batch_scratchpad_size_sycl(queue, n, lda, stride_a, - stride_ipiv, batch_size); + return function_tables[{ libkey, queue }].zgetri_batch_scratchpad_size_sycl( + queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2614,7 +2692,7 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].sgetrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].sgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> @@ -2624,7 +2702,7 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].dgetrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].dgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> @@ -2632,7 +2710,7 @@ std::int64_t getrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].cgetrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].cgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> @@ -2640,7 +2718,7 @@ std::int64_t getrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].zgetrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].zgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> @@ -2648,60 +2726,60 @@ std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].sgeqrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].sgeqrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].dgeqrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].dgeqrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t geqrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].cgeqrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].cgeqrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t geqrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].zgeqrf_batch_scratchpad_size_sycl(queue, m, n, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].zgeqrf_batch_scratchpad_size_sycl( + queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - return function_tables[libkey].spotrf_batch_scratchpad_size_sycl(queue, uplo, n, lda, stride_a, - batch_size); + return function_tables[{ libkey, queue }].spotrf_batch_scratchpad_size_sycl( + queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - return function_tables[libkey].dpotrf_batch_scratchpad_size_sycl(queue, uplo, n, lda, stride_a, - batch_size); + return function_tables[{ libkey, queue }].dpotrf_batch_scratchpad_size_sycl( + queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - return function_tables[libkey].cpotrf_batch_scratchpad_size_sycl(queue, uplo, n, lda, stride_a, - batch_size); + return function_tables[{ libkey, queue }].cpotrf_batch_scratchpad_size_sycl( + queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - return function_tables[libkey].zpotrf_batch_scratchpad_size_sycl(queue, uplo, n, lda, stride_a, - batch_size); + return function_tables[{ libkey, queue }].zpotrf_batch_scratchpad_size_sycl( + queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2709,7 +2787,7 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].spotrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].spotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> @@ -2718,7 +2796,7 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].dpotrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].dpotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> @@ -2726,7 +2804,7 @@ std::int64_t potrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].cpotrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].cpotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> @@ -2734,7 +2812,7 @@ std::int64_t potrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - return function_tables[libkey].zpotrs_batch_scratchpad_size_sycl( + return function_tables[{ libkey, queue }].zpotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> @@ -2742,46 +2820,46 @@ std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].sorgqr_batch_scratchpad_size_sycl(queue, m, n, k, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].sorgqr_batch_scratchpad_size_sycl( + queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].dorgqr_batch_scratchpad_size_sycl(queue, m, n, k, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].dorgqr_batch_scratchpad_size_sycl( + queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t ungqr_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].cungqr_batch_scratchpad_size_sycl(queue, m, n, k, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].cungqr_batch_scratchpad_size_sycl( + queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t ungqr_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - return function_tables[libkey].zungqr_batch_scratchpad_size_sycl(queue, m, n, k, lda, stride_a, - stride_tau, batch_size); + return function_tables[{ libkey, queue }].zungqr_batch_scratchpad_size_sycl( + queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sgetrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].sgetrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dgetrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].dgetrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2789,8 +2867,8 @@ std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::devic std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cgetrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].cgetrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2798,24 +2876,24 @@ std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::devi std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zgetrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].zgetrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sgetri_group_scratchpad_size_sycl(queue, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].sgetri_group_scratchpad_size_sycl( + queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dgetri_group_scratchpad_size_sycl(queue, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].dgetri_group_scratchpad_size_sycl( + queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2823,8 +2901,8 @@ std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::devic std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cgetri_group_scratchpad_size_sycl(queue, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].cgetri_group_scratchpad_size_sycl( + queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2832,8 +2910,8 @@ std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::devi std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zgetri_group_scratchpad_size_sycl(queue, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].zgetri_group_scratchpad_size_sycl( + queue, n, lda, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2841,8 +2919,8 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sgetrs_group_scratchpad_size_sycl(queue, trans, n, nrhs, lda, - ldb, group_count, group_sizes); + return function_tables[{ libkey, queue }].sgetrs_group_scratchpad_size_sycl( + queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2850,40 +2928,40 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dgetrs_group_scratchpad_size_sycl(queue, trans, n, nrhs, lda, - ldb, group_count, group_sizes); + return function_tables[{ libkey, queue }].dgetrs_group_scratchpad_size_sycl( + queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cgetrs_group_scratchpad_size_sycl(queue, trans, n, nrhs, lda, - ldb, group_count, group_sizes); + return function_tables[{ libkey, queue }].cgetrs_group_scratchpad_size_sycl( + queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zgetrs_group_scratchpad_size_sycl(queue, trans, n, nrhs, lda, - ldb, group_count, group_sizes); + return function_tables[{ libkey, queue }].zgetrs_group_scratchpad_size_sycl( + queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sgeqrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].sgeqrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dgeqrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].dgeqrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2891,8 +2969,8 @@ std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::devic std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cgeqrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].cgeqrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::device libkey, @@ -2900,54 +2978,54 @@ std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::devi std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zgeqrf_group_scratchpad_size_sycl(queue, m, n, lda, group_count, - group_sizes); + return function_tables[{ libkey, queue }].zgeqrf_group_scratchpad_size_sycl( + queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].sorgqr_group_scratchpad_size_sycl(queue, m, n, k, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].sorgqr_group_scratchpad_size_sycl( + queue, m, n, k, lda, group_count, group_sizes); } template <> std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dorgqr_group_scratchpad_size_sycl(queue, m, n, k, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].dorgqr_group_scratchpad_size_sycl( + queue, m, n, k, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].spotrf_group_scratchpad_size_sycl(queue, uplo, n, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].spotrf_group_scratchpad_size_sycl( + queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dpotrf_group_scratchpad_size_sycl(queue, uplo, n, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].dpotrf_group_scratchpad_size_sycl( + queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cpotrf_group_scratchpad_size_sycl(queue, uplo, n, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].cpotrf_group_scratchpad_size_sycl( + queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zpotrf_group_scratchpad_size_sycl(queue, uplo, n, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].zpotrf_group_scratchpad_size_sycl( + queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2955,8 +3033,8 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].spotrs_group_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb, - group_count, group_sizes); + return function_tables[{ libkey, queue }].spotrs_group_scratchpad_size_sycl( + queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, @@ -2964,38 +3042,38 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].dpotrs_group_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb, - group_count, group_sizes); + return function_tables[{ libkey, queue }].dpotrs_group_scratchpad_size_sycl( + queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cpotrs_group_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb, - group_count, group_sizes); + return function_tables[{ libkey, queue }].cpotrs_group_scratchpad_size_sycl( + queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zpotrs_group_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb, - group_count, group_sizes); + return function_tables[{ libkey, queue }].zpotrs_group_scratchpad_size_sycl( + queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t ungqr_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].cungqr_group_scratchpad_size_sycl(queue, m, n, k, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].cungqr_group_scratchpad_size_sycl( + queue, m, n, k, lda, group_count, group_sizes); } template <> std::int64_t ungqr_batch_scratchpad_size>( oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { - return function_tables[libkey].zungqr_group_scratchpad_size_sycl(queue, m, n, k, lda, - group_count, group_sizes); + return function_tables[{ libkey, queue }].zungqr_group_scratchpad_size_sycl( + queue, m, n, k, lda, group_count, group_sizes); } } //namespace detail diff --git a/src/rng/rng_loader.cpp b/src/rng/rng_loader.cpp index 1734287ee..68e3a5ba5 100644 --- a/src/rng/rng_loader.cpp +++ b/src/rng/rng_loader.cpp @@ -31,21 +31,21 @@ static oneapi::mkl::detail::table_initializer engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, sycl::queue queue, std::uint64_t seed) { - return function_tables[libkey].create_philox4x32x10_sycl(queue, seed); + return function_tables[{ libkey, queue }].create_philox4x32x10_sycl(queue, seed); } engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, sycl::queue queue, std::initializer_list seed) { - return function_tables[libkey].create_philox4x32x10_ex_sycl(queue, seed); + return function_tables[{ libkey, queue }].create_philox4x32x10_ex_sycl(queue, seed); } engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, sycl::queue queue, std::uint32_t seed) { - return function_tables[libkey].create_mrg32k3a_sycl(queue, seed); + return function_tables[{ libkey, queue }].create_mrg32k3a_sycl(queue, seed); } engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, sycl::queue queue, std::initializer_list seed) { - return function_tables[libkey].create_mrg32k3a_ex_sycl(queue, seed); + return function_tables[{ libkey, queue }].create_mrg32k3a_ex_sycl(queue, seed); } } // namespace detail From d21ac7bc4f2f8d26bcd009fb924e9c2724bca791 Mon Sep 17 00:00:00 2001 From: nscipione Date: Wed, 4 Sep 2024 13:48:09 +0100 Subject: [PATCH 04/10] Enable test for OpenCL GPUs except for Intel's one This patch enables the possibility to run tests with generic_device for devices that have an OpenCL backend. --- tests/unit_tests/main_test.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/unit_tests/main_test.cpp b/tests/unit_tests/main_test.cpp index bac3f8c83..39763e39d 100644 --- a/tests/unit_tests/main_test.cpp +++ b/tests/unit_tests/main_test.cpp @@ -101,15 +101,17 @@ int main(int argc, char** argv) { auto plat_devs = plat.get_devices(); for (auto dev : plat_devs) { try { - /* Do not test for OpenCL backend on GPU */ - if (dev.is_gpu() && plat.get_info().find( - "OpenCL") != std::string::npos) + unsigned int vendor_id = + static_cast(dev.get_info()); + /* Do not test for OpenCL backend on Intel GPU */ + if (dev.is_gpu() && + plat.get_info().find("OpenCL") != + std::string::npos && + vendor_id == INTEL_ID) continue; if (unique_devices.find(dev.get_info()) == unique_devices.end()) { unique_devices.insert(dev.get_info()); - unsigned int vendor_id = static_cast( - dev.get_info()); #if !defined(ENABLE_MKLCPU_BACKEND) && !defined(ENABLE_PORTBLAS_BACKEND_INTEL_CPU) && \ !defined(ENABLE_PORTFFT_BACKEND) if (dev.is_cpu()) From ee6569cf27333411b3d9975f0f4c8fd45a8300e5 Mon Sep 17 00:00:00 2001 From: nscipione Date: Thu, 5 Sep 2024 09:34:06 +0100 Subject: [PATCH 05/10] Add pragma to guard generic device usage and exception --- include/oneapi/mkl/detail/backends_table.hpp | 2 ++ src/include/function_table_initializer.hpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/include/oneapi/mkl/detail/backends_table.hpp b/include/oneapi/mkl/detail/backends_table.hpp index 418f91131..b385b21b0 100644 --- a/include/oneapi/mkl/detail/backends_table.hpp +++ b/include/oneapi/mkl/detail/backends_table.hpp @@ -86,7 +86,9 @@ static std::map>> libraries = } }, { device::generic_device, { +#ifdef ENABLE_PORTBLAS_BACKEND LIB_NAME("blas_portblas"), +#endif } } } }, { domain::dft, diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp index 953226c3e..880035759 100644 --- a/src/include/function_table_initializer.hpp +++ b/src/include/function_table_initializer.hpp @@ -99,13 +99,17 @@ class table_initializer { break; } if (!handle) { +#ifndef ENABLE_PORTBLAS_BACKEND if (key == oneapi::mkl::device::generic_device) { throw mkl::unsupported_device("", "", q.get_device()); } else { +#endif std::cerr << ERROR_MSG << '\n'; throw mkl::backend_not_found(); +#ifndef ENABLE_PORTBLAS_BACKEND } +#endif } auto t = reinterpret_cast(::GET_FUNC(handle.get(), table_names[domain_id])); From 922a654b913a70bde6e84dc618e798fce9bc8cc8 Mon Sep 17 00:00:00 2001 From: nscipione Date: Thu, 5 Sep 2024 09:58:42 +0100 Subject: [PATCH 06/10] Fix typo --- src/blas/backends/portblas/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blas/backends/portblas/CMakeLists.txt b/src/blas/backends/portblas/CMakeLists.txt index 8f981f88b..03fddbb38 100644 --- a/src/blas/backends/portblas/CMakeLists.txt +++ b/src/blas/backends/portblas/CMakeLists.txt @@ -46,7 +46,7 @@ if(NUM_TARGETS EQUAL 0) list(LENGTH SYCL_TARGETS NUM_TARGETS) endif() -if (PORTBLAS_TUNING_TARGET) +if(PORTBLAS_TUNING_TARGET) # Allow the user to manually enable a specific device type # for tuned portBLAS configurations and sets sycl-target. if(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_CPU") From 3466d6f87bc5e0f720dd9a538cd491c5439fe893 Mon Sep 17 00:00:00 2001 From: nscipione Date: Fri, 6 Sep 2024 09:52:25 +0100 Subject: [PATCH 07/10] Move generic_device support pragma Moved pragma and simplified if-statement to increase code readability --- src/include/function_table_initializer.hpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp index 880035759..afa831750 100644 --- a/src/include/function_table_initializer.hpp +++ b/src/include/function_table_initializer.hpp @@ -67,6 +67,12 @@ class table_initializer { } private: +#ifdef ENABLE_PORTBLAS_BACKEND + static constexpr bool is_generic_device_supported = true; +#else + static constexpr bool is_generic_device_supported = false; +#endif + #ifdef _WIN64 // Create a string with last error message std::string GetLastErrorStdStr() { @@ -99,17 +105,14 @@ class table_initializer { break; } if (!handle) { -#ifndef ENABLE_PORTBLAS_BACKEND - if (key == oneapi::mkl::device::generic_device) { + if constexpr (!is_generic_device_supported && + key == oneapi::mkl::device::generic_device) { throw mkl::unsupported_device("", "", q.get_device()); } else { -#endif std::cerr << ERROR_MSG << '\n'; throw mkl::backend_not_found(); -#ifndef ENABLE_PORTBLAS_BACKEND } -#endif } auto t = reinterpret_cast(::GET_FUNC(handle.get(), table_names[domain_id])); From 5f743b279a4c6300d0a21c1c55d8f7fb82bd6492 Mon Sep 17 00:00:00 2001 From: nscipione Date: Fri, 6 Sep 2024 10:14:37 +0100 Subject: [PATCH 08/10] Fix broken if-statement --- src/include/function_table_initializer.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp index afa831750..881a5205f 100644 --- a/src/include/function_table_initializer.hpp +++ b/src/include/function_table_initializer.hpp @@ -105,8 +105,7 @@ class table_initializer { break; } if (!handle) { - if constexpr (!is_generic_device_supported && - key == oneapi::mkl::device::generic_device) { + if (!is_generic_device_supported && key == oneapi::mkl::device::generic_device) { throw mkl::unsupported_device("", "", q.get_device()); } else { From 29e94fcb56ce84e7f732bd0f8a63a3762a307611 Mon Sep 17 00:00:00 2001 From: nscipione Date: Thu, 12 Sep 2024 09:52:02 +0100 Subject: [PATCH 09/10] Update vector of device for testing Previously if some specific backend where enabled the test suite always added a cpu to the device to run test on, even if another if condition already added them. This behaviour cause linking time issue if a cpu device is not available. This commit removes it and it adds missing pragma to the other device selection, fixing the linking issue. --- tests/unit_tests/main_test.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/unit_tests/main_test.cpp b/tests/unit_tests/main_test.cpp index 39763e39d..7a20e0ed8 100644 --- a/tests/unit_tests/main_test.cpp +++ b/tests/unit_tests/main_test.cpp @@ -113,7 +113,7 @@ int main(int argc, char** argv) { unique_devices.end()) { unique_devices.insert(dev.get_info()); #if !defined(ENABLE_MKLCPU_BACKEND) && !defined(ENABLE_PORTBLAS_BACKEND_INTEL_CPU) && \ - !defined(ENABLE_PORTFFT_BACKEND) + !defined(ENABLE_PORTFFT_BACKEND) && !defined(ENABLE_NETLIB_BACKEND) if (dev.is_cpu()) continue; #endif @@ -153,14 +153,6 @@ int main(int argc, char** argv) { #endif } -#if defined(ENABLE_MKLCPU_BACKEND) || defined(ENABLE_NETLIB_BACKEND) || \ - defined(ENABLE_PORTBLAS_BACKEND_INTEL_CPU) -#ifdef __HIPSYCL__ - local_devices.push_back(sycl::device(sycl::cpu_selector())); -#else - local_devices.push_back(sycl::device(sycl::cpu_selector_v)); -#endif -#endif #define GET_NAME(d) (d).template get_info() for (auto& local_dev : local_devices) { // Test only unique devices From 70f88357a7f5b062c5e1267c520214da577e7cdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Scipione?= Date: Thu, 19 Sep 2024 17:37:13 +0200 Subject: [PATCH 10/10] Update docs/building_the_project_with_dpcpp.rst Co-authored-by: Maria Kraynyuk --- docs/building_the_project_with_dpcpp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/building_the_project_with_dpcpp.rst b/docs/building_the_project_with_dpcpp.rst index d85af5760..2fea9395f 100644 --- a/docs/building_the_project_with_dpcpp.rst +++ b/docs/building_the_project_with_dpcpp.rst @@ -434,7 +434,7 @@ Build oneMKL for the BLAS domain on a generic SYCL device: -DENABLE_MKLGPU_BACKEND=False \ -DENABLE_PORTBLAS_BACKEND=True -Note that this is not a supported configuration. This builds oneMKL Interfaces +Note that this is not a tested configuration. This builds oneMKL Interfaces with the portBLAS backend only, for a generic SYCL device supported by the Open DPC++ project.