diff --git a/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/CMakeLists.txt b/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/CMakeLists.txt index d33bfaf57a..64ed528359 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/CMakeLists.txt +++ b/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/CMakeLists.txt @@ -1,6 +1,5 @@ include_directories(common) -add_example_with_mkl(dgemm_target_variant_dispatch_c) add_example_with_mkl(dgemm_dispatch_c) add_example_with_mkl(dgemm_example_01) add_example_with_mkl(dgemm_example_02) diff --git a/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/dgemm_dispatch_c.cpp b/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/dgemm_dispatch_c.cpp index 1e9ab2ea0f..d197c32fb8 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/dgemm_dispatch_c.cpp +++ b/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/dgemm_dispatch_c.cpp @@ -63,7 +63,7 @@ int main() #pragma omp target data map(to: A[0:m*k], B[0:k*n]) map(tofrom: C[0:m*n]) { - #pragma omp target variant dispatch use_device_ptr(A, B, C) + #pragma omp dispatch cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, k, B, n, beta, C, n); } diff --git a/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/dgemm_target_variant_dispatch_c.cpp b/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/dgemm_target_variant_dispatch_c.cpp deleted file mode 100644 index d803bd7ab9..0000000000 --- a/Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/dgemm_target_variant_dispatch_c.cpp +++ /dev/null @@ -1,123 +0,0 @@ -//============================================================== -// Copyright © 2022 Intel Corporation -// -// SPDX-License-Identifier: MIT -// ============================================================= -// clang-format off -// Snippet begin -#include -#include -#include -#include -#include "mkl.h" -#include "mkl_omp_offload.h" - -#define min(x,y) (((x) < (y)) ? (x) : (y)) -#define EPSILON 0.0001 - -int main() -{ - double *A, *B, *C, *C_fl; - int64_t m, n, k; - double alpha, beta; - double sum; - int64_t i, j, q; - int fail; - - printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n" - " Intel oneMKL function dgemm, where A, B, and C are matrices and \n" - " alpha and beta are double precision scalars\n\n"); - - m = 2000, k = 200, n = 1000; - printf (" Initializing data for matrix multiplication C=A*B for matrix \n" - " A(%li x %li) and matrix B(%li x %li)\n\n", m, k, k, n); - alpha = 1.0; beta = 0.0; - - printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n" - " performance \n\n"); - A = (double *)mkl_malloc( m * k * sizeof( double ), 64 ); - B = (double *)mkl_malloc( k * n * sizeof( double ), 64 ); - C = (double *)mkl_malloc( m * n * sizeof( double ), 64 ); - C_fl = (double *)mkl_malloc( m*n*sizeof( double ), 64 ); - - if (A == NULL || B == NULL || C == NULL || C_fl == NULL) { - printf( "\n ERROR: Cannot allocate memory for matrices. Exiting... \n\n"); - return 1; - } - - printf (" Intializing matrices \n\n"); - for (i = 0; i < (m*k); i++) { - A[i] = (double)(i+1); - } - - for (i = 0; i < (k*n); i++) { - B[i] = (double)(-i-1); - } - - for (i = 0; i < (m*n); i++) { - C[i] = 0.0; - C_fl[i] = 0.0; - } - - printf (" Computing matrix product using Intel oneMKL dgemm function via CBLAS interface \n\n"); - - #pragma omp target data map(to: A[0:m*k], B[0:k*n]) map(tofrom: C[0:m*n]) - { - #pragma omp target variant dispatch use_device_ptr(A, B, C) - { - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, - m, n, k, alpha, A, k, B, n, beta, C, n); - } - } - - printf ("\n Top left corner of matrix C: \n"); - for (i=0; i EPSILON) { - fail = 1; - break; - } - } - - if (fail) - printf ("\n **** FAIL **** \n"); - else - printf ("\n **** PASS **** \n"); - - printf ("\n Deallocating memory \n\n"); - mkl_free(A); - mkl_free(B); - mkl_free(C); - - return fail; -} -// Snippet end