From 6c3a6bc818c394f1953a790c7be4ed30c09c1307 Mon Sep 17 00:00:00 2001
From: Henri Menke <henri@henrimenke.de>
Date: Fri, 29 Sep 2023 16:16:49 +0200
Subject: [PATCH] fix: BLAS batch strided needs sufficiently new MKL

First introduced in Intel MKL 2020.2

Unfortunately this is not mentioned in any changelog
https://www.intel.com/content/www/us/en/developer/articles/release-notes/intel-math-kernel-library-release-notes-and-new-features.html

$ curl -sSfL https://apt.repos.intel.com/mkl/dists/all/main/binary-all/Packages |
    awk '/^Filename/ && /intel-mkl-common-c-2020/ { print $2 }' |
    xargs -I{} curl -sSfLO 'https://apt.repos.intel.com/mkl/{}'
$ for pkg in intel-mkl-common-c-2020*.deb; { ar x "$pkg" data.tar.xz && tar xJf data.tar.xz; }
$ grep -r dgemm_batch_strided ./opt/intel/compilers_and_libraries_*/linux/mkl/include/mkl_blas.h
./opt/intel/compilers_and_libraries_2020.2.254/linux/mkl/include/mkl_blas.h:void dgemm_batch_strided(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
./opt/intel/compilers_and_libraries_2020.3.279/linux/mkl/include/mkl_blas.h:void dgemm_batch_strided(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
./opt/intel/compilers_and_libraries_2020.4.304/linux/mkl/include/mkl_blas.h:void dgemm_batch_strided(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
---
 c++/nda/blas/interface/cxx_interface.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/c++/nda/blas/interface/cxx_interface.cpp b/c++/nda/blas/interface/cxx_interface.cpp
index 35ec65203..2201ca5ff 100644
--- a/c++/nda/blas/interface/cxx_interface.cpp
+++ b/c++/nda/blas/interface/cxx_interface.cpp
@@ -135,7 +135,7 @@ namespace nda::blas::f77 {
 
   void gemm_batch_strided(char op_a, char op_b, int M, int N, int K, double alpha, const double *A, int LDA, int strideA, const double *B, int LDB,
                           int strideB, double beta, double *C, int LDC, int strideC, int batch_count) {
-#ifdef NDA_USE_MKL
+#if defined(NDA_USE_MKL) && INTEL_MKL_VERSION >= 20200002
     dgemm_batch_strided(&op_a, &op_b, &M, &N, &K, &alpha, A, &LDA, &strideA, B, &LDB, &strideB, &beta, C, &LDC, &strideC, &batch_count);
 #else
     for (int i = 0; i < batch_count; ++i) gemm(op_a, op_b, M, N, K, alpha, A + i * strideA, LDA, B + i * strideB, LDB, beta, C + i * strideC, LDC);
@@ -143,7 +143,7 @@ namespace nda::blas::f77 {
   }
   void gemm_batch_strided(char op_a, char op_b, int M, int N, int K, dcomplex alpha, const dcomplex *A, int LDA, int strideA, const dcomplex *B,
                           int LDB, int strideB, dcomplex beta, dcomplex *C, int LDC, int strideC, int batch_count) {
-#ifdef NDA_USE_MKL
+#if defined(NDA_USE_MKL) && INTEL_MKL_VERSION >= 20200002
     zgemm_batch_strided(&op_a, &op_b, &M, &N, &K, mklcplx(&alpha), mklcplx(A), &LDA, &strideA, mklcplx(B), &LDB, &strideB, mklcplx(&beta), mklcplx(C),
                         &LDC, &strideC, &batch_count);
 #else