Skip to content

Commit

Permalink
fix: BLAS batch strided needs sufficiently new MKL
Browse files Browse the repository at this point in the history
First introduced in Intel MKL 2020.2

Unfortunately this is not mentioned in any changelog
https://www.intel.com/content/www/us/en/developer/articles/release-notes/intel-math-kernel-library-release-notes-and-new-features.html

$ curl -sSfL https://apt.repos.intel.com/mkl/dists/all/main/binary-all/Packages |
    awk '/^Filename/ && /intel-mkl-common-c-2020/ { print $2 }' |
    xargs -I{} curl -sSfLO 'https://apt.repos.intel.com/mkl/{}'
$ for pkg in intel-mkl-common-c-2020*.deb; { ar x "$pkg" data.tar.xz && ar xJf data.tar.xz }
$ grep -r dgemm_batch_strided ./opt/intel/compilers_and_libraries_*/linux/mkl/include/mkl_blas.h
./opt/intel/compilers_and_libraries_2020.2.254/linux/mkl/include/mkl_blas.h:void dgemm_batch_strided(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
./opt/intel/compilers_and_libraries_2020.3.279/linux/mkl/include/mkl_blas.h:void dgemm_batch_strided(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
./opt/intel/compilers_and_libraries_2020.4.304/linux/mkl/include/mkl_blas.h:void dgemm_batch_strided(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
  • Loading branch information
hmenke committed Sep 29, 2023
1 parent 7c58e87 commit ecf1b30
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions c++/nda/blas/interface/cxx_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ namespace nda::blas::f77 {

void gemm_batch_strided(char op_a, char op_b, int M, int N, int K, double alpha, const double *A, int LDA, int strideA, const double *B, int LDB,
int strideB, double beta, double *C, int LDC, int strideC, int batch_count) {
#ifdef NDA_USE_MKL
#if defined(NDA_USE_MKL) && INTEL_MKL_VERSION >= 20200002
dgemm_batch_strided(&op_a, &op_b, &M, &N, &K, &alpha, A, &LDA, &strideA, B, &LDB, &strideB, &beta, C, &LDC, &strideC, &batch_count);
#else
for (int i = 0; i < batch_count; ++i) gemm(op_a, op_b, M, N, K, alpha, A + i * strideA, LDA, B + i * strideB, LDB, beta, C + i * strideC, LDC);
#endif
}
void gemm_batch_strided(char op_a, char op_b, int M, int N, int K, dcomplex alpha, const dcomplex *A, int LDA, int strideA, const dcomplex *B,
int LDB, int strideB, dcomplex beta, dcomplex *C, int LDC, int strideC, int batch_count) {
#ifdef NDA_USE_MKL
#if defined(NDA_USE_MKL) && INTEL_MKL_VERSION >= 20200002
zgemm_batch_strided(&op_a, &op_b, &M, &N, &K, mklcplx(&alpha), mklcplx(A), &LDA, &strideA, mklcplx(B), &LDB, &strideB, mklcplx(&beta), mklcplx(C),
&LDC, &strideC, &batch_count);
#else
Expand Down

0 comments on commit ecf1b30

Please sign in to comment.