Skip to content

Commit

Permalink
[MKL][BLAS] Add omatcopy2 interface (#404)
Browse files Browse the repository at this point in the history
  • Loading branch information
s-Nick authored Dec 1, 2023
1 parent d56a0f1 commit 7cb0264
Show file tree
Hide file tree
Showing 29 changed files with 1,800 additions and 2 deletions.
76 changes: 76 additions & 0 deletions include/oneapi/mkl/blas.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,40 @@ static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m,
detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb);
}

static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n,
float alpha, sycl::buffer<float, 1> &a, std::int64_t lda,
std::int64_t stridea, sycl::buffer<float, 1> &b, std::int64_t ldb,
std::int64_t strideb) {
detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb,
strideb);
}

static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n,
double alpha, sycl::buffer<double, 1> &a, std::int64_t lda,
std::int64_t stridea, sycl::buffer<double, 1> &b, std::int64_t ldb,
std::int64_t strideb) {
detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb,
strideb);
}

static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n,
std::complex<float> alpha, sycl::buffer<std::complex<float>, 1> &a,
std::int64_t lda, std::int64_t stridea,
sycl::buffer<std::complex<float>, 1> &b, std::int64_t ldb,
std::int64_t strideb) {
detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb,
strideb);
}

static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n,
std::complex<double> alpha, sycl::buffer<std::complex<double>, 1> &a,
std::int64_t lda, std::int64_t stridea,
sycl::buffer<std::complex<double>, 1> &b, std::int64_t ldb,
std::int64_t strideb) {
detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb,
strideb);
}

static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n,
float alpha, sycl::buffer<float, 1> &ab, std::int64_t lda,
std::int64_t ldb) {
Expand Down Expand Up @@ -4056,6 +4090,48 @@ static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int
return done;
}

static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m,
std::int64_t n, float alpha, const float *a, std::int64_t lda,
std::int64_t stridea, float *b, std::int64_t ldb,
std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {}) {
auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea,
b, ldb, strideb, dependencies);
return done;
}

static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m,
std::int64_t n, double alpha, const double *a, std::int64_t lda,
std::int64_t stridea, double *b, std::int64_t ldb,
std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {}) {
auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea,
b, ldb, strideb, dependencies);
return done;
}

static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m,
std::int64_t n, std::complex<float> alpha,
const std::complex<float> *a, std::int64_t lda,
std::int64_t stridea, std::complex<float> *b, std::int64_t ldb,
std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {}) {
auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea,
b, ldb, strideb, dependencies);
return done;
}

static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m,
std::int64_t n, std::complex<double> alpha,
const std::complex<double> *a, std::int64_t lda,
std::int64_t stridea, std::complex<double> *b, std::int64_t ldb,
std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {}) {
auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea,
b, ldb, strideb, dependencies);
return done;
}

static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m,
std::int64_t n, float alpha, float *ab, std::int64_t lda,
std::int64_t ldb,
Expand Down
48 changes: 48 additions & 0 deletions include/oneapi/mkl/blas/detail/blas_ct_backends.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,28 @@ static inline void omatcopy(backend_selector<backend::BACKEND> selector, transpo
sycl::buffer<std::complex<double>, 1> &a, std::int64_t lda,
sycl::buffer<std::complex<double>, 1> &b, std::int64_t ldb);

static inline void omatcopy2(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1> &a,
std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1> &b,
std::int64_t ldb, std::int64_t strideb);

static inline void omatcopy2(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, double alpha,
sycl::buffer<double, 1> &a, std::int64_t lda, std::int64_t stridea,
sycl::buffer<double, 1> &b, std::int64_t ldb, std::int64_t strideb);

static inline void omatcopy2(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, std::complex<float> alpha,
sycl::buffer<std::complex<float>, 1> &a, std::int64_t lda,
std::int64_t stridea, sycl::buffer<std::complex<float>, 1> &b,
std::int64_t ldb, std::int64_t strideb);

static inline void omatcopy2(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, std::complex<double> alpha,
sycl::buffer<std::complex<double>, 1> &a, std::int64_t lda,
std::int64_t stridea, sycl::buffer<std::complex<double>, 1> &b,
std::int64_t ldb, std::int64_t strideb);

static inline void imatcopy(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1> &ab,
std::int64_t lda, std::int64_t ldb);
Expand Down Expand Up @@ -2736,6 +2758,32 @@ static inline sycl::event omatcopy(backend_selector<backend::BACKEND> selector,
std::complex<double> *b, std::int64_t ldb,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatcopy2(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, float alpha, const float *a,
std::int64_t lda, std::int64_t stridea, float *b,
std::int64_t ldb, std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatcopy2(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, double alpha, const double *a,
std::int64_t lda, std::int64_t stridea, double *b,
std::int64_t ldb, std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatcopy2(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, std::complex<float> alpha,
const std::complex<float> *a, std::int64_t lda,
std::int64_t stridea, std::complex<float> *b, std::int64_t ldb,
std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatcopy2(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, std::complex<double> alpha,
const std::complex<double> *a, std::int64_t lda,
std::int64_t stridea, std::complex<double> *b, std::int64_t ldb,
std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event imatcopy(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, float alpha, float *ab,
std::int64_t lda, std::int64_t ldb,
Expand Down
42 changes: 42 additions & 0 deletions include/oneapi/mkl/blas/detail/blas_loader.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,25 @@ ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, tran
sycl::buffer<std::complex<double>, 1> &a, std::int64_t lda,
sycl::buffer<std::complex<double>, 1> &b, std::int64_t ldb);

ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1> &a,
std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1> &b,
std::int64_t ldb, std::int64_t strideb);
ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, double alpha,
sycl::buffer<double, 1> &a, std::int64_t lda, std::int64_t stridea,
sycl::buffer<double, 1> &b, std::int64_t ldb, std::int64_t strideb);
ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, std::complex<float> alpha,
sycl::buffer<std::complex<float>, 1> &a, std::int64_t lda,
std::int64_t stridea, sycl::buffer<std::complex<float>, 1> &b,
std::int64_t ldb, std::int64_t strideb);
ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, std::complex<double> alpha,
sycl::buffer<std::complex<double>, 1> &a, std::int64_t lda,
std::int64_t stridea, sycl::buffer<std::complex<double>, 1> &b,
std::int64_t ldb, std::int64_t strideb);

ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1> &ab,
std::int64_t lda, std::int64_t ldb);
Expand Down Expand Up @@ -2494,6 +2513,29 @@ ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queu
std::complex<double> *b, std::int64_t ldb,
const std::vector<sycl::event> &dependencies = {});

ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, float alpha, const float *a,
std::int64_t lda, std::int64_t stridea, float *b,
std::int64_t ldb, std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {});
ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, double alpha, const double *a,
std::int64_t lda, std::int64_t stridea, double *b,
std::int64_t ldb, std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {});
ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, std::complex<float> alpha,
const std::complex<float> *a, std::int64_t lda,
std::int64_t stridea, std::complex<float> *b, std::int64_t ldb,
std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {});
ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, std::complex<double> alpha,
const std::complex<double> *a, std::int64_t lda,
std::int64_t stridea, std::complex<double> *b, std::int64_t ldb,
std::int64_t strideb,
const std::vector<sycl::event> &dependencies = {});

ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans,
std::int64_t m, std::int64_t n, float alpha, float *ab,
std::int64_t lda, std::int64_t ldb,
Expand Down
70 changes: 70 additions & 0 deletions include/oneapi/mkl/blas/detail/cublas/blas_ct.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1626,6 +1626,38 @@ void omatcopy(backend_selector<backend::cublas> selector, transpose trans, std::
ldb);
}

void omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, float alpha, sycl::buffer<float, 1> &a, std::int64_t lda,
std::int64_t stridea, sycl::buffer<float, 1> &b, std::int64_t ldb,
std::int64_t strideb) {
oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
stridea, b, ldb, strideb);
}

void omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, double alpha, sycl::buffer<double, 1> &a, std::int64_t lda,
std::int64_t stridea, sycl::buffer<double, 1> &b, std::int64_t ldb,
std::int64_t strideb) {
oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
stridea, b, ldb, strideb);
}

void omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1> &a,
std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1> &b,
std::int64_t ldb, std::int64_t strideb) {
oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
stridea, b, ldb, strideb);
}

void omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1> &a,
std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1> &b,
std::int64_t ldb, std::int64_t strideb) {
oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
stridea, b, ldb, strideb);
}

void imatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, float alpha, sycl::buffer<float, 1> &ab, std::int64_t lda,
std::int64_t ldb) {
Expand Down Expand Up @@ -4039,6 +4071,44 @@ sycl::event omatcopy(backend_selector<backend::cublas> selector, transpose trans
return done;
}

sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, float alpha, const float *a, std::int64_t lda,
std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb,
const std::vector<sycl::event> &dependencies) {
auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2(
selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
return done;
}

sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, double alpha, const double *a, std::int64_t lda,
std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb,
const std::vector<sycl::event> &dependencies) {
auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2(
selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
return done;
}

sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, std::complex<float> alpha, const std::complex<float> *a,
std::int64_t lda, std::int64_t stridea, std::complex<float> *b,
std::int64_t ldb, std::int64_t strideb,
const std::vector<sycl::event> &dependencies) {
auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2(
selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
return done;
}

sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, std::complex<double> alpha, const std::complex<double> *a,
std::int64_t lda, std::int64_t stridea, std::complex<double> *b,
std::int64_t ldb, std::int64_t strideb,
const std::vector<sycl::event> &dependencies) {
auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2(
selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
return done;
}

sycl::event imatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb,
const std::vector<sycl::event> &dependencies) {
Expand Down
Loading

0 comments on commit 7cb0264

Please sign in to comment.