Skip to content

Commit

Permalink
Merge pull request #2197 from ndellingwood/master-release-4.3.01
Browse files Browse the repository at this point in the history
Release 4.3.01
  • Loading branch information
ndellingwood authored May 8, 2024
2 parents 1b0a15f + 3939ace commit d1a91b8
Show file tree
Hide file tree
Showing 31 changed files with 560 additions and 187 deletions.
16 changes: 11 additions & 5 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ jobs:
- name: Install Dependencies
run: |
brew install doxygen
python3 -m pip install sphinx -v "sphinx==6.2.1"
python3 -m pip install breathe
python3 -m pip install sphinx-rtd-theme
python3 -m venv .venv
. .venv/bin/activate
pip install sphinx -v "sphinx==6.2.1"
pip install breathe
pip install sphinx-rtd-theme
sphinx-build --version
doxygen --version
Expand Down Expand Up @@ -52,8 +54,10 @@ jobs:
working-directory: kokkos/build
run: make -j2 install

# sphinx needs to be available at configure time for the target to be generated
- name: configure_kokkos_kernels
run: |
. .venv/bin/activate
mkdir -p kokkos-kernels/{build,install}
cd kokkos-kernels/build
cmake \
Expand Down Expand Up @@ -81,5 +85,7 @@ jobs:
fi
- name: build_kokkos_kernels_sphinx
working-directory: kokkos-kernels/build
run: make Sphinx
run: |
. .venv/bin/activate
cd kokkos-kernels/build
make Sphinx
2 changes: 1 addition & 1 deletion .github/workflows/osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: kokkos/kokkos
ref: ${{ github.base_ref }}
ref: 4.3.00
path: kokkos

- name: configure_kokkos
Expand Down
18 changes: 15 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Change Log

## [4.3.01](https://github.com/kokkos/kokkos-kernels/tree/4.3.01)
[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.3.00...4.3.01)

### Bug Fixes:
- sparse: block spiluk fixes [\#2172](https://github.com/kokkos/kokkos-kernels/pull/2172)
- magma: tpl interaction fixes [\#2176](https://github.com/kokkos/kokkos-kernels/pull/2176), [\#2178](https://github.com/kokkos/kokkos-kernels/pull/2178), [\#2181](https://github.com/kokkos/kokkos-kernels/pull/2181)
- trsv: Add early return if numRows == 0 in trsv to avoid integer divide-by-zero error [\#2180](https://github.com/kokkos/kokkos-kernels/pull/2180)
- blas tpl: resolve potential duplicate symbol [\#2183](https://github.com/kokkos/kokkos-kernels/pull/2183)
- spmv: permformance fix, add back special path for rank-2 x/y with 1 column [\#2164](https://github.com/kokkos/kokkos-kernels/pull/2164), [\#2168](https://github.com/kokkos/kokkos-kernels/pull/2168)
- BsrMatrix: Fix HostMirror typedef [\#2196](https://github.com/kokkos/kokkos-kernels/pull/2196)
- GA: Fix macOS docs build [\#2190](https://github.com/kokkos/kokkos-kernels/pull/2190)

## [4.3.00](https://github.com/kokkos/kokkos-kernels/tree/4.3.00) (2024-03-19)
[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.2.01...4.3.00)

Expand Down Expand Up @@ -639,7 +651,7 @@
## [3.6.00](https://github.com/kokkos/kokkos-kernels/tree/3.6.00) (2022-02-18)
[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.5.00...3.6.00)

### Features:
### Features:

#### Batched Sparse Linear algebra
- Kokkos Kernels is adding a new component to the library: batched sparse linear algebra.
Expand Down Expand Up @@ -673,7 +685,7 @@
- SpMV: adding support for rocSPARSE TPL [\#1221](https://github.com/kokkos/kokkos-kernels/pull/1221)

#### Additional new features
- bhalf: Unit test Batched GEMM [\#1251](https://github.com/kokkos/kokkos-kernels/pull/1251)
- bhalf: Unit test Batched GEMM [\#1251](https://github.com/kokkos/kokkos-kernels/pull/1251)
- and demostrate GMRES example convergence with bhalf_t (https://github.com/kokkos/kokkos-kernels/pull/1300)
- Stream interface: adding stream support in GEMV and GEMM [\#1131](https://github.com/kokkos/kokkos-kernels/pull/1131)
- Improve double buffering batched gemm performance [\#1217](https://github.com/kokkos/kokkos-kernels/pull/1217)
Expand Down Expand Up @@ -962,7 +974,7 @@
## [3.1.01](https://github.com/kokkos/kokkos-kernels/tree/3.1.01) (2020-05-04)
[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.1.00...3.1.01)

** Fixed bugs:**
** Fixed bugs:**

- KokkosBatched QR PR breaking nightly tests [\#691](https://github.com/kokkos/kokkos-kernels/issues/691)

Expand Down
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ SET(KOKKOSKERNELS_TOP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})

SET(KokkosKernels_VERSION_MAJOR 4)
SET(KokkosKernels_VERSION_MINOR 3)
SET(KokkosKernels_VERSION_PATCH 0)
SET(KokkosKernels_VERSION_PATCH 1)
SET(KokkosKernels_VERSION "${KokkosKernels_VERSION_MAJOR}.${KokkosKernels_VERSION_MINOR}.${KokkosKernels_VERSION_PATCH}")

#Set variables for config file
Expand Down Expand Up @@ -127,13 +127,13 @@ ELSE()
IF (NOT KOKKOSKERNELS_HAS_TRILINOS AND NOT KOKKOSKERNELS_HAS_PARENT)
# This is a standalone build
FIND_PACKAGE(Kokkos REQUIRED)
IF((${Kokkos_VERSION} VERSION_GREATER_EQUAL "4.1.0") AND (${Kokkos_VERSION} VERSION_LESS_EQUAL "4.3.0"))
IF((${Kokkos_VERSION} VERSION_GREATER_EQUAL "4.1.0") AND (${Kokkos_VERSION} VERSION_LESS_EQUAL "4.3.1"))
MESSAGE(STATUS "Found Kokkos version ${Kokkos_VERSION} at ${Kokkos_DIR}")
IF((${Kokkos_VERSION} VERSION_GREATER "4.3.99"))
MESSAGE(WARNING "Configuring with Kokkos ${Kokkos_VERSION} which is newer than the expected develop branch - version check may need update")
ENDIF()
ELSE()
MESSAGE(FATAL_ERROR "Kokkos Kernels ${KokkosKernels_VERSION} requires Kokkos_VERSION 4.1.0, 4.2.0, 4.2.1 or 4.3.0")
MESSAGE(FATAL_ERROR "Kokkos Kernels ${KokkosKernels_VERSION} requires Kokkos_VERSION 4.1.0, 4.2.0, 4.2.1, 4.3.0, or 4.3.1")
ENDIF()
ENDIF()

Expand Down
46 changes: 44 additions & 2 deletions batched/dense/impl/KokkosBatched_Trsm_Team_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,48 @@ struct TeamTrsm<MemberType, Side::Right, Uplo::Upper, Trans::NoTranspose,
}
};

///
/// R/L/NT
///
/// B := (alpha*B) inv(tril(A))
/// A(n x n), B(m x n)

template <typename MemberType, typename ArgDiag>
struct TeamTrsm<MemberType, Side::Right, Uplo::Lower, Trans::NoTranspose,
ArgDiag, Algo::Trsm::Unblocked> {
template <typename ScalarType, typename AViewType, typename BViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
const ScalarType alpha,
const AViewType &A,
const BViewType &B) {
return TeamTrsmInternalLeftUpper<Algo::Trsm::Unblocked>::invoke(
member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha,
A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_1(),
B.stride_0());
}
};

template <typename MemberType, typename ArgDiag>
struct TeamTrsm<MemberType, Side::Right, Uplo::Lower, Trans::NoTranspose,
ArgDiag, Algo::Trsm::Blocked> {
template <typename ScalarType, typename AViewType, typename BViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
const ScalarType alpha,
const AViewType &A,
const BViewType &B) {
return TeamTrsmInternalLeftUpper<Algo::Trsm::Blocked>::invoke(
member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha,
A.data(), A.stride_1(), A.stride_0(), B.data(), B.stride_1(),
B.stride_0());
}
};

///
/// R/U/T
///
/// B := (alpha*B) inv(triu(A))
/// A(n x n), B(m x n)

template <typename MemberType, typename ArgDiag>
struct TeamTrsm<MemberType, Side::Right, Uplo::Upper, Trans::Transpose, ArgDiag,
Algo::Trsm::Unblocked> {
Expand All @@ -107,7 +149,7 @@ struct TeamTrsm<MemberType, Side::Right, Uplo::Upper, Trans::Transpose, ArgDiag,
const ScalarType alpha,
const AViewType &A,
const BViewType &B) {
return TeamTrsmInternalLeftLower<Algo::Trsm::Unblocked>::invoke(
return TeamTrsmInternalLeftUpper<Algo::Trsm::Unblocked>::invoke(
member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha,
A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_1(),
B.stride_0());
Expand All @@ -122,7 +164,7 @@ struct TeamTrsm<MemberType, Side::Right, Uplo::Upper, Trans::Transpose, ArgDiag,
const ScalarType alpha,
const AViewType &A,
const BViewType &B) {
return TeamTrsmInternalLeftLower<Algo::Trsm::Blocked>::invoke(
return TeamTrsmInternalLeftUpper<Algo::Trsm::Blocked>::invoke(
member, ArgDiag::use_unit_diag, B.extent(1), B.extent(0), alpha,
A.data(), A.stride_0(), A.stride_1(), B.data(), B.stride_1(),
B.stride_0());
Expand Down
6 changes: 5 additions & 1 deletion batched/dense/unit_test/Test_Batched_BatchedGemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,11 @@ void impl_test_batched_gemm(const int N, const int matAdim1, const int matAdim2,
ASSERT_EQ(batchedGemmHandleCublas.vecLen, 0);
#endif

#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)
// FIXME temporary workaround to run this magma test only if cublas is not
// enabled the design of the BatchedGemmHandle currently does not allow
// simultanous testing in this way. See issue #2177
#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && \
!defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS)
magma_queue_t magma_queue;
BatchedGemmHandle batchedGemmHandleMagma(magma_queue, GemmTplAlgos::MAGMA,
0, 0);
Expand Down
4 changes: 2 additions & 2 deletions blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_blas.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ namespace Impl {
ETI_SPEC_AVAIL>::syr2(space, trans, uplo, alpha, X, Y, A); \
} else { \
if (A_is_ll) { \
HostBlas<std::complex<double>>::zher2( \
HostBlas<std::complex<double>>::her2( \
uplo[0], N, alpha, \
reinterpret_cast<const std::complex<double>*>(X.data()), one, \
reinterpret_cast<const std::complex<double>*>(Y.data()), one, \
Expand Down Expand Up @@ -220,7 +220,7 @@ namespace Impl {
ETI_SPEC_AVAIL>::syr2(space, trans, uplo, alpha, X, Y, A); \
} else { \
if (A_is_ll) { \
HostBlas<std::complex<float>>::cher2( \
HostBlas<std::complex<float>>::her2( \
uplo[0], N, alpha, \
reinterpret_cast<const std::complex<float>*>(X.data()), one, \
reinterpret_cast<const std::complex<float>*>(Y.data()), one, \
Expand Down
4 changes: 2 additions & 2 deletions blas/tpls/KokkosBlas2_syr_tpl_spec_decl_blas.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ namespace Impl {
space, trans, uplo, alpha, X, A); \
} else { \
if (A_is_ll) { \
HostBlas<std::complex<double>>::zher<double>( \
HostBlas<std::complex<double>>::her<double>( \
uplo[0], N, alpha.real(), \
reinterpret_cast<const std::complex<double>*>(X.data()), one, \
reinterpret_cast<std::complex<double>*>(A.data()), LDA); \
Expand Down Expand Up @@ -188,7 +188,7 @@ namespace Impl {
space, trans, uplo, alpha, X, A); \
} else { \
if (A_is_ll && (alpha.imag() == 0.)) { \
HostBlas<std::complex<float>>::cher<float>( \
HostBlas<std::complex<float>>::her<float>( \
uplo[0], N, alpha.real(), \
reinterpret_cast<const std::complex<float>*>(X.data()), one, \
reinterpret_cast<std::complex<float>*>(A.data()), LDA); \
Expand Down
1 change: 1 addition & 0 deletions blas/tpls/KokkosBlas_Cuda_tpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
#include <Kokkos_Core.hpp>
#include <KokkosKernels_config.h>
#include <KokkosBlas_Cuda_tpl.hpp>
#include <KokkosBlas_Magma_tpl.hpp>
22 changes: 0 additions & 22 deletions blas/tpls/KokkosBlas_Cuda_tpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,4 @@ CudaBlasSingleton& CudaBlasSingleton::singleton() {
} // namespace KokkosBlas
#endif // defined (KOKKOSKERNELS_ENABLE_TPL_CUBLAS)

#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)
#include <KokkosBlas_tpl_spec.hpp>

namespace KokkosBlas {
namespace Impl {

MagmaSingleton::MagmaSingleton() {
magma_int_t stat = magma_init();
if (stat != MAGMA_SUCCESS) Kokkos::abort("MAGMA initialization failed\n");

Kokkos::push_finalize_hook([&]() { magma_finalize(); });
}

MagmaSingleton& MagmaSingleton::singleton() {
static MagmaSingleton s;
return s;
}

} // namespace Impl
} // namespace KokkosBlas
#endif // defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)

#endif // KOKKOSBLAS_CUDA_TPL_HPP_
24 changes: 13 additions & 11 deletions blas/tpls/KokkosBlas_Host_tpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,10 @@ void F77_BLAS_MANGLE(dsyr, DSYR)(const char*, KK_INT*, const double*,

void F77_BLAS_MANGLE(cher, CHER)(const char*, KK_INT*, const float*,
const std::complex<float>*, KK_INT*,
std::complex<float>*, KK_INT*);
/* */ std::complex<float>*, KK_INT*);
void F77_BLAS_MANGLE(zher, ZHER)(const char*, KK_INT*, const double*,
const std::complex<double>*, KK_INT*,
std::complex<double>*, KK_INT*);
/* */ std::complex<double>*, KK_INT*);

///
/// Syr2
Expand All @@ -322,12 +322,12 @@ void F77_BLAS_MANGLE(cher2, CHER2)(const char*, KK_INT*,
const std::complex<float>*,
const std::complex<float>*, KK_INT*,
const std::complex<float>*, KK_INT*,
std::complex<float>*, KK_INT*);
/* */ std::complex<float>*, KK_INT*);
void F77_BLAS_MANGLE(zher2, ZHER2)(const char*, KK_INT*,
const std::complex<double>*,
const std::complex<double>*, KK_INT*,
const std::complex<double>*, KK_INT*,
std::complex<double>*, KK_INT*);
/* */ std::complex<double>*, KK_INT*);

///
/// Trsv
Expand Down Expand Up @@ -901,14 +901,14 @@ void HostBlas<std::complex<float> >::gerc(
}
template <>
template <>
void HostBlas<std::complex<float> >::cher<float>(
void HostBlas<std::complex<float> >::her<float>(
const char uplo, KK_INT n, const float alpha, const std::complex<float>* x,
KK_INT incx, std::complex<float>* a, KK_INT lda) {
F77_FUNC_CHER(&uplo, &n, &alpha, (const std::complex<float>*)x, &incx,
(std::complex<float>*)a, &lda);
}
template <>
void HostBlas<std::complex<float> >::cher2(
void HostBlas<std::complex<float> >::her2(
const char uplo, KK_INT n, const std::complex<float> alpha,
const std::complex<float>* x, KK_INT incx, const std::complex<float>* y,
KK_INT incy, std::complex<float>* a, KK_INT lda) {
Expand Down Expand Up @@ -1069,15 +1069,17 @@ void HostBlas<std::complex<double> >::gerc(
}
template <>
template <>
void HostBlas<std::complex<double> >::zher<double>(
const char uplo, KK_INT n, const double alpha,
const std::complex<double>* x, KK_INT incx, std::complex<double>* a,
KK_INT lda) {
void HostBlas<std::complex<double> >::her<double>(const char uplo, KK_INT n,
const double alpha,
const std::complex<double>* x,
KK_INT incx,
std::complex<double>* a,
KK_INT lda) {
F77_FUNC_ZHER(&uplo, &n, &alpha, (const std::complex<double>*)x, &incx,
(std::complex<double>*)a, &lda);
}
template <>
void HostBlas<std::complex<double> >::zher2(
void HostBlas<std::complex<double> >::her2(
const char uplo, KK_INT n, const std::complex<double> alpha,
const std::complex<double>* x, KK_INT incx, const std::complex<double>* y,
KK_INT incy, std::complex<double>* a, KK_INT lda) {
Expand Down
15 changes: 4 additions & 11 deletions blas/tpls/KokkosBlas_Host_tpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,11 @@ struct HostBlas {
KK_INT incx, const T *y, KK_INT incy, T *a, KK_INT lda);

template <typename tAlpha>
static void cher(const char uplo, KK_INT n, const tAlpha alpha, const T *x,
KK_INT incx, T *a, KK_INT lda);

template <typename tAlpha>
static void zher(const char uplo, KK_INT n, const tAlpha alpha, const T *x,
KK_INT incx, T *a, KK_INT lda);

static void cher2(const char uplo, KK_INT n, const T alpha, const T *x,
KK_INT incx, const T *y, KK_INT incy, T *a, KK_INT lda);
static void her(const char uplo, KK_INT n, const tAlpha alpha, const T *x,
KK_INT incx, T *a, KK_INT lda);

static void zher2(const char uplo, KK_INT n, const T alpha, const T *x,
KK_INT incx, const T *y, KK_INT incy, T *a, KK_INT lda);
static void her2(const char uplo, KK_INT n, const T alpha, const T *x,
KK_INT incx, const T *y, KK_INT incy, T *a, KK_INT lda);

static void trsv(const char uplo, const char transa, const char diag,
KK_INT m, const T *a, KK_INT lda,
Expand Down
41 changes: 41 additions & 0 deletions blas/tpls/KokkosBlas_Magma_tpl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOSBLAS_MAGMA_TPL_HPP_
#define KOKKOSBLAS_MAGMA_TPL_HPP_

#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)
#include <KokkosBlas_magma.hpp>

namespace KokkosBlas {
namespace Impl {

MagmaSingleton::MagmaSingleton() {
magma_int_t stat = magma_init();
if (stat != MAGMA_SUCCESS) Kokkos::abort("MAGMA initialization failed\n");

Kokkos::push_finalize_hook([&]() { magma_finalize(); });
}

MagmaSingleton& MagmaSingleton::singleton() {
static MagmaSingleton s;
return s;
}

} // namespace Impl
} // namespace KokkosBlas
#endif // defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)

#endif // KOKKOSBLAS_MAGMA_TPL_HPP_
Loading

0 comments on commit d1a91b8

Please sign in to comment.