Skip to content

Commit

Permalink
Renaming template parameter and variable, avoid memory check if using
Browse files Browse the repository at this point in the history
buffer only

Signed-off-by: nscipione <[email protected]>
  • Loading branch information
s-Nick committed Mar 21, 2024
1 parent 4531e3d commit 981b618
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 31 deletions.
9 changes: 5 additions & 4 deletions include/operations/blas1_trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ struct AssignReduction {
* function below.
*
*/
template <typename operator_t, bool managed_mem, typename lhs_t, typename rhs_t>
template <typename operator_t, bool usmManagedMem, typename lhs_t,
typename rhs_t>
struct WGAtomicReduction {
using value_t = typename lhs_t::value_t;
using index_t = typename rhs_t::index_t;
Expand Down Expand Up @@ -304,11 +305,11 @@ inline AssignReduction<operator_t, lhs_t, rhs_t> make_assign_reduction(
lhs_, rhs_, local_num_thread_, global_num_thread_);
}

template <typename operator_t, bool managed_mem = false, typename lhs_t,
template <typename operator_t, bool usmManagedMem = false, typename lhs_t,
typename rhs_t>
inline WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>
inline WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>
make_wg_atomic_reduction(lhs_t &lhs_, rhs_t &rhs_) {
return WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>(lhs_, rhs_);
return WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>(lhs_, rhs_);
}

template <bool is_max, bool is_step0, typename lhs_t, typename rhs_t>
Expand Down
30 changes: 21 additions & 9 deletions src/interface/blas1/backend/amd_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,17 @@ typename sb_handle_t::event_t _asum(
*address space causing a big performance degradation, but making the kernel
*behaves correctly also with managed memory (aka malloc_shared allocation).
**/
bool managed_mem{false};
#ifdef SB_ENABLE_USM
bool usm_managed_mem{false};
if constexpr (std::is_pointer_v<decltype(_rs)>) {
managed_mem =
usm_managed_mem =
sycl::usm::alloc::shared ==
sycl::get_pointer_type(_rs, sb_handle.get_queue().get_context());
}
if (managed_mem) {
#else
constexpr bool usm_managed_mem{false};
#endif
if (usm_managed_mem) {
if (_N < (1 << 18)) {
constexpr index_t localSize = 1024;
const index_t number_WG = (_N + localSize - 1) / localSize;
Expand Down Expand Up @@ -133,13 +137,17 @@ typename sb_handle_t::event_t _nrm2(
/**
* Read comment in _asum above.
**/
bool managed_mem{false};
#ifdef SB_ENABLE_USM
bool usm_managed_mem{false};
if constexpr (std::is_pointer_v<decltype(_rs)>) {
managed_mem =
usm_managed_mem =
sycl::usm::alloc::shared ==
sycl::get_pointer_type(_rs, sb_handle.get_queue().get_context());
}
if (managed_mem) {
#else
constexpr bool usm_managed_mem{false};
#endif
if (usm_managed_mem) {
if (_N < (1 << 18)) {
constexpr index_t localSize = 1024;
const index_t number_WG = (_N + localSize - 1) / localSize;
Expand Down Expand Up @@ -179,13 +187,17 @@ typename sb_handle_t::event_t _dot(
/**
* Read comment in _asum above.
**/
bool managed_mem{false};
#ifdef SB_ENABLE_USM
bool usm_managed_mem{false};
if constexpr (std::is_pointer_v<decltype(_rs)>) {
managed_mem =
usm_managed_mem =
sycl::usm::alloc::shared ==
sycl::get_pointer_type(_rs, sb_handle.get_queue().get_context());
}
if (managed_mem) {
#else
constexpr bool usm_managed_mem{false};
#endif
if (usm_managed_mem) {
if (_N < (1 << 18)) {
constexpr index_t localSize = 1024;
const index_t number_WG = (_N + localSize - 1) / localSize;
Expand Down
44 changes: 26 additions & 18 deletions src/operations/blas1/WGAtomicReduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,29 +35,34 @@ namespace blas {
* and atomics operation to combine the results.
*
* */
template <typename operator_t, bool managed_mem, typename lhs_t, typename rhs_t>
WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>::WGAtomicReduction(
template <typename operator_t, bool usmManagedMem, typename lhs_t,
typename rhs_t>
WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>::WGAtomicReduction(
lhs_t& _l, rhs_t& _r)
: lhs_(_l), rhs_(_r){};

template <typename operator_t, bool managed_mem, typename lhs_t, typename rhs_t>
template <typename operator_t, bool usmManagedMem, typename lhs_t,
typename rhs_t>
PORTBLAS_INLINE
typename WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>::index_t
WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>::get_size() const {
typename WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>::index_t
WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>::get_size()
const {
return rhs_.get_size();
}

template <typename operator_t, bool managed_mem, typename lhs_t, typename rhs_t>
template <typename operator_t, bool usmManagedMem, typename lhs_t,
typename rhs_t>
PORTBLAS_INLINE bool
WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>::valid_thread(
WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>::valid_thread(
cl::sycl::nd_item<1> ndItem) const {
return true;
}

template <typename operator_t, bool managed_mem, typename lhs_t, typename rhs_t>
template <typename operator_t, bool usmManagedMem, typename lhs_t,
typename rhs_t>
PORTBLAS_INLINE
typename WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>::value_t
WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>::eval(
typename WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>::value_t
WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>::eval(
cl::sycl::nd_item<1> ndItem) {
auto atomic_res =
cl::sycl::atomic_ref<value_t, cl::sycl::memory_order::relaxed,
Expand Down Expand Up @@ -85,11 +90,12 @@ PORTBLAS_INLINE
return {};
}

template <typename operator_t, bool managed_mem, typename lhs_t, typename rhs_t>
template <typename operator_t, bool usmManagedMem, typename lhs_t,
typename rhs_t>
template <typename sharedT>
PORTBLAS_INLINE
typename WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>::value_t
WGAtomicReduction<operator_t, managed_mem, lhs_t, rhs_t>::eval(
typename WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>::value_t
WGAtomicReduction<operator_t, usmManagedMem, lhs_t, rhs_t>::eval(
sharedT scratch, cl::sycl::nd_item<1> ndItem) {
const auto size = get_size();
const int lid = static_cast<int>(ndItem.get_global_linear_id());
Expand Down Expand Up @@ -120,7 +126,7 @@ PORTBLAS_INLINE
cl::sycl::plus<value_t>());
}
if (ndItem.get_local_id()[0] == 0) {
if constexpr (!managed_mem) {
if constexpr (!usmManagedMem) {
auto atomic_res =
cl::sycl::atomic_ref<value_t, cl::sycl::memory_order::relaxed,
cl::sycl::memory_scope::device,
Expand All @@ -140,15 +146,17 @@ PORTBLAS_INLINE
return {};
}

template <typename operator_t, bool managed_mem, typename lhs_t, typename rhs_t>
PORTBLAS_INLINE void WGAtomicReduction<operator_t, managed_mem, lhs_t,
template <typename operator_t, bool usmManagedMem, typename lhs_t,
typename rhs_t>
PORTBLAS_INLINE void WGAtomicReduction<operator_t, usmManagedMem, lhs_t,
rhs_t>::bind(cl::sycl::handler& h) {
lhs_.bind(h);
rhs_.bind(h);
}

template <typename operator_t, bool managed_mem, typename lhs_t, typename rhs_t>
PORTBLAS_INLINE void WGAtomicReduction<operator_t, managed_mem, lhs_t,
template <typename operator_t, bool usmManagedMem, typename lhs_t,
typename rhs_t>
PORTBLAS_INLINE void WGAtomicReduction<operator_t, usmManagedMem, lhs_t,
rhs_t>::adjust_access_displacement() {
lhs_.adjust_access_displacement();
rhs_.adjust_access_displacement();
Expand Down

0 comments on commit 981b618

Please sign in to comment.