Skip to content

Commit

Permalink
Merge branch 'master' into kmp5/experimental/upkeep_btas
Browse files Browse the repository at this point in the history
  • Loading branch information
kmp5VT committed Apr 2, 2024
2 parents 479001c + 0cc5e31 commit 666d490
Show file tree
Hide file tree
Showing 27 changed files with 662 additions and 309 deletions.
4 changes: 2 additions & 2 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ Both methods are supported. However, for most users we _strongly_ recommend to b
- Boost.Container: header-only
- Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing*
- Boost.Range: header-only, *only used for unit testing*
- [BTAS](http://github.com/ValeevGroup/BTAS), tag 85eea7796651de1bcb4781b0081a352b32bf91d5 . If usable BTAS installation is not found, TiledArray will download and compile
- [BTAS](http://github.com/ValeevGroup/BTAS), tag 4e8f5233aa7881dccdfcc37ce07128833926d3c2 . If usable BTAS installation is not found, TiledArray will download and compile
BTAS from source. *This is the recommended way to compile BTAS for all users*.
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 39de6cb4d262b3df1f67b3c04a37a935564ca657 .
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 8788aea9758bfe6479cc23d39e6c77b7528009db .
Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray.
If usable MADNESS installation is not found, TiledArray will download and compile
MADNESS from source. *This is the recommended way to compile MADNESS for all users*.
Expand Down
57 changes: 30 additions & 27 deletions external/boost.cmake
Original file line number Diff line number Diff line change
@@ -1,32 +1,35 @@
# Boost can be discovered by every (sub)package but only the top package can build it ...
# if we are the top package need to include the list of Boost components to be built
if("${CMAKE_PROJECT_NAME}" STREQUAL "${PROJECT_NAME}")
set(required_components
headers # TA, BTAS
algorithm # TA
container # TA, BTAS
iterator # TA, BTAS
random # TA, BTAS
tuple # TA
# Boost can be discovered by every (sub)package but only the top package can *build* it ...
# in either case must declare the components used by TA
set(required_components
headers
algorithm
container
iterator
random
tuple
)
if (BUILD_TESTING)
list(APPEND required_components
test
)
if (DEFINED Boost_REQUIRED_COMPONENTS)
list(APPEND Boost_REQUIRED_COMPONENTS
${required_components})
list(REMOVE_DUPLICATES Boost_REQUIRED_COMPONENTS)
else()
set(Boost_REQUIRED_COMPONENTS "${required_components}" CACHE STRING "Components of Boost to discovered or built")
endif()
set(optional_components
serialization # BTAS
endif()
if (DEFINED Boost_REQUIRED_COMPONENTS)
list(APPEND Boost_REQUIRED_COMPONENTS
${required_components})
list(REMOVE_DUPLICATES Boost_REQUIRED_COMPONENTS)
else()
set(Boost_REQUIRED_COMPONENTS "${required_components}" CACHE STRING "Components of Boost to discovered or built")
endif()
set(optional_components
serialization # BTAS
)
if (DEFINED Boost_OPTIONAL_COMPONENTS)
list(APPEND Boost_OPTIONAL_COMPONENTS
${optional_components}
)
if (DEFINED Boost_OPTIONAL_COMPONENTS)
list(APPEND Boost_OPTIONAL_COMPONENTS
${optional_components}
)
list(REMOVE_DUPLICATES Boost_OPTIONAL_COMPONENTS)
else()
set(Boost_OPTIONAL_COMPONENTS "${optional_components}" CACHE STRING "Optional components of Boost to discovered or built")
endif()
list(REMOVE_DUPLICATES Boost_OPTIONAL_COMPONENTS)
else()
set(Boost_OPTIONAL_COMPONENTS "${optional_components}" CACHE STRING "Optional components of Boost to discovered or built")
endif()

if (NOT DEFINED Boost_FETCH_IF_MISSING)
Expand Down
8 changes: 4 additions & 4 deletions external/versions.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# for each dependency track both current and previous id (the variable for the latter must contain PREVIOUS)
# to be able to auto-update them

set(TA_TRACKED_VGCMAKEKIT_TAG 9b541a5f3708a58dce59d00f3c47ac030ef4d8b4)
set(TA_TRACKED_VGCMAKEKIT_TAG d5c0a6f9ff6dc97cbb5132912733e1eb1cf73f1e)

# N.B. may need to update INSTALL.md manually with the CUDA-specific version
set(TA_TRACKED_EIGEN_VERSION 3.3.5)
Expand All @@ -11,13 +11,13 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7)
set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626)
set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496)

set(TA_TRACKED_MADNESS_TAG 39de6cb4d262b3df1f67b3c04a37a935564ca657)
set(TA_TRACKED_MADNESS_PREVIOUS_TAG 51c2728d664c096d0ea39d3b9cbf2895d8d99439)
set(TA_TRACKED_MADNESS_TAG 8788aea9758bfe6479cc23d39e6c77b7528009db)
set(TA_TRACKED_MADNESS_PREVIOUS_TAG 87715d98a244bff5cbff0bd2c644a8a00d882989)
set(TA_TRACKED_MADNESS_VERSION 0.10.1)
set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1)

set(TA_TRACKED_BTAS_TAG ba34ccbb5b181f6ceebf34712dd07714cd80a4ba)
set(TA_TRACKED_BTAS_PREVIOUS_TAG bf0c376d5cdd6f668174b2a4c67b19634d1c0da7)
set(TA_TRACKED_BTAS_PREVIOUS_TAG b7b2ea7513b087e35c6f1b26184a3904ac1e6b14)

set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece)
set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83)
Expand Down
27 changes: 17 additions & 10 deletions src/TiledArray/block_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class BlockRange : public Range {
upper[d] = upper_bound_d;
// Check input dimensions
TA_ASSERT(lower[d] >= range.lobound(d));
TA_ASSERT(lower[d] < upper[d]);
TA_ASSERT(lower[d] <= upper[d]);
TA_ASSERT(upper[d] <= range.upbound(d));
extent[d] = upper[d] - lower[d];
TA_ASSERT(extent[d] ==
Expand Down Expand Up @@ -132,7 +132,7 @@ class BlockRange : public Range {
upper[d] = upper_bound_d;
// Check input dimensions
TA_ASSERT(lower[d] >= range.lobound(d));
TA_ASSERT(lower[d] < upper[d]);
TA_ASSERT(lower[d] <= upper[d]);
TA_ASSERT(upper[d] <= range.upbound(d));
extent[d] = upper[d] - lower[d];
TA_ASSERT(extent[d] ==
Expand Down Expand Up @@ -177,9 +177,10 @@ class BlockRange : public Range {
/// \param range the host Range
/// \param lower_bound A sequence of lower bounds for each dimension
/// \param upper_bound A sequence of upper bounds for each dimension
/// \note Zero-extent blocks along any mode is possible, i.e. `lower_bound[d] == upper_bound[d]` is supported
/// \throw TiledArray::Exception When the size of \p lower_bound is not
/// equal to that of \p upper_bound.
/// \throw TiledArray::Exception When `lower_bound[i] >= upper_bound[i]`
/// \throw TiledArray::Exception When `lower_bound[i] > upper_bound[i]`
// clang-format on
template <typename Index1, typename Index2,
typename = std::enable_if_t<detail::is_integral_range_v<Index1> &&
Expand All @@ -204,9 +205,10 @@ class BlockRange : public Range {
/// \param range the host Range
/// \param lower_bound An initializer list of lower bounds for each dimension
/// \param upper_bound An initializer list of upper bounds for each dimension
/// \note Zero-extent blocks along any mode is possible, i.e. `lower_bound[d] == upper_bound[d]` is supported
/// \throw TiledArray::Exception When the size of \p lower_bound is not
/// equal to that of \p upper_bound.
/// \throw TiledArray::Exception When `lower_bound[i] >= upper_bound[i]`
/// \throw TiledArray::Exception When `lower_bound[i] > upper_bound[i]`
// clang-format on
template <typename Index1, typename Index2,
typename = std::enable_if_t<std::is_integral_v<Index1> &&
Expand Down Expand Up @@ -247,7 +249,8 @@ class BlockRange : public Range {
/// \endcode
/// \tparam PairRange Type representing a range of generalized pairs (see TiledArray::detail::is_gpair_v )
/// \param bounds A range of {lower,upper} bounds for each dimension
/// \throw TiledArray::Exception When `bounds[i].lower>=bounds[i].upper` for any \c i .
/// \note Zero-extent blocks along any mode is possible, i.e. `bounds[d].lower == bounds[d].upper` is supported
/// \throw TiledArray::Exception When `bounds[i].lower>bounds[i].upper` for any \c i .
// clang-format on
template <typename PairRange,
typename = std::enable_if_t<detail::is_gpair_range_v<PairRange>>>
Expand All @@ -264,8 +267,9 @@ class BlockRange : public Range {
/// BlockRange br0(r, {std::make_pair(0,4), std::pair{1,6}, std::pair(2,8)});
/// \endcode
/// \tparam GPair a generalized pair of integral types
/// \param bound A range of {lower,upper} bounds for each dimension
/// \throw TiledArray::Exception When `bound[i].lower>=bound[i].upper` for any \c i .
/// \param bounds A range of {lower,upper} bounds for each dimension
/// \note Zero-extent blocks along any mode is possible, i.e. `bounds[d].lower == bounds[d].upper` is supported
/// \throw TiledArray::Exception When `bounds[i].lower>bounds[i].upper` for any \c i .
// clang-format on
template <typename GPair>
BlockRange(const Range& range, const std::initializer_list<GPair>& bounds,
Expand All @@ -290,8 +294,9 @@ class BlockRange : public Range {
/// BlockRange br0(r, {{0,4}, {1,6}, {2,8}});
/// \endcode
/// \tparam Index An integral type
/// \param bound A range of {lower,upper} bounds for each dimension
/// \throw TiledArray::Exception When `bound[i].lower>=bound[i].upper` for any \c i .
/// \param bounds A range of {lower,upper} bounds for each dimension
/// \note Zero-extent blocks along any mode is possible, i.e. `bounds[d].lower == bounds[d].upper` is supported
/// \throw TiledArray::Exception When `bounds[i].lower>bounds[i].upper` for any \c i .
// clang-format on
template <typename Index,
typename = std::enable_if_t<std::is_integral_v<Index>>>
Expand Down Expand Up @@ -354,6 +359,8 @@ class BlockRange : public Range {
/// \return The ordinal index in the
/// \throw TiledArray::Exception When \c index is not included in this range
ordinal_type ordinal(ordinal_type ord) const {
// ordinals are useless for zero-volume ranges
TA_ASSERT(volume() != 0);
// Check that ord is contained by this range.
TA_ASSERT(Range::includes_ordinal(ord));

Expand Down Expand Up @@ -414,7 +421,7 @@ class BlockRange : public Range {
template <typename Archive>
void serialize(Archive& ar) const {
Range::serialize(ar);
ar& block_offset_;
ar & block_offset_;
}
}; // BlockRange

Expand Down
26 changes: 16 additions & 10 deletions src/TiledArray/device/btas.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,12 @@ ::btas::Tensor<T, Range, Storage> gemm(
gemm_helper.compute_matrix_sizes(m, n, k, left.range(), right.range());

// Get the leading dimension for left and right matrices.
const integer lda =
(gemm_helper.left_op() == TiledArray::math::blas::Op::NoTrans ? k : m);
const integer ldb =
(gemm_helper.right_op() == TiledArray::math::blas::Op::NoTrans ? n : k);
const integer lda = std::max(
integer{1},
(gemm_helper.left_op() == TiledArray::math::blas::Op::NoTrans ? k : m));
const integer ldb = std::max(
integer{1},
(gemm_helper.right_op() == TiledArray::math::blas::Op::NoTrans ? n : k));

T factor_t = T(factor);
T zero(0);
Expand Down Expand Up @@ -112,10 +114,11 @@ ::btas::Tensor<T, Range, Storage> gemm(

static_assert(::btas::boxrange_iteration_order<Range>::value ==
::btas::boxrange_iteration_order<Range>::row_major);
const integer ldc = std::max(integer{1}, n);
blas::gemm(blas::Layout::ColMajor, gemm_helper.right_op(),
gemm_helper.left_op(), n, m, k, factor_t,
device_data(right.storage()), ldb, device_data(left.storage()),
lda, zero, device_data(result.storage()), n, queue);
lda, zero, device_data(result.storage()), ldc, queue);

device::sync_madness_task_with(stream);
}
Expand Down Expand Up @@ -185,10 +188,12 @@ void gemm(::btas::Tensor<T, Range, Storage> &result,
gemm_helper.compute_matrix_sizes(m, n, k, left.range(), right.range());

// Get the leading dimension for left and right matrices.
const integer lda =
(gemm_helper.left_op() == TiledArray::math::blas::Op::NoTrans ? k : m);
const integer ldb =
(gemm_helper.right_op() == TiledArray::math::blas::Op::NoTrans ? n : k);
const integer lda = std::max(
integer{1},
(gemm_helper.left_op() == TiledArray::math::blas::Op::NoTrans ? k : m));
const integer ldb = std::max(
integer{1},
(gemm_helper.right_op() == TiledArray::math::blas::Op::NoTrans ? n : k));

auto &queue = blasqueue_for(result.range());
const auto stream = device::Stream(queue.device(), queue.stream());
Expand All @@ -207,10 +212,11 @@ void gemm(::btas::Tensor<T, Range, Storage> &result,

static_assert(::btas::boxrange_iteration_order<Range>::value ==
::btas::boxrange_iteration_order<Range>::row_major);
const integer ldc = std::max(integer{1}, n);
blas::gemm(blas::Layout::ColMajor, gemm_helper.right_op(),
gemm_helper.left_op(), n, m, k, factor_t,
device_data(right.storage()), ldb, device_data(left.storage()),
lda, one, device_data(result.storage()), n, queue);
lda, one, device_data(result.storage()), ldc, queue);
device::sync_madness_task_with(stream);
}
}
Expand Down
Loading

0 comments on commit 666d490

Please sign in to comment.