Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into gaudel/feature/tot_…
Browse files Browse the repository at this point in the history
…inner_tensor_ops

# Conflicts:
#	src/TiledArray/tensor/tensor.h
  • Loading branch information
bimalgaudel committed Apr 5, 2024
2 parents c9d16a9 + e26379a commit 227fd04
Show file tree
Hide file tree
Showing 50 changed files with 1,038 additions and 551 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ add_feature_info(TA_RANGEV3 TA_RANGEV3 "Range-V3 ranges library")
option(TA_TTG "Enable search/build of TTG library" OFF)
add_feature_info(TA_TTG TA_TTG "TTG library")

option(IntelMKL_FAIR_DISPATCH "Enable fair dispatch in Intel MKL" OFF)
add_feature_info(IntelMKL_FAIR_DISPATCH IntelMKL_FAIR_DISPATCH "Use of fair dispatch in Intel MKL")

# Enable shared library support options
redefaultable_option(TA_ASSUMES_ASLR_DISABLED "TiledArray assumes the Address Space Layout Randomization (ASLR) to be disabled" OFF)
add_feature_info(ASSUMES_ASLR_DISABLED TA_ASSUMES_ASLR_DISABLED
Expand Down
1 change: 1 addition & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ support may be added.
* `TA_TENSOR_MEM_PROFILE` -- Set to `ON` to profile host memory allocations used by TA::Tensor. This causes the use of Umpire for host memory allocation. This also enables additional tracing facilities provided by Umpire; these can be controlled via [environment variable `UMPIRE_LOG_LEVEL`](https://umpire.readthedocs.io/en/develop/sphinx/features/logging_and_replay.html), but note that the default is to log Umpire info into a file rather than stdout.
* `TA_TENSOR_MEM_TRACE` -- Set to `ON` to *trace* host memory allocations used by TA::Tensor. This turns on support for tracking memory used by `Tensor` objects; such tracking must be enabled programmatically. This can greatly increase memory consumption by the application and is only intended for expert developers troubleshooting memory use by TiledArray.
* `TA_UT_CTEST_TIMEOUT` -- The value (in seconds) of the timeout to use for running the TA unit tests via CTest when building the `check`/`check-tiledarray` targets. The default timeout is 1500s.
* `IntelMKL_FAIR_DISPATCH` -- If want to use Intel MKL library on non-Intel (e.g., AMD) CPUs, set to `ON` to use fair kernel dispatch. [Default=OFF].

# Build TiledArray

Expand Down
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ endif()
# Add Subdirectories
add_subdirectory (cc)
add_subdirectory (device)
add_subdirectory (dgemm)
add_subdirectory (gemm)
add_subdirectory (demo)
add_subdirectory (scalapack)
add_subdirectory (fock)
Expand Down
168 changes: 0 additions & 168 deletions examples/dgemm/ta_dense_new_tile.cpp

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# Create example executable

foreach(_exec ta_blas ta_eigen ta_band ta_dense ta_sparse ta_dense_nonuniform
ta_dense_asymm ta_sparse_grow ta_dense_new_tile
ta_dense_asymm ta_sparse_grow
ta_cc_abcd)

# Add executable
Expand Down
4 changes: 2 additions & 2 deletions examples/dgemm/README → examples/gemm/README
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ Applications usage:

ta_band matrix_size block_size band_width [repetitions]

blas matrix_size [repetitions]
ta_blas matrix_size [repetitions]

eigen matrix_size [repetitions]
ta_eigen matrix_size [repetitions]

Argument definitions:

Expand Down
File renamed without changes.
File renamed without changes.
32 changes: 14 additions & 18 deletions examples/dgemm/ta_band.cpp → examples/gemm/ta_band.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*
*/

#include <TiledArray/util/time.h>
#include <tiledarray.h>
#include <iostream>

Expand Down Expand Up @@ -104,38 +105,33 @@ int main(int argc, char** argv) {
for (; j < j_end; ++j, ++ij) shape_tensor[ij] = 1.0;
}

TiledArray::SparseShape<float> shape(shape_tensor, trange);
TiledArray::SparseShape<float> shape(
shape_tensor, trange, /* per_element_norms_already = */ true);

// Construct and initialize arrays
TiledArray::TSpArrayD a(world, trange, shape);
TiledArray::TSpArrayD b(world, trange, shape);
TiledArray::TSpArrayD c(world, trange);
TiledArray::TSpArrayD c;
a.fill(1.0);
b.fill(1.0);

// Start clock
world.gop.fence();
const double wall_time_start = madness::wall_time();

// Do matrix multiplication
world.gop.fence();
for (int i = 0; i < repeat; ++i) {
c("m,n") = a("m,k") * b("k,n");
world.gop.fence();
TA_RECORD_DURATION(c("m,n") = a("m,k") * b("k,n"); world.gop.fence();)
if (world.rank() == 0) std::cout << "Iteration " << i + 1 << "\n";
}

// Stop clock
const double wall_time_stop = madness::wall_time();

// Print results
const long flop = 2.0 * c("m,n").sum().get();
const auto gflops_per_call = 2.0 * c("m,n").sum().get() / 1.e9;
if (world.rank() == 0) {
std::cout << "Average wall time = "
<< (wall_time_stop - wall_time_start) / double(repeat)
<< "\nAverage GFLOPS = "
<< double(repeat) * double(flop) /
(wall_time_stop - wall_time_start) / 1.0e9
<< "\n";
auto durations = TiledArray::duration_statistics();
std::cout << "Average wall time = " << durations.mean
<< " s\nAverage GFLOPS = "
<< gflops_per_call * durations.mean_reciprocal
<< "\nMedian wall time = " << durations.median
<< " s\nMedian GFLOPS = "
<< gflops_per_call / durations.median << "\n";
}

} catch (TiledArray::Exception& e) {
Expand Down
31 changes: 13 additions & 18 deletions examples/dgemm/ta_blas.cpp → examples/gemm/ta_blas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
*
*/

#include <TiledArray/util/time.h>
#include <tiledarray.h>
#include <iostream>

int main(int argc, char** argv) {
// Get command line arguments
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " matrix_size [repetitions]\n";
std::cout << "Usage: " << argv[0] << " matrix_size [repetitions = 5]\n";
return 0;
}
const long matrix_size = atol(argv[1]);
Expand Down Expand Up @@ -66,31 +67,25 @@ int main(int argc, char** argv) {
const integer m = matrix_size, n = matrix_size, k = matrix_size;
const integer lda = matrix_size, ldb = matrix_size, ldc = matrix_size;

// Start clock
const double wall_time_start = madness::wall_time();

// Do matrix multiplcation
// Note: If TiledArray has not been configured with blas, this will be an
// eigen call.
// Do matrix multiplication
for (int i = 0; i < repeat; ++i) {
gemm(opa, opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
TA_RECORD_DURATION(
gemm(opa, opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc));
}

// Stop clock
const double wall_time_stop = madness::wall_time();
auto durations = TiledArray::duration_statistics();

// Cleanup memory
free(a);
free(b);
free(c);

std::cout << "Average wall time = "
<< (wall_time_stop - wall_time_start) / double(repeat)
<< "\nAverage GFLOPS = "
<< double(repeat) * 2.0 *
double(matrix_size * matrix_size * matrix_size) /
(wall_time_stop - wall_time_start) / 1.0e9
<< "\n";
const auto gflops_per_call =
2.0 * double(matrix_size * matrix_size * matrix_size) / 1.0e9;
std::cout << "Average wall time = " << durations.mean << "\nAverage GFLOPS = "
<< gflops_per_call * durations.mean_reciprocal
<< "\nMedian wall time = " << durations.median
<< "\nMedian GFLOPS = " << gflops_per_call / durations.median
<< std::endl;

return 0;
}
Loading

0 comments on commit 227fd04

Please sign in to comment.