Skip to content

Commit

Permalink
Merge pull request #39 from GPUEngineering/f/min-and-max
Browse files Browse the repository at this point in the history
Min/max methods
  • Loading branch information
ruairimoran authored Aug 2, 2024
2 parents 39d3524 + fb831fb commit 8749677
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [orin, a40]
runner: [a40, orin]
steps:
- name: checkout code
uses: actions/checkout@v4
Expand Down
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


<!-- ---------------------
v1.1.0
--------------------- -->
## v1.1.0 - 03-08-2024

### Added

- Implementation and test of methods `.maxAbs()` and `.minAbs()` for any tensor.

<!-- ---------------------
v1.0.0
--------------------- -->
Expand All @@ -21,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Using a function `numBlocks` instead of the macro `DIM2BLOCKS`
- Using `TEMPLATE_WITH_TYPE_T` and `TEMPLATE_CONSTRAINT_REQUIRES_FPX` for the code to run on both C++17 and C++20


<!-- ---------------------
v0.1.0
--------------------- -->
Expand Down
58 changes: 55 additions & 3 deletions include/tensor.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,19 @@ public:
*/
T sumAbs() const;

/**
* Maximum of absolute of all elements.
* Equivalent to inf-norm, max(|x_i|) for all i.
* @return max of absolute as same data type
*/
T maxAbs() const;

/**
* Minimum of absolute of all elements, min(|x_i|) for all i.
* @return min of absolute as same data type
*/
T minAbs() const;

/**
* Solves for the least squares solution of A \ b.
* A is this tensor and b is the provided tensor.
Expand Down Expand Up @@ -405,7 +418,7 @@ public:

DTensor &operator=(const DTensor &other);

T operator()(size_t i, size_t j = 0, size_t k = 0);
T operator()(size_t i, size_t j = 0, size_t k = 0) const;

DTensor &operator*=(T scalar);

Expand Down Expand Up @@ -605,7 +618,6 @@ inline float DTensor<float>::normF() const {
return the_norm;
}


template<>
inline float DTensor<float>::sumAbs() const {
float sumAbsAllElements;
Expand All @@ -622,6 +634,46 @@ inline double DTensor<double>::sumAbs() const {
return sumAbsAllElements;
}

template<>
inline float DTensor<float>::maxAbs() const {
int idx;
float hostDst;
gpuErrChk(cublasIsamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
&idx));
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
return std::signbit(hostDst) ? -hostDst : hostDst;
}

template<>
inline double DTensor<double>::maxAbs() const {
int idx;
double hostDst;
gpuErrChk(cublasIdamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
&idx));
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
return std::signbit(hostDst) ? -hostDst : hostDst;
}

template<>
inline float DTensor<float>::minAbs() const {
int idx;
float hostDst;
gpuErrChk(cublasIsamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
&idx));
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
return std::signbit(hostDst) ? -hostDst : hostDst;
}

template<>
inline double DTensor<double>::minAbs() const {
int idx;
double hostDst;
gpuErrChk(cublasIdamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
&idx));
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
return std::signbit(hostDst) ? -hostDst : hostDst;
}

template<typename T>
inline bool DTensor<T>::allocateOnDevice(size_t size, bool zero) {
if (size <= 0) return false;
Expand Down Expand Up @@ -772,7 +824,7 @@ inline DTensor<double> &DTensor<double>::operator-=(const DTensor<double> &rhs)
}

template<typename T>
inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) {
inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) const {
T hostDst;
size_t offset = i + m_numRows * (j + m_numCols * k);
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + offset, sizeof(T), cudaMemcpyDeviceToHost));
Expand Down
36 changes: 35 additions & 1 deletion test/testTensor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,45 @@ void tensorSumAbs() {
EXPECT_NEAR(112, tenz.sumAbs(), PRECISION_HIGH); // from MATLAB
}

TEST_F(TensorTest, tensorNormFtensorSumAbs) {
TEST_F(TensorTest, tensorSumAbs) {
tensorSumAbs<float>();
tensorSumAbs<double>();
}

/* ---------------------------------------
* Tensor: max of absolute of all elements
* --------------------------------------- */

TEMPLATE_WITH_TYPE_T
void tensorMax() {
std::vector<T> data = TENSOR_DATA_234AMB;
DTensor<T> tenz(data, 2, 3, 4);
T m = tenz.maxAbs();
EXPECT_EQ(27, m);
}

TEST_F(TensorTest, tensorMax) {
tensorMax<float>();
tensorMax<double>();
}

/* ---------------------------------------
* Tensor: min of absolute of all elements
* --------------------------------------- */

TEMPLATE_WITH_TYPE_T
void tensorMin() {
std::vector<T> data = TENSOR_DATA_234AMB;
DTensor<T> tenz(data, 2, 3, 4);
T m = tenz.minAbs();
EXPECT_EQ(0, m);
}

TEST_F(TensorTest, tensorMin) {
tensorMin<float>();
tensorMin<double>();
}

/* ---------------------------------------
* Tensor operator() to access element
* e.g., t(2, 3, 4)
Expand Down

0 comments on commit 8749677

Please sign in to comment.