From 67fc871536dae40a3f364f6b549859f6eb11f758 Mon Sep 17 00:00:00 2001
From: Ruairi Moran <ruairi.moran@equipmentshare.com>
Date: Fri, 2 Aug 2024 18:48:03 +0100
Subject: [PATCH 1/3] added min and max methods and tests, updated changelog

---
 CHANGELOG.md       | 10 +++++++-
 include/tensor.cuh | 58 +++++++++++++++++++++++++++++++++++++++++++---
 test/testTensor.cu | 36 +++++++++++++++++++++++++++-
 3 files changed, 99 insertions(+), 5 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 88d0a63..3096100 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+<!-- ---------------------
+      v1.1.0
+     --------------------- -->
+## v1.1.0 - 03-08-2024
+
+### Added
+
+- Implementation and test of methods `.max()` and `.min()` for any tensor.
+
 <!-- ---------------------
       v1.0.0
      --------------------- -->
@@ -21,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Using a function `numBlocks` instead of the macro `DIM2BLOCKS`
 - Using `TEMPLATE_WITH_TYPE_T` and `TEMPLATE_CONSTRAINT_REQUIRES_FPX` for the code to run on both C++17 and C++20
 
-
 <!-- ---------------------
       v0.1.0
      --------------------- -->
diff --git a/include/tensor.cuh b/include/tensor.cuh
index fb50e4a..a2698cb 100644
--- a/include/tensor.cuh
+++ b/include/tensor.cuh
@@ -377,6 +377,19 @@ public:
      */
     T sumAbs() const;
 
+    /**
+     * Maximum of absolute of all elements.
+     * Equivalent to inf-norm, max(|x_i|) for all i.
+     * @return max as same data type
+     */
+    T max() const;
+
+    /**
+     * Minimum of absolute of all elements, min(|x_i|) for all i.
+     * @return min as same data type
+     */
+    T min() const;
+
     /**
      * Solves for the least squares solution of A \ b.
      * A is this tensor and b is the provided tensor.
@@ -405,7 +418,7 @@ public:
 
     DTensor &operator=(const DTensor &other);
 
-    T operator()(size_t i, size_t j = 0, size_t k = 0);
+    T operator()(size_t i, size_t j = 0, size_t k = 0) const;
 
     DTensor &operator*=(T scalar);
 
@@ -605,7 +618,6 @@ inline float DTensor<float>::normF() const {
     return the_norm;
 }
 
-
 template<>
 inline float DTensor<float>::sumAbs() const {
     float sumAbsAllElements;
@@ -622,6 +634,46 @@ inline double DTensor<double>::sumAbs() const {
     return sumAbsAllElements;
 }
 
+template<>
+inline float DTensor<float>::max() const {
+    int idx;
+    float hostDst;
+    gpuErrChk(cublasIsamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
+                           &idx));
+    gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
+    return std::signbit(hostDst) ? -hostDst : hostDst;
+}
+
+template<>
+inline double DTensor<double>::max() const {
+    int idx;
+    double hostDst;
+    gpuErrChk(cublasIdamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
+                           &idx));
+    gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
+    return std::signbit(hostDst) ? -hostDst : hostDst;
+}
+
+template<>
+inline float DTensor<float>::min() const {
+    int idx;
+    float hostDst;
+    gpuErrChk(cublasIsamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
+                           &idx));
+    gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
+    return std::signbit(hostDst) ? -hostDst : hostDst;
+}
+
+template<>
+inline double DTensor<double>::min() const {
+    int idx;
+    double hostDst;
+    gpuErrChk(cublasIdamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
+                           &idx));
+    gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
+    return std::signbit(hostDst) ? -hostDst : hostDst;
+}
+
 template<typename T>
 inline bool DTensor<T>::allocateOnDevice(size_t size, bool zero) {
     if (size <= 0) return false;
@@ -772,7 +824,7 @@ inline DTensor<double> &DTensor<double>::operator-=(const DTensor<double> &rhs)
 }
 
 template<typename T>
-inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) {
+inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) const {
     T hostDst;
     size_t offset = i + m_numRows * (j + m_numCols * k);
     gpuErrChk(cudaMemcpy(&hostDst, m_d_data + offset, sizeof(T), cudaMemcpyDeviceToHost));
diff --git a/test/testTensor.cu b/test/testTensor.cu
index 3d1a7b4..df81014 100644
--- a/test/testTensor.cu
+++ b/test/testTensor.cu
@@ -352,11 +352,45 @@ void tensorSumAbs() {
     EXPECT_NEAR(112, tenz.sumAbs(), PRECISION_HIGH); // from MATLAB
 }
 
-TEST_F(TensorTest, tensorNormFtensorSumAbs) {
+TEST_F(TensorTest, tensorSumAbs) {
     tensorSumAbs<float>();
     tensorSumAbs<double>();
 }
 
+/* ---------------------------------------
+ * Tensor: max of all elements
+ * --------------------------------------- */
+
+TEMPLATE_WITH_TYPE_T
+void tensorMax() {
+    std::vector<T> data = TENSOR_DATA_234AMB;
+    DTensor<T> tenz(data, 2, 3, 4);
+    T m = tenz.max();
+    EXPECT_EQ(27, m);
+}
+
+TEST_F(TensorTest, tensorMax) {
+    tensorMax<float>();
+    tensorMax<double>();
+}
+
+/* ---------------------------------------
+ * Tensor: min of all elements
+ * --------------------------------------- */
+
+TEMPLATE_WITH_TYPE_T
+void tensorMin() {
+    std::vector<T> data = TENSOR_DATA_234AMB;
+    DTensor<T> tenz(data, 2, 3, 4);
+    T m = tenz.min();
+    EXPECT_EQ(0, m);
+}
+
+TEST_F(TensorTest, tensorMin) {
+    tensorMin<float>();
+    tensorMin<double>();
+}
+
 /* ---------------------------------------
  * Tensor operator() to access element
  * e.g., t(2, 3, 4)

From 6c370171a4b5716e57f73215f60337c7298580b9 Mon Sep 17 00:00:00 2001
From: Ruairi Moran <ruairi.moran@equipmentshare.com>
Date: Fri, 2 Aug 2024 19:14:49 +0100
Subject: [PATCH 2/3] rename

---
 CHANGELOG.md       |  2 +-
 include/tensor.cuh | 16 ++++++++--------
 test/testTensor.cu |  8 ++++----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3096100..2ba5842 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Implementation and test of methods `.max()` and `.min()` for any tensor.
+- Implementation and test of methods `.maxAbs()` and `.minAbs()` for any tensor.
 
 <!-- ---------------------
       v1.0.0
diff --git a/include/tensor.cuh b/include/tensor.cuh
index a2698cb..4b4f01c 100644
--- a/include/tensor.cuh
+++ b/include/tensor.cuh
@@ -380,15 +380,15 @@ public:
     /**
      * Maximum of absolute of all elements.
      * Equivalent to inf-norm, max(|x_i|) for all i.
-     * @return max as same data type
+     * @return max of absolute as same data type
      */
-    T max() const;
+    T maxAbs() const;
 
     /**
      * Minimum of absolute of all elements, min(|x_i|) for all i.
-     * @return min as same data type
+     * @return min of absolute as same data type
      */
-    T min() const;
+    T minAbs() const;
 
     /**
      * Solves for the least squares solution of A \ b.
@@ -635,7 +635,7 @@ inline double DTensor<double>::sumAbs() const {
 }
 
 template<>
-inline float DTensor<float>::max() const {
+inline float DTensor<float>::maxAbs() const {
     int idx;
     float hostDst;
     gpuErrChk(cublasIsamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
@@ -645,7 +645,7 @@ inline float DTensor<float>::max() const {
 }
 
 template<>
-inline double DTensor<double>::max() const {
+inline double DTensor<double>::maxAbs() const {
     int idx;
     double hostDst;
     gpuErrChk(cublasIdamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
@@ -655,7 +655,7 @@ inline double DTensor<double>::max() const {
 }
 
 template<>
-inline float DTensor<float>::min() const {
+inline float DTensor<float>::minAbs() const {
     int idx;
     float hostDst;
     gpuErrChk(cublasIsamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
@@ -665,7 +665,7 @@ inline float DTensor<float>::min() const {
 }
 
 template<>
-inline double DTensor<double>::min() const {
+inline double DTensor<double>::minAbs() const {
     int idx;
     double hostDst;
     gpuErrChk(cublasIdamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
diff --git a/test/testTensor.cu b/test/testTensor.cu
index df81014..5bbc954 100644
--- a/test/testTensor.cu
+++ b/test/testTensor.cu
@@ -358,14 +358,14 @@ TEST_F(TensorTest, tensorSumAbs) {
 }
 
 /* ---------------------------------------
- * Tensor: max of all elements
+ * Tensor: max of absolute of all elements
  * --------------------------------------- */
 
 TEMPLATE_WITH_TYPE_T
 void tensorMax() {
     std::vector<T> data = TENSOR_DATA_234AMB;
     DTensor<T> tenz(data, 2, 3, 4);
-    T m = tenz.max();
+    T m = tenz.maxAbs();
     EXPECT_EQ(27, m);
 }
 
@@ -375,14 +375,14 @@ TEST_F(TensorTest, tensorMax) {
 }
 
 /* ---------------------------------------
- * Tensor: min of all elements
+ * Tensor: min of absolute of all elements
  * --------------------------------------- */
 
 TEMPLATE_WITH_TYPE_T
 void tensorMin() {
     std::vector<T> data = TENSOR_DATA_234AMB;
     DTensor<T> tenz(data, 2, 3, 4);
-    T m = tenz.min();
+    T m = tenz.minAbs();
     EXPECT_EQ(0, m);
 }
 

From fb831fbbaf13ffedf6ff99a66bb3ff175b23a386 Mon Sep 17 00:00:00 2001
From: Ruairi Moran <ruairi.moran@equipmentshare.com>
Date: Fri, 2 Aug 2024 19:25:28 +0100
Subject: [PATCH 3/3] reorder machine runners

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 32c33a6..5b686fb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,7 +10,7 @@ jobs:
     runs-on: ${{ matrix.runner }}
     strategy:
       matrix:
-        runner: [orin, a40]
+        runner: [a40, orin]
     steps:
       - name: checkout code
         uses: actions/checkout@v4