Realize 'rhs' to an Eigen::Vector if it gets used multiple times.

This avoids repeated evaluation of template expressions in the multiple methods. Rather, 'rhs' is evaluated once and stored in a buffer, to be re-used multiple times in each method. For single-use 'rhs', this doesn't matter and the template expression can be forwarded to an Eigen operation.
LTLA · Jul 3, 2024 · 55404f4 · 55404f4
1 parent e340bc3
commit 55404f4
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 47 deletions.
diff --git a/include/irlba/parallel.hpp b/include/irlba/parallel.hpp
@@ -37,11 +37,13 @@ namespace irlba {
  * Should support a read-only `[]` operator.
  * @tparam PointerArray_ Array class containing integer values for the pointers to the row/column boundaries.
  * Should support a read-only `[]` operator.
+ * @tparam EigenVector_ A floating-point `Eigen::Vector` class.
  */
 template<
     class ValueArray_ = std::vector<double>, 
     class IndexArray_ = std::vector<int>, 
-    class PointerArray_ = std::vector<size_t> 
+    class PointerArray_ = std::vector<size_t>,
+    class EigenVector_ = Eigen::VectorXd
 >
 class ParallelSparseMatrix {
 public:
@@ -240,8 +242,7 @@ class ParallelSparseMatrix {
     }
 
 private:
-    template<class Right_, class EigenVector_>
-    void indirect_multiply(const Right_& rhs, EigenVector_& output) const {
+    void indirect_multiply(const EigenVector_& rhs, EigenVector_& output) const {
         output.setZero();
 
         if (my_nthreads == 1) {
@@ -285,8 +286,7 @@ class ParallelSparseMatrix {
         return;
     }
 
-    template<class Right_, class EigenVector_>
-    void direct_multiply(const Right_& rhs, EigenVector_& output) const {
+    void direct_multiply(const EigenVector_& rhs, EigenVector_& output) const {
         if (my_nthreads == 1) {
             for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
                 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
@@ -318,8 +318,8 @@ class ParallelSparseMatrix {
         return;
     }
 
-    template<typename Scalar_, class Right_>
-    Scalar_ column_dot_product(size_t c, const Right_& rhs) const {
+    template<typename Scalar_>
+    Scalar_ column_dot_product(size_t c, const EigenVector_& rhs) const {
         PointerType primary_start = my_ptrs[c], primary_end = my_ptrs[c + 1];
         Scalar_ dot = 0;
         for (PointerType s = primary_start; s < primary_end; ++s) {
@@ -333,34 +333,40 @@ class ParallelSparseMatrix {
      */
     // All MockMatrix interface methods, we can ignore this.
 public:
-    typedef bool Workspace;
+    struct Workspace {
+        EigenVector_ buffer;
+    };
 
-    bool workspace() const {
-        return false;
+    Workspace workspace() const {
+        return Workspace();
     }
 
-    typedef bool AdjointWorkspace;
+    struct AdjointWorkspace {
+        EigenVector_ buffer;
+    };
 
-    bool adjoint_workspace() const {
-        return false;
+    AdjointWorkspace adjoint_workspace() const {
+        return AdjointWorkspace();
     }
 
 public:
-    template<class Right_, class EigenVector_>
-    void multiply(const Right_& rhs, [[maybe_unused]] Workspace& work, EigenVector_& output) const {
+    template<class Right_>
+    void multiply(const Right_& rhs, Workspace& work, EigenVector_& output) const {
+        const auto& realized_rhs = internal::realize_rhs(rhs, work.buffer);
         if (my_column_major) {
-            indirect_multiply(rhs, output);
+            indirect_multiply(realized_rhs, output);
         } else {
-            direct_multiply(rhs, output);
+            direct_multiply(realized_rhs, output);
         }
     }
 
-    template<class Right_, class EigenVector_>
-    void adjoint_multiply(const Right_& rhs, [[maybe_unused]] AdjointWorkspace& work, EigenVector_& output) const {
+    template<class Right_>
+    void adjoint_multiply(const Right_& rhs, AdjointWorkspace& work, EigenVector_& output) const {
+        const auto& realized_rhs = internal::realize_rhs(rhs, work.buffer);
         if (my_column_major) {
-            direct_multiply(rhs, output);
+            direct_multiply(realized_rhs, output);
         } else {
-            indirect_multiply(rhs, output);
+            indirect_multiply(realized_rhs, output);
         }
     }
 

diff --git a/include/irlba/utils.hpp b/include/irlba/utils.hpp
@@ -42,6 +42,16 @@ void fill_with_random_normals(Matrix_& mat, Eigen::Index column, Engine_& eng) {
     fill_with_random_normals(proxy, eng);
 }
 
+template<class Right_, class EigenVector_>
+const EigenVector_& realize_rhs(const Right_& rhs, EigenVector_& buffer) {
+    if constexpr(std::is_same<Right_, EigenVector_>::value) {
+        return rhs;
+    } else {
+        buffer.noalias() = rhs;
+        return buffer;
+    }
+}
+
 }
 
 }

diff --git a/include/irlba/wrappers.hpp b/include/irlba/wrappers.hpp
@@ -220,23 +220,32 @@ class Centered {
     Eigen::Index cols() const { return my_matrix.cols(); }
 
 public:
-    typedef WrappedWorkspace<Matrix_> Workspace;
+    struct Workspace {
+        Workspace(WrappedWorkspace<Matrix_> i) : inner(std::move(i)) {}
+        WrappedWorkspace<Matrix_> inner;
+        EigenVector_ buffer;
+    };
 
     Workspace workspace() const {
-        return wrapped_workspace(my_matrix);
+        return Workspace(wrapped_workspace(my_matrix));
     }
 
-    typedef WrappedAdjointWorkspace<Matrix_> AdjointWorkspace;
+    struct AdjointWorkspace {
+        AdjointWorkspace(WrappedAdjointWorkspace<Matrix_> i) : inner(std::move(i)) {}
+        WrappedAdjointWorkspace<Matrix_> inner;
+        EigenVector_ buffer;
+    };
 
     AdjointWorkspace adjoint_workspace() const {
-        return wrapped_adjoint_workspace(my_matrix);
+        return AdjointWorkspace(wrapped_adjoint_workspace(my_matrix));
     }
 
 public:
     template<class Right_>
     void multiply(const Right_& rhs, Workspace& work, EigenVector_& out) const {
-        wrapped_multiply(my_matrix, rhs, work, out);
-        auto beta = rhs.dot(my_center);
+        const auto& realized_rhs = internal::realize_rhs(rhs, work.buffer);
+        wrapped_multiply(my_matrix, realized_rhs, work.inner, out);
+        auto beta = realized_rhs.dot(my_center);
         for (auto& o : out) {
             o -= beta;
         }
@@ -245,8 +254,9 @@ class Centered {
 
     template<class Right_>
     void adjoint_multiply(const Right_& rhs, AdjointWorkspace& work, EigenVector_& out) const {
-        wrapped_adjoint_multiply(my_matrix, rhs, work, out);
-        auto beta = rhs.sum();
+        const auto& realized_rhs = internal::realize_rhs(rhs, work.buffer);
+        wrapped_adjoint_multiply(my_matrix, realized_rhs, work.inner, out);
+        auto beta = realized_rhs.sum();
         out -= beta * (my_center);
         return;
     }

diff --git a/tests/src/wrappers.cpp b/tests/src/wrappers.cpp
@@ -36,9 +36,6 @@ TEST_F(WrapperCenteringTest, Multiply) {
     Eigen::VectorXd expected = realized * C;
 
     auto wrk = centered.workspace();
-    bool is_placeholder = std::is_same<decltype(wrk), bool>::value;
-    EXPECT_TRUE(is_placeholder); // just inherits the child.
-
     Eigen::VectorXd output(20);
     centered.multiply(C, wrk, output);
     expect_equal_matrix(expected, output);
@@ -47,6 +44,15 @@ TEST_F(WrapperCenteringTest, Multiply) {
     output.setZero();
     irlba::wrapped_multiply(centered, C, wrk, output);
     expect_equal_matrix(expected, output);
+
+    // Check that it works with expression inputs.
+    auto C2 = C * 2;
+    bool simple = std::is_same<decltype(C2), Eigen::VectorXd>::value;
+    EXPECT_FALSE(simple);
+    output.setZero();
+    centered.multiply(C2, wrk, output);
+    expected.noalias() = realized * C2;
+    expect_equal_matrix(expected, output);
 }
 
 TEST_F(WrapperCenteringTest, AdjointMultiply) {
@@ -57,9 +63,6 @@ TEST_F(WrapperCenteringTest, AdjointMultiply) {
     Eigen::VectorXd expected = realized.adjoint() * C;
 
     auto wrk = centered.adjoint_workspace();
-    bool is_placeholder = std::is_same<decltype(wrk), bool>::value;
-    EXPECT_TRUE(is_placeholder); // just inherits the child.
-
     Eigen::VectorXd output(10);
     centered.adjoint_multiply(C, wrk, output);
     expect_equal_matrix(expected, output);
@@ -68,6 +71,15 @@ TEST_F(WrapperCenteringTest, AdjointMultiply) {
     output.setZero();
     irlba::wrapped_adjoint_multiply(centered, C, wrk, output);
     expect_equal_matrix(expected, output);
+
+    // Check that it works with expression inputs.
+    auto C2 = C * 2;
+    bool simple = std::is_same<decltype(C2), Eigen::VectorXd>::value;
+    EXPECT_FALSE(simple);
+    output.setZero();
+    centered.adjoint_multiply(C2, wrk, output);
+    expected.noalias() = realized.adjoint() * C2;
+    expect_equal_matrix(expected, output);
 }
 
 class WrapperScalingTest : public ::testing::TestWithParam<bool> {
@@ -132,9 +144,6 @@ TEST_P(WrapperScalingTest, Multiply) {
         Eigen::VectorXd expected = realized * C;
 
         auto wrk = scaled.workspace();
-        bool is_placeholder = std::is_same<decltype(wrk), bool>::value;
-        EXPECT_FALSE(is_placeholder); // defines its own workspace. 
-
         Eigen::VectorXd output(20);
         scaled.multiply(C, wrk, output);
         expect_equal_matrix(expected, output);
@@ -143,6 +152,15 @@ TEST_P(WrapperScalingTest, Multiply) {
         output.setZero();
         irlba::wrapped_multiply(scaled, C, wrk, output);
         expect_equal_matrix(expected, output);
+
+        // Check that it works with expression inputs.
+        auto C2 = C * 2;
+        bool simple = std::is_same<decltype(C2), Eigen::VectorXd>::value;
+        EXPECT_FALSE(simple);
+        output.setZero();
+        scaled.multiply(C2, wrk, output);
+        expected.noalias() = realized * C2;
+        expect_equal_matrix(expected, output);
     }
 
     {
@@ -152,9 +170,6 @@ TEST_P(WrapperScalingTest, Multiply) {
         Eigen::VectorXd expected = realized * C;
 
         auto wrk = scaled.workspace();
-        bool is_placeholder = std::is_same<decltype(wrk), bool>::value;
-        EXPECT_TRUE(is_placeholder); // inherits the child.
-
         Eigen::VectorXd output(20);
         scaled.multiply(C, wrk, output);
         expect_equal_matrix(expected, output);
@@ -176,9 +191,6 @@ TEST_P(WrapperScalingTest, AdjointMultiply) {
         Eigen::VectorXd expected = realized.adjoint() * C;
 
         auto wrk = scaled.adjoint_workspace();
-        bool is_placeholder = std::is_same<decltype(wrk), bool>::value;
-        EXPECT_TRUE(is_placeholder); // just inherits the child.
-
         Eigen::VectorXd output(10);
         scaled.adjoint_multiply(C, wrk, output);
         expect_equal_matrix(expected, output);
@@ -187,6 +199,15 @@ TEST_P(WrapperScalingTest, AdjointMultiply) {
         output.setZero();
         irlba::wrapped_adjoint_multiply(scaled, C, wrk, output);
         expect_equal_matrix(expected, output);
+
+        // Check that it works with expression inputs.
+        auto C2 = C * 2;
+        bool simple = std::is_same<decltype(C2), Eigen::VectorXd>::value;
+        EXPECT_FALSE(simple);
+        output.setZero();
+        scaled.adjoint_multiply(C2, wrk, output);
+        expected.noalias() = realized.adjoint() * C2;
+        expect_equal_matrix(expected, output);
     }
 
     {
@@ -196,9 +217,6 @@ TEST_P(WrapperScalingTest, AdjointMultiply) {
         Eigen::VectorXd expected = realized.adjoint() * C;
 
         auto wrk = scaled.adjoint_workspace();
-        bool is_placeholder = std::is_same<decltype(wrk), bool>::value;
-        EXPECT_FALSE(is_placeholder); // defines its own workspace.
-
         Eigen::VectorXd output(10);
         scaled.adjoint_multiply(C, wrk, output);
         expect_equal_matrix(expected, output);