Skip to content

Commit

Permalink
Add new generic matrix operators
Browse files Browse the repository at this point in the history
Our current approach for e.g. matrix multiplication reads very
naturally, e.g. `A = B*C` models $A = BC$, but this has a problem on GPU
code. Indeed, if these matrices have size $N \times N$, then we first
concretize an $N \times N$ matrix which we then have to copy
element-by-element into $A$. This means that we need to keep that many
registers live, which is quite large for e.g. $7 \times 7$ free
matrices (which thus occupy 49 registers).

This problem can be alleviated by implementing optimized operators. More
precisely, this PR implements the following new methods:

* `set_product(A, B, C)` computes $A = BC$ without intermediate values.
* `set_product_left_transpose(A, B, C)` computes $A = B^TC$ without
  intermediate values.
* `set_product_right_transpose(A, B, C)` computes $A = BC^T` without
  intermediate values.
* `set_inplace_product_right(A, B)` computes $A = AB$ in place.
* `set_inplace_product_left(A, B)` computes $A = BA$ in place.
* `set_inplace_product_right_transpose(A, B)` computes $A = AB^T$ in
  place.
* `set_inplace_product_left_transpose(A, B)` computes $A = B^TA$ in
  place.
  • Loading branch information
stephenswat committed Nov 19, 2024
1 parent ec14244 commit 376d545
Show file tree
Hide file tree
Showing 2 changed files with 803 additions and 0 deletions.
153 changes: 153 additions & 0 deletions math/cmath/include/algebra/math/impl/cmath_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,159 @@ struct actor {

return inverse_actor_t()(m);
}

// Set matrix C to the product AB
template <size_type M, size_type N, size_type O>
ALGEBRA_HOST_DEVICE inline void set_product(
matrix_type<M, O> &C, const matrix_type<M, N> &A,
const matrix_type<N, O> &B) const {

for (size_type i = 0; i < M; ++i) {
for (size_type j = 0; j < O; ++j) {
scalar_t T = 0.f;

for (size_type k = 0; k < N; ++k) {
T += element_getter()(A, i, k) * element_getter()(B, k, j);
}

element_getter()(C, i, j) = T;
}
}
}

// Set matrix C to the product A^TB
template <size_type M, size_type N, size_type O>
ALGEBRA_HOST_DEVICE inline void set_product_left_transpose(
matrix_type<M, O> &C, const matrix_type<N, M> &A,
const matrix_type<N, O> &B) const {

for (size_type i = 0; i < M; ++i) {
for (size_type j = 0; j < O; ++j) {
scalar_t T = 0.f;

for (size_type k = 0; k < N; ++k) {
T += element_getter()(A, k, i) * element_getter()(B, k, j);
}

element_getter()(C, i, j) = T;
}
}
}

// Set matrix C to the product AB^T
template <size_type M, size_type N, size_type O>
ALGEBRA_HOST_DEVICE inline void set_product_right_transpose(
matrix_type<M, O> &C, const matrix_type<M, N> &A,
const matrix_type<O, N> &B) const {

for (size_type i = 0; i < M; ++i) {
for (size_type j = 0; j < O; ++j) {
scalar_t T = 0.f;

for (size_type k = 0; k < N; ++k) {
T += element_getter()(A, i, k) * element_getter()(B, j, k);
}

element_getter()(C, i, j) = T;
}
}
}

// Set matrix A to the product AB in place
template <size_type M>
ALGEBRA_HOST_DEVICE inline void set_inplace_product_right(
matrix_type<M, M> &A, const matrix_type<M, M> &B) const {

for (size_type i = 0; i < M; ++i) {
matrix_type<1, M> Q;

for (size_type j = 0; j < M; ++j) {
element_getter()(Q, 0, j) = element_getter()(A, i, j);
}

for (size_type j = 0; j < M; ++j) {
scalar_t T = 0.f;

for (size_type k = 0; k < M; ++k) {
T += element_getter()(Q, 0, k) * element_getter()(B, k, j);
}

element_getter()(A, i, j) = T;
}
}
}

// Set matrix A to the product BA in place
template <size_type M>
ALGEBRA_HOST_DEVICE inline void set_inplace_product_left(
matrix_type<M, M> &A, const matrix_type<M, M> &B) const {

for (size_type j = 0; j < M; ++j) {
matrix_type<1, M> Q;

for (size_type i = 0; i < M; ++i) {
element_getter()(Q, 0, i) = element_getter()(A, i, j);
}

for (size_type i = 0; i < M; ++i) {
scalar_t T = 0.f;

for (size_type k = 0; k < M; ++k) {
T += element_getter()(B, i, k) * element_getter()(Q, 0, k);
}

element_getter()(A, i, j) = T;
}
}
}

// Set matrix A to the product AB^T in place
template <size_type M>
ALGEBRA_HOST_DEVICE inline void set_inplace_product_right_transpose(
matrix_type<M, M> &A, const matrix_type<M, M> &B) const {

for (size_type i = 0; i < M; ++i) {
matrix_type<1, M> Q;

for (size_type j = 0; j < M; ++j) {
element_getter()(Q, 0, j) = element_getter()(A, i, j);
}

for (size_type j = 0; j < M; ++j) {
scalar_t T = 0.f;

for (size_type k = 0; k < M; ++k) {
T += element_getter()(Q, 0, k) * element_getter()(B, j, k);
}

element_getter()(A, i, j) = T;
}
}
}

// Set matrix A to the product B^TA in place
template <size_type M>
ALGEBRA_HOST_DEVICE inline void set_inplace_product_left_transpose(
matrix_type<M, M> &A, const matrix_type<M, M> &B) const {

for (size_type j = 0; j < M; ++j) {
matrix_type<1, M> Q;

for (size_type i = 0; i < M; ++i) {
element_getter()(Q, 0, i) = element_getter()(A, i, j);
}

for (size_type i = 0; i < M; ++i) {
scalar_t T = 0.f;

for (size_type k = 0; k < M; ++k) {
T += element_getter()(B, k, i) * element_getter()(Q, 0, k);
}

element_getter()(A, i, j) = T;
}
}
}
};

namespace determinant {
Expand Down
Loading

0 comments on commit 376d545

Please sign in to comment.