From 2872d1f5b811133a20103831df9e45306e611746 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Tue, 1 Oct 2024 10:14:17 -0700 Subject: [PATCH 01/26] init-commit, initial `TrigSkOp` structure --- RandBLAS/trig_skops.hh | 318 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 318 insertions(+) create mode 100644 RandBLAS/trig_skops.hh diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh new file mode 100644 index 0000000..896a64f --- /dev/null +++ b/RandBLAS/trig_skops.hh @@ -0,0 +1,318 @@ +#include "RandBLAS/base.hh" +#include "RandBLAS/exceptions.hh" +#include "RandBLAS/random_gen.hh" +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define MAX(a, b) (((a) < (b)) ? (b) : (a)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +namespace RandBLAS { + + // ============================================================================= + /// WARNING: None of the following functions or overloads thereof are part of the + /// public API + /// + void generateRademacherVector_r123(std::vector &buff, uint32_t key_seed, uint32_t ctr_seed, int n) { + typedef r123::Philox2x32 RNG; + // std::vector rademacherVector(n); + + // Use OpenMP to parallelize the Rademacher vector generation + #pragma omp parallel + { + // Each thread has its own RNG instance + RNG rng; + + // You can use any seed value for the key + RNG::key_type k = {{key_seed + omp_get_thread_num()}}; // Unique key for each thread + + // Thread-local counter + RNG::ctr_type c; + + // Parallel for loop + #pragma omp for + for (int i = 0; i < n; ++i) { + // Set the counter for each random number (unique per thread) + c[0] = ctr_seed + i; // Ensure the counter is unique across threads + + // Generate a 2x32-bit random number using the Philox generator + RNG::ctr_type r = rng(c, k); + + // Convert the random number into a float in [0, 1) using u01fixedpt + float randValue = r123::u01fixedpt(r.v[0]); + + // Convert the float into a Rademacher entry (-1 or 1) + buff[i] = randValue < 0.5 ? -1 : 1; + } + } + } + + std::vector generateRademacherVector_parallel(int64_t n) { + std::vector rademacherVec(n); + + #pragma omp parallel + { + // Each thread gets its own random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::bernoulli_distribution bernoulli(0.5); + + #pragma omp for + for (int64_t i = 0; i < n; ++i) { + rademacherVec[i] = bernoulli(gen) ? 1 : -1; + } + } + + return rademacherVec; + } + + template + void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A) { + std::vector diag = generateRademacherVector_parallel(cols); + + for(int col=0; col < cols; col++) { + if(diag[col] > 0) + continue; + blas::scal(rows, diag[col], &A[col * rows], 1); + } + } + + template + void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A, std::vector &diag) { + //NOTE: Only considers sketching from the left + ColMajor format as of now + for(int col=0; col < cols; col++) { + if(diag[col] > 0) + continue; + blas::scal(rows, diag[col], &A[col * rows], 1); + } + } + + // `applyDiagonalRademacher`should have a similar API as `sketch_general` + // Will be called inside of `lskget` + // B <- alpha * diag(Rademacher) * op(A) + beta * B + // `alpha`, `beta` aren't needed and shape(A) == shape(B) + // lda == ldb + template + void applyDiagonalRademacher( + blas::Layout layout, + blas::Op opA, // taken from `sketch_general/lskget` + int64_t n, // everything is the same size as `op(A)`: m-by-n + int64_t m, + std::vector &diag, + const T* A, // The data matrix that won't be modified + // int64_t lda, + T* B // The destination matrix + // int64_t ldb + ) { + // B <- alpha * diag(Rademacher) * op(A) + beta * B + // `alpha`, `beta` aren't needed + // && shape(A) == shape(B) && lda == ldb && shape({A | B}) = m-by-n + //NOTE: Use `layout` and `opA` for working with RowMajor data + int64_t lda = m; + int64_t ldb = m; + + //NOTE: Should the copy be made inside of `applyDiagonalRademacher` or + // should it be inside of `lskget`? + lapack::lacpy(lapack::MatrixType::General, m, n, A, lda, B, ldb); + + applyDiagonalRademacher(m, n, B, diag); + } + + + // `permuteRowsToTop` should just take in `B` as an input + // `B` will be passed in by the user to `sketch_general/lskget` and `permuteRowsToTop` will take in + // the `B` that has been modified by `applyRademacherDiagonal` + // Will also be called inside of `lskget` + template + void permuteRowsToTop(int rows, int cols, std::vector selectedRows, T* B, int ldb) { + //NOTE: There should be a similar `permuteColsToLeft` + int top = 0; // Keeps track of the topmost unselected row + + // Compute `r` internally + // int64_t r = 100; + // std::vector selectedRows = generateRademacherVector_parallel(r); + + for (int selected : selectedRows) { + if (selected != top) { + // Use BLAS swap to swap the entire rows + // Swapping row 'selected' with row 'top' + blas::swap(cols, &B[selected], ldb, &B[top], ldb); + } + top++; + } + } + + enum class TrigDistName: char { + Fourier = 'F', + + // --------------------------------------------------------------------------- + + Hadamard = 'H' + }; + + struct TrigDist { + const int64_t n_rows; + const int64_t n_cols; + + int64_t dim_short; + int64_t dim_long; + + const TrigDistName family; + + TrigDist( + int64_t n_rows, + int64_t n_cols, + TrigDistName tn = TrigDistName::Hadamard + ) : n_rows(n_rows), n_cols(n_cols), family(tn) { + dim_short = MIN(n_rows, n_cols); + dim_long = MAX(n_rows, n_cols); +} +}; + + template + struct TrigSkOp { + using generator = RNG; + using state_type = RNGState; + using buffer_type = T; + + const int64_t n_rows; + const int64_t n_cols; + int64_t dim_short; + int64_t dim_long; + // int64_t n_sampled; + + const TrigDist dist; + + + const RNGState seed_state; + + RNGState next_state; + + const blas::Layout layout; + const bool sketchFromLeft = true; + bool known_filled = false; + + T* DiagonalRademacher = nullptr; + T* SampledRows = nullptr; + + TrigSkOp( + TrigDist dist, + RNGState const &state, + bool known_filled = true + ) : n_rows(dist.n_rows), n_cols(dist.n_cols), dist(dist), seed_state(state), known_filled(known_filled), dim_short(dist.dim_short), dim_long(dist.dim_long){ + // Memory for Rademacher diagonal gets allocated here + // Number of rows/cols to be sampled gets decided here + // i.e. `n_sampled` gets set + if(sketchFromLeft) + DiagonalRademacher = new T[n_rows]; + else + DiagonalRademacher = new T[n_cols]; + + SampledRows = new sint_t[dim_short]; + + //TODO: Logic to compute the number of samples that we require `r` + // this can be shown to depend on the maximal row norm of the data matrix + //NOTE: Do not have access to this in the data-oblivious regime --- how + // do we get access? +}; + + TrigSkOp( + TrigDistName family, + int64_t n_rows, + int64_t n_cols, + uint32_t key + ) : n_rows(n_rows), n_cols(n_cols), dist(TrigDist{n_rows, n_cols, family}), seed_state() {}; + + ~TrigSkOp(); +}; + +// Populates the Rademacher diagonal entries and decides the rows/cols to +// be sampled at the end +template +RandBLAS::RNGState fill_trig( + TrigSkOp &Tr +) { + /** + * Will do the work of filling in the diagonal Rademacher entries + * and selecting the rows/cols to be sampled + */ + + auto [ctr, key] = Tr.seed_state; + + // Fill in the Rademacher diagonal + if(Tr.sketchFromLeft) + generateRademacherVector_r123(Tr.DiagonalRademacher, key, ctr, Tr.n_rows); + else + generateRademacherVector_r123(Tr.DiagonalRademacher, key, ctr, Tr.n_cols); + + //NOTE: Select the rows/cols to be sampled --- use the `repeated_fisher_yates` function + int64_t r = Tr.dim_short; + int64_t d = Tr.dim_long; + + // Output containers + std::vector idxs_major(r); // Stores the sampled integers + std::vector idxs_minor(r); // Not needed in your case + std::vector vals(r); // Not needed in your case, but required by the function + + RandBLAS::repeated_fisher_yates( + Tr.seed_state, + r, // Number of samples (vec_nnz) + d, // Total number of elements (dim_major) + 1, // Single sample round (dim_minor) + Tr.SampledRows, // Holds the required output + idxs_minor.data(), // Placeholder + vals.data() // Placeholder + ); +} +} + + +namespace RandBLAS::trig { + +// Performs the actual application of the fast trigonometric transform +// Only called after calling `fill_trig` +template +inline void lskget( + blas::Layout layout, + blas::Op opS, + blas::Op opA, + int64_t d, // B is d-by-n + int64_t n, // op(A) is m-by-n + int64_t m, // op(S) is d-by-m + T alpha, + TrigSkOp &Tr, + int64_t ro_s, + int64_t co_s, + const T* A, // data-matrix + int64_t lda, + T beta, + T* B, // output matrix + int64_t ldb +) { + if (!Tr.known_filled) + fill_trig(Tr); + + // Applying the diagonal transformation + applyDiagonalRademacher(layout, opA, n, m, Tr.DiagonalRademacher, A, B); + // applyDiagonalRademacher(m, n, A, Tr.DiagonalRademacher); + + //TODO: Apply the Hadamard transform + + //... and finally permute the rows + permuteRowsToTop(m, n, Tr.SampledRows, B, ldb); +} +} From 7103a00cd8ba90baabaeace2b6fe68513acb0f4c Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Tue, 1 Oct 2024 10:44:42 -0700 Subject: [PATCH 02/26] Cleaning --- RandBLAS/trig_skops.hh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 896a64f..696798f 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -37,7 +37,6 @@ namespace RandBLAS { // Each thread has its own RNG instance RNG rng; - // You can use any seed value for the key RNG::key_type k = {{key_seed + omp_get_thread_num()}}; // Unique key for each thread // Thread-local counter @@ -263,10 +262,8 @@ RandBLAS::RNGState fill_trig( int64_t r = Tr.dim_short; int64_t d = Tr.dim_long; - // Output containers - std::vector idxs_major(r); // Stores the sampled integers - std::vector idxs_minor(r); // Not needed in your case - std::vector vals(r); // Not needed in your case, but required by the function + std::vector idxs_minor(r); // Placeholder + std::vector vals(r); // Placeholder RandBLAS::repeated_fisher_yates( Tr.seed_state, From 5c1572dde9a75a3c7fb60539f05cfdd7fe473039 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Mon, 7 Oct 2024 13:50:30 -0700 Subject: [PATCH 03/26] Commiting old changes for documenting --- RandBLAS/trig_skops.hh | 56 ++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 696798f..5b6a747 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -27,7 +27,9 @@ namespace RandBLAS { /// WARNING: None of the following functions or overloads thereof are part of the /// public API /// - void generateRademacherVector_r123(std::vector &buff, uint32_t key_seed, uint32_t ctr_seed, int n) { + template + // void generateRademacherVector_r123(std::vector &buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { + void generateRademacherVector_r123(sint_t* buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { typedef r123::Philox2x32 RNG; // std::vector rademacherVector(n); @@ -90,8 +92,9 @@ namespace RandBLAS { } } - template - void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A, std::vector &diag) { + template + // void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A, std::vector &diag) { + void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A, sint_t* diag) { //NOTE: Only considers sketching from the left + ColMajor format as of now for(int col=0; col < cols; col++) { if(diag[col] > 0) @@ -105,13 +108,14 @@ namespace RandBLAS { // B <- alpha * diag(Rademacher) * op(A) + beta * B // `alpha`, `beta` aren't needed and shape(A) == shape(B) // lda == ldb - template + template void applyDiagonalRademacher( blas::Layout layout, blas::Op opA, // taken from `sketch_general/lskget` int64_t n, // everything is the same size as `op(A)`: m-by-n int64_t m, - std::vector &diag, + // std::vector &diag, + sint_t* diag, const T* A, // The data matrix that won't be modified // int64_t lda, T* B // The destination matrix @@ -182,12 +186,15 @@ namespace RandBLAS { } }; - template + template struct TrigSkOp { using generator = RNG; using state_type = RNGState; using buffer_type = T; + //TODO: Where should the logic for deciding the size of `H` to use go? + // Since that will be accompanied by padding of (DA) maybe it should + // go inside of `lskget`? const int64_t n_rows; const int64_t n_cols; int64_t dim_short; @@ -198,28 +205,28 @@ namespace RandBLAS { const RNGState seed_state; - RNGState next_state; const blas::Layout layout; const bool sketchFromLeft = true; bool known_filled = false; - T* DiagonalRademacher = nullptr; - T* SampledRows = nullptr; + sint_t* DiagonalRademacher = nullptr; + sint_t* SampledRows = nullptr; TrigSkOp( TrigDist dist, RNGState const &state, - bool known_filled = true - ) : n_rows(dist.n_rows), n_cols(dist.n_cols), dist(dist), seed_state(state), known_filled(known_filled), dim_short(dist.dim_short), dim_long(dist.dim_long){ + blas::Layout layout, + bool known_filled = false + ) : n_rows(dist.n_rows), n_cols(dist.n_cols), dist(dist), seed_state(state), known_filled(known_filled), dim_short(dist.dim_short), dim_long(dist.dim_long), layout(layout){ // Memory for Rademacher diagonal gets allocated here // Number of rows/cols to be sampled gets decided here // i.e. `n_sampled` gets set if(sketchFromLeft) - DiagonalRademacher = new T[n_rows]; + DiagonalRademacher = new sint_t[n_rows]; else - DiagonalRademacher = new T[n_cols]; + DiagonalRademacher = new sint_t[n_cols]; SampledRows = new sint_t[dim_short]; @@ -236,27 +243,32 @@ namespace RandBLAS { uint32_t key ) : n_rows(n_rows), n_cols(n_cols), dist(TrigDist{n_rows, n_cols, family}), seed_state() {}; + //TODO: Write a proper deconstructor + //Free up DiagonalRademacher && SampledRows ~TrigSkOp(); }; -// Populates the Rademacher diagonal entries and decides the rows/cols to -// be sampled at the end +template +TrigSkOp::~TrigSkOp() { + delete [] this->DiagonalRademacher; + delete [] this->SampledRows; +}; + template RandBLAS::RNGState fill_trig( - TrigSkOp &Tr + TrigSkOp &Tr ) { /** * Will do the work of filling in the diagonal Rademacher entries * and selecting the rows/cols to be sampled */ - auto [ctr, key] = Tr.seed_state; // Fill in the Rademacher diagonal if(Tr.sketchFromLeft) - generateRademacherVector_r123(Tr.DiagonalRademacher, key, ctr, Tr.n_rows); + generateRademacherVector_r123(Tr.DiagonalRademacher, key[0], ctr[0], Tr.n_rows); else - generateRademacherVector_r123(Tr.DiagonalRademacher, key, ctr, Tr.n_cols); + generateRademacherVector_r123(Tr.DiagonalRademacher, key[0], ctr[0], Tr.n_cols); //NOTE: Select the rows/cols to be sampled --- use the `repeated_fisher_yates` function int64_t r = Tr.dim_short; @@ -265,7 +277,7 @@ RandBLAS::RNGState fill_trig( std::vector idxs_minor(r); // Placeholder std::vector vals(r); // Placeholder - RandBLAS::repeated_fisher_yates( + Tr.next_state = RandBLAS::repeated_fisher_yates( Tr.seed_state, r, // Number of samples (vec_nnz) d, // Total number of elements (dim_major) @@ -274,6 +286,8 @@ RandBLAS::RNGState fill_trig( idxs_minor.data(), // Placeholder vals.data() // Placeholder ); + Tr.known_filled = true; + return Tr.next_state; } } @@ -310,6 +324,6 @@ inline void lskget( //TODO: Apply the Hadamard transform //... and finally permute the rows - permuteRowsToTop(m, n, Tr.SampledRows, B, ldb); + // permuteRowsToTop(m, n, Tr.SampledRows, B, ldb); } } From a60490942f6d48da6026e0c7b0fc61451bb6108b Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Wed, 9 Oct 2024 00:18:23 -0700 Subject: [PATCH 04/26] `A <- (\Pi D) A`, for `ColMajor` A --- RandBLAS/trig_skops.hh | 104 +++++++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 15 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 5b6a747..345604e 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -94,13 +94,26 @@ namespace RandBLAS { template // void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A, std::vector &diag) { - void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A, sint_t* diag) { - //NOTE: Only considers sketching from the left + ColMajor format as of now - for(int col=0; col < cols; col++) { - if(diag[col] > 0) - continue; - blas::scal(rows, diag[col], &A[col * rows], 1); - } + void applyDiagonalRademacher( + blas::Layout layout, + int64_t rows, + int64_t cols, + T* A, + sint_t* diag + ) + { + //TODO: Only considers sketching from the left + ColMajor format as of now + if(layout == blas::Layout::ColMajor) { + // In a `ColMajor` setting we parallelize over columns + for(int col=0; col < cols; col++) { + if(diag[col] > 0) + continue; + blas::scal(rows, diag[col], &A[col * rows], 1); + } + } + else { + // In a `RowMajor` setting we vectorize over rows + } } // `applyDiagonalRademacher`should have a similar API as `sketch_general` @@ -140,20 +153,27 @@ namespace RandBLAS { // `B` will be passed in by the user to `sketch_general/lskget` and `permuteRowsToTop` will take in // the `B` that has been modified by `applyRademacherDiagonal` // Will also be called inside of `lskget` - template - void permuteRowsToTop(int rows, int cols, std::vector selectedRows, T* B, int ldb) { + template + void permuteRowsToTop( + blas::Layout layout, + int64_t rows, + int64_t cols, + sint_t* selectedRows, + int64_t d, // size of `selectedRows` + T* A + ) { //NOTE: There should be a similar `permuteColsToLeft` int top = 0; // Keeps track of the topmost unselected row - // Compute `r` internally - // int64_t r = 100; - // std::vector selectedRows = generateRademacherVector_parallel(r); + int64_t lda = rows; + if(layout == blas::Layout::RowMajor) + lda = cols; - for (int selected : selectedRows) { - if (selected != top) { + for (int i=0; i < d; i++) { + if (selectedRows[i] != top) { // Use BLAS swap to swap the entire rows // Swapping row 'selected' with row 'top' - blas::swap(cols, &B[selected], ldb, &B[top], ldb); + blas::swap(cols, &A[selectedRows[i]], lda, &A[top], lda); } top++; } @@ -326,4 +346,58 @@ inline void lskget( //... and finally permute the rows // permuteRowsToTop(m, n, Tr.SampledRows, B, ldb); } + + +/* + * These functions apply an in-place, SRHT-like transform to the input matrix + * i.e. A <- (\Pi H D)A OR A <- A(D H \Pi) (which is equivalent to A <- A(\Pi H D)^{-1}) + * layout: Layout of the input matrix (`ColMajor/RowMajor`) + * A: (m x n), input dimensions of `A` + * d: The number of rows/columns that will be permuted by the action of $\Pi$ + */ +template +inline void lmiget( + blas::Layout layout, + RandBLAS::RNGState random_state, + int64_t m, // `A` is `(m x n)` + int64_t n, + int64_t d, // `d` is the number of rows that have to be permuted by `\Pi` + T* A // data-matrix +) +{ + // Size of the Rademacher entries = |A_cols| + //TODO: Change `diag` to float/doubles (same data type as the matrix) + sint_t* diag = new sint_t[n]; + sint_t* selected_rows = new sint_t[d]; + + auto [ctr, key] = random_state; + + //Step 1: Scale with `D` + //Populating `diag` + generateRademacherVector_r123(diag, key[0], ctr[0], n); + applyDiagonalRademacher(layout, m, n, A, diag); + + //Step 2: Apply the Hadamard transform + + //Step 3: Permute the rows + std::vector idxs_minor(d); // Placeholder + std::vector vals(d); // Placeholder + + // Populating `selected_rows` + //TODO: Do I return this at some point? + RandBLAS::RNGState next_state = RandBLAS::repeated_fisher_yates( + random_state, + d, // Number of samples (vec_nnz) + m, // Total number of elements (dim_major) + 1, // Single sample round (dim_minor) + selected_rows, // Holds the required output + idxs_minor.data(), // Placeholder + vals.data() // Placeholder + ); + + permuteRowsToTop(layout, m, n, selected_rows, d, A); + + free(diag); + free(selected_rows); +} } From 1caaffeae443255a6aa0a9bf817131ccade60c3b Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Wed, 9 Oct 2024 11:04:18 -0700 Subject: [PATCH 05/26] `A <- (\Pi H D) A` for `A: (mxn)` and `ColMajor` --- RandBLAS/trig_skops.hh | 44 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 345604e..e548a3b 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -179,6 +179,45 @@ namespace RandBLAS { } } + void fht_left_col_major(double *buf, int log_n, int num_rows, int num_cols) { + // No Padding of the columns in this implementation, + // the #rows must exactly be a power of 2 + // Padding would be straight-forward to address + int n = 1 << log_n; + std::cout << n << std::endl; + + // Apply FHT to each column independently + for (int col = 0; col < num_cols; ++col) { + // Pointer to the beginning of the current column in the Column-Major order + double* col_buf = buf + col * num_rows; + + // Apply the original FHT on this column + for (int i = 0; i < log_n; ++i) { + int s1 = 1 << i; + int s2 = s1 << 1; + for (int j = 0; j < n; j += s2) { + for (int k = 0; k < s1; ++k) { + // For implicitly padding the input we just have to make sure + // we replace all out-of-bounds accesses with zeros + bool b1 = j + k < num_rows; + bool b2 = j + k + s1 < num_rows; + double u = b1 ? col_buf[j + k] : 0; + double v = b2 ? col_buf[j + k + s1] : 0; + if(b1 && b2) { + col_buf[j + k] = u + v; + col_buf[j + k + s1] = u - v; + } + else if(!b2 && b1) { + col_buf[j + k] = u + v; + } + else if(!b2 && !b1) + continue; + } + } + } + } + } + enum class TrigDistName: char { Fourier = 'F', @@ -375,9 +414,10 @@ inline void lmiget( //Step 1: Scale with `D` //Populating `diag` generateRademacherVector_r123(diag, key[0], ctr[0], n); - applyDiagonalRademacher(layout, m, n, A, diag); + // applyDiagonalRademacher(layout, m, n, A, diag); //Step 2: Apply the Hadamard transform + fht_left_col_major(A, std::log2(MAX(m, n)), m, n); //Step 3: Permute the rows std::vector idxs_minor(d); // Placeholder @@ -395,7 +435,7 @@ inline void lmiget( vals.data() // Placeholder ); - permuteRowsToTop(layout, m, n, selected_rows, d, A); + // permuteRowsToTop(layout, m, n, selected_rows, d, A); free(diag); free(selected_rows); From 30c32f313e9c026a0edb37c335408bb846ddaea3 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Wed, 9 Oct 2024 13:45:03 -0700 Subject: [PATCH 06/26] {L|R}`RowMajor`, {R}`RowMajor`, implicit padding --- RandBLAS/trig_skops.hh | 91 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 5 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index e548a3b..7c29d27 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -180,11 +180,7 @@ namespace RandBLAS { } void fht_left_col_major(double *buf, int log_n, int num_rows, int num_cols) { - // No Padding of the columns in this implementation, - // the #rows must exactly be a power of 2 - // Padding would be straight-forward to address int n = 1 << log_n; - std::cout << n << std::endl; // Apply FHT to each column independently for (int col = 0; col < num_cols; ++col) { @@ -218,6 +214,88 @@ namespace RandBLAS { } } + void fht_left_row_major(double *buf, int log_n, int num_rows, int num_cols) { + int n = 1 << log_n; + + // Apply FHT to each column independently + for (int col = 0; col < num_cols; ++col) { + // Apply the original FHT on this column + for (int i = 0; i < log_n; ++i) { + int s1 = 1 << i; + int s2 = s1 << 1; + for (int j = 0; j < n; j += s2) { + for (int k = 0; k < s1; ++k) { + // For implicitly padding the input we just have to make sure + // we replace all out-of-bounds accesses with zeros + bool b1 = j + k < num_rows; + bool b2 = j + k + s1 < num_rows; + double u = b1 ? buf[(j + k) * num_cols + col] : 0; + double v = b2 ? buf[(j + k + s1) * num_cols + col] : 0; + if(b1 && b2) { + buf[(j + k) * num_cols + col] = u + v; + buf[(j + k + s1) * num_cols + col] = u - v; + } + else if(!b2 && b1) { + buf[(j + k) * num_cols + col] = u + v; + } + else if(!b2 && !b1) + continue; + } + } + } + } + } + + void fht_right_row_major(double *buf, int log_n, int num_rows, int num_cols) { + int n = 1 << log_n; + + // Apply FHT to each row independently + for (int row = 0; row < num_rows; ++row) { + // Pointer to the beginning of the current row in RowMajor order + double* row_buf = buf + row * num_cols; + + // Apply the original FHT on this row + for (int i = 0; i < log_n; ++i) { + int s1 = 1 << i; + int s2 = s1 << 1; + for (int j = 0; j < n; j += s2) { + for (int k = 0; k < s1; ++k) { + // For implicitly padding the input we just have to make sure + // we replace all out-of-bounds accesses with zeros + bool b1 = j + k < num_cols; + bool b2 = j + k + s1 < num_cols; + double u = b1 ? row_buf[j + k] : 0; + double v = b2 ? row_buf[j + k + s1] : 0; + if(b1 && b2) { + row_buf[j + k] = u + v; + row_buf[j + k + s1] = u - v; + } + else if(!b2 && b1) { + row_buf[j + k] = u + v; + } + else if(!b2 && !b1) + continue; + } + } + } + } + } + + void fht_right_col_major(double *buf, int log_n, int num_rows, int num_cols) { + //TODO: + } + + void fht_dispatch( + blas::Layout layout, + double* buff, + int64_t log_n, + int64_t num_rows, + int64_t num_cols + ) + { + //TODO: + } + enum class TrigDistName: char { Fourier = 'F', @@ -417,7 +495,10 @@ inline void lmiget( // applyDiagonalRademacher(layout, m, n, A, diag); //Step 2: Apply the Hadamard transform - fht_left_col_major(A, std::log2(MAX(m, n)), m, n); + //TODO: Clean via `fht_dispatch` + // fht_left_col_major(A, std::log2(MAX(m, n)), m, n); + // fht_right_row_major(A, std::log2(MAX(m, n)), m, n); + fht_left_row_major(A, std::log2(MAX(m, n)), m, n); //Step 3: Permute the rows std::vector idxs_minor(d); // Placeholder From e6013094aa9758d9ce527a8d5c3e2f44ab7cde00 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Wed, 9 Oct 2024 18:03:46 -0700 Subject: [PATCH 07/26] All variations of $H_n$ implemented --- RandBLAS/trig_skops.hh | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 7c29d27..56c113d 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -282,7 +282,35 @@ namespace RandBLAS { } void fht_right_col_major(double *buf, int log_n, int num_rows, int num_cols) { - //TODO: + int n = 1 << log_n; + + // Apply FHT to each row independently + for (int row= 0; row < num_rows; ++row) { + // Apply the original FHT on this column + for (int i = 0; i < log_n; ++i) { + int s1 = 1 << i; + int s2 = s1 << 1; + for (int j = 0; j < n; j += s2) { + for (int k = 0; k < s1; ++k) { + // For implicitly padding the input we just have to make sure + // we replace all out-of-bounds accesses with zeros + bool b1 = j + k < num_cols; + bool b2 = j + k + s1 < num_cols; + double u = b1 ? buf[(j + k) * num_rows + row] : 0; + double v = b2 ? buf[(j + k + s1) * num_rows + row] : 0; + if(b1 && b2) { + buf[(j + k) * num_rows + row] = u + v; + buf[(j + k + s1) * num_rows + row] = u - v; + } + else if(!b2 && b1) { + buf[(j + k) * num_rows + row] = u + v; + } + else if(!b2 && !b1) + continue; + } + } + } + } } void fht_dispatch( @@ -498,7 +526,8 @@ inline void lmiget( //TODO: Clean via `fht_dispatch` // fht_left_col_major(A, std::log2(MAX(m, n)), m, n); // fht_right_row_major(A, std::log2(MAX(m, n)), m, n); - fht_left_row_major(A, std::log2(MAX(m, n)), m, n); + // fht_left_row_major(A, std::log2(MAX(m, n)), m, n); + fht_right_col_major(A, std::log2(MAX(m, n)), m, n); //Step 3: Permute the rows std::vector idxs_minor(d); // Placeholder From 7b8b13457ac5ae89b53ea4560719b85ecca7f572 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Wed, 9 Oct 2024 18:47:35 -0700 Subject: [PATCH 08/26] Cleaned code with `fht_dispatch` --- RandBLAS/trig_skops.hh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 56c113d..3466a36 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -314,6 +314,7 @@ namespace RandBLAS { } void fht_dispatch( + bool left, blas::Layout layout, double* buff, int64_t log_n, @@ -322,6 +323,14 @@ namespace RandBLAS { ) { //TODO: + if(left && layout == blas::Layout::ColMajor) + fht_left_col_major(buff, log_n, num_rows, num_cols); + else if(left && layout == blas::Layout::RowMajor) + fht_left_row_major(buff, log_n, num_rows, num_cols); + else if(!left && layout == blas::Layout::ColMajor) + fht_right_col_major(buff, log_n, num_rows, num_cols); + else + fht_right_row_major(buff, log_n, num_rows, num_cols); } enum class TrigDistName: char { @@ -523,11 +532,7 @@ inline void lmiget( // applyDiagonalRademacher(layout, m, n, A, diag); //Step 2: Apply the Hadamard transform - //TODO: Clean via `fht_dispatch` - // fht_left_col_major(A, std::log2(MAX(m, n)), m, n); - // fht_right_row_major(A, std::log2(MAX(m, n)), m, n); - // fht_left_row_major(A, std::log2(MAX(m, n)), m, n); - fht_right_col_major(A, std::log2(MAX(m, n)), m, n); + fht_dispatch(true, layout, A, std::log2(MAX(m, n)), m, n); //Step 3: Permute the rows std::vector idxs_minor(d); // Placeholder From 5f72995ffc65d18ef3bb3d3fe885878d3ad29601 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Thu, 10 Oct 2024 09:40:47 -0700 Subject: [PATCH 09/26] Generalizing `applyDiagonalRademacher` --- RandBLAS/trig_skops.hh | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 3466a36..5fd62d9 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -93,8 +93,8 @@ namespace RandBLAS { } template - // void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A, std::vector &diag) { void applyDiagonalRademacher( + bool left, blas::Layout layout, int64_t rows, int64_t cols, @@ -102,17 +102,33 @@ namespace RandBLAS { sint_t* diag ) { - //TODO: Only considers sketching from the left + ColMajor format as of now - if(layout == blas::Layout::ColMajor) { - // In a `ColMajor` setting we parallelize over columns + if(left && layout == blas::Layout::ColMajor) { for(int col=0; col < cols; col++) { if(diag[col] > 0) continue; blas::scal(rows, diag[col], &A[col * rows], 1); } } + else if(left && layout == blas::Layout::RowMajor) { + for(int col=0; col < cols; col++) { + if(diag[col] > 0) + continue; + blas::scal(rows, diag[col], &A[col], cols); + } + } + else if(!left && layout == blas::Layout::ColMajor) { + for(int row = 0; row < rows; row++) { + if(diag[row] > 0) + continue; + blas::scal(cols, diag[row], &A[row], rows); + } + } else { - // In a `RowMajor` setting we vectorize over rows + for(int row = 0; row < rows; row++) { + if(diag[row] > 0) + continue; + blas::scal(cols, diag[row], &A[row * cols], 1); + } } } @@ -155,6 +171,7 @@ namespace RandBLAS { // Will also be called inside of `lskget` template void permuteRowsToTop( + bool left, blas::Layout layout, int64_t rows, int64_t cols, @@ -162,7 +179,8 @@ namespace RandBLAS { int64_t d, // size of `selectedRows` T* A ) { - //NOTE: There should be a similar `permuteColsToLeft` + //NOTE: There should be a similar `permuteColsToLeft` for sketching + //from the right int top = 0; // Keeps track of the topmost unselected row int64_t lda = rows; @@ -528,8 +546,8 @@ inline void lmiget( //Step 1: Scale with `D` //Populating `diag` - generateRademacherVector_r123(diag, key[0], ctr[0], n); - // applyDiagonalRademacher(layout, m, n, A, diag); + generateRademacherVector_r123(diag, key[3], ctr[0], n); + applyDiagonalRademacher(true, layout, m, n, A, diag); //Step 2: Apply the Hadamard transform fht_dispatch(true, layout, A, std::log2(MAX(m, n)), m, n); From 3bd236100eb4aeb4831e5334597234c711c161cb Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Thu, 10 Oct 2024 10:01:41 -0700 Subject: [PATCH 10/26] Generalized `permuteRowsToTop` --- RandBLAS/trig_skops.hh | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 5fd62d9..f0a4909 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -171,7 +171,6 @@ namespace RandBLAS { // Will also be called inside of `lskget` template void permuteRowsToTop( - bool left, blas::Layout layout, int64_t rows, int64_t cols, @@ -179,21 +178,28 @@ namespace RandBLAS { int64_t d, // size of `selectedRows` T* A ) { - //NOTE: There should be a similar `permuteColsToLeft` for sketching + //TODO: There should be a similar `permuteColsToLeft` for sketching //from the right - int top = 0; // Keeps track of the topmost unselected row - - int64_t lda = rows; - if(layout == blas::Layout::RowMajor) - lda = cols; - - for (int i=0; i < d; i++) { - if (selectedRows[i] != top) { - // Use BLAS swap to swap the entire rows - // Swapping row 'selected' with row 'top' - blas::swap(cols, &A[selectedRows[i]], lda, &A[top], lda); + int64_t top = 0; // Keeps track of the topmost unselected row + + if(layout == blas::Layout::ColMajor) { + for (int64_t i=0; i < d; i++) { + if (selectedRows[i] != top) { + // Use BLAS swap to swap the entire rows + // Swapping row 'selected' with row 'top' + blas::swap(cols, &A[selectedRows[i]], rows, &A[top], rows); + } + top++; + } + } + else { + // For `RowMajor` ordering + for (int64_t i=0; i < d; i++) { + if (selectedRows[i] != top) { + blas::swap(cols, &A[cols * selectedRows[i]], 1, &A[cols * top], 1); + } + top++; } - top++; } } @@ -568,7 +574,7 @@ inline void lmiget( vals.data() // Placeholder ); - // permuteRowsToTop(layout, m, n, selected_rows, d, A); + permuteRowsToTop(layout, m, n, selected_rows, d, A); free(diag); free(selected_rows); From eec181366441fc5f7cb63359cfebcd01e2e88ef5 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Thu, 10 Oct 2024 10:17:08 -0700 Subject: [PATCH 11/26] Added `permuteColsToLeft` && `rmiget` --- RandBLAS/trig_skops.hh | 83 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index f0a4909..ec6c8a3 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -203,6 +203,38 @@ namespace RandBLAS { } } + template + void permuteColsToLeft( + blas::Layout layout, + int64_t rows, + int64_t cols, + sint_t* selectedCols, + int64_t d, // size of `selectedRows` + T* A + ) { + int64_t left = 0; // Keeps track of the topmost unselected column + + if(layout == blas::Layout::ColMajor) { + for (int64_t i=0; i < d; i++) { + if (selectedCols[i] != left) { + // Use BLAS::swap to swap entire columns at once + // Swapping col 'selected' with col 'top' + blas::swap(rows, &A[rows * selectedCols[i]], 1, &A[rows * left], 1); + } + left++; + } + } + else { + // For `RowMajor` ordering + for (int64_t i=0; i < d; i++) { + if (selectedCols[i] != left) { + blas::swap(rows, &A[selectedCols[i]], cols, &A[left], cols); + } + left++; + } + } + } + void fht_left_col_major(double *buf, int log_n, int num_rows, int num_cols) { int n = 1 << log_n; @@ -346,7 +378,6 @@ namespace RandBLAS { int64_t num_cols ) { - //TODO: if(left && layout == blas::Layout::ColMajor) fht_left_col_major(buff, log_n, num_rows, num_cols); else if(left && layout == blas::Layout::RowMajor) @@ -552,7 +583,7 @@ inline void lmiget( //Step 1: Scale with `D` //Populating `diag` - generateRademacherVector_r123(diag, key[3], ctr[0], n); + generateRademacherVector_r123(diag, key[0], ctr[0], n); applyDiagonalRademacher(true, layout, m, n, A, diag); //Step 2: Apply the Hadamard transform @@ -579,4 +610,52 @@ inline void lmiget( free(diag); free(selected_rows); } + + +template +inline void rmiget( + blas::Layout layout, + RandBLAS::RNGState random_state, + int64_t m, // `A` is `(m x n)` + int64_t n, + int64_t d, // `d` is the number of cols that have to be permuted by `\Pi` + T* A // data-matrix +) +{ + // Size of the Rademacher entries = |A_cols| + //TODO: Change `diag` to float/doubles (same data type as the matrix) + sint_t* diag = new sint_t[m]; + sint_t* selected_cols = new sint_t[d]; + + auto [ctr, key] = random_state; + + //Step 1: Scale with `D` + //Populating `diag` + generateRademacherVector_r123(diag, key[0], ctr[0], n); + applyDiagonalRademacher(false, layout, m, n, A, diag); + + //Step 2: Apply the Hadamard transform + fht_dispatch(false, layout, A, std::log2(MAX(m, n)), m, n); + + //Step 3: Permute the rows + std::vector idxs_minor(d); // Placeholder + std::vector vals(d); // Placeholder + + // Populating `selected_rows` + //TODO: Do I return this at some point? + RandBLAS::RNGState next_state = RandBLAS::repeated_fisher_yates( + random_state, + d, // Number of samples (vec_nnz) + m, // Total number of elements (dim_major) + 1, // Single sample round (dim_minor) + selected_cols, // Holds the required output + idxs_minor.data(), // Placeholder + vals.data() // Placeholder + ); + + permuteColsToLeft(layout, m, n, selected_cols, d, A); + + free(diag); + free(selected_cols); +} } From 10470bb8f88d400f70f6db7bb431c6ffcd11be4a Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Thu, 10 Oct 2024 15:23:04 -0700 Subject: [PATCH 12/26] Removed old `TrigSkOp` code --- RandBLAS/trig_skops.hh | 172 +---------------------------------------- 1 file changed, 2 insertions(+), 170 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index ec6c8a3..6461360 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -28,10 +28,8 @@ namespace RandBLAS { /// public API /// template - // void generateRademacherVector_r123(std::vector &buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { void generateRademacherVector_r123(sint_t* buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { typedef r123::Philox2x32 RNG; - // std::vector rademacherVector(n); // Use OpenMP to parallelize the Rademacher vector generation #pragma omp parallel @@ -387,176 +385,10 @@ namespace RandBLAS { else fht_right_row_major(buff, log_n, num_rows, num_cols); } - - enum class TrigDistName: char { - Fourier = 'F', - - // --------------------------------------------------------------------------- - - Hadamard = 'H' - }; - - struct TrigDist { - const int64_t n_rows; - const int64_t n_cols; - - int64_t dim_short; - int64_t dim_long; - - const TrigDistName family; - - TrigDist( - int64_t n_rows, - int64_t n_cols, - TrigDistName tn = TrigDistName::Hadamard - ) : n_rows(n_rows), n_cols(n_cols), family(tn) { - dim_short = MIN(n_rows, n_cols); - dim_long = MAX(n_rows, n_cols); -} -}; - - template - struct TrigSkOp { - using generator = RNG; - using state_type = RNGState; - using buffer_type = T; - - //TODO: Where should the logic for deciding the size of `H` to use go? - // Since that will be accompanied by padding of (DA) maybe it should - // go inside of `lskget`? - const int64_t n_rows; - const int64_t n_cols; - int64_t dim_short; - int64_t dim_long; - // int64_t n_sampled; - - const TrigDist dist; - - - const RNGState seed_state; - RNGState next_state; - - const blas::Layout layout; - const bool sketchFromLeft = true; - bool known_filled = false; - - sint_t* DiagonalRademacher = nullptr; - sint_t* SampledRows = nullptr; - - TrigSkOp( - TrigDist dist, - RNGState const &state, - blas::Layout layout, - bool known_filled = false - ) : n_rows(dist.n_rows), n_cols(dist.n_cols), dist(dist), seed_state(state), known_filled(known_filled), dim_short(dist.dim_short), dim_long(dist.dim_long), layout(layout){ - // Memory for Rademacher diagonal gets allocated here - // Number of rows/cols to be sampled gets decided here - // i.e. `n_sampled` gets set - if(sketchFromLeft) - DiagonalRademacher = new sint_t[n_rows]; - else - DiagonalRademacher = new sint_t[n_cols]; - - SampledRows = new sint_t[dim_short]; - - //TODO: Logic to compute the number of samples that we require `r` - // this can be shown to depend on the maximal row norm of the data matrix - //NOTE: Do not have access to this in the data-oblivious regime --- how - // do we get access? -}; - - TrigSkOp( - TrigDistName family, - int64_t n_rows, - int64_t n_cols, - uint32_t key - ) : n_rows(n_rows), n_cols(n_cols), dist(TrigDist{n_rows, n_cols, family}), seed_state() {}; - - //TODO: Write a proper deconstructor - //Free up DiagonalRademacher && SampledRows - ~TrigSkOp(); -}; - -template -TrigSkOp::~TrigSkOp() { - delete [] this->DiagonalRademacher; - delete [] this->SampledRows; -}; - -template -RandBLAS::RNGState fill_trig( - TrigSkOp &Tr -) { - /** - * Will do the work of filling in the diagonal Rademacher entries - * and selecting the rows/cols to be sampled - */ - auto [ctr, key] = Tr.seed_state; - - // Fill in the Rademacher diagonal - if(Tr.sketchFromLeft) - generateRademacherVector_r123(Tr.DiagonalRademacher, key[0], ctr[0], Tr.n_rows); - else - generateRademacherVector_r123(Tr.DiagonalRademacher, key[0], ctr[0], Tr.n_cols); - - //NOTE: Select the rows/cols to be sampled --- use the `repeated_fisher_yates` function - int64_t r = Tr.dim_short; - int64_t d = Tr.dim_long; - - std::vector idxs_minor(r); // Placeholder - std::vector vals(r); // Placeholder - - Tr.next_state = RandBLAS::repeated_fisher_yates( - Tr.seed_state, - r, // Number of samples (vec_nnz) - d, // Total number of elements (dim_major) - 1, // Single sample round (dim_minor) - Tr.SampledRows, // Holds the required output - idxs_minor.data(), // Placeholder - vals.data() // Placeholder - ); - Tr.known_filled = true; - return Tr.next_state; -} } namespace RandBLAS::trig { - -// Performs the actual application of the fast trigonometric transform -// Only called after calling `fill_trig` -template -inline void lskget( - blas::Layout layout, - blas::Op opS, - blas::Op opA, - int64_t d, // B is d-by-n - int64_t n, // op(A) is m-by-n - int64_t m, // op(S) is d-by-m - T alpha, - TrigSkOp &Tr, - int64_t ro_s, - int64_t co_s, - const T* A, // data-matrix - int64_t lda, - T beta, - T* B, // output matrix - int64_t ldb -) { - if (!Tr.known_filled) - fill_trig(Tr); - - // Applying the diagonal transformation - applyDiagonalRademacher(layout, opA, n, m, Tr.DiagonalRademacher, A, B); - // applyDiagonalRademacher(m, n, A, Tr.DiagonalRademacher); - - //TODO: Apply the Hadamard transform - - //... and finally permute the rows - // permuteRowsToTop(m, n, Tr.SampledRows, B, ldb); -} - - /* * These functions apply an in-place, SRHT-like transform to the input matrix * i.e. A <- (\Pi H D)A OR A <- A(D H \Pi) (which is equivalent to A <- A(\Pi H D)^{-1}) @@ -593,7 +425,7 @@ inline void lmiget( std::vector idxs_minor(d); // Placeholder std::vector vals(d); // Placeholder - // Populating `selected_rows` + // Populating `selected_rows` //TODO: Do I return this at some point? RandBLAS::RNGState next_state = RandBLAS::repeated_fisher_yates( random_state, @@ -641,7 +473,7 @@ inline void rmiget( std::vector idxs_minor(d); // Placeholder std::vector vals(d); // Placeholder - // Populating `selected_rows` + // Populating `selected_rows` //TODO: Do I return this at some point? RandBLAS::RNGState next_state = RandBLAS::repeated_fisher_yates( random_state, From b2aabf8ffd48624ae67cc9726634f1fb9f8b2616 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Sat, 12 Oct 2024 15:03:38 -0700 Subject: [PATCH 13/26] First review --- RandBLAS/trig_skops.hh | 126 +++++++++++------------------------------ 1 file changed, 34 insertions(+), 92 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 6461360..a07af5c 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -6,14 +6,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include @@ -27,69 +25,52 @@ namespace RandBLAS { /// WARNING: None of the following functions or overloads thereof are part of the /// public API /// - template - void generateRademacherVector_r123(sint_t* buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { - typedef r123::Philox2x32 RNG; - - // Use OpenMP to parallelize the Rademacher vector generation - #pragma omp parallel - { - // Each thread has its own RNG instance - RNG rng; - RNG::key_type k = {{key_seed + omp_get_thread_num()}}; // Unique key for each thread + // Generates a vector of Rademacher entries using the Random123 library + template + void generateRademacherVector_r123(sint_t* buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { + RNG rng; - // Thread-local counter - RNG::ctr_type c; + typename RNG::ctr_type c; + typename RNG::key_type key = {{key_seed}}; - // Parallel for loop - #pragma omp for - for (int i = 0; i < n; ++i) { - // Set the counter for each random number (unique per thread) - c[0] = ctr_seed + i; // Ensure the counter is unique across threads + // Sequential loop for generating Rademacher entries + for (int i = 0; i < n; ++i) { + // Set the counter for each random number + c[0] = ctr_seed + i; // Ensure each counter is unique - // Generate a 2x32-bit random number using the Philox generator - RNG::ctr_type r = rng(c, k); + // Generate a 2x32-bit random number using the Philox generator + typename RNG::ctr_type r = rng(c, key); - // Convert the random number into a float in [0, 1) using u01fixedpt - float randValue = r123::u01fixedpt(r.v[0]); + // Convert the random number into a float in [0, 1) using u01fixedpt + float randValue = r123::u01fixedpt(r.v[0]); - // Convert the float into a Rademacher entry (-1 or 1) - buff[i] = randValue < 0.5 ? -1 : 1; - } + // Convert the float into a Rademacher entry (-1 or 1) + buff[i] = randValue < 0.5 ? -1 : 1; } } - std::vector generateRademacherVector_parallel(int64_t n) { - std::vector rademacherVec(n); + template + RandBLAS::RNGState generateRademacherVector_r123(sint_t* buff, int64_t n, RandBLAS::RNGState seed_state) { + RNG rng; + auto [ctr, key] = seed_state; - #pragma omp parallel - { - // Each thread gets its own random number generator - std::random_device rd; - std::mt19937 gen(rd()); - std::bernoulli_distribution bernoulli(0.5); + for (int64_t i = 0; i < n; ++i) { + typename RNG::ctr_type r = rng(ctr, key); - #pragma omp for - for (int64_t i = 0; i < n; ++i) { - rademacherVec[i] = bernoulli(gen) ? 1 : -1; - } - } + float randValue = r123::u01fixedpt(r.v[0]); - return rademacherVec; - } + buff[i] = randValue < 0.5 ? -1 : 1; - template - void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A) { - std::vector diag = generateRademacherVector_parallel(cols); + ctr.incr(); + } - for(int col=0; col < cols; col++) { - if(diag[col] > 0) - continue; - blas::scal(rows, diag[col], &A[col * rows], 1); - } + // Return the updated RNGState (with the incremented counter) + return RandBLAS::RNGState {ctr, key}; } + // Catch-all method for applying the diagonal Rademacher + // entries in-place to an input matrix, `A` template void applyDiagonalRademacher( bool left, @@ -98,8 +79,9 @@ namespace RandBLAS { int64_t cols, T* A, sint_t* diag - ) - { + ) { + //TODO: Investigate better schemes for performing the scaling + //TODO: Move to `RandBLAS/util.hh` if(left && layout == blas::Layout::ColMajor) { for(int col=0; col < cols; col++) { if(diag[col] > 0) @@ -130,43 +112,6 @@ namespace RandBLAS { } } - // `applyDiagonalRademacher`should have a similar API as `sketch_general` - // Will be called inside of `lskget` - // B <- alpha * diag(Rademacher) * op(A) + beta * B - // `alpha`, `beta` aren't needed and shape(A) == shape(B) - // lda == ldb - template - void applyDiagonalRademacher( - blas::Layout layout, - blas::Op opA, // taken from `sketch_general/lskget` - int64_t n, // everything is the same size as `op(A)`: m-by-n - int64_t m, - // std::vector &diag, - sint_t* diag, - const T* A, // The data matrix that won't be modified - // int64_t lda, - T* B // The destination matrix - // int64_t ldb - ) { - // B <- alpha * diag(Rademacher) * op(A) + beta * B - // `alpha`, `beta` aren't needed - // && shape(A) == shape(B) && lda == ldb && shape({A | B}) = m-by-n - //NOTE: Use `layout` and `opA` for working with RowMajor data - int64_t lda = m; - int64_t ldb = m; - - //NOTE: Should the copy be made inside of `applyDiagonalRademacher` or - // should it be inside of `lskget`? - lapack::lacpy(lapack::MatrixType::General, m, n, A, lda, B, ldb); - - applyDiagonalRademacher(m, n, B, diag); - } - - - // `permuteRowsToTop` should just take in `B` as an input - // `B` will be passed in by the user to `sketch_general/lskget` and `permuteRowsToTop` will take in - // the `B` that has been modified by `applyRademacherDiagonal` - // Will also be called inside of `lskget` template void permuteRowsToTop( blas::Layout layout, @@ -176,8 +121,6 @@ namespace RandBLAS { int64_t d, // size of `selectedRows` T* A ) { - //TODO: There should be a similar `permuteColsToLeft` for sketching - //from the right int64_t top = 0; // Keeps track of the topmost unselected row if(layout == blas::Layout::ColMajor) { @@ -404,8 +347,7 @@ inline void lmiget( int64_t n, int64_t d, // `d` is the number of rows that have to be permuted by `\Pi` T* A // data-matrix -) -{ +) { // Size of the Rademacher entries = |A_cols| //TODO: Change `diag` to float/doubles (same data type as the matrix) sint_t* diag = new sint_t[n]; From bc33741fe17f8dfa00353889386a708eaeb0be80 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Sat, 12 Oct 2024 17:10:12 -0700 Subject: [PATCH 14/26] Some stylistic cleanups --- RandBLAS/trig_skops.hh | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index a07af5c..8b83cee 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -28,7 +28,7 @@ namespace RandBLAS { // Generates a vector of Rademacher entries using the Random123 library template - void generateRademacherVector_r123(sint_t* buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { + void generate_rademacher_vector_r123(sint_t* buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { RNG rng; typename RNG::ctr_type c; @@ -43,24 +43,24 @@ namespace RandBLAS { typename RNG::ctr_type r = rng(c, key); // Convert the random number into a float in [0, 1) using u01fixedpt - float randValue = r123::u01fixedpt(r.v[0]); + float rand_value = r123::u01fixedpt(r.v[0]); // Convert the float into a Rademacher entry (-1 or 1) - buff[i] = randValue < 0.5 ? -1 : 1; + buff[i] = rand_value < 0.5 ? -1 : 1; } } template - RandBLAS::RNGState generateRademacherVector_r123(sint_t* buff, int64_t n, RandBLAS::RNGState seed_state) { + RandBLAS::RNGState generate_rademacher_vector_r123(sint_t* buff, int64_t n, RandBLAS::RNGState seed_state) { RNG rng; auto [ctr, key] = seed_state; for (int64_t i = 0; i < n; ++i) { typename RNG::ctr_type r = rng(ctr, key); - float randValue = r123::u01fixedpt(r.v[0]); + float rand_value = r123::u01fixedpt(r.v[0]); - buff[i] = randValue < 0.5 ? -1 : 1; + buff[i] = rand_value < 0.5 ? -1 : 1; ctr.incr(); } @@ -72,7 +72,7 @@ namespace RandBLAS { // Catch-all method for applying the diagonal Rademacher // entries in-place to an input matrix, `A` template - void applyDiagonalRademacher( + void apply_diagonal_rademacher( bool left, blas::Layout layout, int64_t rows, @@ -117,18 +117,18 @@ namespace RandBLAS { blas::Layout layout, int64_t rows, int64_t cols, - sint_t* selectedRows, - int64_t d, // size of `selectedRows` + sint_t* selected_rows, + int64_t d, // size of `selected_rows` T* A ) { int64_t top = 0; // Keeps track of the topmost unselected row if(layout == blas::Layout::ColMajor) { for (int64_t i=0; i < d; i++) { - if (selectedRows[i] != top) { + if (selected_rows[i] != top) { // Use BLAS swap to swap the entire rows // Swapping row 'selected' with row 'top' - blas::swap(cols, &A[selectedRows[i]], rows, &A[top], rows); + blas::swap(cols, &A[selected_rows[i]], rows, &A[top], rows); } top++; } @@ -136,8 +136,8 @@ namespace RandBLAS { else { // For `RowMajor` ordering for (int64_t i=0; i < d; i++) { - if (selectedRows[i] != top) { - blas::swap(cols, &A[cols * selectedRows[i]], 1, &A[cols * top], 1); + if (selected_rows[i] != top) { + blas::swap(cols, &A[cols * selected_rows[i]], 1, &A[cols * top], 1); } top++; } @@ -149,7 +149,7 @@ namespace RandBLAS { blas::Layout layout, int64_t rows, int64_t cols, - sint_t* selectedCols, + sint_t* selected_cols, int64_t d, // size of `selectedRows` T* A ) { @@ -157,10 +157,10 @@ namespace RandBLAS { if(layout == blas::Layout::ColMajor) { for (int64_t i=0; i < d; i++) { - if (selectedCols[i] != left) { + if (selected_cols[i] != left) { // Use BLAS::swap to swap entire columns at once // Swapping col 'selected' with col 'top' - blas::swap(rows, &A[rows * selectedCols[i]], 1, &A[rows * left], 1); + blas::swap(rows, &A[rows * selected_cols[i]], 1, &A[rows * left], 1); } left++; } @@ -168,8 +168,8 @@ namespace RandBLAS { else { // For `RowMajor` ordering for (int64_t i=0; i < d; i++) { - if (selectedCols[i] != left) { - blas::swap(rows, &A[selectedCols[i]], cols, &A[left], cols); + if (selected_cols[i] != left) { + blas::swap(rows, &A[selected_cols[i]], cols, &A[left], cols); } left++; } @@ -357,8 +357,8 @@ inline void lmiget( //Step 1: Scale with `D` //Populating `diag` - generateRademacherVector_r123(diag, key[0], ctr[0], n); - applyDiagonalRademacher(true, layout, m, n, A, diag); + generate_rademacher_vector_r123(diag, key[0], ctr[0], n); + apply_diagonal_rademacher(true, layout, m, n, A, diag); //Step 2: Apply the Hadamard transform fht_dispatch(true, layout, A, std::log2(MAX(m, n)), m, n); @@ -405,8 +405,8 @@ inline void rmiget( //Step 1: Scale with `D` //Populating `diag` - generateRademacherVector_r123(diag, key[0], ctr[0], n); - applyDiagonalRademacher(false, layout, m, n, A, diag); + generate_rademacher_vector_r123(diag, key[0], ctr[0], n); + apply_diagonal_rademacher(false, layout, m, n, A, diag); //Step 2: Apply the Hadamard transform fht_dispatch(false, layout, A, std::log2(MAX(m, n)), m, n); From 348bf050ee5ca1c39bfd4bce09922781f9260bda Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Sun, 13 Oct 2024 10:24:24 -0700 Subject: [PATCH 15/26] Templating numeric precision in FHT implementation --- RandBLAS/trig_skops.hh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 8b83cee..83fd565 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -176,7 +176,8 @@ namespace RandBLAS { } } - void fht_left_col_major(double *buf, int log_n, int num_rows, int num_cols) { + template + void fht_left_col_major(T *buf, int log_n, int num_rows, int num_cols) { int n = 1 << log_n; // Apply FHT to each column independently @@ -211,7 +212,8 @@ namespace RandBLAS { } } - void fht_left_row_major(double *buf, int log_n, int num_rows, int num_cols) { + template + void fht_left_row_major(T *buf, int log_n, int num_rows, int num_cols) { int n = 1 << log_n; // Apply FHT to each column independently @@ -243,7 +245,8 @@ namespace RandBLAS { } } - void fht_right_row_major(double *buf, int log_n, int num_rows, int num_cols) { + template + void fht_right_row_major(T *buf, int log_n, int num_rows, int num_cols) { int n = 1 << log_n; // Apply FHT to each row independently @@ -278,7 +281,8 @@ namespace RandBLAS { } } - void fht_right_col_major(double *buf, int log_n, int num_rows, int num_cols) { + template + void fht_right_col_major(T *buf, int log_n, int num_rows, int num_cols) { int n = 1 << log_n; // Apply FHT to each row independently @@ -310,10 +314,11 @@ namespace RandBLAS { } } + template void fht_dispatch( bool left, blas::Layout layout, - double* buff, + T* buff, int64_t log_n, int64_t num_rows, int64_t num_cols From f13086b26f4925a9aa761a98e37debb62721be5e Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Sun, 13 Oct 2024 13:48:02 -0700 Subject: [PATCH 16/26] Corrected ommited templating + `int->int64_t` --- RandBLAS/trig_skops.hh | 94 +++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 83fd565..4447205 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -35,7 +35,7 @@ namespace RandBLAS { typename RNG::key_type key = {{key_seed}}; // Sequential loop for generating Rademacher entries - for (int i = 0; i < n; ++i) { + for (int64_t i = 0; i < n; ++i) { // Set the counter for each random number c[0] = ctr_seed + i; // Ensure each counter is unique @@ -83,28 +83,28 @@ namespace RandBLAS { //TODO: Investigate better schemes for performing the scaling //TODO: Move to `RandBLAS/util.hh` if(left && layout == blas::Layout::ColMajor) { - for(int col=0; col < cols; col++) { + for(int64_t col=0; col < cols; col++) { if(diag[col] > 0) continue; blas::scal(rows, diag[col], &A[col * rows], 1); } } else if(left && layout == blas::Layout::RowMajor) { - for(int col=0; col < cols; col++) { + for(int64_t col=0; col < cols; col++) { if(diag[col] > 0) continue; blas::scal(rows, diag[col], &A[col], cols); } } else if(!left && layout == blas::Layout::ColMajor) { - for(int row = 0; row < rows; row++) { + for(int64_t row = 0; row < rows; row++) { if(diag[row] > 0) continue; blas::scal(cols, diag[row], &A[row], rows); } } else { - for(int row = 0; row < rows; row++) { + for(int64_t row = 0; row < rows; row++) { if(diag[row] > 0) continue; blas::scal(cols, diag[row], &A[row * cols], 1); @@ -177,26 +177,26 @@ namespace RandBLAS { } template - void fht_left_col_major(T *buf, int log_n, int num_rows, int num_cols) { - int n = 1 << log_n; + void fht_left_col_major(T *buf, int64_t log_n, int64_t num_rows, int64_t num_cols) { + int64_t n = 1 << log_n; // Apply FHT to each column independently - for (int col = 0; col < num_cols; ++col) { + for (int64_t col = 0; col < num_cols; ++col) { // Pointer to the beginning of the current column in the Column-Major order - double* col_buf = buf + col * num_rows; + T* col_buf = buf + col * num_rows; // Apply the original FHT on this column - for (int i = 0; i < log_n; ++i) { - int s1 = 1 << i; - int s2 = s1 << 1; - for (int j = 0; j < n; j += s2) { - for (int k = 0; k < s1; ++k) { + for (int64_t i = 0; i < log_n; ++i) { + int64_t s1 = 1 << i; + int64_t s2 = s1 << 1; + for (int64_t j = 0; j < n; j += s2) { + for (int64_t k = 0; k < s1; ++k) { // For implicitly padding the input we just have to make sure // we replace all out-of-bounds accesses with zeros bool b1 = j + k < num_rows; bool b2 = j + k + s1 < num_rows; - double u = b1 ? col_buf[j + k] : 0; - double v = b2 ? col_buf[j + k + s1] : 0; + T u = b1 ? col_buf[j + k] : 0; + T v = b2 ? col_buf[j + k + s1] : 0; if(b1 && b2) { col_buf[j + k] = u + v; col_buf[j + k + s1] = u - v; @@ -213,23 +213,23 @@ namespace RandBLAS { } template - void fht_left_row_major(T *buf, int log_n, int num_rows, int num_cols) { - int n = 1 << log_n; + void fht_left_row_major(T *buf, int64_t log_n, int64_t num_rows, int64_t num_cols) { + int64_t n = 1 << log_n; // Apply FHT to each column independently - for (int col = 0; col < num_cols; ++col) { + for (int64_t col = 0; col < num_cols; ++col) { // Apply the original FHT on this column - for (int i = 0; i < log_n; ++i) { - int s1 = 1 << i; - int s2 = s1 << 1; - for (int j = 0; j < n; j += s2) { - for (int k = 0; k < s1; ++k) { + for (int64_t i = 0; i < log_n; ++i) { + int64_t s1 = 1 << i; + int64_t s2 = s1 << 1; + for (int64_t j = 0; j < n; j += s2) { + for (int64_t k = 0; k < s1; ++k) { // For implicitly padding the input we just have to make sure // we replace all out-of-bounds accesses with zeros bool b1 = j + k < num_rows; bool b2 = j + k + s1 < num_rows; - double u = b1 ? buf[(j + k) * num_cols + col] : 0; - double v = b2 ? buf[(j + k + s1) * num_cols + col] : 0; + T u = b1 ? buf[(j + k) * num_cols + col] : 0; + T v = b2 ? buf[(j + k + s1) * num_cols + col] : 0; if(b1 && b2) { buf[(j + k) * num_cols + col] = u + v; buf[(j + k + s1) * num_cols + col] = u - v; @@ -246,26 +246,26 @@ namespace RandBLAS { } template - void fht_right_row_major(T *buf, int log_n, int num_rows, int num_cols) { - int n = 1 << log_n; + void fht_right_row_major(T *buf, int64_t log_n, int64_t num_rows, int64_t num_cols) { + int64_t n = 1 << log_n; // Apply FHT to each row independently - for (int row = 0; row < num_rows; ++row) { + for (int64_t row = 0; row < num_rows; ++row) { // Pointer to the beginning of the current row in RowMajor order - double* row_buf = buf + row * num_cols; + T * row_buf = buf + row * num_cols; // Apply the original FHT on this row - for (int i = 0; i < log_n; ++i) { - int s1 = 1 << i; - int s2 = s1 << 1; - for (int j = 0; j < n; j += s2) { - for (int k = 0; k < s1; ++k) { + for (int64_t i = 0; i < log_n; ++i) { + int64_t s1 = 1 << i; + int64_t s2 = s1 << 1; + for (int64_t j = 0; j < n; j += s2) { + for (int64_t k = 0; k < s1; ++k) { // For implicitly padding the input we just have to make sure // we replace all out-of-bounds accesses with zeros bool b1 = j + k < num_cols; bool b2 = j + k + s1 < num_cols; - double u = b1 ? row_buf[j + k] : 0; - double v = b2 ? row_buf[j + k + s1] : 0; + T u = b1 ? row_buf[j + k] : 0; + T v = b2 ? row_buf[j + k + s1] : 0; if(b1 && b2) { row_buf[j + k] = u + v; row_buf[j + k + s1] = u - v; @@ -282,23 +282,23 @@ namespace RandBLAS { } template - void fht_right_col_major(T *buf, int log_n, int num_rows, int num_cols) { - int n = 1 << log_n; + void fht_right_col_major(T *buf, int64_t log_n, int64_t num_rows, int64_t num_cols) { + int64_t n = 1 << log_n; // Apply FHT to each row independently - for (int row= 0; row < num_rows; ++row) { + for (int64_t row= 0; row < num_rows; ++row) { // Apply the original FHT on this column - for (int i = 0; i < log_n; ++i) { - int s1 = 1 << i; - int s2 = s1 << 1; - for (int j = 0; j < n; j += s2) { - for (int k = 0; k < s1; ++k) { + for (int64_t i = 0; i < log_n; ++i) { + int64_t s1 = 1 << i; + int64_t s2 = s1 << 1; + for (int64_t j = 0; j < n; j += s2) { + for (int64_t k = 0; k < s1; ++k) { // For implicitly padding the input we just have to make sure // we replace all out-of-bounds accesses with zeros bool b1 = j + k < num_cols; bool b2 = j + k + s1 < num_cols; - double u = b1 ? buf[(j + k) * num_rows + row] : 0; - double v = b2 ? buf[(j + k + s1) * num_rows + row] : 0; + T u = b1 ? buf[(j + k) * num_rows + row] : 0; + T v = b2 ? buf[(j + k + s1) * num_rows + row] : 0; if(b1 && b2) { buf[(j + k) * num_rows + row] = u + v; buf[(j + k + s1) * num_rows + row] = u - v; From 884f96e60c3bfb9984452dc499decb5130d9308b Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Wed, 16 Oct 2024 11:37:32 -0700 Subject: [PATCH 17/26] First version of tests, VERY UNSTABLE --- RandBLAS/trig_skops.hh | 19 +- test/test_matmul_cores/test_trig.cc | 354 ++++++++++++++++++++++++++++ 2 files changed, 367 insertions(+), 6 deletions(-) create mode 100644 test/test_matmul_cores/test_trig.cc diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 4447205..6fba319 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -125,21 +124,25 @@ namespace RandBLAS { if(layout == blas::Layout::ColMajor) { for (int64_t i=0; i < d; i++) { + randblas_error_if_msg(selected_rows[i] == top, + "The list of provided indices should be unique"); if (selected_rows[i] != top) { // Use BLAS swap to swap the entire rows // Swapping row 'selected' with row 'top' - blas::swap(cols, &A[selected_rows[i]], rows, &A[top], rows); + blas::swap(cols, &A[top], rows, &A[selected_rows[i]], rows); + // top = selected_rows[i]; } - top++; } } else { // For `RowMajor` ordering for (int64_t i=0; i < d; i++) { + randblas_error_if_msg(selected_rows[i] == top, + "The list of provided indices should be unique"); if (selected_rows[i] != top) { blas::swap(cols, &A[cols * selected_rows[i]], 1, &A[cols * top], 1); + // top = selected_rows[i]; } - top++; } } } @@ -157,21 +160,25 @@ namespace RandBLAS { if(layout == blas::Layout::ColMajor) { for (int64_t i=0; i < d; i++) { + randblas_error_if_msg(selected_cols[i] == left, + "The list of provided indices should be unique"); if (selected_cols[i] != left) { // Use BLAS::swap to swap entire columns at once // Swapping col 'selected' with col 'top' blas::swap(rows, &A[rows * selected_cols[i]], 1, &A[rows * left], 1); } - left++; + // left++; } } else { // For `RowMajor` ordering for (int64_t i=0; i < d; i++) { + randblas_error_if_msg(selected_cols[i] == left, + "The list of provided indices should be unique"); if (selected_cols[i] != left) { blas::swap(rows, &A[selected_cols[i]], cols, &A[left], cols); } - left++; + // left++; } } } diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc new file mode 100644 index 0000000..841de18 --- /dev/null +++ b/test/test_matmul_cores/test_trig.cc @@ -0,0 +1,354 @@ +// Copyright, 2024. See LICENSE for copyright holder information. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// (1) Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// (2) Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// (3) Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// #include "test/test_matmul_cores/linop_common.hh" +#include "RandBLAS.hh" +#include "RandBLAS/base.hh" +#include "RandBLAS/trig_skops.hh" +#include + +#include +#include +#include +#include + +using RandBLAS::trig::lmiget; +using RandBLAS::trig::rmiget; +using RandBLAS::generate_rademacher_vector_r123; +using RandBLAS::apply_diagonal_rademacher; +using RandBLAS::permuteRowsToTop; +using RandBLAS::permuteColsToLeft; +using RandBLAS::fht_dispatch; +using Eigen::MatrixXd; + + +class TestLMIGET : public::testing::Test +{ + protected: + virtual void SetUp(){}; + + virtual void TearDown(){}; + + inline static std::vector keys {0, 42}; + + // Helper function for explicitly generating a Hadamard matrix + // (Note that ColMajor and RowMajor storage is identical for `H`) + static std::vector generate_hadamard(int64_t log_n) { + int64_t size = 1 << log_n; // size = 2^n + std::vector> H(size, std::vector(size, 1)); // Initialize H_1 + + // Sylvester's construction: recursively build the matrix + for (int n = 1; n <= log_n; ++n) { + double curr_size = 1 << n; // Current size of the matrix is 2^n + for (int i = 0; i < curr_size / 2; ++i) { + for (int j = 0; j < curr_size / 2; ++j) { + // Fill the bottom-left and bottom-right quadrants + H[i + curr_size / 2][j] = H[i][j]; // Copy the top-left quadrant to bottom-left + H[i][j + curr_size / 2] = H[i][j]; // Copy the top-left quadrant to top-right + H[i + curr_size / 2][j + curr_size / 2] = -H[i][j]; // Fill bottom-right with negative values + } + } + } + + // Flatten into a vector in ColMajor order + std::vector H_flat(size * size); + + for (int col = 0; col < size; ++col) { + for (int row = 0; row < size; ++row) { + H_flat[col * size + row] = H[row][col]; + } + } + + return H_flat; + } + + enum class transforms {diag_scale, hadamard, permute}; + + // Tests to verify correctness of each of the transforms + template + static void correctness( + uint32_t seed, + transforms transform, + int64_t m, // Generated data matrix, `A` is of size `(m x n)` + int64_t n, + int64_t d, // The permutation matrix permutes `d` of the final rows/cols + bool left, + blas::Layout layout, + double epsilon=1e-5 + ) { + // Grabbing a random matrix + Eigen::Matrix A(m, n); + A.setRandom(); + + // Deep copy + MatrixXd B = A; + + switch (transform) { + case transforms::permute: { + // Simply compares against Eigen::PermutationMatrix + Eigen::PermutationMatrix perm(5); + + // Define the indices of the permutation manually + // Eigen::VectorXi indices = left ? Eigen::VectorXi(m) : Eigen::VectorXi(n); + + std::vector V = left ? std::vector(m) : std::vector(n); + + int cnt = 0; + // int cnt = 0; + for(int i = 0; i < V.size(); i++) { + if(i == 0) + V[i] = V.size() - 1; + else if(i == V.size() - 1) + V[i] = 0; + else + V[i] = cnt; + cnt++; + } + + Eigen::VectorXi indices = Eigen::Map(V.data(), V.size()); + + // Set the indices in the permutation matrix + perm.indices() = indices; + + // std::reverse(V.begin(), V.end()); + sint_t* v = new sint_t; + *v = V.size() - 1; + if(left) + RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A.data()); + else + RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A.data()); + + std::cout << "orig" << std::endl << B << std::endl; + std::cout << "eigen_permute" << std::endl << perm * B << std::endl; + std::cout << "I permute" << std::endl << A << std::endl; + // Or just do A.isApprox(B) + double norm_permute = 0.0; + if(left) + norm_permute = (A - perm * B).norm(); + else { + norm_permute = (A - B * perm).norm(); + std::cout << norm_permute << std::endl; + } + + // Or do A.isApprox(H * B) + randblas_require(norm_permute < epsilon); + + break; + } + case transforms::hadamard: { + // Here, simply check against explicit application of the Hadamard matrix + RandBLAS::fht_dispatch(left, layout, A.data(), std::log2(m), m, n); + std::vector H_vec = generate_hadamard(std::log2(m)); + //TODO: Should have a check here to enforce that `m` is a power of 2 (since + // my `generate_hadamard` function does not take care to pad an input matrix) + MatrixXd H = Eigen::Map(H_vec.data(), int(std::pow(2, std::log2(m))), int(std::pow(2, std::log2(m)))); + + double norm_hadamard = (A - H * B).norm(); + randblas_require(norm_hadamard < epsilon); + + break; + } + case transforms::diag_scale: { + // Scales all rows/cols by -1 and checks if A == -A + std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); + + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A.data(), buff.data()); + + double norm_diag = (A + B).norm(); + randblas_require(norm_diag < epsilon); + + break; + } + } + } +}; + +//////////////////////////////////////////////////////////////////////// +// +// +// Checking correctness of each of the transforms +// +// +//////////////////////////////////////////////////////////////////////// + +TEST_F(TestLMIGET, test_diag_left_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::diag_scale, + 100, + 100, + 0, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_diag_right_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::diag_scale, + 100, + 100, + 0, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_diag_left_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::diag_scale, + 100, + 100, + 0, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_diag_right_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::diag_scale, + 100, + 100, + 0, + false, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_permute_left_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::permute, + 100, + 100, + 0, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_permute_right_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::permute, + 100, + 100, + 0, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_permute_left_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::permute, + 100, + 100, + 0, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_permute_right_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::permute, + 100, + 100, + 0, + false, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_hadamard_left_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::hadamard, + 128, + 100, + 0, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_hadamard_right_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::hadamard, + 100, + 128, + 0, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_hadamard_left_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::hadamard, + 128, + 100, + 0, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_hadamard_right_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::hadamard, + 100, + 128, + 0, + false, + blas::Layout::RowMajor + ); +} From 018acd83df2b61a5a8a98050488bed403232500c Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Wed, 16 Oct 2024 17:05:31 -0700 Subject: [PATCH 18/26] Hadamard && Diag tests working --- RandBLAS/trig_skops.hh | 10 ++- test/test_matmul_cores/test_trig.cc | 107 +++++++++++++++++++++------- 2 files changed, 84 insertions(+), 33 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 6fba319..d0c7f31 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -122,10 +122,11 @@ namespace RandBLAS { ) { int64_t top = 0; // Keeps track of the topmost unselected row + //TODO: discuss precise semantics of `selected_rows` in this function if(layout == blas::Layout::ColMajor) { for (int64_t i=0; i < d; i++) { randblas_error_if_msg(selected_rows[i] == top, - "The list of provided indices should be unique"); + "The list of provided indices should be unique"); if (selected_rows[i] != top) { // Use BLAS swap to swap the entire rows // Swapping row 'selected' with row 'top' @@ -138,7 +139,8 @@ namespace RandBLAS { // For `RowMajor` ordering for (int64_t i=0; i < d; i++) { randblas_error_if_msg(selected_rows[i] == top, - "The list of provided indices should be unique"); + "The list of provided indices should be unique"); + std::cout << "see here" << selected_rows[i] << std::endl; if (selected_rows[i] != top) { blas::swap(cols, &A[cols * selected_rows[i]], 1, &A[cols * top], 1); // top = selected_rows[i]; @@ -160,8 +162,6 @@ namespace RandBLAS { if(layout == blas::Layout::ColMajor) { for (int64_t i=0; i < d; i++) { - randblas_error_if_msg(selected_cols[i] == left, - "The list of provided indices should be unique"); if (selected_cols[i] != left) { // Use BLAS::swap to swap entire columns at once // Swapping col 'selected' with col 'top' @@ -173,8 +173,6 @@ namespace RandBLAS { else { // For `RowMajor` ordering for (int64_t i=0; i < d; i++) { - randblas_error_if_msg(selected_cols[i] == left, - "The list of provided indices should be unique"); if (selected_cols[i] != left) { blas::swap(rows, &A[selected_cols[i]], cols, &A[left], cols); } diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index 841de18..321b33c 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -27,7 +27,6 @@ // POSSIBILITY OF SUCH DAMAGE. // -// #include "test/test_matmul_cores/linop_common.hh" #include "RandBLAS.hh" #include "RandBLAS/base.hh" #include "RandBLAS/trig_skops.hh" @@ -36,6 +35,7 @@ #include #include #include +#include #include using RandBLAS::trig::lmiget; @@ -88,6 +88,25 @@ class TestLMIGET : public::testing::Test return H_flat; } + static std::vector generate_random_vector(int size, double lower_bound, double upper_bound) { + // Create a random device and seed the random number generator + std::random_device rd; + std::mt19937 gen(rd()); + + // Define the distribution range for the random doubles + std::uniform_real_distribution<> dist(lower_bound, upper_bound); + + // Create a vector of the specified size + std::vector random_vector(size); + + // Generate random doubles and fill the vector + for (int i = 0; i < size; ++i) { + random_vector[i] = dist(gen); + } + + return random_vector; + } + enum class transforms {diag_scale, hadamard, permute}; // Tests to verify correctness of each of the transforms @@ -103,24 +122,25 @@ class TestLMIGET : public::testing::Test double epsilon=1e-5 ) { // Grabbing a random matrix - Eigen::Matrix A(m, n); - A.setRandom(); + std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); + Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); + Eigen::Map> A_row(A_vec.data(), m, n); // Deep copy - MatrixXd B = A; + MatrixXd B; + if(layout == blas::Layout::RowMajor) + B = A_row; + else + B = A_col; switch (transform) { case transforms::permute: { // Simply compares against Eigen::PermutationMatrix Eigen::PermutationMatrix perm(5); - // Define the indices of the permutation manually - // Eigen::VectorXi indices = left ? Eigen::VectorXi(m) : Eigen::VectorXi(n); - std::vector V = left ? std::vector(m) : std::vector(n); int cnt = 0; - // int cnt = 0; for(int i = 0; i < V.size(); i++) { if(i == 0) V[i] = V.size() - 1; @@ -136,24 +156,34 @@ class TestLMIGET : public::testing::Test // Set the indices in the permutation matrix perm.indices() = indices; - // std::reverse(V.begin(), V.end()); sint_t* v = new sint_t; *v = V.size() - 1; - if(left) - RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A.data()); - else - RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A.data()); + if(left) { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_row.data()); + else + RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_col.data()); + } + else { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_row.data()); + else + RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_col.data()); + } - std::cout << "orig" << std::endl << B << std::endl; - std::cout << "eigen_permute" << std::endl << perm * B << std::endl; - std::cout << "I permute" << std::endl << A << std::endl; // Or just do A.isApprox(B) double norm_permute = 0.0; - if(left) - norm_permute = (A - perm * B).norm(); + if(left) { + if(layout == blas::Layout::RowMajor) + norm_permute = (A_row - perm * B).norm(); + else + norm_permute = (A_col - perm * B).norm(); + } else { - norm_permute = (A - B * perm).norm(); - std::cout << norm_permute << std::endl; + if(layout == blas::Layout::RowMajor) + norm_permute = (A_row - B * perm).norm(); + else + norm_permute = (A_row - B * perm).norm(); } // Or do A.isApprox(H * B) @@ -163,13 +193,29 @@ class TestLMIGET : public::testing::Test } case transforms::hadamard: { // Here, simply check against explicit application of the Hadamard matrix - RandBLAS::fht_dispatch(left, layout, A.data(), std::log2(m), m, n); - std::vector H_vec = generate_hadamard(std::log2(m)); - //TODO: Should have a check here to enforce that `m` is a power of 2 (since + int ld = (left) ? m : n; + if(layout == blas::Layout::ColMajor) + RandBLAS::fht_dispatch(left, layout, A_col.data(), std::log2(ld), m, n); + else + RandBLAS::fht_dispatch(left, layout, A_row.data(), std::log2(ld), m, n); + + std::vector H_vec = generate_hadamard(std::log2(ld)); + //TODO: Should have a check here to enforce that `m` and `n` are powers of 2 (since // my `generate_hadamard` function does not take care to pad an input matrix) - MatrixXd H = Eigen::Map(H_vec.data(), int(std::pow(2, std::log2(m))), int(std::pow(2, std::log2(m)))); + MatrixXd H = Eigen::Map(H_vec.data(), int(std::pow(2, std::log2(ld))), int(std::pow(2, std::log2(ld)))); + + double norm_hadamard = 0.0; + if(left) + if(layout == blas::Layout::RowMajor) + norm_hadamard = (A_row - H * B).norm(); + else + norm_hadamard = (A_col - H * B).norm(); + else + if(layout == blas::Layout::RowMajor) + norm_hadamard = (A_row - B * H).norm(); + else + norm_hadamard = (A_col - B * H).norm(); - double norm_hadamard = (A - H * B).norm(); randblas_require(norm_hadamard < epsilon); break; @@ -178,9 +224,16 @@ class TestLMIGET : public::testing::Test // Scales all rows/cols by -1 and checks if A == -A std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A.data(), buff.data()); + double norm_diag = 0.0; + if(layout == blas::Layout::RowMajor) { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + double norm_diag = (A_row + B).norm(); + } + else { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + double norm_diag = (A_col + B).norm(); + } - double norm_diag = (A + B).norm(); randblas_require(norm_diag < epsilon); break; From 0cfdcf1007732b658c67a97feefbd73d8e2216d3 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Thu, 17 Oct 2024 02:31:49 -0700 Subject: [PATCH 19/26] Final cleanups, all tests working --- RandBLAS/trig_skops.hh | 5 --- test/test_matmul_cores/test_trig.cc | 50 ++++++++++++----------------- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index d0c7f31..804fe89 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -131,7 +131,6 @@ namespace RandBLAS { // Use BLAS swap to swap the entire rows // Swapping row 'selected' with row 'top' blas::swap(cols, &A[top], rows, &A[selected_rows[i]], rows); - // top = selected_rows[i]; } } } @@ -140,10 +139,8 @@ namespace RandBLAS { for (int64_t i=0; i < d; i++) { randblas_error_if_msg(selected_rows[i] == top, "The list of provided indices should be unique"); - std::cout << "see here" << selected_rows[i] << std::endl; if (selected_rows[i] != top) { blas::swap(cols, &A[cols * selected_rows[i]], 1, &A[cols * top], 1); - // top = selected_rows[i]; } } } @@ -167,7 +164,6 @@ namespace RandBLAS { // Swapping col 'selected' with col 'top' blas::swap(rows, &A[rows * selected_cols[i]], 1, &A[rows * left], 1); } - // left++; } } else { @@ -176,7 +172,6 @@ namespace RandBLAS { if (selected_cols[i] != left) { blas::swap(rows, &A[selected_cols[i]], cols, &A[left], cols); } - // left++; } } } diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index 321b33c..fde958c 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -116,7 +116,6 @@ class TestLMIGET : public::testing::Test transforms transform, int64_t m, // Generated data matrix, `A` is of size `(m x n)` int64_t n, - int64_t d, // The permutation matrix permutes `d` of the final rows/cols bool left, blas::Layout layout, double epsilon=1e-5 @@ -129,9 +128,9 @@ class TestLMIGET : public::testing::Test // Deep copy MatrixXd B; if(layout == blas::Layout::RowMajor) - B = A_row; + B = A_row; else - B = A_col; + B = A_col; switch (transform) { case transforms::permute: { @@ -141,13 +140,14 @@ class TestLMIGET : public::testing::Test std::vector V = left ? std::vector(m) : std::vector(n); int cnt = 0; + // int cnt = 0; for(int i = 0; i < V.size(); i++) { if(i == 0) - V[i] = V.size() - 1; + V[i] = V.size() - 1; else if(i == V.size() - 1) - V[i] = 0; + V[i] = 0; else - V[i] = cnt; + V[i] = cnt; cnt++; } @@ -158,32 +158,33 @@ class TestLMIGET : public::testing::Test sint_t* v = new sint_t; *v = V.size() - 1; + if(left) { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_row.data()); + RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_row.data()); else - RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_col.data()); + RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_col.data()); } else { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_row.data()); + RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_row.data()); else - RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_col.data()); + RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_col.data()); } // Or just do A.isApprox(B) double norm_permute = 0.0; if(left) { if(layout == blas::Layout::RowMajor) - norm_permute = (A_row - perm * B).norm(); + norm_permute = (A_row - perm * B).norm(); else - norm_permute = (A_col - perm * B).norm(); + norm_permute = (A_col - perm * B).norm(); } else { if(layout == blas::Layout::RowMajor) norm_permute = (A_row - B * perm).norm(); else - norm_permute = (A_row - B * perm).norm(); + norm_permute = (A_col - B * perm).norm(); } // Or do A.isApprox(H * B) @@ -193,11 +194,12 @@ class TestLMIGET : public::testing::Test } case transforms::hadamard: { // Here, simply check against explicit application of the Hadamard matrix + int ld = (left) ? m : n; if(layout == blas::Layout::ColMajor) - RandBLAS::fht_dispatch(left, layout, A_col.data(), std::log2(ld), m, n); + RandBLAS::fht_dispatch(left, layout, A_col.data(), std::log2(ld), m, n); else - RandBLAS::fht_dispatch(left, layout, A_row.data(), std::log2(ld), m, n); + RandBLAS::fht_dispatch(left, layout, A_row.data(), std::log2(ld), m, n); std::vector H_vec = generate_hadamard(std::log2(ld)); //TODO: Should have a check here to enforce that `m` and `n` are powers of 2 (since @@ -205,16 +207,18 @@ class TestLMIGET : public::testing::Test MatrixXd H = Eigen::Map(H_vec.data(), int(std::pow(2, std::log2(ld))), int(std::pow(2, std::log2(ld)))); double norm_hadamard = 0.0; - if(left) + if(left) { if(layout == blas::Layout::RowMajor) norm_hadamard = (A_row - H * B).norm(); else norm_hadamard = (A_col - H * B).norm(); - else + } + else { if(layout == blas::Layout::RowMajor) norm_hadamard = (A_row - B * H).norm(); else norm_hadamard = (A_col - B * H).norm(); + } randblas_require(norm_hadamard < epsilon); @@ -257,7 +261,6 @@ TEST_F(TestLMIGET, test_diag_left_colmajor) { transforms::diag_scale, 100, 100, - 0, true, blas::Layout::ColMajor ); @@ -270,7 +273,6 @@ TEST_F(TestLMIGET, test_diag_right_colmajor) { transforms::diag_scale, 100, 100, - 0, false, blas::Layout::ColMajor ); @@ -283,7 +285,6 @@ TEST_F(TestLMIGET, test_diag_left_rowmajor) { transforms::diag_scale, 100, 100, - 0, true, blas::Layout::RowMajor ); @@ -296,7 +297,6 @@ TEST_F(TestLMIGET, test_diag_right_rowmajor) { transforms::diag_scale, 100, 100, - 0, false, blas::Layout::RowMajor ); @@ -309,7 +309,6 @@ TEST_F(TestLMIGET, test_permute_left_colmajor) { transforms::permute, 100, 100, - 0, true, blas::Layout::ColMajor ); @@ -322,7 +321,6 @@ TEST_F(TestLMIGET, test_permute_right_colmajor) { transforms::permute, 100, 100, - 0, false, blas::Layout::ColMajor ); @@ -335,7 +333,6 @@ TEST_F(TestLMIGET, test_permute_left_rowmajor) { transforms::permute, 100, 100, - 0, true, blas::Layout::RowMajor ); @@ -348,7 +345,6 @@ TEST_F(TestLMIGET, test_permute_right_rowmajor) { transforms::permute, 100, 100, - 0, false, blas::Layout::RowMajor ); @@ -361,7 +357,6 @@ TEST_F(TestLMIGET, test_hadamard_left_colmajor) { transforms::hadamard, 128, 100, - 0, true, blas::Layout::ColMajor ); @@ -374,7 +369,6 @@ TEST_F(TestLMIGET, test_hadamard_right_colmajor) { transforms::hadamard, 100, 128, - 0, false, blas::Layout::ColMajor ); @@ -387,7 +381,6 @@ TEST_F(TestLMIGET, test_hadamard_left_rowmajor) { transforms::hadamard, 128, 100, - 0, true, blas::Layout::RowMajor ); @@ -400,7 +393,6 @@ TEST_F(TestLMIGET, test_hadamard_right_rowmajor) { transforms::hadamard, 100, 128, - 0, false, blas::Layout::RowMajor ); From 591522f0b3b6b34921bef27466b34841086331d5 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Thu, 17 Oct 2024 06:21:30 -0700 Subject: [PATCH 20/26] Added in invSRHT tests --- test/test_matmul_cores/test_trig.cc | 163 +++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 2 deletions(-) diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index fde958c..01b8913 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -231,11 +231,11 @@ class TestLMIGET : public::testing::Test double norm_diag = 0.0; if(layout == blas::Layout::RowMajor) { RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); - double norm_diag = (A_row + B).norm(); + norm_diag = (A_row + B).norm(); } else { RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); - double norm_diag = (A_col + B).norm(); + norm_diag = (A_col + B).norm(); } randblas_require(norm_diag < epsilon); @@ -244,6 +244,117 @@ class TestLMIGET : public::testing::Test } } } + + template + static void inverse_transform( + uint32_t seed, + int64_t m, // Generated data matrix, `A` is of size `(m x n)` + int64_t n, + int64_t d, + bool left, + blas::Layout layout, + double epsilon=1e-5 + ) { + // Grabbing a random matrix + std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); + // Eigen::Matrix A_col(m, n); + Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); + // A_col.setRandom(); + Eigen::Map> A_row(A_vec.data(), m, n); + + // Deep copy + MatrixXd B; + if(layout == blas::Layout::RowMajor) + B = A_row; + else + B = A_col; + + //// Performing \Pi H D + // Step 1: setup the diagonal scaling + std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); + + if(layout == blas::Layout::RowMajor) { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + } + else { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + } + + // Step 2: apply the hadamard transform + int ld = (left) ? m : n; + if(layout == blas::Layout::ColMajor){ + RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); + } + else { + RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); + } + + // Step 3: Permuting + std::vector indices(d); + + std::iota(indices.begin(), indices.end(), 1); + if(left) { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); + else + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); + } + else { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); + else + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); + } + + //// Performing D H \Pi + + //Step 1: Un-permute + std::reverse(indices.begin(), indices.end()); + + if(left) { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); + else + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); + } + else { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); + else + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); + } + + // Step-2: Apply H^{-1} + if(layout == blas::Layout::ColMajor) { + RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); + A_col = A_col * 1/std::pow(2, int(std::log2(ld))); + } + else { + RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); + A_row = A_row * 1/std::pow(2, int(std::log2(ld))); + } + + //Step-3: Inverting `D` + if(layout == blas::Layout::RowMajor) { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + } + else { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + } + + double norm_inverse = 0.0; + + if(layout == blas::Layout::RowMajor) { + norm_inverse = (A_row - B).norm(); + } + else { + norm_inverse = (A_col - B).norm(); + } + + randblas_require(norm_inverse < epsilon); + + + } }; //////////////////////////////////////////////////////////////////////// @@ -397,3 +508,51 @@ TEST_F(TestLMIGET, test_hadamard_right_rowmajor) { blas::Layout::RowMajor ); } + +TEST_F(TestLMIGET, test_inverse_left_colmajor) { + for(uint32_t seed: keys) + inverse_transform( + seed, + 128, + 128, + 25, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_inverse_right_colmajor) { + for(uint32_t seed: keys) + inverse_transform( + seed, + 100, + 128, + 25, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_inverse_left_rowmajor) { + for(uint32_t seed: keys) + inverse_transform( + seed, + 128, + 128, + 25, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_inverse_right_rowmajor) { + for(uint32_t seed: keys) + inverse_transform( + seed, + 100, + 128, + 25, + false, + blas::Layout::RowMajor + ); +} From 8b04df1e9aa00fd576c115e1dc72f3c3aefe70b8 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Thu, 17 Oct 2024 06:28:51 -0700 Subject: [PATCH 21/26] Cleaning up comments and stylistic changes --- test/test_matmul_cores/test_trig.cc | 48 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index 01b8913..3283764 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -250,43 +250,41 @@ class TestLMIGET : public::testing::Test uint32_t seed, int64_t m, // Generated data matrix, `A` is of size `(m x n)` int64_t n, - int64_t d, + int64_t d, // #rows/cols that will be permuted bool left, blas::Layout layout, double epsilon=1e-5 ) { // Grabbing a random matrix std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); - // Eigen::Matrix A_col(m, n); Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); - // A_col.setRandom(); Eigen::Map> A_row(A_vec.data(), m, n); // Deep copy MatrixXd B; if(layout == blas::Layout::RowMajor) - B = A_row; + B = A_row; else - B = A_col; + B = A_col; //// Performing \Pi H D // Step 1: setup the diagonal scaling std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); if(layout == blas::Layout::RowMajor) { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); } else { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); } // Step 2: apply the hadamard transform int ld = (left) ? m : n; if(layout == blas::Layout::ColMajor){ - RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); + RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); } else { - RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); + RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); } // Step 3: Permuting @@ -295,15 +293,15 @@ class TestLMIGET : public::testing::Test std::iota(indices.begin(), indices.end(), 1); if(left) { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); else - RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); } else { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); else - RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); } //// Performing D H \Pi @@ -313,42 +311,42 @@ class TestLMIGET : public::testing::Test if(left) { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); else - RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); } else { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); else - RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); } // Step-2: Apply H^{-1} if(layout == blas::Layout::ColMajor) { - RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); - A_col = A_col * 1/std::pow(2, int(std::log2(ld))); + RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); + A_col = A_col * 1/std::pow(2, int(std::log2(ld))); } else { - RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); - A_row = A_row * 1/std::pow(2, int(std::log2(ld))); + RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); + A_row = A_row * 1/std::pow(2, int(std::log2(ld))); } //Step-3: Inverting `D` if(layout == blas::Layout::RowMajor) { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); } else { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); } double norm_inverse = 0.0; if(layout == blas::Layout::RowMajor) { - norm_inverse = (A_row - B).norm(); + norm_inverse = (A_row - B).norm(); } else { - norm_inverse = (A_col - B).norm(); + norm_inverse = (A_col - B).norm(); } randblas_require(norm_inverse < epsilon); From 6b7101c9b644fad11627ad73392c5360b4abe0d1 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Wed, 23 Oct 2024 10:32:00 -0700 Subject: [PATCH 22/26] Review --- RandBLAS/trig_skops.hh | 97 ++++++++++--------------- test/test_matmul_cores/test_trig.cc | 106 +++++++++++++++------------- 2 files changed, 97 insertions(+), 106 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 804fe89..22f8311 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -27,30 +27,7 @@ namespace RandBLAS { // Generates a vector of Rademacher entries using the Random123 library template - void generate_rademacher_vector_r123(sint_t* buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { - RNG rng; - - typename RNG::ctr_type c; - typename RNG::key_type key = {{key_seed}}; - - // Sequential loop for generating Rademacher entries - for (int64_t i = 0; i < n; ++i) { - // Set the counter for each random number - c[0] = ctr_seed + i; // Ensure each counter is unique - - // Generate a 2x32-bit random number using the Philox generator - typename RNG::ctr_type r = rng(c, key); - - // Convert the random number into a float in [0, 1) using u01fixedpt - float rand_value = r123::u01fixedpt(r.v[0]); - - // Convert the float into a Rademacher entry (-1 or 1) - buff[i] = rand_value < 0.5 ? -1 : 1; - } - } - - template - RandBLAS::RNGState generate_rademacher_vector_r123(sint_t* buff, int64_t n, RandBLAS::RNGState seed_state) { + RNGState generate_rademacher_vector_r123(sint_t* buff, int64_t n, RNGState seed_state) { RNG rng; auto [ctr, key] = seed_state; @@ -65,14 +42,14 @@ namespace RandBLAS { } // Return the updated RNGState (with the incremented counter) - return RandBLAS::RNGState {ctr, key}; + return RNGState {ctr, key}; } // Catch-all method for applying the diagonal Rademacher // entries in-place to an input matrix, `A` template void apply_diagonal_rademacher( - bool left, + bool left, // Pre-multiplying? blas::Layout layout, int64_t rows, int64_t cols, @@ -82,37 +59,37 @@ namespace RandBLAS { //TODO: Investigate better schemes for performing the scaling //TODO: Move to `RandBLAS/util.hh` if(left && layout == blas::Layout::ColMajor) { - for(int64_t col=0; col < cols; col++) { - if(diag[col] > 0) + for(int64_t row = 0; row < rows; row++) { + if(diag[row] > 0) continue; - blas::scal(rows, diag[col], &A[col * rows], 1); + blas::scal(cols, diag[row], &A[row], rows); } } else if(left && layout == blas::Layout::RowMajor) { - for(int64_t col=0; col < cols; col++) { - if(diag[col] > 0) + for(int64_t row = 0; row < rows; row++) { + if(diag[row] > 0) continue; - blas::scal(rows, diag[col], &A[col], cols); + blas::scal(cols, diag[row], &A[row * cols], 1); } } else if(!left && layout == blas::Layout::ColMajor) { - for(int64_t row = 0; row < rows; row++) { - if(diag[row] > 0) + for(int64_t col=0; col < cols; col++) { + if(diag[col] > 0) continue; - blas::scal(cols, diag[row], &A[row], rows); + blas::scal(rows, diag[col], &A[col * rows], 1); } } else { - for(int64_t row = 0; row < rows; row++) { - if(diag[row] > 0) + for(int64_t col=0; col < cols; col++) { + if(diag[col] > 0) continue; - blas::scal(cols, diag[row], &A[row * cols], 1); + blas::scal(rows, diag[col], &A[col], cols); } } } template - void permuteRowsToTop( + void permute_rows_to_top( blas::Layout layout, int64_t rows, int64_t cols, @@ -147,7 +124,7 @@ namespace RandBLAS { } template - void permuteColsToLeft( + void permute_cols_to_left( blas::Layout layout, int64_t rows, int64_t cols, @@ -316,22 +293,22 @@ namespace RandBLAS { template void fht_dispatch( - bool left, + bool left, // Pre-multiplying? blas::Layout layout, - T* buff, - int64_t log_n, int64_t num_rows, - int64_t num_cols + int64_t num_cols, + int64_t log_n, + T* A ) { if(left && layout == blas::Layout::ColMajor) - fht_left_col_major(buff, log_n, num_rows, num_cols); + fht_left_col_major(A, log_n, num_rows, num_cols); else if(left && layout == blas::Layout::RowMajor) - fht_left_row_major(buff, log_n, num_rows, num_cols); + fht_left_row_major(A, log_n, num_rows, num_cols); else if(!left && layout == blas::Layout::ColMajor) - fht_right_col_major(buff, log_n, num_rows, num_cols); + fht_right_col_major(A, log_n, num_rows, num_cols); else - fht_right_row_major(buff, log_n, num_rows, num_cols); + fht_right_row_major(A, log_n, num_rows, num_cols); } } @@ -345,9 +322,9 @@ namespace RandBLAS::trig { * d: The number of rows/columns that will be permuted by the action of $\Pi$ */ template -inline void lmiget( +inline RandBLAS::RNGState lmiget( blas::Layout layout, - RandBLAS::RNGState random_state, + const RandBLAS::RNGState &random_state, int64_t m, // `A` is `(m x n)` int64_t n, int64_t d, // `d` is the number of rows that have to be permuted by `\Pi` @@ -362,11 +339,11 @@ inline void lmiget( //Step 1: Scale with `D` //Populating `diag` - generate_rademacher_vector_r123(diag, key[0], ctr[0], n); + generate_rademacher_vector_r123(diag, n, random_state); apply_diagonal_rademacher(true, layout, m, n, A, diag); //Step 2: Apply the Hadamard transform - fht_dispatch(true, layout, A, std::log2(MAX(m, n)), m, n); + fht_dispatch(true, layout, m, n, std::log2(MAX(m, n)), A); //Step 3: Permute the rows std::vector idxs_minor(d); // Placeholder @@ -384,17 +361,19 @@ inline void lmiget( vals.data() // Placeholder ); - permuteRowsToTop(layout, m, n, selected_rows, d, A); + permute_rows_to_top(layout, m, n, selected_rows, d, A); free(diag); free(selected_rows); + + return next_state; } template -inline void rmiget( +inline RandBLAS::RNGState rmiget( blas::Layout layout, - RandBLAS::RNGState random_state, + const RandBLAS::RNGState &random_state, int64_t m, // `A` is `(m x n)` int64_t n, int64_t d, // `d` is the number of cols that have to be permuted by `\Pi` @@ -410,11 +389,11 @@ inline void rmiget( //Step 1: Scale with `D` //Populating `diag` - generate_rademacher_vector_r123(diag, key[0], ctr[0], n); + generate_rademacher_vector_r123(diag, n, random_state); apply_diagonal_rademacher(false, layout, m, n, A, diag); //Step 2: Apply the Hadamard transform - fht_dispatch(false, layout, A, std::log2(MAX(m, n)), m, n); + fht_dispatch(false, layout, m, n, std::log2(MAX(m, n)), A); //Step 3: Permute the rows std::vector idxs_minor(d); // Placeholder @@ -432,9 +411,11 @@ inline void rmiget( vals.data() // Placeholder ); - permuteColsToLeft(layout, m, n, selected_cols, d, A); + permute_cols_to_left(layout, m, n, selected_cols, d, A); free(diag); free(selected_cols); + + return next_state; } } diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index 3283764..d5dd9f1 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -42,8 +42,6 @@ using RandBLAS::trig::lmiget; using RandBLAS::trig::rmiget; using RandBLAS::generate_rademacher_vector_r123; using RandBLAS::apply_diagonal_rademacher; -using RandBLAS::permuteRowsToTop; -using RandBLAS::permuteColsToLeft; using RandBLAS::fht_dispatch; using Eigen::MatrixXd; @@ -59,13 +57,14 @@ class TestLMIGET : public::testing::Test // Helper function for explicitly generating a Hadamard matrix // (Note that ColMajor and RowMajor storage is identical for `H`) - static std::vector generate_hadamard(int64_t log_n) { + template + static std::vector generate_hadamard(int64_t log_n) { int64_t size = 1 << log_n; // size = 2^n - std::vector> H(size, std::vector(size, 1)); // Initialize H_1 + std::vector> H(size, std::vector(size, 1)); // Initialize H_1 // Sylvester's construction: recursively build the matrix for (int n = 1; n <= log_n; ++n) { - double curr_size = 1 << n; // Current size of the matrix is 2^n + T curr_size = 1 << n; // Current size of the matrix is 2^n for (int i = 0; i < curr_size / 2; ++i) { for (int j = 0; j < curr_size / 2; ++j) { // Fill the bottom-left and bottom-right quadrants @@ -77,7 +76,7 @@ class TestLMIGET : public::testing::Test } // Flatten into a vector in ColMajor order - std::vector H_flat(size * size); + std::vector H_flat(size * size); for (int col = 0; col < size; ++col) { for (int row = 0; row < size; ++row) { @@ -88,16 +87,17 @@ class TestLMIGET : public::testing::Test return H_flat; } - static std::vector generate_random_vector(int size, double lower_bound, double upper_bound) { + template + static std::vector generate_random_vector(int size, T lower_bound, T upper_bound) { // Create a random device and seed the random number generator std::random_device rd; std::mt19937 gen(rd()); - // Define the distribution range for the random doubles + // Define the distribution range for the random Ts std::uniform_real_distribution<> dist(lower_bound, upper_bound); // Create a vector of the specified size - std::vector random_vector(size); + std::vector random_vector(size); // Generate random doubles and fill the vector for (int i = 0; i < size; ++i) { @@ -118,10 +118,10 @@ class TestLMIGET : public::testing::Test int64_t n, bool left, blas::Layout layout, - double epsilon=1e-5 + T epsilon=1e-5 ) { // Grabbing a random matrix - std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); + std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); Eigen::Map> A_row(A_vec.data(), m, n); @@ -161,19 +161,19 @@ class TestLMIGET : public::testing::Test if(left) { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_row.data()); + RandBLAS::permute_rows_to_top(layout, m, n, v, 1, A_row.data()); else - RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_col.data()); + RandBLAS::permute_rows_to_top(layout, m, n, v, 1, A_col.data()); } else { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_row.data()); + RandBLAS::permute_cols_to_left(layout, m, n, v, 1, A_row.data()); else - RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_col.data()); + RandBLAS::permute_cols_to_left(layout, m, n, v, 1, A_col.data()); } // Or just do A.isApprox(B) - double norm_permute = 0.0; + T norm_permute = 0.0; if(left) { if(layout == blas::Layout::RowMajor) norm_permute = (A_row - perm * B).norm(); @@ -197,16 +197,16 @@ class TestLMIGET : public::testing::Test int ld = (left) ? m : n; if(layout == blas::Layout::ColMajor) - RandBLAS::fht_dispatch(left, layout, A_col.data(), std::log2(ld), m, n); + RandBLAS::fht_dispatch(left, layout, m, n, std::log2(ld), A_col.data()); else - RandBLAS::fht_dispatch(left, layout, A_row.data(), std::log2(ld), m, n); + RandBLAS::fht_dispatch(left, layout, m, n, std::log2(ld), A_row.data()); - std::vector H_vec = generate_hadamard(std::log2(ld)); + std::vector H_vec = generate_hadamard(std::log2(ld)); //TODO: Should have a check here to enforce that `m` and `n` are powers of 2 (since // my `generate_hadamard` function does not take care to pad an input matrix) MatrixXd H = Eigen::Map(H_vec.data(), int(std::pow(2, std::log2(ld))), int(std::pow(2, std::log2(ld)))); - double norm_hadamard = 0.0; + T norm_hadamard = 0.0; if(left) { if(layout == blas::Layout::RowMajor) norm_hadamard = (A_row - H * B).norm(); @@ -228,7 +228,7 @@ class TestLMIGET : public::testing::Test // Scales all rows/cols by -1 and checks if A == -A std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); - double norm_diag = 0.0; + T norm_diag = 0.0; if(layout == blas::Layout::RowMajor) { RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); norm_diag = (A_row + B).norm(); @@ -250,41 +250,43 @@ class TestLMIGET : public::testing::Test uint32_t seed, int64_t m, // Generated data matrix, `A` is of size `(m x n)` int64_t n, - int64_t d, // #rows/cols that will be permuted + int64_t d, bool left, blas::Layout layout, - double epsilon=1e-5 + T epsilon=1e-5 ) { // Grabbing a random matrix - std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); + std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); + // Eigen::Matrix A_col(m, n); Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); + // A_col.setRandom(); Eigen::Map> A_row(A_vec.data(), m, n); // Deep copy MatrixXd B; if(layout == blas::Layout::RowMajor) - B = A_row; + B = A_row; else - B = A_col; + B = A_col; //// Performing \Pi H D // Step 1: setup the diagonal scaling - std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); + std::vector buff = left ? std::vector(m, -1) : std::vector(n, -1); if(layout == blas::Layout::RowMajor) { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); } else { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); } // Step 2: apply the hadamard transform int ld = (left) ? m : n; if(layout == blas::Layout::ColMajor){ - RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); + RandBLAS::fht_dispatch(left, layout, m, n, int(std::log2(ld)), A_col.data()); } else { - RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); + RandBLAS::fht_dispatch(left, layout, m, n, int(std::log2(ld)), A_row.data()); } // Step 3: Permuting @@ -293,15 +295,15 @@ class TestLMIGET : public::testing::Test std::iota(indices.begin(), indices.end(), 1); if(left) { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); + RandBLAS::permute_rows_to_top(layout, m, n, indices.data(), d, A_row.data()); else - RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); + RandBLAS::permute_rows_to_top(layout, m, n, indices.data(), d, A_col.data()); } else { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); + RandBLAS::permute_cols_to_left(layout, m, n, indices.data(), d, A_row.data()); else - RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); + RandBLAS::permute_cols_to_left(layout, m, n, indices.data(), d, A_col.data()); } //// Performing D H \Pi @@ -311,42 +313,42 @@ class TestLMIGET : public::testing::Test if(left) { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); + RandBLAS::permute_rows_to_top(layout, m, n, indices.data(), d, A_row.data()); else - RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); + RandBLAS::permute_rows_to_top(layout, m, n, indices.data(), d, A_col.data()); } else { if(layout == blas::Layout::RowMajor) - RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); + RandBLAS::permute_cols_to_left(layout, m, n, indices.data(), d, A_row.data()); else - RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); + RandBLAS::permute_cols_to_left(layout, m, n, indices.data(), d, A_col.data()); } // Step-2: Apply H^{-1} if(layout == blas::Layout::ColMajor) { - RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); + RandBLAS::fht_dispatch(left, layout, m, n, int(std::log2(ld)), A_col.data()); A_col = A_col * 1/std::pow(2, int(std::log2(ld))); } else { - RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); + RandBLAS::fht_dispatch(left, layout, m, n, int(std::log2(ld)), A_row.data()); A_row = A_row * 1/std::pow(2, int(std::log2(ld))); } //Step-3: Inverting `D` if(layout == blas::Layout::RowMajor) { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); } else { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); } - double norm_inverse = 0.0; + T norm_inverse = 0.0; if(layout == blas::Layout::RowMajor) { - norm_inverse = (A_row - B).norm(); + norm_inverse = (A_row - B).norm(); } else { - norm_inverse = (A_col - B).norm(); + norm_inverse = (A_col - B).norm(); } randblas_require(norm_inverse < epsilon); @@ -507,12 +509,20 @@ TEST_F(TestLMIGET, test_hadamard_right_rowmajor) { ); } +//////////////////////////////////////////////////////////////////////// +// +// +// Verifying invertibility of the transform +// +// +//////////////////////////////////////////////////////////////////////// + TEST_F(TestLMIGET, test_inverse_left_colmajor) { for(uint32_t seed: keys) inverse_transform( seed, 128, - 128, + 100, 25, true, blas::Layout::ColMajor @@ -536,7 +546,7 @@ TEST_F(TestLMIGET, test_inverse_left_rowmajor) { inverse_transform( seed, 128, - 128, + 100, 25, true, blas::Layout::RowMajor From 0bcd76e93675ba3e6ec9e3ea39eaeab9d97ff836 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Fri, 25 Oct 2024 13:09:27 -0700 Subject: [PATCH 23/26] `HadamardMixingOp`, `lmiget|rmiget` -> `miget` --- RandBLAS/trig_skops.hh | 213 ++++++++++++++++------------ test/test_matmul_cores/test_trig.cc | 127 ++++++++++++++++- 2 files changed, 246 insertions(+), 94 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index 22f8311..f2a5245 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -10,23 +10,24 @@ #include #include #include +#include #include #include #include +#include #define MAX(a, b) (((a) < (b)) ? (b) : (a)) #define MIN(a, b) (((a) < (b)) ? (a) : (b)) namespace RandBLAS { - // ============================================================================= /// WARNING: None of the following functions or overloads thereof are part of the /// public API /// // Generates a vector of Rademacher entries using the Random123 library - template + template RNGState generate_rademacher_vector_r123(sint_t* buff, int64_t n, RNGState seed_state) { RNG rng; auto [ctr, key] = seed_state; @@ -47,7 +48,7 @@ namespace RandBLAS { // Catch-all method for applying the diagonal Rademacher // entries in-place to an input matrix, `A` - template + template void apply_diagonal_rademacher( bool left, // Pre-multiplying? blas::Layout layout, @@ -88,7 +89,7 @@ namespace RandBLAS { } } - template + template void permute_rows_to_top( blas::Layout layout, int64_t rows, @@ -102,28 +103,28 @@ namespace RandBLAS { //TODO: discuss precise semantics of `selected_rows` in this function if(layout == blas::Layout::ColMajor) { for (int64_t i=0; i < d; i++) { - randblas_error_if_msg(selected_rows[i] == top, - "The list of provided indices should be unique"); if (selected_rows[i] != top) { // Use BLAS swap to swap the entire rows // Swapping row 'selected' with row 'top' blas::swap(cols, &A[top], rows, &A[selected_rows[i]], rows); } + else + continue; } } else { // For `RowMajor` ordering for (int64_t i=0; i < d; i++) { - randblas_error_if_msg(selected_rows[i] == top, - "The list of provided indices should be unique"); if (selected_rows[i] != top) { blas::swap(cols, &A[cols * selected_rows[i]], 1, &A[cols * top], 1); } + else + continue; } } } - template + template void permute_cols_to_left( blas::Layout layout, int64_t rows, @@ -141,6 +142,8 @@ namespace RandBLAS { // Swapping col 'selected' with col 'top' blas::swap(rows, &A[rows * selected_cols[i]], 1, &A[rows * left], 1); } + else + continue; } } else { @@ -149,6 +152,8 @@ namespace RandBLAS { if (selected_cols[i] != left) { blas::swap(rows, &A[selected_cols[i]], cols, &A[left], cols); } + else + continue; } } } @@ -314,6 +319,86 @@ namespace RandBLAS { namespace RandBLAS::trig { +template +struct HadamardMixingOp{ + sint_t* diag_scale; + sint_t* selected_idxs; + blas::Layout layout; + int64_t m; + int64_t n; + int64_t d; + bool left; + bool filled = false; // will be updated by `miget` + + // Constructor + HadamardMixingOp(bool left, + blas::Layout layout, + int64_t m, + int64_t n, + int64_t d + ) : left(left), layout(layout), m(m), n(n), d(d) { + if(left) + diag_scale = new sint_t[m]; + else + diag_scale = new sint_t[n]; + selected_idxs = new sint_t[d]; + } + + // Destructor + ~HadamardMixingOp() { + free(this->diag_scale); + free(this->selected_idxs); + } + + private: +}; + +/* + * A free-function that performs an inversion of a matrix transformed by `lmiget | rmiget` + * the inversion is also performed in-place +*/ +template +void invert( + HadamardMixingOp &hmo, // details about the transform + T* SA // sketched matrix +) { + // We have to make sure we apply the operation in the inverted order + // with the operations appropriately inverted + randblas_error_if_msg(!hmo.filled, "You are trying to call `invert` on an untransformed matrix,\ + please call `miget` on your matrix before calling `invert`"); + + // Creating a vector out of `selected_idxs` to be able to conveniently reverse + std::vector selected_idxs(hmo.selected_idxs, hmo.selected_idxs + hmo.d); + // Reversing the indices for performing the inverse + std::reverse(selected_idxs.begin(), selected_idxs.end()); + + //Step 1: Permute the rows/cols + // Perform the permutation (for the way we define permutations + // invSelected_idxs = reverse(Selected_idxs)) + if(hmo.left) + permute_rows_to_top(hmo.layout, hmo.m, hmo.n, selected_idxs.data(), hmo.d, SA); + else + permute_cols_to_left(hmo.layout, hmo.m, hmo.n, selected_idxs.data(), hmo.d, SA); + + //Step 2: Apply the Hadamard transform (invH = H.T = H) + int ld = (hmo.left) ? hmo.m : hmo.n; + + T log_sz = std::log2(ld); + T log_int_sz, log_final_sz; + T log_frac_sz = std::modf(log_sz, &log_int_sz); + + if(log_frac_sz < 1e-3) + log_final_sz = log_int_sz; + else + log_final_sz = log_int_sz + 1; + + fht_dispatch(hmo.left, hmo.layout, hmo.m, hmo.n, log_final_sz, SA); + blas::scal(hmo.m * hmo.n, 1/std::pow(2, int(std::log2(ld))), SA, 1); + + //Step 3: Scale with `D` (invD = D for rademacher entries) + apply_diagonal_rademacher(hmo.left, hmo.layout, hmo.m, hmo.n, SA, hmo.diag_scale); +} + /* * These functions apply an in-place, SRHT-like transform to the input matrix * i.e. A <- (\Pi H D)A OR A <- A(D H \Pi) (which is equivalent to A <- A(\Pi H D)^{-1}) @@ -321,100 +406,48 @@ namespace RandBLAS::trig { * A: (m x n), input dimensions of `A` * d: The number of rows/columns that will be permuted by the action of $\Pi$ */ -template -inline RandBLAS::RNGState lmiget( - blas::Layout layout, - const RandBLAS::RNGState &random_state, - int64_t m, // `A` is `(m x n)` - int64_t n, - int64_t d, // `d` is the number of rows that have to be permuted by `\Pi` - T* A // data-matrix +template +inline RNGState miget( + HadamardMixingOp &hmo, // All information about `A` && the $\mathbb{\Pi\text{RHT}}$ + const RNGState &random_state, + T* A // The data-matrix ) { - // Size of the Rademacher entries = |A_cols| - //TODO: Change `diag` to float/doubles (same data type as the matrix) - sint_t* diag = new sint_t[n]; - sint_t* selected_rows = new sint_t[d]; - auto [ctr, key] = random_state; //Step 1: Scale with `D` //Populating `diag` - generate_rademacher_vector_r123(diag, n, random_state); - apply_diagonal_rademacher(true, layout, m, n, A, diag); + RNGState state_idxs = generate_rademacher_vector_r123(hmo.diag_scale, hmo.n, random_state); + apply_diagonal_rademacher(hmo.left, hmo.layout, hmo.m, hmo.n, A, hmo.diag_scale); //Step 2: Apply the Hadamard transform - fht_dispatch(true, layout, m, n, std::log2(MAX(m, n)), A); + int ld = (hmo.left) ? hmo.m : hmo.n; + T log_sz = std::log2(ld); + T log_int_sz, log_final_sz; + T log_frac_sz = std::modf(log_sz, &log_int_sz); - //Step 3: Permute the rows - std::vector idxs_minor(d); // Placeholder - std::vector vals(d); // Placeholder - - // Populating `selected_rows` - //TODO: Do I return this at some point? - RandBLAS::RNGState next_state = RandBLAS::repeated_fisher_yates( - random_state, - d, // Number of samples (vec_nnz) - m, // Total number of elements (dim_major) - 1, // Single sample round (dim_minor) - selected_rows, // Holds the required output - idxs_minor.data(), // Placeholder - vals.data() // Placeholder - ); - - permute_rows_to_top(layout, m, n, selected_rows, d, A); - - free(diag); - free(selected_rows); - - return next_state; -} - - -template -inline RandBLAS::RNGState rmiget( - blas::Layout layout, - const RandBLAS::RNGState &random_state, - int64_t m, // `A` is `(m x n)` - int64_t n, - int64_t d, // `d` is the number of cols that have to be permuted by `\Pi` - T* A // data-matrix -) -{ - // Size of the Rademacher entries = |A_cols| - //TODO: Change `diag` to float/doubles (same data type as the matrix) - sint_t* diag = new sint_t[m]; - sint_t* selected_cols = new sint_t[d]; - - auto [ctr, key] = random_state; - - //Step 1: Scale with `D` - //Populating `diag` - generate_rademacher_vector_r123(diag, n, random_state); - apply_diagonal_rademacher(false, layout, m, n, A, diag); - - //Step 2: Apply the Hadamard transform - fht_dispatch(false, layout, m, n, std::log2(MAX(m, n)), A); + if(log_frac_sz < 1e-3) + log_final_sz = log_int_sz; + else + log_final_sz = log_int_sz + 1; + fht_dispatch(hmo.left, hmo.layout, hmo.m, hmo.n, log_final_sz, A); //Step 3: Permute the rows - std::vector idxs_minor(d); // Placeholder - std::vector vals(d); // Placeholder - - // Populating `selected_rows` - //TODO: Do I return this at some point? - RandBLAS::RNGState next_state = RandBLAS::repeated_fisher_yates( - random_state, - d, // Number of samples (vec_nnz) - m, // Total number of elements (dim_major) - 1, // Single sample round (dim_minor) - selected_cols, // Holds the required output - idxs_minor.data(), // Placeholder - vals.data() // Placeholder + // Uniformly samples `d` entries from the index set [0, ..., m - 1] + RNGState next_state = repeated_fisher_yates( + hmo.d, + hmo.m, + 1, + hmo.selected_idxs, + state_idxs ); - permute_cols_to_left(layout, m, n, selected_cols, d, A); + if(hmo.left) + permute_rows_to_top(hmo.layout, hmo.m, hmo.n, hmo.selected_idxs, hmo.d, A); + else + permute_cols_to_left(hmo.layout, hmo.m, hmo.n, hmo.selected_idxs, hmo.d, A); - free(diag); - free(selected_cols); + // `invert` can now be called with this instance of `HadamardMixingOp` + hmo.filled = true; return next_state; } diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index d5dd9f1..ca84258 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -27,7 +27,7 @@ // POSSIBILITY OF SUCH DAMAGE. // -#include "RandBLAS.hh" +// #include "RandBLAS.hh" #include "RandBLAS/base.hh" #include "RandBLAS/trig_skops.hh" #include @@ -53,7 +53,7 @@ class TestLMIGET : public::testing::Test virtual void TearDown(){}; - inline static std::vector keys {0, 42}; + inline static std::vector keys {42}; // Helper function for explicitly generating a Hadamard matrix // (Note that ColMajor and RowMajor storage is identical for `H`) @@ -118,7 +118,7 @@ class TestLMIGET : public::testing::Test int64_t n, bool left, blas::Layout layout, - T epsilon=1e-5 + T epsilon=RandBLAS::sqrt_epsilon() ) { // Grabbing a random matrix std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); @@ -253,7 +253,7 @@ class TestLMIGET : public::testing::Test int64_t d, bool left, blas::Layout layout, - T epsilon=1e-5 + T epsilon=RandBLAS::sqrt_epsilon() ) { // Grabbing a random matrix std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); @@ -355,6 +355,68 @@ class TestLMIGET : public::testing::Test } + + template + static void drivers_inverse( + uint32_t seed, + int64_t m, + int64_t n, + int64_t d, + int64_t left, + blas::Layout layout, + T epsilon=RandBLAS::sqrt_epsilon() + ) { + // Grabbing a random matrix + std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); + Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); + Eigen::Map> A_row(A_vec.data(), m, n); + RandBLAS::RNGState seed_state(seed); + + // Deep copy + MatrixXd B; + if(layout == blas::Layout::RowMajor) + B = A_row; + else + B = A_col; + + // Aggregating information about the matrix + RandBLAS::trig::HadamardMixingOp<> hmo( + left, + layout, + m, + n, + d + ); + + // Sketching the matrix + if(layout == blas::Layout::RowMajor) { + RandBLAS::trig::miget(hmo, seed_state, A_row.data()); + RandBLAS::trig::invert(hmo, A_row.data()); + } + else { + // std::cout << A_col << std::endl; + // std::cout << "<-------x------->" << std::endl; + RandBLAS::trig::miget(hmo, seed_state, A_col.data()); + // for(int i = 0; i < d; i ++) + // std::cout << hmo.selected_idxs[i] << std::endl; + // std::cout << A_col << std::endl; + // std::cout << "<-------x------->" << std::endl; + RandBLAS::trig::invert(hmo, A_col.data()); + // std::cout << A_col << std::endl; + // std::cout << "<-------x------->" << std::endl; + } + + T norm_inverse = 0.0; + + if(layout == blas::Layout::RowMajor) { + norm_inverse = (A_row - B).norm(); + } + else { + norm_inverse = (A_col - B).norm(); + } + + randblas_require(norm_inverse < epsilon); + } }; //////////////////////////////////////////////////////////////////////// @@ -564,3 +626,60 @@ TEST_F(TestLMIGET, test_inverse_right_rowmajor) { blas::Layout::RowMajor ); } + +//////////////////////////////////////////////////////////////////////// +// +// +// Verifying correctness (in invertibility) of user-facing functions +// +// +//////////////////////////////////////////////////////////////////////// + + +TEST_F(TestLMIGET, test_user_inverse_left_colmajor) { + for(uint32_t seed: keys) + drivers_inverse( + seed, + 128, + 128, + 25, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_user_inverse_right_colmajor) { + for(uint32_t seed: keys) + drivers_inverse( + seed, + 128, + 128, + 25, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_user_inverse_left_rowmajor) { + for(uint32_t seed: keys) + drivers_inverse( + seed, + 128, + 128, + 25, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_user_inverse_right_rowmajor) { + for(uint32_t seed: keys) + drivers_inverse( + seed, + 128, + 128, + 25, + false, + blas::Layout::RowMajor + ); +} From 6da14634d608c7f2ee2f5e886397b4abdba53c46 Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Fri, 25 Oct 2024 13:11:36 -0700 Subject: [PATCH 24/26] Removed comments --- test/test_matmul_cores/test_trig.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index ca84258..67555be 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -394,16 +394,8 @@ class TestLMIGET : public::testing::Test RandBLAS::trig::invert(hmo, A_row.data()); } else { - // std::cout << A_col << std::endl; - // std::cout << "<-------x------->" << std::endl; RandBLAS::trig::miget(hmo, seed_state, A_col.data()); - // for(int i = 0; i < d; i ++) - // std::cout << hmo.selected_idxs[i] << std::endl; - // std::cout << A_col << std::endl; - // std::cout << "<-------x------->" << std::endl; RandBLAS::trig::invert(hmo, A_col.data()); - // std::cout << A_col << std::endl; - // std::cout << "<-------x------->" << std::endl; } T norm_inverse = 0.0; From 6ade6168f442b70e48dff62d6df78f040dc5351c Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Mon, 4 Nov 2024 10:13:59 -0800 Subject: [PATCH 25/26] BIG, removes Eigen-test dep. && review --- RandBLAS/trig_skops.hh | 50 ++- test/test_matmul_cores/test_trig.cc | 474 +++++++++++++--------------- 2 files changed, 232 insertions(+), 292 deletions(-) diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh index f2a5245..7548191 100644 --- a/RandBLAS/trig_skops.hh +++ b/RandBLAS/trig_skops.hh @@ -1,7 +1,7 @@ #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/random_gen.hh" -#include +#include "RandBLAS/sparse_skops.hh" #include #include @@ -27,13 +27,13 @@ namespace RandBLAS { /// // Generates a vector of Rademacher entries using the Random123 library - template - RNGState generate_rademacher_vector_r123(sint_t* buff, int64_t n, RNGState seed_state) { - RNG rng; + template> + state_t generate_rademacher_vector_r123(sint_t* buff, int64_t n, state_t &seed_state) { + DefaultRNG rng; auto [ctr, key] = seed_state; for (int64_t i = 0; i < n; ++i) { - typename RNG::ctr_type r = rng(ctr, key); + typename DefaultRNG::ctr_type r = rng(ctr, key); float rand_value = r123::u01fixedpt(r.v[0]); @@ -43,7 +43,7 @@ namespace RandBLAS { } // Return the updated RNGState (with the incremented counter) - return RNGState {ctr, key}; + return state_t {ctr, key}; } // Catch-all method for applying the diagonal Rademacher @@ -107,9 +107,7 @@ namespace RandBLAS { // Use BLAS swap to swap the entire rows // Swapping row 'selected' with row 'top' blas::swap(cols, &A[top], rows, &A[selected_rows[i]], rows); - } - else - continue; + } // else, continue; } } else { @@ -117,9 +115,7 @@ namespace RandBLAS { for (int64_t i=0; i < d; i++) { if (selected_rows[i] != top) { blas::swap(cols, &A[cols * selected_rows[i]], 1, &A[cols * top], 1); - } - else - continue; + } // else, continue; } } } @@ -142,8 +138,6 @@ namespace RandBLAS { // Swapping col 'selected' with col 'top' blas::swap(rows, &A[rows * selected_cols[i]], 1, &A[rows * left], 1); } - else - continue; } } else { @@ -152,8 +146,6 @@ namespace RandBLAS { if (selected_cols[i] != left) { blas::swap(rows, &A[selected_cols[i]], cols, &A[left], cols); } - else - continue; } } } @@ -208,8 +200,8 @@ namespace RandBLAS { for (int64_t k = 0; k < s1; ++k) { // For implicitly padding the input we just have to make sure // we replace all out-of-bounds accesses with zeros - bool b1 = j + k < num_rows; - bool b2 = j + k + s1 < num_rows; + bool b1 = (j + k) * num_cols + col < num_rows * num_cols; + bool b2 = (j + k + s1) * num_cols + col < num_rows * num_cols; T u = b1 ? buf[(j + k) * num_cols + col] : 0; T v = b2 ? buf[(j + k + s1) * num_cols + col] : 0; if(b1 && b2) { @@ -277,8 +269,8 @@ namespace RandBLAS { for (int64_t k = 0; k < s1; ++k) { // For implicitly padding the input we just have to make sure // we replace all out-of-bounds accesses with zeros - bool b1 = j + k < num_cols; - bool b2 = j + k + s1 < num_cols; + bool b1 = (j + k) * num_rows + row < num_cols * num_rows; + bool b2 = (j + k + s1) * num_rows + row < num_cols * num_rows; T u = b1 ? buf[(j + k) * num_rows + row] : 0; T v = b2 ? buf[(j + k + s1) * num_rows + row] : 0; if(b1 && b2) { @@ -346,8 +338,8 @@ struct HadamardMixingOp{ // Destructor ~HadamardMixingOp() { - free(this->diag_scale); - free(this->selected_idxs); + delete [] this->diag_scale; + delete [] this->selected_idxs; } private: @@ -358,7 +350,7 @@ struct HadamardMixingOp{ * the inversion is also performed in-place */ template -void invert( +void invert_hadamard( HadamardMixingOp &hmo, // details about the transform T* SA // sketched matrix ) { @@ -406,17 +398,17 @@ void invert( * A: (m x n), input dimensions of `A` * d: The number of rows/columns that will be permuted by the action of $\Pi$ */ -template -inline RNGState miget( +template , SignedInteger sint_t = int64_t> +inline state_t miget( HadamardMixingOp &hmo, // All information about `A` && the $\mathbb{\Pi\text{RHT}}$ - const RNGState &random_state, + const state_t &random_state, T* A // The data-matrix ) { auto [ctr, key] = random_state; //Step 1: Scale with `D` //Populating `diag` - RNGState state_idxs = generate_rademacher_vector_r123(hmo.diag_scale, hmo.n, random_state); + auto next_state = generate_rademacher_vector_r123(hmo.diag_scale, hmo.n, random_state); apply_diagonal_rademacher(hmo.left, hmo.layout, hmo.m, hmo.n, A, hmo.diag_scale); //Step 2: Apply the Hadamard transform @@ -433,12 +425,12 @@ inline RNGState miget( //Step 3: Permute the rows // Uniformly samples `d` entries from the index set [0, ..., m - 1] - RNGState next_state = repeated_fisher_yates( + next_state = repeated_fisher_yates( hmo.d, hmo.m, 1, hmo.selected_idxs, - state_idxs + next_state ); if(hmo.left) diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index 67555be..8cf04c3 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -27,25 +27,16 @@ // POSSIBILITY OF SUCH DAMAGE. // -// #include "RandBLAS.hh" +#include "RandBLAS.hh" #include "RandBLAS/base.hh" #include "RandBLAS/trig_skops.hh" +#include "RandBLAS/util.hh" #include -#include -#include #include #include #include -using RandBLAS::trig::lmiget; -using RandBLAS::trig::rmiget; -using RandBLAS::generate_rademacher_vector_r123; -using RandBLAS::apply_diagonal_rademacher; -using RandBLAS::fht_dispatch; -using Eigen::MatrixXd; - - class TestLMIGET : public::testing::Test { protected: @@ -55,6 +46,23 @@ class TestLMIGET : public::testing::Test inline static std::vector keys {42}; + // Helper function for generating a row-permutation matrix + // in ColMajor format + //NOTE: Need to deallocate memory explicitly + template + static T* generate_row_permutation_row_major(int size, const std::vector& final_row_order) { + // Allocate memory for a size x size matrix and initialize with zeros + T* perm_matrix = new T[size * size](); + + // Fill in the matrix with '1's at positions corresponding to the final row order + for (int i = 0; i < size; ++i) { + int final_row = final_row_order[i]; + perm_matrix[i * size + final_row] = 1.0; // Set the '1' in the permuted position (RowMajor) + } + + return perm_matrix; + } + // Helper function for explicitly generating a Hadamard matrix // (Note that ColMajor and RowMajor storage is identical for `H`) template @@ -118,26 +126,17 @@ class TestLMIGET : public::testing::Test int64_t n, bool left, blas::Layout layout, - T epsilon=RandBLAS::sqrt_epsilon() + T tol=RandBLAS::sqrt_epsilon() ) { // Grabbing a random matrix std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); - Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); - Eigen::Map> A_row(A_vec.data(), m, n); - - // Deep copy - MatrixXd B; - if(layout == blas::Layout::RowMajor) - B = A_row; - else - B = A_col; + std::vector B_vec(A_vec); switch (transform) { case transforms::permute: { - // Simply compares against Eigen::PermutationMatrix - Eigen::PermutationMatrix perm(5); + int ld = (left) ? m : n; - std::vector V = left ? std::vector(m) : std::vector(n); + std::vector V = std::vector(ld); int cnt = 0; // int cnt = 0; @@ -151,44 +150,101 @@ class TestLMIGET : public::testing::Test cnt++; } - Eigen::VectorXi indices = Eigen::Map(V.data(), V.size()); - - // Set the indices in the permutation matrix - perm.indices() = indices; + T* perm_matrix = generate_row_permutation_row_major(ld, V); + T* true_perm_matrix = new T[m * n]; sint_t* v = new sint_t; *v = V.size() - 1; if(left) { - if(layout == blas::Layout::RowMajor) - RandBLAS::permute_rows_to_top(layout, m, n, v, 1, A_row.data()); - else - RandBLAS::permute_rows_to_top(layout, m, n, v, 1, A_col.data()); + if(layout == blas::Layout::RowMajor) { + blas::gemm( + blas::Layout::RowMajor, + blas::Op::Trans, + blas::Op::NoTrans, + m, + n, + m, + 1.0, + perm_matrix, + m, + A_vec.data(), + n, + 0.0, + true_perm_matrix, + n + ); + RandBLAS::permute_rows_to_top(layout, m, n, v, 1, A_vec.data()); + } + else { + blas::gemm( + blas::Layout::ColMajor, + blas::Op::Trans, + blas::Op::NoTrans, + m, + n, + m, + 1.0, + perm_matrix, + m, + A_vec.data(), + m, + 0.0, + true_perm_matrix, + m + ); + RandBLAS::permute_rows_to_top(layout, m, n, v, 1, A_vec.data()); + } } else { - if(layout == blas::Layout::RowMajor) - RandBLAS::permute_cols_to_left(layout, m, n, v, 1, A_row.data()); - else - RandBLAS::permute_cols_to_left(layout, m, n, v, 1, A_col.data()); + if(layout == blas::Layout::RowMajor) { + blas::gemm( + blas::Layout::RowMajor, + blas::Op::NoTrans, + blas::Op::NoTrans, + m, + n, + n, + 1.0, + A_vec.data(), + n, + perm_matrix, + n, + 0.0, + true_perm_matrix, + n + ); + RandBLAS::permute_cols_to_left(layout, m, n, v, 1, A_vec.data()); + } + else { + blas::gemm( + blas::Layout::ColMajor, + blas::Op::NoTrans, + blas::Op::NoTrans, + m, + n, + n, + 1.0, + A_vec.data(), + m, + perm_matrix, + n, + 0.0, + true_perm_matrix, + m + ); + RandBLAS::permute_cols_to_left(layout, m, n, v, 1, A_vec.data()); + } } - // Or just do A.isApprox(B) T norm_permute = 0.0; - if(left) { - if(layout == blas::Layout::RowMajor) - norm_permute = (A_row - perm * B).norm(); - else - norm_permute = (A_col - perm * B).norm(); - } - else { - if(layout == blas::Layout::RowMajor) - norm_permute = (A_row - B * perm).norm(); - else - norm_permute = (A_col - B * perm).norm(); - } + blas::axpy(m * n, -1.0, A_vec.data(), 1, true_perm_matrix, 1); + norm_permute = blas::nrm2(m * n, true_perm_matrix, 1); + + randblas_require(norm_permute < tol); - // Or do A.isApprox(H * B) - randblas_require(norm_permute < epsilon); + delete [] perm_matrix; + delete [] true_perm_matrix; break; } @@ -196,31 +252,104 @@ class TestLMIGET : public::testing::Test // Here, simply check against explicit application of the Hadamard matrix int ld = (left) ? m : n; - if(layout == blas::Layout::ColMajor) - RandBLAS::fht_dispatch(left, layout, m, n, std::log2(ld), A_col.data()); - else - RandBLAS::fht_dispatch(left, layout, m, n, std::log2(ld), A_row.data()); + RandBLAS::fht_dispatch(left, layout, m, n, std::log2(ld), A_vec.data()); std::vector H_vec = generate_hadamard(std::log2(ld)); //TODO: Should have a check here to enforce that `m` and `n` are powers of 2 (since // my `generate_hadamard` function does not take care to pad an input matrix) - MatrixXd H = Eigen::Map(H_vec.data(), int(std::pow(2, std::log2(ld))), int(std::pow(2, std::log2(ld)))); + + T* true_H_mult = new T[m * n]; T norm_hadamard = 0.0; + if(left) { - if(layout == blas::Layout::RowMajor) - norm_hadamard = (A_row - H * B).norm(); - else - norm_hadamard = (A_col - H * B).norm(); + if(layout == blas::Layout::RowMajor) { + blas::gemm( + blas::Layout::RowMajor, + blas::Op::NoTrans, + blas::Op::NoTrans, + m, + n, + m, + 1.0, + H_vec.data(), + m, + B_vec.data(), + n, + 0.0, + true_H_mult, + n + ); + blas::axpy(m * n, -1.0, A_vec.data(), 1, true_H_mult, 1); + norm_hadamard = blas::nrm2(m * n, true_H_mult, 1); + } + else { + blas::gemm( + blas::Layout::ColMajor, + blas::Op::NoTrans, + blas::Op::NoTrans, + m, + n, + m, + 1.0, + H_vec.data(), + m, + B_vec.data(), + m, + 0.0, + true_H_mult, + m + ); + blas::axpy(m * n, -1.0, A_vec.data(), 1, true_H_mult, 1); + norm_hadamard = blas::nrm2(m * n, true_H_mult, 1); + } } else { - if(layout == blas::Layout::RowMajor) - norm_hadamard = (A_row - B * H).norm(); - else - norm_hadamard = (A_col - B * H).norm(); + if(layout == blas::Layout::RowMajor) { + blas::gemm( + blas::Layout::RowMajor, + blas::Op::NoTrans, + blas::Op::NoTrans, + m, + n, + n, + 1.0, + B_vec.data(), + n, + H_vec.data(), + n, + 0.0, + true_H_mult, + n + ); + blas::axpy(m * n, -1.0, A_vec.data(), 1, true_H_mult, 1); + norm_hadamard = blas::nrm2(m * n, true_H_mult, 1); + } + else { + blas::gemm( + blas::Layout::ColMajor, + blas::Op::NoTrans, + blas::Op::NoTrans, + m, + n, + n, + 1.0, + B_vec.data(), + m, + H_vec.data(), + n, + 0.0, + true_H_mult, + m + ); + blas::axpy(m * n, -1.0, A_vec.data(), 1, true_H_mult, 1); + norm_hadamard = blas::nrm2(m * n, true_H_mult, 1); + } } - randblas_require(norm_hadamard < epsilon); + randblas_require(norm_hadamard < tol); + + delete [] true_H_mult; break; } @@ -228,134 +357,25 @@ class TestLMIGET : public::testing::Test // Scales all rows/cols by -1 and checks if A == -A std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_vec.data(), buff.data()); + T norm_diag = 0.0; if(layout == blas::Layout::RowMajor) { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); - norm_diag = (A_row + B).norm(); + blas::axpy(m * n, 1.0, A_vec.data(), 1, B_vec.data(), 1); + norm_diag = blas::nrm2(m * n, B_vec.data(), 1); } else { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); - norm_diag = (A_col + B).norm(); + blas::axpy(m * n, 1.0, A_vec.data(), 1, B_vec.data(), 1); + norm_diag = blas::nrm2(m * n, B_vec.data(), 1); } - randblas_require(norm_diag < epsilon); + randblas_require(norm_diag < tol); break; } } } - template - static void inverse_transform( - uint32_t seed, - int64_t m, // Generated data matrix, `A` is of size `(m x n)` - int64_t n, - int64_t d, - bool left, - blas::Layout layout, - T epsilon=RandBLAS::sqrt_epsilon() - ) { - // Grabbing a random matrix - std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); - // Eigen::Matrix A_col(m, n); - Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); - // A_col.setRandom(); - Eigen::Map> A_row(A_vec.data(), m, n); - - // Deep copy - MatrixXd B; - if(layout == blas::Layout::RowMajor) - B = A_row; - else - B = A_col; - - //// Performing \Pi H D - // Step 1: setup the diagonal scaling - std::vector buff = left ? std::vector(m, -1) : std::vector(n, -1); - - if(layout == blas::Layout::RowMajor) { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); - } - else { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); - } - - // Step 2: apply the hadamard transform - int ld = (left) ? m : n; - if(layout == blas::Layout::ColMajor){ - RandBLAS::fht_dispatch(left, layout, m, n, int(std::log2(ld)), A_col.data()); - } - else { - RandBLAS::fht_dispatch(left, layout, m, n, int(std::log2(ld)), A_row.data()); - } - - // Step 3: Permuting - std::vector indices(d); - - std::iota(indices.begin(), indices.end(), 1); - if(left) { - if(layout == blas::Layout::RowMajor) - RandBLAS::permute_rows_to_top(layout, m, n, indices.data(), d, A_row.data()); - else - RandBLAS::permute_rows_to_top(layout, m, n, indices.data(), d, A_col.data()); - } - else { - if(layout == blas::Layout::RowMajor) - RandBLAS::permute_cols_to_left(layout, m, n, indices.data(), d, A_row.data()); - else - RandBLAS::permute_cols_to_left(layout, m, n, indices.data(), d, A_col.data()); - } - - //// Performing D H \Pi - - //Step 1: Un-permute - std::reverse(indices.begin(), indices.end()); - - if(left) { - if(layout == blas::Layout::RowMajor) - RandBLAS::permute_rows_to_top(layout, m, n, indices.data(), d, A_row.data()); - else - RandBLAS::permute_rows_to_top(layout, m, n, indices.data(), d, A_col.data()); - } - else { - if(layout == blas::Layout::RowMajor) - RandBLAS::permute_cols_to_left(layout, m, n, indices.data(), d, A_row.data()); - else - RandBLAS::permute_cols_to_left(layout, m, n, indices.data(), d, A_col.data()); - } - - // Step-2: Apply H^{-1} - if(layout == blas::Layout::ColMajor) { - RandBLAS::fht_dispatch(left, layout, m, n, int(std::log2(ld)), A_col.data()); - A_col = A_col * 1/std::pow(2, int(std::log2(ld))); - } - else { - RandBLAS::fht_dispatch(left, layout, m, n, int(std::log2(ld)), A_row.data()); - A_row = A_row * 1/std::pow(2, int(std::log2(ld))); - } - - //Step-3: Inverting `D` - if(layout == blas::Layout::RowMajor) { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); - } - else { - RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); - } - - T norm_inverse = 0.0; - - if(layout == blas::Layout::RowMajor) { - norm_inverse = (A_row - B).norm(); - } - else { - norm_inverse = (A_col - B).norm(); - } - - randblas_require(norm_inverse < epsilon); - - - } - template static void drivers_inverse( uint32_t seed, @@ -364,20 +384,13 @@ class TestLMIGET : public::testing::Test int64_t d, int64_t left, blas::Layout layout, - T epsilon=RandBLAS::sqrt_epsilon() + T tol=RandBLAS::sqrt_epsilon() ) { // Grabbing a random matrix std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); - Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); - Eigen::Map> A_row(A_vec.data(), m, n); - RandBLAS::RNGState seed_state(seed); + std::vector B_vec(A_vec); - // Deep copy - MatrixXd B; - if(layout == blas::Layout::RowMajor) - B = A_row; - else - B = A_col; + RandBLAS::RNGState seed_state(seed); // Aggregating information about the matrix RandBLAS::trig::HadamardMixingOp<> hmo( @@ -388,26 +401,17 @@ class TestLMIGET : public::testing::Test d ); - // Sketching the matrix - if(layout == blas::Layout::RowMajor) { - RandBLAS::trig::miget(hmo, seed_state, A_row.data()); - RandBLAS::trig::invert(hmo, A_row.data()); - } - else { - RandBLAS::trig::miget(hmo, seed_state, A_col.data()); - RandBLAS::trig::invert(hmo, A_col.data()); - } + // Performing the transform and... + RandBLAS::trig::miget(hmo, seed_state, A_vec.data()); + RandBLAS::trig::invert_hadamard(hmo, A_vec.data()); + // ... inverting T norm_inverse = 0.0; - if(layout == blas::Layout::RowMajor) { - norm_inverse = (A_row - B).norm(); - } - else { - norm_inverse = (A_col - B).norm(); - } + blas::axpy(m * n, -1.0, A_vec.data(), 1, B_vec.data(), 1); + norm_inverse = blas::nrm2(m * n, B_vec.data(), 1); - randblas_require(norm_inverse < epsilon); + randblas_require(norm_inverse < tol); } }; @@ -563,62 +567,6 @@ TEST_F(TestLMIGET, test_hadamard_right_rowmajor) { ); } -//////////////////////////////////////////////////////////////////////// -// -// -// Verifying invertibility of the transform -// -// -//////////////////////////////////////////////////////////////////////// - -TEST_F(TestLMIGET, test_inverse_left_colmajor) { - for(uint32_t seed: keys) - inverse_transform( - seed, - 128, - 100, - 25, - true, - blas::Layout::ColMajor - ); -} - -TEST_F(TestLMIGET, test_inverse_right_colmajor) { - for(uint32_t seed: keys) - inverse_transform( - seed, - 100, - 128, - 25, - false, - blas::Layout::ColMajor - ); -} - -TEST_F(TestLMIGET, test_inverse_left_rowmajor) { - for(uint32_t seed: keys) - inverse_transform( - seed, - 128, - 100, - 25, - true, - blas::Layout::RowMajor - ); -} - -TEST_F(TestLMIGET, test_inverse_right_rowmajor) { - for(uint32_t seed: keys) - inverse_transform( - seed, - 100, - 128, - 25, - false, - blas::Layout::RowMajor - ); -} - //////////////////////////////////////////////////////////////////////// // // From 09ea44d3bd8196c1fc71509b309917b6acf0094a Mon Sep 17 00:00:00 2001 From: Aryaman Jeendgar Date: Mon, 4 Nov 2024 18:10:26 -0800 Subject: [PATCH 26/26] Simplifying correctness test for `diag` --- test/test_matmul_cores/test_trig.cc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc index 8cf04c3..e00b141 100644 --- a/test/test_matmul_cores/test_trig.cc +++ b/test/test_matmul_cores/test_trig.cc @@ -360,14 +360,8 @@ class TestLMIGET : public::testing::Test RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_vec.data(), buff.data()); T norm_diag = 0.0; - if(layout == blas::Layout::RowMajor) { - blas::axpy(m * n, 1.0, A_vec.data(), 1, B_vec.data(), 1); - norm_diag = blas::nrm2(m * n, B_vec.data(), 1); - } - else { - blas::axpy(m * n, 1.0, A_vec.data(), 1, B_vec.data(), 1); - norm_diag = blas::nrm2(m * n, B_vec.data(), 1); - } + blas::axpy(m * n, 1.0, A_vec.data(), 1, B_vec.data(), 1); + norm_diag = blas::nrm2(m * n, B_vec.data(), 1); randblas_require(norm_diag < tol);