Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand and update examples, improve spmm performance #89

Merged
merged 44 commits into from
May 30, 2024
Merged
Changes from 1 commit
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
8a5e294
update examples
rileyjmurray Apr 17, 2024
8744efa
have data set up for low-rank approx of sparse matrix
rileyjmurray Apr 19, 2024
37087b8
svd example compiles
rileyjmurray Apr 23, 2024
b481b5b
accuracy check and runtime
rileyjmurray Apr 26, 2024
a7b4ab0
directory
rileyjmurray Apr 28, 2024
8e6c035
ignore directory contents
rileyjmurray Apr 28, 2024
74122e1
fast matrix market folder
rileyjmurray Apr 28, 2024
c8b8378
include examples that use fast-matrix-market
rileyjmurray Apr 28, 2024
cb8f8d2
interesting demo using test data from fast-matrix-market
rileyjmurray Apr 28, 2024
8d5ac2d
tweak
rileyjmurray Apr 28, 2024
e19a266
clean up svd example (using a new macro for timing)
rileyjmurray Apr 28, 2024
299cbc3
ternary assignment
rileyjmurray Apr 29, 2024
f66d209
proper QRCP demo
rileyjmurray May 1, 2024
698d272
standalone Eigen example (currently very messy)
rileyjmurray May 1, 2024
2dff390
update eigen example
rileyjmurray May 2, 2024
5676def
remove Eigen example (its sparse QR doesnt do pivoting to reveal nume…
rileyjmurray May 2, 2024
669acbd
restructure examples
rileyjmurray May 7, 2024
4af1221
fix bug in examples (those using lu_stabilize). Create special implem…
rileyjmurray May 8, 2024
1eb7b80
properly check for OpenMP before calling omp_get_thread_num()
rileyjmurray May 9, 2024
db4adcb
fix thread number
rileyjmurray May 9, 2024
6e70336
graphs examples
rileyjmurray May 9, 2024
2408585
check in
rileyjmurray May 10, 2024
18f91eb
check in
rileyjmurray May 10, 2024
cb929a2
fix logging bug
rileyjmurray May 10, 2024
09244d3
debugging
rileyjmurray May 24, 2024
8966842
fixed sparse QRCP example
rileyjmurray May 24, 2024
9a13962
Update lu_row_stabilize so that it errors when necessary. Modulo read…
rileyjmurray May 24, 2024
a86d2c5
clean up notes in SpMatrix concept and remove requirement that preclu…
rileyjmurray May 29, 2024
862121e
remove messy graphs example. Rename fmm_svd.cc
rileyjmurray May 29, 2024
daac3fd
rename qrcp.cc example, update CMakeLists.txt
rileyjmurray May 29, 2024
bb16d37
remove added whitespace
rileyjmurray May 29, 2024
fc25e2d
clean up svd_matrixmarket example
rileyjmurray May 29, 2024
4274ad3
Polish example for low-rank QRCP of sparse matrices
rileyjmurray May 29, 2024
90d6678
blank line at end of file
rileyjmurray May 29, 2024
703ccef
specify μs (microseconds) instead of ms (milliseconds)
rileyjmurray May 29, 2024
a226f21
revise TLS examples
rileyjmurray May 29, 2024
96588fa
change fetcher to a README
rileyjmurray May 29, 2024
a7ab7bb
README and CMakeLists.txt
rileyjmurray May 29, 2024
a91397c
make sure to deallocate before returning
rileyjmurray May 29, 2024
d9eacf9
dev notes
rileyjmurray May 29, 2024
1f8ae46
better devnotes
rileyjmurray May 29, 2024
d0125c1
devnotes
rileyjmurray May 29, 2024
5754ce1
better devnotes
rileyjmurray May 30, 2024
fd5a987
tweak
rileyjmurray May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
revise TLS examples
rileyjmurray committed May 29, 2024
commit a226f2126e67dc2d05fe7d5894b9a62bfea76e9a
136 changes: 66 additions & 70 deletions examples/total-least-squares/tls_dense_skop.cc
Original file line number Diff line number Diff line change
@@ -32,22 +32,43 @@ void init_noisy_data(int64_t m, int64_t n, int64_t d, double* AB){
RandBLAS::fill_dense(Dist_A, AB, state); //Fill A to be a random gaussian
RandBLAS::fill_dense(Dist_eps, eps, state1); //Fill A to be a random gaussian

blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, m, d, n, 1.0, AB, m, target_x, n, 0.0, &AB[m*n], m);
blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, m, d, n, 1, AB, m, target_x, n, 0, &AB[m*n], m);

for (int i = 0; i < m*d; i++){
AB[m*n + i] += eps[i]; // Add Gaussian Noise to right hand side
}
}

/* Let A be a tall data matrix of dimensions m by n where m > n and b be a vector of dimension m.
template <typename T>
void total_least_squares(int64_t m, int64_t n, T* AB, int64_t ldab, T* x, T* work_s, T* work_vt) {
// AB is m-by-(n+1) and stored in column-major format with leading dimension "ldab".
// Its first n columns contain a matrix "A", and its last column contains a vector "B".
//
// This function overwrites x with the solution to
// (A+E)x = B+R
// where (E, R) solve
// solve min{ ||[E, R]||_F : B+R in range(A+E) }.
//
// On exit, AB will have been overwritten by its matrix of left singular vectors,
// its singular values will be stored in work_s, and its (transposed) right singular
// vectors will be stored in work_vt.
lapack::gesdd(lapack::Job::OverwriteVec, m, n+1, AB, ldab, work_s, nullptr, 1, work_vt, n+1);
T scale = work_vt[(n+1)*(n+1)-1];
for (int i = 0; i < n; i++) {
x[i] = -work_vt[n + i*(n+1)] / scale;
}
return;
}

/* Let A be a tall data matrix of dimensions m by n where m > n and b be a vector of dimension m.
* In ordinary least squares it assumes that the error lies only in the right hand side vector b,
* and it aims to find a vector x that minimizes ||A*x - b||_2.
* On the other hand, total least squares assumes that the input data matrix A could also incur errors.
* Total least squares aims to find a solution where the error is orthogonal to the regression model.
*/

// To call the executable run ./TLS_DenseSkOp <m> <n> where <m> <n> are the number of rows and columns
// of A respectively. We expect m > 2*n.
// of A respectively. We expect m > 2*n.
int main(int argc, char* argv[]){

// Initialize dimensions
@@ -69,49 +90,34 @@ int main(int argc, char* argv[]){
exit(1);
}

// Define number or rows of the sketching operator
int64_t sk_dim = 2*(n+1);

// Initialize workspace
double *AB = new double[m*(n + 1)]; // Store [A B] in column major format
double *AB = new double[m*(n+1)];
double *SAB = new double[sk_dim*(n+1)];
double *X = new double[n];
double *res = new double[n];

// Initialize workspace for the sketched svd
double *U = new double[sk_dim*sk_dim];
double *sketch_x = new double[n];
double *svals = new double[n+1];
double *VT = new double[(n+1)*(n+1)];

// Initialize noisy gaussian data
init_noisy_data(m, n, 1, AB);

// Define properties of the sketching operator

// Initialize seed for random number generation
uint32_t seed = 0;
std::cout << "\nDimensions of the augmented matrix [A|B] : " << m << " by " << n+1 << '\n';
std::cout << "Embedding dimension : " << sk_dim << '\n';

// Define the dense distribution that the sketching operator will sample from
/* Additional dense distributions: RandBLAS::DenseDistName::Gaussian - entries are iid standard normal
* RandBLAS::DenseDistName::BlackBox - entires are user provided through a buffer
*/
// Sample the sketching operator
auto time_constructsketch1 = high_resolution_clock::now();
RandBLAS::DenseDistName dn = RandBLAS::DenseDistName::Uniform;

// Initialize dense distribution struct for the sketching operator
RandBLAS::DenseDist Dist(sk_dim, // Number of rows of the sketching operator
m, // Number of columns of the sketching operator
dn); // Distribution of the entires

//Construct the dense sketching operator
RandBLAS::DenseSkOp<double> S(Dist, seed);
auto t = omp_get_num_threads();
omp_set_num_threads(8);
RandBLAS::DenseDist Dist{ sk_dim, m };
uint32_t seed = 1997;
RandBLAS::DenseSkOp<double> S(Dist, seed);
RandBLAS::fill_dense(S);
omp_set_num_threads(t);
auto time_constructsketch2 = high_resolution_clock::now();
double sampling_time = (double) duration_cast<milliseconds>(time_constructsketch2 - time_constructsketch1).count()/1000;
std::cout << "\nTime to sample S : " << sampling_time << " seconds" << '\n';

// Sketch AB
// SAB = alpha * \op(S) * \op(AB) + beta * SAB
// SAB = 1.0 * S * AB + 0.0 * SAB
auto time_sketch1 = high_resolution_clock::now();
RandBLAS::sketch_general(
blas::Layout::ColMajor, // Matrix storage layout of AB and SAB
@@ -121,57 +127,47 @@ int main(int argc, char* argv[]){
n + 1, // Number of columns of AB and SAB
m, // Number of rows of AB and columns of S
1.0, // Scalar alpha - if alpha is zero AB is not accessed
S, // A DenseSkOp or SparseSkOp sketching operator
S, // A DenseSkOp or SparseSkOp
AB, // Matrix to be sketched
m, // Leading dimension of AB
0.0, // Scalar beta - if beta is zero SAB is not accessed
0.0, // Scalar beta - if beta is zero the initial value of SAB is not accessed
SAB, // Sketched matrix SAB
sk_dim // Leading dimension of SAB
);
auto time_sketch2 = high_resolution_clock::now();
double sketching_time = (double) duration_cast<milliseconds>(time_sketch2 - time_sketch1).count()/1000;
std::cout << "Time to compute SAB = S * AB : " << sketching_time << " seconds\n";

auto time_sketched_TLS1 = high_resolution_clock::now();
total_least_squares(sk_dim, n, SAB, sk_dim, sketch_x, svals, VT);
auto time_sketched_TLS2 = high_resolution_clock::now();
double sketched_solve_time = (double) duration_cast<milliseconds>(time_sketched_TLS2 - time_sketched_TLS1).count()/1000;
std::cout << "Time to perform TLS on sketched data : " << sketched_solve_time << " seconds\n\n";

double total_randomized_time = sampling_time + sketching_time + sketched_solve_time;
std::cout << "Total time for the randomized TLS method : " << total_randomized_time << " seconds\n";

double* true_x = new double[n];
auto time_true_TLS1 = high_resolution_clock::now();
total_least_squares(m, n, AB, m, true_x, svals, VT);
auto time_true_TLS2 = high_resolution_clock::now();
double true_solve_time = (double) duration_cast<milliseconds>(time_true_TLS2 - time_true_TLS1).count()/1000;
std::cout << "Time for the classical TLS method : " << true_solve_time << " seconds" << "\n";

std::cout << "Speedup of sketched vs classical method : " << true_solve_time / total_randomized_time << "\n\n";

// Perform SVD operation on SAB
auto time_TLS1 = high_resolution_clock::now();
lapack::gesdd(lapack::Job::AllVec, sk_dim, (n+1), SAB, sk_dim, svals, U, sk_dim, VT, n+1);

for (int i = 0; i < n; i++) {
X[i] = VT[n + i*(n+1)]; // Take the right n by 1 block of V
}

// Scale X by the inverse of the 1 by 1 bottom right block of V
blas::scal(n, -1/VT[(n+1)*(n+1)-1], X, 1);
auto time_TLS2 = high_resolution_clock::now();

//Check TLS solution. Expected to be close to a vector of 1's
double res_infnorm = 0;
double res_twonorm = 0;

for (int i = 0; i < n; i++) {
res[i] = abs(X[i] - 1);
res_twonorm += res[i]*res[i];
if (res_infnorm < res[i]) {
res_infnorm = res[i];
}
}

std::cout << "Matrix dimensions: " << m << " by " << n+1 << '\n';
std::cout << "Sketch dimension: " << sk_dim << '\n';
std::cout << "Time to create dense sketching operator: " << (double) duration_cast<milliseconds>(time_constructsketch2 - time_constructsketch1).count()/1000 << " seconds" << '\n';
std::cout << "Time to sketch AB: " << (double) duration_cast<milliseconds>(time_sketch2 - time_sketch1).count()/1000 << " seconds" <<'\n';
std::cout << "Time to perform TLS on sketched matrix: " << (double) duration_cast<milliseconds>(time_TLS2 - time_TLS1).count()/1000 << " seconds" << '\n';
std::cout << "Inf-norm distance from TLS solution to vector of all ones: " << res_infnorm << '\n';
std::cout << "Two-norm distance from TLS solution to vector of all ones: " << sqrt(res_twonorm) << '\n';
double* delta = new double[n];
blas::copy(n, sketch_x, 1, delta, 1);
blas::axpy(n, -1, true_x, 1, delta, 1);
double distance = blas::nrm2(n, delta, 1);
double scale = blas::nrm2(n, true_x, 1);
std::cout << "||sketch_x - true_x|| / ||true_x|| : " << distance/scale << "\n\n";

delete[] true_x;
delete[] AB;
delete[] SAB;
delete[] X;
delete[] res;
delete[] U;
delete[] sketch_x;
delete[] svals;
delete[] VT;
return 0;
}




125 changes: 66 additions & 59 deletions examples/total-least-squares/tls_sparse_skop.cc
Original file line number Diff line number Diff line change
@@ -17,7 +17,6 @@ using std::chrono::duration;
using std::chrono::milliseconds;


//TODO: Read in matrix dimensions
//TODO: Have the user choose between dense and sketch sketching operator (4 nnz per col)

void init_noisy_data(int64_t m, int64_t n, int64_t d, double* AB){
@@ -42,6 +41,27 @@ void init_noisy_data(int64_t m, int64_t n, int64_t d, double* AB){
}
}

template <typename T>
void total_least_squares(int64_t m, int64_t n, T* AB, int64_t ldab, T* x, T* work_s, T* work_vt) {
// AB is m-by-(n+1) and stored in column-major format with leading dimension "ldab".
// Its first n columns contain a matrix "A", and its last column contains a vector "B".
//
// This function overwrites x with the solution to
// (A+E)x = B+R
// where (E, R) solve
// solve min{ ||[E, R]||_F : B+R in range(A+E) }.
//
// On exit, AB will have been overwritten by its matrix of left singular vectors,
// its singular values will be stored in work_s, and its (transposed) right singular
// vectors will be stored in work_vt.
lapack::gesdd(lapack::Job::OverwriteVec, m, n+1, AB, ldab, work_s, nullptr, 1, work_vt, n+1);
T scale = work_vt[(n+1)*(n+1)-1];
for (int i = 0; i < n; i++) {
x[i] = -work_vt[n + i*(n+1)] / scale;
}
return;
}

/* Let A be a tall data matrix of dimensions m by n where m > n and b be a vector of dimension m.
* In ordinary least squares it assumes that the error lies only in the right hand side vector b,
* and it aims to find a vector x that minimizes ||A*x - b||_2.
@@ -76,38 +96,35 @@ int main(int argc, char* argv[]){
int64_t sk_dim = 2*(n+1);

// Initialize workspace
double *AB = new double[m*(n+1)]; // Store [A B] in column major format
double *AB = new double[m*(n+1)];
double *SAB = new double[sk_dim*(n+1)];
double *X = new double[n];
double *res = new double[n];

// Initialize workspace for the sketched svd
double *U = new double[sk_dim*sk_dim];
double *sketch_x = new double[n];
double *svals = new double[n+1];
double *VT = new double[(n+1)*(n+1)];

// Initialize noisy gaussian data
init_noisy_data(m, n, 1, AB);

std::cout << "\nDimensions of the augmented matrix [A|B] : " << m << " by " << n+1 << '\n';
std::cout << "Embedding dimension : " << sk_dim << '\n';

// Define the parameters of the sparse distribution
// Sample the sketching operator
auto time_constructsketch1 = high_resolution_clock::now();

// Initialize sparse distribution struct for the sketching operator
uint32_t seed = 0; // Initialize seed for random number generation
RandBLAS::SparseDist Dist = {.n_rows = sk_dim, // Number of rows of the sketching operator
.n_cols = m, // Number of columns of the sketching operator
.vec_nnz = 8, // Number of non-zero entires per major axis
.major_axis = RandBLAS::MajorAxis::Short // Defines the major axis of the sketching operator
};

//Construct the sparse sketching operator
RandBLAS::SparseDist Dist = {
.n_rows = sk_dim, // Number of rows of the sketching operator
.n_cols = m, // Number of columns of the sketching operator
.vec_nnz = 8, // Number of non-zero entires per major-axis vector
.major_axis = RandBLAS::MajorAxis::Short // A "SASO" (aka SJLT, aka OSNAP, aka generalized CountSketch)
};
uint32_t seed = 1997;
RandBLAS::SparseSkOp<double> S(Dist, seed);
RandBLAS::fill_sparse(S);
auto time_constructsketch2 = high_resolution_clock::now();
double sampling_time = (double) duration_cast<milliseconds>(time_constructsketch2 - time_constructsketch1).count()/1000;
std::cout << "\nTime to sample S : " << sampling_time << " seconds" << '\n';

// Sketch AB
// SAB = alpha * \op(S) * \op(AB) + beta * SAB
// SAB = 1.0 * S * AB + 0.0 * SAB
auto time_sketch1 = high_resolution_clock::now();
RandBLAS::sketch_general(
blas::Layout::ColMajor, // Matrix storage layout of AB and SAB
@@ -117,57 +134,47 @@ int main(int argc, char* argv[]){
n + 1, // Number of columns of AB and SAB
m, // Number of rows of AB and columns of S
1.0, // Scalar alpha - if alpha is zero AB is not accessed
S, // A DenseSkOp or SparseSkOp sketching operator
S, // A DenseSkOp or SparseSkOp
AB, // Matrix to be sketched
m, // Leading dimension of AB
0.0, // Scalar beta - if beta is zero SAB is not accessed
0.0, // Scalar beta - if beta is zero the initial value of SAB is not accessed
SAB, // Sketched matrix SAB
sk_dim // Leading dimension of SAB
);
auto time_sketch2 = high_resolution_clock::now();
double sketching_time = (double) duration_cast<milliseconds>(time_sketch2 - time_sketch1).count()/1000;
std::cout << "Time to compute SAB = S * AB : " << sketching_time << " seconds\n";

auto time_sketched_TLS1 = high_resolution_clock::now();
total_least_squares(sk_dim, n, SAB, sk_dim, sketch_x, svals, VT);
auto time_sketched_TLS2 = high_resolution_clock::now();
double sketched_solve_time = (double) duration_cast<milliseconds>(time_sketched_TLS2 - time_sketched_TLS1).count()/1000;
std::cout << "Time to perform TLS on sketched data : " << sketched_solve_time << " seconds\n\n";

double total_randomized_time = sampling_time + sketching_time + sketched_solve_time;
std::cout << "Total time for the randomized TLS method : " << total_randomized_time << " seconds\n";

double* true_x = new double[n];
auto time_true_TLS1 = high_resolution_clock::now();
total_least_squares(m, n, AB, m, true_x, svals, VT);
auto time_true_TLS2 = high_resolution_clock::now();
double true_solve_time = (double) duration_cast<milliseconds>(time_true_TLS2 - time_true_TLS1).count()/1000;
std::cout << "Time for the classical TLS method : " << true_solve_time << " seconds" << "\n";

std::cout << "Speedup of sketched vs classical method : " << true_solve_time / total_randomized_time << "\n\n";

// Perform SVD operation on SAB
auto time_TLS1 = high_resolution_clock::now();
lapack::gesdd(lapack::Job::AllVec, sk_dim, (n+1), SAB, sk_dim, svals, U, sk_dim, VT, n+1);

for (int i = 0; i < n; i++) {
X[i] = VT[n + i*(n+1)]; // Take the right n by 1 block of V
}

// Scale X by the inverse of the 1 by 1 bottom right block of V
blas::scal(n, -1/VT[(n+1)*(n+1)-1], X, 1);
auto time_TLS2 = high_resolution_clock::now();

//Check TLS solution. Expected to be a vector of 1's
double res_infnorm = 0;
double res_twonorm = 0;

for (int i = 0; i < n; i++) {
res[i] = abs(X[i] - 1);
res_twonorm += res[i]*res[i];
if (res_infnorm < res[i]) {
res_infnorm = res[i];
}
}

std::cout << "Matrix dimensions: " << m << " by " << n+1 << '\n';
std::cout << "Sketch dimension: " << sk_dim << '\n';
std::cout << "Time to create sparse sketching operator: " << (double) duration_cast<milliseconds>(time_constructsketch2 - time_constructsketch1).count()/1000 << " seconds" << '\n';
std::cout << "Time to sketch AB: " << (double) duration_cast<milliseconds>(time_sketch2 - time_sketch1).count()/1000 << " seconds" <<'\n';
std::cout << "Time to perform TLS on sketched matrix: " << (double) duration_cast<milliseconds>(time_TLS2 - time_TLS1).count()/1000 << " seconds" << '\n';
std::cout << "Inf-norm distance from TLS solution to vector of all ones: " << res_infnorm << '\n';
std::cout << "Two-norm distance from TLS solution to vector of all ones: " << sqrt(res_twonorm) << '\n';
double* delta = new double[n];
blas::copy(n, sketch_x, 1, delta, 1);
blas::axpy(n, -1, true_x, 1, delta, 1);
double distance = blas::nrm2(n, delta, 1);
double scale = blas::nrm2(n, true_x, 1);
std::cout << "||sketch_x - true_x|| / ||true_x|| : " << distance/scale << "\n\n";

delete[] true_x;
delete[] AB;
delete[] SAB;
delete[] X;
delete[] res;
delete[] U;
delete[] sketch_x;
delete[] svals;
delete[] VT;
return 0;
}