From a226f2126e67dc2d05fe7d5894b9a62bfea76e9a Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Wed, 29 May 2024 09:41:49 -0400 Subject: [PATCH] revise TLS examples --- .../total-least-squares/tls_dense_skop.cc | 136 +++++++++--------- .../total-least-squares/tls_sparse_skop.cc | 125 ++++++++-------- 2 files changed, 132 insertions(+), 129 deletions(-) diff --git a/examples/total-least-squares/tls_dense_skop.cc b/examples/total-least-squares/tls_dense_skop.cc index 927a9a62..46500461 100644 --- a/examples/total-least-squares/tls_dense_skop.cc +++ b/examples/total-least-squares/tls_dense_skop.cc @@ -32,14 +32,35 @@ void init_noisy_data(int64_t m, int64_t n, int64_t d, double* AB){ RandBLAS::fill_dense(Dist_A, AB, state); //Fill A to be a random gaussian RandBLAS::fill_dense(Dist_eps, eps, state1); //Fill A to be a random gaussian - blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, m, d, n, 1.0, AB, m, target_x, n, 0.0, &AB[m*n], m); + blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, m, d, n, 1, AB, m, target_x, n, 0, &AB[m*n], m); for (int i = 0; i < m*d; i++){ AB[m*n + i] += eps[i]; // Add Gaussian Noise to right hand side } } -/* Let A be a tall data matrix of dimensions m by n where m > n and b be a vector of dimension m. +template +void total_least_squares(int64_t m, int64_t n, T* AB, int64_t ldab, T* x, T* work_s, T* work_vt) { + // AB is m-by-(n+1) and stored in column-major format with leading dimension "ldab". + // Its first n columns contain a matrix "A", and its last column contains a vector "B". + // + // This function overwrites x with the solution to + // (A+E)x = B+R + // where (E, R) solve + // solve min{ ||[E, R]||_F : B+R in range(A+E) }. + // + // On exit, AB will have been overwritten by its matrix of left singular vectors, + // its singular values will be stored in work_s, and its (transposed) right singular + // vectors will be stored in work_vt. + lapack::gesdd(lapack::Job::OverwriteVec, m, n+1, AB, ldab, work_s, nullptr, 1, work_vt, n+1); + T scale = work_vt[(n+1)*(n+1)-1]; + for (int i = 0; i < n; i++) { + x[i] = -work_vt[n + i*(n+1)] / scale; + } + return; +} + +/* Let A be a tall data matrix of dimensions m by n where m > n and b be a vector of dimension m. * In ordinary least squares it assumes that the error lies only in the right hand side vector b, * and it aims to find a vector x that minimizes ||A*x - b||_2. * On the other hand, total least squares assumes that the input data matrix A could also incur errors. @@ -47,7 +68,7 @@ void init_noisy_data(int64_t m, int64_t n, int64_t d, double* AB){ */ // To call the executable run ./TLS_DenseSkOp where are the number of rows and columns -// of A respectively. We expect m > 2*n. +// of A respectively. We expect m > 2*n. int main(int argc, char* argv[]){ // Initialize dimensions @@ -69,49 +90,34 @@ int main(int argc, char* argv[]){ exit(1); } + // Define number or rows of the sketching operator int64_t sk_dim = 2*(n+1); // Initialize workspace - double *AB = new double[m*(n + 1)]; // Store [A B] in column major format + double *AB = new double[m*(n+1)]; double *SAB = new double[sk_dim*(n+1)]; - double *X = new double[n]; - double *res = new double[n]; - - // Initialize workspace for the sketched svd - double *U = new double[sk_dim*sk_dim]; + double *sketch_x = new double[n]; double *svals = new double[n+1]; double *VT = new double[(n+1)*(n+1)]; // Initialize noisy gaussian data init_noisy_data(m, n, 1, AB); - // Define properties of the sketching operator - - // Initialize seed for random number generation - uint32_t seed = 0; + std::cout << "\nDimensions of the augmented matrix [A|B] : " << m << " by " << n+1 << '\n'; + std::cout << "Embedding dimension : " << sk_dim << '\n'; - // Define the dense distribution that the sketching operator will sample from - /* Additional dense distributions: RandBLAS::DenseDistName::Gaussian - entries are iid standard normal - * RandBLAS::DenseDistName::BlackBox - entires are user provided through a buffer - */ + // Sample the sketching operator auto time_constructsketch1 = high_resolution_clock::now(); - RandBLAS::DenseDistName dn = RandBLAS::DenseDistName::Uniform; - - // Initialize dense distribution struct for the sketching operator - RandBLAS::DenseDist Dist(sk_dim, // Number of rows of the sketching operator - m, // Number of columns of the sketching operator - dn); // Distribution of the entires - - //Construct the dense sketching operator - RandBLAS::DenseSkOp S(Dist, seed); - auto t = omp_get_num_threads(); - omp_set_num_threads(8); + RandBLAS::DenseDist Dist{ sk_dim, m }; + uint32_t seed = 1997; + RandBLAS::DenseSkOp S(Dist, seed); RandBLAS::fill_dense(S); - omp_set_num_threads(t); auto time_constructsketch2 = high_resolution_clock::now(); + double sampling_time = (double) duration_cast(time_constructsketch2 - time_constructsketch1).count()/1000; + std::cout << "\nTime to sample S : " << sampling_time << " seconds" << '\n'; // Sketch AB - // SAB = alpha * \op(S) * \op(AB) + beta * SAB + // SAB = 1.0 * S * AB + 0.0 * SAB auto time_sketch1 = high_resolution_clock::now(); RandBLAS::sketch_general( blas::Layout::ColMajor, // Matrix storage layout of AB and SAB @@ -121,57 +127,47 @@ int main(int argc, char* argv[]){ n + 1, // Number of columns of AB and SAB m, // Number of rows of AB and columns of S 1.0, // Scalar alpha - if alpha is zero AB is not accessed - S, // A DenseSkOp or SparseSkOp sketching operator + S, // A DenseSkOp or SparseSkOp AB, // Matrix to be sketched m, // Leading dimension of AB - 0.0, // Scalar beta - if beta is zero SAB is not accessed + 0.0, // Scalar beta - if beta is zero the initial value of SAB is not accessed SAB, // Sketched matrix SAB sk_dim // Leading dimension of SAB ); auto time_sketch2 = high_resolution_clock::now(); + double sketching_time = (double) duration_cast(time_sketch2 - time_sketch1).count()/1000; + std::cout << "Time to compute SAB = S * AB : " << sketching_time << " seconds\n"; + + auto time_sketched_TLS1 = high_resolution_clock::now(); + total_least_squares(sk_dim, n, SAB, sk_dim, sketch_x, svals, VT); + auto time_sketched_TLS2 = high_resolution_clock::now(); + double sketched_solve_time = (double) duration_cast(time_sketched_TLS2 - time_sketched_TLS1).count()/1000; + std::cout << "Time to perform TLS on sketched data : " << sketched_solve_time << " seconds\n\n"; + + double total_randomized_time = sampling_time + sketching_time + sketched_solve_time; + std::cout << "Total time for the randomized TLS method : " << total_randomized_time << " seconds\n"; + + double* true_x = new double[n]; + auto time_true_TLS1 = high_resolution_clock::now(); + total_least_squares(m, n, AB, m, true_x, svals, VT); + auto time_true_TLS2 = high_resolution_clock::now(); + double true_solve_time = (double) duration_cast(time_true_TLS2 - time_true_TLS1).count()/1000; + std::cout << "Time for the classical TLS method : " << true_solve_time << " seconds" << "\n"; + + std::cout << "Speedup of sketched vs classical method : " << true_solve_time / total_randomized_time << "\n\n"; - // Perform SVD operation on SAB - auto time_TLS1 = high_resolution_clock::now(); - lapack::gesdd(lapack::Job::AllVec, sk_dim, (n+1), SAB, sk_dim, svals, U, sk_dim, VT, n+1); - - for (int i = 0; i < n; i++) { - X[i] = VT[n + i*(n+1)]; // Take the right n by 1 block of V - } - - // Scale X by the inverse of the 1 by 1 bottom right block of V - blas::scal(n, -1/VT[(n+1)*(n+1)-1], X, 1); - auto time_TLS2 = high_resolution_clock::now(); - - //Check TLS solution. Expected to be close to a vector of 1's - double res_infnorm = 0; - double res_twonorm = 0; - - for (int i = 0; i < n; i++) { - res[i] = abs(X[i] - 1); - res_twonorm += res[i]*res[i]; - if (res_infnorm < res[i]) { - res_infnorm = res[i]; - } - } - - std::cout << "Matrix dimensions: " << m << " by " << n+1 << '\n'; - std::cout << "Sketch dimension: " << sk_dim << '\n'; - std::cout << "Time to create dense sketching operator: " << (double) duration_cast(time_constructsketch2 - time_constructsketch1).count()/1000 << " seconds" << '\n'; - std::cout << "Time to sketch AB: " << (double) duration_cast(time_sketch2 - time_sketch1).count()/1000 << " seconds" <<'\n'; - std::cout << "Time to perform TLS on sketched matrix: " << (double) duration_cast(time_TLS2 - time_TLS1).count()/1000 << " seconds" << '\n'; - std::cout << "Inf-norm distance from TLS solution to vector of all ones: " << res_infnorm << '\n'; - std::cout << "Two-norm distance from TLS solution to vector of all ones: " << sqrt(res_twonorm) << '\n'; + double* delta = new double[n]; + blas::copy(n, sketch_x, 1, delta, 1); + blas::axpy(n, -1, true_x, 1, delta, 1); + double distance = blas::nrm2(n, delta, 1); + double scale = blas::nrm2(n, true_x, 1); + std::cout << "||sketch_x - true_x|| / ||true_x|| : " << distance/scale << "\n\n"; + delete[] true_x; delete[] AB; delete[] SAB; - delete[] X; - delete[] res; - delete[] U; + delete[] sketch_x; delete[] svals; delete[] VT; return 0; } - - - - diff --git a/examples/total-least-squares/tls_sparse_skop.cc b/examples/total-least-squares/tls_sparse_skop.cc index fa208b39..298660f2 100644 --- a/examples/total-least-squares/tls_sparse_skop.cc +++ b/examples/total-least-squares/tls_sparse_skop.cc @@ -17,7 +17,6 @@ using std::chrono::duration; using std::chrono::milliseconds; -//TODO: Read in matrix dimensions //TODO: Have the user choose between dense and sketch sketching operator (4 nnz per col) void init_noisy_data(int64_t m, int64_t n, int64_t d, double* AB){ @@ -42,6 +41,27 @@ void init_noisy_data(int64_t m, int64_t n, int64_t d, double* AB){ } } +template +void total_least_squares(int64_t m, int64_t n, T* AB, int64_t ldab, T* x, T* work_s, T* work_vt) { + // AB is m-by-(n+1) and stored in column-major format with leading dimension "ldab". + // Its first n columns contain a matrix "A", and its last column contains a vector "B". + // + // This function overwrites x with the solution to + // (A+E)x = B+R + // where (E, R) solve + // solve min{ ||[E, R]||_F : B+R in range(A+E) }. + // + // On exit, AB will have been overwritten by its matrix of left singular vectors, + // its singular values will be stored in work_s, and its (transposed) right singular + // vectors will be stored in work_vt. + lapack::gesdd(lapack::Job::OverwriteVec, m, n+1, AB, ldab, work_s, nullptr, 1, work_vt, n+1); + T scale = work_vt[(n+1)*(n+1)-1]; + for (int i = 0; i < n; i++) { + x[i] = -work_vt[n + i*(n+1)] / scale; + } + return; +} + /* Let A be a tall data matrix of dimensions m by n where m > n and b be a vector of dimension m. * In ordinary least squares it assumes that the error lies only in the right hand side vector b, * and it aims to find a vector x that minimizes ||A*x - b||_2. @@ -76,38 +96,35 @@ int main(int argc, char* argv[]){ int64_t sk_dim = 2*(n+1); // Initialize workspace - double *AB = new double[m*(n+1)]; // Store [A B] in column major format + double *AB = new double[m*(n+1)]; double *SAB = new double[sk_dim*(n+1)]; - double *X = new double[n]; - double *res = new double[n]; - - // Initialize workspace for the sketched svd - double *U = new double[sk_dim*sk_dim]; + double *sketch_x = new double[n]; double *svals = new double[n+1]; double *VT = new double[(n+1)*(n+1)]; // Initialize noisy gaussian data init_noisy_data(m, n, 1, AB); + std::cout << "\nDimensions of the augmented matrix [A|B] : " << m << " by " << n+1 << '\n'; + std::cout << "Embedding dimension : " << sk_dim << '\n'; - // Define the parameters of the sparse distribution + // Sample the sketching operator auto time_constructsketch1 = high_resolution_clock::now(); - - // Initialize sparse distribution struct for the sketching operator - uint32_t seed = 0; // Initialize seed for random number generation - RandBLAS::SparseDist Dist = {.n_rows = sk_dim, // Number of rows of the sketching operator - .n_cols = m, // Number of columns of the sketching operator - .vec_nnz = 8, // Number of non-zero entires per major axis - .major_axis = RandBLAS::MajorAxis::Short // Defines the major axis of the sketching operator - }; - - //Construct the sparse sketching operator + RandBLAS::SparseDist Dist = { + .n_rows = sk_dim, // Number of rows of the sketching operator + .n_cols = m, // Number of columns of the sketching operator + .vec_nnz = 8, // Number of non-zero entires per major-axis vector + .major_axis = RandBLAS::MajorAxis::Short // A "SASO" (aka SJLT, aka OSNAP, aka generalized CountSketch) + }; + uint32_t seed = 1997; RandBLAS::SparseSkOp S(Dist, seed); RandBLAS::fill_sparse(S); auto time_constructsketch2 = high_resolution_clock::now(); + double sampling_time = (double) duration_cast(time_constructsketch2 - time_constructsketch1).count()/1000; + std::cout << "\nTime to sample S : " << sampling_time << " seconds" << '\n'; // Sketch AB - // SAB = alpha * \op(S) * \op(AB) + beta * SAB + // SAB = 1.0 * S * AB + 0.0 * SAB auto time_sketch1 = high_resolution_clock::now(); RandBLAS::sketch_general( blas::Layout::ColMajor, // Matrix storage layout of AB and SAB @@ -117,57 +134,47 @@ int main(int argc, char* argv[]){ n + 1, // Number of columns of AB and SAB m, // Number of rows of AB and columns of S 1.0, // Scalar alpha - if alpha is zero AB is not accessed - S, // A DenseSkOp or SparseSkOp sketching operator + S, // A DenseSkOp or SparseSkOp AB, // Matrix to be sketched m, // Leading dimension of AB - 0.0, // Scalar beta - if beta is zero SAB is not accessed + 0.0, // Scalar beta - if beta is zero the initial value of SAB is not accessed SAB, // Sketched matrix SAB sk_dim // Leading dimension of SAB ); auto time_sketch2 = high_resolution_clock::now(); + double sketching_time = (double) duration_cast(time_sketch2 - time_sketch1).count()/1000; + std::cout << "Time to compute SAB = S * AB : " << sketching_time << " seconds\n"; + + auto time_sketched_TLS1 = high_resolution_clock::now(); + total_least_squares(sk_dim, n, SAB, sk_dim, sketch_x, svals, VT); + auto time_sketched_TLS2 = high_resolution_clock::now(); + double sketched_solve_time = (double) duration_cast(time_sketched_TLS2 - time_sketched_TLS1).count()/1000; + std::cout << "Time to perform TLS on sketched data : " << sketched_solve_time << " seconds\n\n"; + + double total_randomized_time = sampling_time + sketching_time + sketched_solve_time; + std::cout << "Total time for the randomized TLS method : " << total_randomized_time << " seconds\n"; + + double* true_x = new double[n]; + auto time_true_TLS1 = high_resolution_clock::now(); + total_least_squares(m, n, AB, m, true_x, svals, VT); + auto time_true_TLS2 = high_resolution_clock::now(); + double true_solve_time = (double) duration_cast(time_true_TLS2 - time_true_TLS1).count()/1000; + std::cout << "Time for the classical TLS method : " << true_solve_time << " seconds" << "\n"; + + std::cout << "Speedup of sketched vs classical method : " << true_solve_time / total_randomized_time << "\n\n"; - // Perform SVD operation on SAB - auto time_TLS1 = high_resolution_clock::now(); - lapack::gesdd(lapack::Job::AllVec, sk_dim, (n+1), SAB, sk_dim, svals, U, sk_dim, VT, n+1); - - for (int i = 0; i < n; i++) { - X[i] = VT[n + i*(n+1)]; // Take the right n by 1 block of V - } - - // Scale X by the inverse of the 1 by 1 bottom right block of V - blas::scal(n, -1/VT[(n+1)*(n+1)-1], X, 1); - auto time_TLS2 = high_resolution_clock::now(); - - //Check TLS solution. Expected to be a vector of 1's - double res_infnorm = 0; - double res_twonorm = 0; - - for (int i = 0; i < n; i++) { - res[i] = abs(X[i] - 1); - res_twonorm += res[i]*res[i]; - if (res_infnorm < res[i]) { - res_infnorm = res[i]; - } - } - - std::cout << "Matrix dimensions: " << m << " by " << n+1 << '\n'; - std::cout << "Sketch dimension: " << sk_dim << '\n'; - std::cout << "Time to create sparse sketching operator: " << (double) duration_cast(time_constructsketch2 - time_constructsketch1).count()/1000 << " seconds" << '\n'; - std::cout << "Time to sketch AB: " << (double) duration_cast(time_sketch2 - time_sketch1).count()/1000 << " seconds" <<'\n'; - std::cout << "Time to perform TLS on sketched matrix: " << (double) duration_cast(time_TLS2 - time_TLS1).count()/1000 << " seconds" << '\n'; - std::cout << "Inf-norm distance from TLS solution to vector of all ones: " << res_infnorm << '\n'; - std::cout << "Two-norm distance from TLS solution to vector of all ones: " << sqrt(res_twonorm) << '\n'; + double* delta = new double[n]; + blas::copy(n, sketch_x, 1, delta, 1); + blas::axpy(n, -1, true_x, 1, delta, 1); + double distance = blas::nrm2(n, delta, 1); + double scale = blas::nrm2(n, true_x, 1); + std::cout << "||sketch_x - true_x|| / ||true_x|| : " << distance/scale << "\n\n"; + delete[] true_x; delete[] AB; delete[] SAB; - delete[] X; - delete[] res; - delete[] U; + delete[] sketch_x; delete[] svals; delete[] VT; return 0; } - - - -