diff --git a/CMake/rl_build_options.cmake b/CMake/rl_build_options.cmake index 6e8db234..ed2a32f5 100644 --- a/CMake/rl_build_options.cmake +++ b/CMake/rl_build_options.cmake @@ -1,4 +1,4 @@ -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_POSITION_INDEPENDENT_CODE ON) option(BUILD_SHARED_LIBS OFF "Configure to build shared or static libraries") diff --git a/RandBLAS b/RandBLAS index 03170537..172d0963 160000 --- a/RandBLAS +++ b/RandBLAS @@ -1 +1 @@ -Subproject commit 03170537dbaee636cfc1df0afdf12041398f654f +Subproject commit 172d0963f16743defa646b32e7e0279b52230f99 diff --git a/RandLAPACK/misc/rl_gen.hh b/RandLAPACK/misc/rl_gen.hh index 5cbd4bb7..0761ac43 100644 --- a/RandLAPACK/misc/rl_gen.hh +++ b/RandLAPACK/misc/rl_gen.hh @@ -59,6 +59,7 @@ struct mat_gen_info { exponent = 1.0; theta = 1.0; perturb = 1.0; + check_true_rank = false; } }; @@ -250,7 +251,7 @@ void gen_spiked_mat( /// sample from [m] without replacement. Get the row indices for a tall LASO with a single column. RandBLAS::SparseDist DS = {.n_rows = m, .n_cols = 1, .vec_nnz = num_rows_sampled, .major_axis = RandBLAS::MajorAxis::Long}; - RandBLAS::SparseSkOp S(DS, state); + RandBLAS::SparseSkOp S(DS, state); state = RandBLAS::fill_sparse(S); T* V = ( T * ) calloc( n * n, sizeof( T ) ); diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 03106f0b..56a50e2c 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.10) project(benchmark) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) message(STATUS "Checking for OpenMP ... ") diff --git a/benchmark/Gemm_vs_ormqr.cc b/benchmark/Gemm_vs_ormqr.cc index 91c8a203..3e4de94f 100644 --- a/benchmark/Gemm_vs_ormqr.cc +++ b/benchmark/Gemm_vs_ormqr.cc @@ -13,7 +13,10 @@ using namespace RandLAPACK; template static void -test_speed(int64_t m, int64_t n, int64_t runs, RandBLAS::RNGState const_state) { +test_speed(int64_t m, + int64_t n, + int64_t runs, + RandBLAS::RNGState const_state) { // Matrix to decompose. std::vector A(m * n, 0.0); @@ -36,8 +39,8 @@ test_speed(int64_t m, int64_t n, int64_t runs, RandBLAS::RNGState const_sta auto state = const_state; RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info, A, state); - RandLAPACK::gen::mat_gen(m_info, B1, state); + RandLAPACK::gen::mat_gen(m_info, A, state); + RandLAPACK::gen::mat_gen(m_info, B1, state); lapack::lacpy(MatrixType::General, m, n, B1_dat, m, B2_dat, m); // Get the implicit Q-factor in A_dat @@ -66,11 +69,11 @@ test_speed(int64_t m, int64_t n, int64_t runs, RandBLAS::RNGState const_sta int main() { auto state = RandBLAS::RNGState(); - test_speed(std::pow(2, 10), std::pow(2, 5), 10, state); - test_speed(std::pow(2, 11), std::pow(2, 6), 10, state); - test_speed(std::pow(2, 12), std::pow(2, 7), 10, state); - test_speed(std::pow(2, 13), std::pow(2, 8), 10, state); - test_speed(std::pow(2, 14), std::pow(2, 9), 10, state); - test_speed(std::pow(2, 15), std::pow(2, 10), 10, state); + test_speed(std::pow(2, 10), std::pow(2, 5), 10, state); + test_speed(std::pow(2, 11), std::pow(2, 6), 10, state); + test_speed(std::pow(2, 12), std::pow(2, 7), 10, state); + test_speed(std::pow(2, 13), std::pow(2, 8), 10, state); + test_speed(std::pow(2, 14), std::pow(2, 9), 10, state); + test_speed(std::pow(2, 15), std::pow(2, 10), 10, state); return 0; } diff --git a/benchmark/bench_CQRRP/CQRRP_pivot_quality.cc b/benchmark/bench_CQRRP/CQRRP_pivot_quality.cc index 4bf840de..42a170f9 100644 --- a/benchmark/bench_CQRRP/CQRRP_pivot_quality.cc +++ b/benchmark/bench_CQRRP/CQRRP_pivot_quality.cc @@ -40,13 +40,13 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, QR_speed_benchmark_data &all_data, RandBLAS::RNGState &state) { - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0); std::fill(all_data.J.begin(), all_data.J.end(), 0); } // Re-generate and clear data -template +template static std::vector get_norms( QR_speed_benchmark_data &all_data) { int64_t m = all_data.row; @@ -82,16 +82,16 @@ static void R_norm_ratio( std::iota(all_data.J.begin(), all_data.J.end(), 1); //RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, 0, 0, state, (T*) nullptr); lapack::geqp3(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data()); - std::vector R_norms_HQRRP = get_norms(all_data); + std::vector R_norms_HQRRP = get_norms(all_data); printf("\nDone with HQRRP\n"); // Clear and re-generate data - data_regen(m_info, all_data, state); + data_regen(m_info, all_data, state); printf("\nStarting CQRRP\n"); // Running CQRRP CQRRP_blocked.call(m, n, all_data.A.data(), m, d_factor, all_data.tau.data(), all_data.J.data(), state); - std::vector R_norms_CQRRP = get_norms(all_data); + std::vector R_norms_CQRRP = get_norms(all_data); // Declare a data file std::fstream file1("data_out/QR_R_norm_ratios_rows_" + std::to_string(m) @@ -139,7 +139,7 @@ static void sv_ratio( lapack::gesdd(Job::NoVec, m, n, all_data.A.data(), m, all_data.S.data(), (T*) nullptr, m, (T*) nullptr, n); // Clear and re-generate data - data_regen(m_info, all_data, state); + data_regen(m_info, all_data, state); // Running GEQP3 std::iota(all_data.J.begin(), all_data.J.end(), 1); @@ -153,7 +153,7 @@ static void sv_ratio( file2 << ",\n"; // Clear and re-generate data - data_regen(m_info, all_data, state1); + data_regen(m_info, all_data, state1); // Running CQRRP CQRRP_blocked.call(m, n, all_data.A.data(), m, d_factor, all_data.tau.data(), all_data.J.data(), state); @@ -184,12 +184,12 @@ int main() { //m_info.cond_num = std::pow(10, 10); //m_info.rank = n; //m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); #if !defined(__APPLE__) - R_norm_ratio(m_info, b_sz, all_data, state_constant1); + R_norm_ratio(m_info, b_sz, all_data, state_constant1); printf("R done\n"); - sv_ratio(m_info, b_sz, all_data, state_constant2); + sv_ratio(m_info, b_sz, all_data, state_constant2); printf("SV done\n\n"); #endif } \ No newline at end of file diff --git a/benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc b/benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc index cb7fae7e..91fecc38 100644 --- a/benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc +++ b/benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc @@ -48,7 +48,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, QR_speed_benchmark_data &all_data, RandBLAS::RNGState &state) { - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0); std::fill(all_data.J.begin(), all_data.J.end(), 0); } @@ -95,7 +95,7 @@ static std::vector call_all_algs( state_gen_0 = state; state_alg_0 = state; // Clear and re-generate data - data_regen(m_info, all_data, state_gen_0); + data_regen(m_info, all_data, state_gen_0); } return inner_timing_best; @@ -120,7 +120,7 @@ int main() { QR_speed_benchmark_data all_data(m, n, tol, d_factor); // Generate the input matrix - gaussian suffices for performance tests. RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); // Declare a data file std::fstream file("CQRRP_inner_speed_" + std::to_string(m) @@ -132,7 +132,7 @@ int main() { #if !defined(__APPLE__) for (;b_sz_start <= b_sz_end; b_sz_start *= 2) { - res = call_all_algs(m_info, numruns, b_sz_start, all_data, state_constant); + res = call_all_algs(m_info, numruns, b_sz_start, all_data, state_constant); file << res[0] << ", " << res[1] << ", " << res[2] << ", " << res[3] << ", " << res[4] << ", " << res[5] << ", " << res[6] << ", " << res[7] << ", " << res[8] << ", " << res[9] << ", " << res[10] << ", " << res[11] << ",\n"; } #endif diff --git a/benchmark/bench_CQRRP/CQRRP_single_precision.cc b/benchmark/bench_CQRRP/CQRRP_single_precision.cc index ff238ad0..c3a9a14c 100644 --- a/benchmark/bench_CQRRP/CQRRP_single_precision.cc +++ b/benchmark/bench_CQRRP/CQRRP_single_precision.cc @@ -38,7 +38,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, QR_speed_benchmark_data &all_data, RandBLAS::RNGState &state, int apply_itoa) { - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0); if (apply_itoa) { std::iota(all_data.J.begin(), all_data.J.end(), 1); @@ -47,7 +47,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, } } -template +template static std::vector call_all_algs( RandLAPACK::gen::mat_gen_info m_info_cqrrp, RandLAPACK::gen::mat_gen_info m_info_rest, @@ -67,8 +67,6 @@ static std::vector call_all_algs( CQRRP_blocked.nnz = 2; CQRRP_blocked.num_threads = 48; // We are nbot using panel pivoting in performance testing. - int panel_pivoting = 0; - // timing vars long dur_cqrrp = 0; long dur_geqrf = 0; @@ -87,12 +85,12 @@ static std::vector call_all_algs( auto start_getrf = high_resolution_clock::now(); lapack::getrf(m, n, all_data_rest.A.data(), m, all_data_rest.J.data()); auto stop_getrf = high_resolution_clock::now(); - auto dur_getrf = duration_cast(stop_getrf - start_getrf).count(); + dur_getrf = duration_cast(stop_getrf - start_getrf).count(); printf("TOTAL TIME FOR GETRF %ld\n", dur_getrf); // Update best timing i == 0 ? t_getrf_best = dur_getrf : (dur_getrf < t_getrf_best) ? t_getrf_best = dur_getrf : NULL; - data_regen(m_info_rest, all_data_rest, state_gen, 0); + data_regen(m_info_rest, all_data_rest, state_gen, 0); state_gen = state; // Testing GEQRF @@ -105,7 +103,7 @@ static std::vector call_all_algs( i == 0 ? t_geqrf_best = dur_geqrf : (dur_geqrf < t_geqrf_best) ? t_geqrf_best = dur_geqrf : NULL; // Clear and re-generate data - data_regen(m_info_rest, all_data_rest, state_gen, 0); + data_regen(m_info_rest, all_data_rest, state_gen, 0); state_gen = state; // Testing CQRRP - best setup @@ -118,7 +116,7 @@ static std::vector call_all_algs( i == 0 ? t_cqrrp_best = dur_cqrrp : (dur_cqrrp < t_cqrrp_best) ? t_cqrrp_best = dur_cqrrp : NULL; // Clear and re-generate data - data_regen(m_info_cqrrp, all_data_cqrrp, state_gen, 1); + data_regen(m_info_cqrrp, all_data_cqrrp, state_gen, 1); state_gen = state; state_alg = state; } @@ -148,13 +146,13 @@ int main() { QR_speed_benchmark_data all_data_d(m, n, tol, d_factor); // Generate the input matrix - gaussian suffices for performance tests. RandLAPACK::gen::mat_gen_info m_info_d(m, n, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info_d, all_data_d.A.data(), state); + RandLAPACK::gen::mat_gen(m_info_d, all_data_d.A.data(), state); // Allocate basic workspace - float QR_speed_benchmark_data all_data_f(m, n, (float) tol, (float) d_factor); // Generate the input matrix - gaussian suffices for performance tests. RandLAPACK::gen::mat_gen_info m_info_f(m, n, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info_f, all_data_f.A.data(), state_cpy); + RandLAPACK::gen::mat_gen(m_info_f, all_data_f.A.data(), state_cpy); // Declare a data file std::fstream file("Apple_QR_time_raw_rows_" + std::to_string(m) @@ -165,7 +163,7 @@ int main() { + ".dat", std::fstream::app); #if !defined(__APPLE__) for (;b_sz_start <= b_sz_end; b_sz_start *= 2) { - res = call_all_algs(m_info_f, m_info_d, numruns, b_sz_start, all_data_f, all_data_d, state_constant); + res = call_all_algs(m_info_f, m_info_d, numruns, b_sz_start, all_data_f, all_data_d, state_constant); file << res[0] << ", " << res[1] << ", " << res[2] << ",\n"; } #endif diff --git a/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc b/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc index 82498e37..e283062a 100644 --- a/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc +++ b/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc @@ -45,7 +45,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, QR_speed_benchmark_data &all_data, RandBLAS::RNGState &state, int apply_itoa) { - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0); if (apply_itoa) { std::iota(all_data.J.begin(), all_data.J.end(), 1); @@ -99,11 +99,11 @@ static std::vector call_all_algs( auto dur_geqp3 = duration_cast(stop_geqp3 - start_geqp3).count(); printf("TOTAL TIME FOR GEQP3 %ld\n", dur_geqp3); - data_regen(m_info, all_data, state_buf, 0); + data_regen(m_info, all_data, state_buf, 0); // Testing GEQRF auto start_geqrf = high_resolution_clock::now(); - //lapack::geqrf(m, n, all_data.A.data(), m, all_data.tau.data()); + lapack::geqrf(m, n, all_data.A.data(), m, all_data.tau.data()); auto stop_geqrf = high_resolution_clock::now(); dur_geqrf = duration_cast(stop_geqrf - start_geqrf).count(); printf("TOTAL TIME FOR GEQRF %ld\n", dur_geqrf); @@ -114,7 +114,7 @@ static std::vector call_all_algs( auto state_gen_1 = state_gen_0; auto state_alg_1 = state_alg_0; // Clear and re-generate data - data_regen(m_info, all_data, state_gen_0, 0); + data_regen(m_info, all_data, state_gen_0, 0); // Testing CQRRP - best setup auto start_cqrrp = high_resolution_clock::now(); @@ -128,11 +128,11 @@ static std::vector call_all_algs( auto state_gen_3 = state_gen_1; auto state_alg_3 = state_alg_1; // Clear and re-generate data - data_regen(m_info, all_data, state_gen_1, 1); + data_regen(m_info, all_data, state_gen_1, 1); // Testing HQRRP with GEQRF auto start_hqrrp_geqrf = high_resolution_clock::now(); - //RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 0, state_alg_1, (T*) nullptr); + RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 0, state_alg_1, (T*) nullptr); auto stop_hqrrp_geqrf = high_resolution_clock::now(); dur_hqrrp_geqrf = duration_cast(stop_hqrrp_geqrf - start_hqrrp_geqrf).count(); printf("TOTAL TIME FOR HQRRP WITH GEQRF %ld\n", dur_hqrrp_geqrf); @@ -143,11 +143,11 @@ static std::vector call_all_algs( auto state_gen_4 = state_gen_3; auto state_alg_4 = state_alg_3; // Clear and re-generate data - data_regen(m_info, all_data, state_gen_3, 1); + data_regen(m_info, all_data, state_gen_3, 1); // Testing HQRRP with Cholqr auto start_hqrrp_cholqr = high_resolution_clock::now(); - //RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 1, state_alg_3, (T*) nullptr); + RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 1, state_alg_3, (T*) nullptr); auto stop_hqrrp_cholqr = high_resolution_clock::now(); dur_hqrrp_cholqr = duration_cast(stop_hqrrp_cholqr - start_hqrrp_cholqr).count(); printf("TOTAL TIME FOR HQRRP WITH CHOLQRQ %ld\n", dur_hqrrp_cholqr); @@ -159,7 +159,7 @@ static std::vector call_all_algs( state_alg_0 = state_alg_4; state_buf = state_gen_4; // Clear and re-generate data - data_regen(m_info, all_data, state_gen_4, 0); + data_regen(m_info, all_data, state_gen_4, 0); } printf("CQRRP takes %ld μs\n", t_cqrrp_best); @@ -190,7 +190,7 @@ int main() { QR_speed_benchmark_data all_data(m, n, tol, d_factor); // Generate the input matrix - gaussian suffices for performance tests. RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); // Declare a data file std::fstream file("ICQRRP_QP3_QR_time_raw_rows_" + std::to_string(m) @@ -201,7 +201,7 @@ int main() { + ".dat", std::fstream::app); #if !defined(__APPLE__) for (;b_sz_start <= b_sz_end; b_sz_start *= 2) { - res = call_all_algs(m_info, numruns, b_sz_start, all_data, state_constant); + res = call_all_algs(m_info, numruns, b_sz_start, all_data, state_constant); file << res[0] << ", " << res[1] << ", " << res[2] << ", " << res[3] << ",\n"; } #endif diff --git a/benchmark/bench_CQRRPT/CQRRPT_pivot_quality.cc b/benchmark/bench_CQRRPT/CQRRPT_pivot_quality.cc index 034935ad..97b713ab 100644 --- a/benchmark/bench_CQRRPT/CQRRPT_pivot_quality.cc +++ b/benchmark/bench_CQRRPT/CQRRPT_pivot_quality.cc @@ -42,14 +42,14 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, QR_benchmark_data &all_data, RandBLAS::RNGState &state) { - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.R.begin(), all_data.R.end(), 0.0); std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0); std::fill(all_data.J.begin(), all_data.J.end(), 0); } // Re-generate and clear data -template +template static std::vector get_norms( QR_benchmark_data &all_data) { int64_t m = all_data.row; @@ -79,14 +79,14 @@ static void R_norm_ratio( // Running HQRRP lapack::geqp3(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data()); - std::vector R_norms_HQRRP = get_norms(all_data); + std::vector R_norms_HQRRP = get_norms(all_data); // Clear and re-generate data - data_regen(m_info, all_data, state); + data_regen(m_info, all_data, state); // Running CQRRP CQRRPT.call(m, n, all_data.A.data(), m, all_data.R.data(), n, all_data.J.data(), d_factor, state); - std::vector R_norms_CQRRPT = get_norms(all_data); + std::vector R_norms_CQRRPT = get_norms(all_data); // Declare a data file std::fstream file1("data_out/QR_R_norm_ratios_rows_" + std::to_string(m) @@ -131,7 +131,7 @@ static void sv_ratio( lapack::gesdd(Job::NoVec, m, n, all_data.A.data(), m, all_data.S.data(), (T*) nullptr, m, (T*) nullptr, n); // Clear and re-generate data - data_regen(m_info, all_data, state); + data_regen(m_info, all_data, state); // Running GEQP3 std::iota(all_data.J.begin(), all_data.J.end(), 1); @@ -143,7 +143,7 @@ static void sv_ratio( file2 << ",\n"; // Clear and re-generate data - data_regen(m_info, all_data, state1); + data_regen(m_info, all_data, state1); // Running CQRRP CQRRPT.call(m, n, all_data.A.data(), m, all_data.R.data(), n, all_data.J.data(), d_factor, state); @@ -175,10 +175,10 @@ int main() { m_info.cond_num = std::pow(10, 10); m_info.rank = n; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - R_norm_ratio(m_info, all_data, state_constant1); + R_norm_ratio(m_info, all_data, state_constant1); printf("R done\n"); - sv_ratio(m_info, all_data, state_constant2); + sv_ratio(m_info, all_data, state_constant2); printf("SV done\n\n"); } \ No newline at end of file diff --git a/benchmark/bench_CQRRPT/CQRRPT_runtime_breakdown.cc b/benchmark/bench_CQRRPT/CQRRPT_runtime_breakdown.cc index 8bb7b469..004ec811 100644 --- a/benchmark/bench_CQRRPT/CQRRPT_runtime_breakdown.cc +++ b/benchmark/bench_CQRRPT/CQRRPT_runtime_breakdown.cc @@ -46,7 +46,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, QR_benchmark_data &all_data, RandBLAS::RNGState &state) { - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.R.begin(), all_data.R.end(), 0.0); std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0); std::fill(all_data.J.begin(), all_data.J.end(), 0); @@ -90,7 +90,7 @@ static std::vector call_all_algs( state_alg = state; state_gen = state; // Clear and re-generate data - data_regen(m_info, all_data, state_gen); + data_regen(m_info, all_data, state_gen); } return inner_timing_best; @@ -114,7 +114,7 @@ int main() { QR_benchmark_data all_data(m, n_stop, tol, d_factor); // Generate the input matrix - gaussian suffices for performance tests. RandLAPACK::gen::mat_gen_info m_info(m, n_stop, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); // Declare a data file std::fstream file("CQRRPT_inner_speed_" + std::to_string(m) @@ -124,7 +124,7 @@ int main() { + ".dat", std::fstream::app); for (;n_start <= n_stop; n_start *= 2) { - res = call_all_algs(m_info, numruns, n_start, all_data, state_constant); + res = call_all_algs(m_info, numruns, n_start, all_data, state_constant); file << res[0] << ", " << res[1] << ", " << res[2] << ", " << res[3] << ", " << res[4] << ", " << res[5] << ", " << res[6] << ", " << res[7] << ",\n"; } } \ No newline at end of file diff --git a/benchmark/bench_CQRRPT/CQRRPT_speed_comparisons.cc b/benchmark/bench_CQRRPT/CQRRPT_speed_comparisons.cc index 7f501878..488cc2b6 100644 --- a/benchmark/bench_CQRRPT/CQRRPT_speed_comparisons.cc +++ b/benchmark/bench_CQRRPT/CQRRPT_speed_comparisons.cc @@ -47,7 +47,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, QR_benchmark_data &all_data, RandBLAS::RNGState &state) { - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.R.begin(), all_data.R.end(), 0.0); std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0); std::fill(all_data.J.begin(), all_data.J.end(), 0); @@ -97,7 +97,7 @@ static std::vector call_all_algs( dur_geqp3 = duration_cast(stop_geqp3 - start_geqp3).count(); state_gen = state; - data_regen(m_info, all_data, state_gen); + data_regen(m_info, all_data, state_gen); // Testing GEQRF auto start_geqrf = high_resolution_clock::now(); @@ -106,7 +106,7 @@ static std::vector call_all_algs( dur_geqrf = duration_cast(stop_geqrf - start_geqrf).count(); state_gen = state; - data_regen(m_info, all_data, state_gen); + data_regen(m_info, all_data, state_gen); // Testing CQRRPT auto start_cqrrp = high_resolution_clock::now(); @@ -116,7 +116,7 @@ static std::vector call_all_algs( state_gen = state; state_alg = state; - data_regen(m_info, all_data, state_gen); + data_regen(m_info, all_data, state_gen); // Testing SCHOLQR3 auto start_scholqr = high_resolution_clock::now(); @@ -141,7 +141,7 @@ static std::vector call_all_algs( dur_scholqr = duration_cast(stop_scholqr - start_scholqr).count(); auto state_gen = state; - data_regen(m_info, all_data, state_gen); + data_regen(m_info, all_data, state_gen); // Testing GEQR + GEQPT auto start_geqpt = high_resolution_clock::now(); @@ -164,7 +164,7 @@ static std::vector call_all_algs( dur_geqpt = duration_cast(stop_geqpt - start_geqpt).count(); state_gen = state; - data_regen(m_info, all_data, state_gen); + data_regen(m_info, all_data, state_gen); i == 0 ? t_cqrrpt_best = dur_cqrrpt : (dur_cqrrpt < t_cqrrpt_best) ? t_cqrrpt_best = dur_cqrrpt : NULL; i == 0 ? t_geqpt_best = dur_geqpt : (dur_geqpt < t_geqpt_best) ? t_geqpt_best = dur_geqpt : NULL; @@ -197,7 +197,7 @@ int main() { QR_benchmark_data all_data(m, n_stop, tol, d_factor); // Generate the input matrix - gaussian suffices for performance tests. RandLAPACK::gen::mat_gen_info m_info(m, n_stop, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); // Declare a data file std::fstream file("CQRRPT_speed_comp_" + std::to_string(m) @@ -207,7 +207,7 @@ int main() { + ".dat", std::fstream::app); for (;n_start <= n_stop; n_start *= 2) { - res = call_all_algs(m_info, numruns, n_start, all_data, state_constant); + res = call_all_algs(m_info, numruns, n_start, all_data, state_constant); file << res[0] << ", " << res[1] << ", " << res[2] << ", " << res[3] << ", " << res[4] << ", " << res[5] << ",\n"; } } diff --git a/benchmark/bench_RBKI/RBKI_runtime_breakdown.cc b/benchmark/bench_RBKI/RBKI_runtime_breakdown.cc index 3de93820..7b39491c 100644 --- a/benchmark/bench_RBKI/RBKI_runtime_breakdown.cc +++ b/benchmark/bench_RBKI/RBKI_runtime_breakdown.cc @@ -58,7 +58,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, RandBLAS::RNGState &state, int overwrite_A) { if (overwrite_A) - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.U.begin(), all_data.U.end(), 0.0); std::fill(all_data.V.begin(), all_data.V.end(), 0.0); std::fill(all_data.Sigma.begin(), all_data.Sigma.end(), 0.0); @@ -104,7 +104,7 @@ static void call_all_algs( file << "\n"; // Clear and re-generate data - data_regen(m_info, all_data, state_gen, 0); + data_regen(m_info, all_data, state_gen, 0); state_gen = state; } } @@ -136,7 +136,7 @@ int main(int argc, char *argv[]) { m_info.filename = argv[1]; m_info.workspace_query_mod = 1; // Workspace query; - RandLAPACK::gen::mat_gen(m_info, NULL, state); + RandLAPACK::gen::mat_gen(m_info, NULL, state); // Update basic params. m = m_info.rows; @@ -148,7 +148,7 @@ int main(int argc, char *argv[]) { RBKI_benchmark_data all_data(m, n, k_stop, tol); // Fill the data matrix; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); printf("Finished data preparation\n"); @@ -163,7 +163,7 @@ int main(int argc, char *argv[]) { for (;k_start <= k_stop; k_start *=2) { for (;num_krylov_iters_curr <= num_krylov_iters_stop; num_krylov_iters_curr *=2) { - call_all_algs(m_info, numruns, k_start, num_krylov_iters_curr, all_data, state_constant, output_filename); + call_all_algs(m_info, numruns, k_start, num_krylov_iters_curr, all_data, state_constant, output_filename); } num_krylov_iters_curr = num_krylov_iters_start; } diff --git a/benchmark/bench_RBKI/RBKI_speed_comparisons.cc b/benchmark/bench_RBKI/RBKI_speed_comparisons.cc index f88a6e7c..8b37e221 100644 --- a/benchmark/bench_RBKI/RBKI_speed_comparisons.cc +++ b/benchmark/bench_RBKI/RBKI_speed_comparisons.cc @@ -46,7 +46,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info m_info, RandBLAS::RNGState &state, int overwrite_A) { if (overwrite_A) - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); std::fill(all_data.U.begin(), all_data.U.end(), 0.0); std::fill(all_data.VT.begin(), all_data.VT.end(), 0.0); std::fill(all_data.Sigma.begin(), all_data.Sigma.end(), 0.0); @@ -161,7 +161,7 @@ static void call_all_algs( std::ofstream file(output_filename, std::ios::app); file << b_sz << ", " << RBKI.max_krylov_iters << ", " << target_rank << ", " << custom_rank << ", " << residual_err_target << ", " << residual_err_custom << ", " << dur_rbki << ", " << dur_svd << ",\n"; state_gen = state; - data_regen(m_info, all_data, state_gen, 0); + data_regen(m_info, all_data, state_gen, 0); } } @@ -194,7 +194,7 @@ int main(int argc, char *argv[]) { m_info.filename = argv[1]; m_info.workspace_query_mod = 1; // Workspace query; - RandLAPACK::gen::mat_gen(m_info, NULL, state); + RandLAPACK::gen::mat_gen(m_info, NULL, state); // Update basic params. m = m_info.rows; @@ -206,7 +206,7 @@ int main(int argc, char *argv[]) { RBKI_benchmark_data all_data(m, n, tol); // Fill the data matrix; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); printf("Finished data preparation\n"); @@ -221,7 +221,7 @@ int main(int argc, char *argv[]) { for (;b_sz_start <= b_sz_stop; b_sz_start *=2) { for (;num_matmuls_curr <= num_matmuls_stop; ++num_matmuls_curr) { - call_all_algs(m_info, numruns, b_sz_start, num_matmuls_curr, custom_rank, all_data, state_constant, output_filename, dur_svd); + call_all_algs(m_info, numruns, b_sz_start, num_matmuls_curr, custom_rank, all_data, state_constant, output_filename, dur_svd); } num_matmuls_curr = num_matmuls_start; } diff --git a/benchmark/bench_general/Chol_check.cc b/benchmark/bench_general/Chol_check.cc index 22da8a4e..c3e0150c 100644 --- a/benchmark/bench_general/Chol_check.cc +++ b/benchmark/bench_general/Chol_check.cc @@ -7,7 +7,9 @@ using namespace RandLAPACK; template static void -chol_check(int64_t m, int64_t k, RandBLAS::RNGState state) { +chol_check(int64_t m, + int64_t k, + RandBLAS::RNGState state) { std::vector A(m * m, 0.0); std::vector A_leading_submat_symm(k * k, 0.0); @@ -15,7 +17,7 @@ chol_check(int64_t m, int64_t k, RandBLAS::RNGState state) { RandLAPACK::gen::mat_gen_info m_info(m, m, RandLAPACK::gen::polynomial); m_info.cond_num = std::pow(10, 8); - RandLAPACK::gen::mat_gen(m_info, A.data(), state); + RandLAPACK::gen::mat_gen(m_info, A.data(), state); T* A_dat = A.data(); T* A_leading_submat_symm_dat = A_leading_submat_symm.data(); @@ -51,7 +53,7 @@ chol_check(int64_t m, int64_t k, RandBLAS::RNGState state) { int main() { for(int i = 0; i < 10; ++i) { auto state = RandBLAS::RNGState(i); - chol_check(1000, 500, state); + chol_check(1000, 500, state); } return 0; } \ No newline at end of file diff --git a/benchmark/bench_general/GEMM_flop_count.cc b/benchmark/bench_general/GEMM_flop_count.cc index 64b0a8a8..e7282676 100644 --- a/benchmark/bench_general/GEMM_flop_count.cc +++ b/benchmark/bench_general/GEMM_flop_count.cc @@ -13,7 +13,8 @@ using namespace RandLAPACK; template static void -test_flops(int64_t k, RandBLAS::RNGState state) { +test_flops(int64_t k, + RandBLAS::RNGState state) { int size = k * k; // Flops in gemm of given size - overflows @@ -34,8 +35,8 @@ test_flops(int64_t k, RandBLAS::RNGState state) { T* C_dat = C.data(); RandLAPACK::gen::mat_gen_info m_info(k, k, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info, A.data(), state); - RandLAPACK::gen::mat_gen(m_info, B.data(), state); + RandLAPACK::gen::mat_gen(m_info, A.data(), state); + RandLAPACK::gen::mat_gen(m_info, B.data(), state); // Get the timing auto start = high_resolution_clock::now(); @@ -55,6 +56,6 @@ test_flops(int64_t k, RandBLAS::RNGState state) { int main() { auto state = RandBLAS::RNGState(); - test_flops(1000, state); + test_flops(1000, state); return 0; } diff --git a/test/comps/test_orth.cc b/test/comps/test_orth.cc index 9d4c29bd..02fcb16b 100644 --- a/test/comps/test_orth.cc +++ b/test/comps/test_orth.cc @@ -70,7 +70,7 @@ class TestOrth : public ::testing::Test /// Tests orthogonality of a matrix Q, obtained by orthogonalizing a Gaussian sketch. /// Checks I - \transpose{Q}Q. - template + template static void test_orth_sketch( OrthTestData &all_data, RandLAPACK::CholQRQ &CholQRQ @@ -113,8 +113,8 @@ TEST_F(TestOrth, Test_CholQRQ) RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::polynomial); m_info.cond_num = 2; m_info.rank = k; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - sketch_and_copy_computational_helper(state, all_data); - test_orth_sketch(all_data, CholQRQ); + sketch_and_copy_computational_helper(state, all_data); + test_orth_sketch(all_data, CholQRQ); } diff --git a/test/comps/test_preconditioners.cc b/test/comps/test_preconditioners.cc index f9d03901..3b7c3fc6 100644 --- a/test/comps/test_preconditioners.cc +++ b/test/comps/test_preconditioners.cc @@ -278,7 +278,7 @@ TEST_F(TestNystromPrecond, basictest) { mat_info.exponent = 2.0; std::vector A(m * m, 0.0); RandBLAS::RNGState data_state(0); - RandLAPACK::gen::mat_gen(mat_info, A.data(), data_state); + RandLAPACK::gen::mat_gen(mat_info, A.data(), data_state); std::vector G(m * m, 0.0); blas::syrk(Layout::ColMajor, Uplo::Lower, Op::NoTrans, m, m, 1.0, A.data(), m, 0.0, G.data(), m diff --git a/test/comps/test_qb.cc b/test/comps/test_qb.cc index 90bdcea9..e0973054 100644 --- a/test/comps/test_qb.cc +++ b/test/comps/test_qb.cc @@ -251,10 +251,10 @@ TEST_F(TestQB, Polynomial_Decay_general1) m_info.cond_num = 2025; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); + RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); - svd_and_copy_computational_helper(*all_data); - test_QB2_low_exact_rank>(block_sz, tol, *all_data, *all_algs, state); + svd_and_copy_computational_helper(*all_data); + test_QB2_low_exact_rank(block_sz, tol, *all_data, *all_algs, state); delete all_data; delete all_algs; @@ -283,10 +283,10 @@ TEST_F(TestQB, Polynomial_Decay_general2) m_info.cond_num = 6.7; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); + RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); - svd_and_copy_computational_helper(*all_data); - test_QB2_low_exact_rank>(block_sz, tol, *all_data, *all_algs, state); + svd_and_copy_computational_helper(*all_data); + test_QB2_low_exact_rank(block_sz, tol, *all_data, *all_algs, state); delete all_data; delete all_algs; @@ -315,10 +315,10 @@ TEST_F(TestQB, Polynomial_Decay_zero_tol1) m_info.cond_num = 2025; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); + RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); double norm_A = lapack::lange(Norm::Fro, m, n, (*all_data).A.data(), m); - test_QB2_k_eq_min(block_sz, tol, norm_A, *all_data, *all_algs, state); + test_QB2_k_eq_min(block_sz, tol, norm_A, *all_data, *all_algs, state); delete all_data; delete all_algs; @@ -347,10 +347,10 @@ TEST_F(TestQB, Polynomial_Decay_zero_tol2) m_info.cond_num = 2025; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); + RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); double norm_A = lapack::lange(Norm::Fro, m, n, (*all_data).A.data(), m); - test_QB2_k_eq_min(block_sz, tol, norm_A, *all_data, *all_algs, state); + test_QB2_k_eq_min(block_sz, tol, norm_A, *all_data, *all_algs, state); delete all_data; delete all_algs; diff --git a/test/comps/test_rf.cc b/test/comps/test_rf.cc index 2729ce5d..7eacea6d 100644 --- a/test/comps/test_rf.cc +++ b/test/comps/test_rf.cc @@ -64,7 +64,7 @@ class TestRF : public ::testing::Test {} }; - template + template static void orth_and_copy_computational_helper(RFTestData &all_data) { auto m = all_data.row; @@ -160,11 +160,11 @@ TEST_F(TestRF, Polynomial_Decay_general1) m_info.cond_num = 2025; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); + RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); - orth_and_copy_computational_helper(*all_data); + orth_and_copy_computational_helper(*all_data); - test_RF_general>(*all_data, *all_algs, state); + test_RF_general(*all_data, *all_algs, state); delete all_data; delete all_algs; @@ -190,11 +190,11 @@ TEST_F(TestRF, Polynomial_Decay_general2) m_info.cond_num = 2025; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); + RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); - orth_and_copy_computational_helper(*all_data); + orth_and_copy_computational_helper(*all_data); - test_RF_general>(*all_data, *all_algs, state); + test_RF_general(*all_data, *all_algs, state); delete all_data; delete all_algs; diff --git a/test/comps/test_syrf.cc b/test/comps/test_syrf.cc index e897f603..25d5dd22 100644 --- a/test/comps/test_syrf.cc +++ b/test/comps/test_syrf.cc @@ -61,7 +61,7 @@ class TestSYRF : public ::testing::Test {} }; - template + template static void orth_and_copy_computational_helper(SYRFTestData &all_data) { auto m = all_data.row; @@ -159,11 +159,11 @@ TEST_F(TestSYRF, Polynomial_Decay_general1) m_info.cond_num = 2025; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); algorithm_objects all_algs(verbosity, cond_check, p, passes_per_iteration); - orth_and_copy_computational_helper(all_data); - test_SYRF_general(state, all_data, all_algs); + orth_and_copy_computational_helper(all_data); + test_SYRF_general(state, all_data, all_algs); } TEST_F(TestSYRF, Polynomial_Decay_general2) @@ -184,9 +184,9 @@ TEST_F(TestSYRF, Polynomial_Decay_general2) m_info.cond_num = 2025; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); algorithm_objects all_algs(verbosity, cond_check, p, passes_per_iteration); - orth_and_copy_computational_helper(all_data); - test_SYRF_general(state, all_data, all_algs); + orth_and_copy_computational_helper(all_data); + test_SYRF_general(state, all_data, all_algs); } diff --git a/test/comps/test_util.cc b/test/comps/test_util.cc index 6e440b6b..64fc8aea 100644 --- a/test/comps/test_util.cc +++ b/test/comps/test_util.cc @@ -3,7 +3,7 @@ #include "rl_gen.hh" #include -#include +#include #include #include @@ -116,10 +116,10 @@ TEST_F(TestUtil, test_spectral_norm_polynomial_decay_double_precision) { m_info.cond_num = 2025; m_info.rank = n; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); lapack::lacpy(MatrixType::General, m, n, all_data.A.data(), m, all_data.A_cpy.data(), m); - test_spectral_norm(state, all_data); + test_spectral_norm(state, all_data); } TEST_F(TestUtil, test_spectral_norm_rank_def_mat_double_precision) { @@ -132,10 +132,10 @@ TEST_F(TestUtil, test_spectral_norm_rank_def_mat_double_precision) { RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::adverserial); m_info.scaling = std::pow(10, 15); m_info.rank = n; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); lapack::lacpy(MatrixType::General, m, n, all_data.A.data(), m, all_data.A_cpy.data(), m); - test_spectral_norm(state, all_data); + test_spectral_norm(state, all_data); } TEST_F(TestUtil, test_spectral_norm_polynomial_decay_single_precision) { @@ -149,10 +149,10 @@ TEST_F(TestUtil, test_spectral_norm_polynomial_decay_single_precision) { m_info.cond_num = 2; m_info.rank = n; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); lapack::lacpy(MatrixType::General, m, n, all_data.A.data(), m, all_data.A_cpy.data(), m); - test_spectral_norm(state, all_data); + test_spectral_norm(state, all_data); } TEST_F(TestUtil, test_spectral_norm_rank_def_mat_single_precision) { @@ -165,10 +165,10 @@ TEST_F(TestUtil, test_spectral_norm_rank_def_mat_single_precision) { RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::adverserial); m_info.scaling = std::pow(10, 7); m_info.rank = n; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); lapack::lacpy(MatrixType::General, m, n, all_data.A.data(), m, all_data.A_cpy.data(), m); - test_spectral_norm(state, all_data); + test_spectral_norm(state, all_data); } TEST_F(TestUtil, test_normc) { @@ -176,7 +176,7 @@ TEST_F(TestUtil, test_normc) { int64_t n = 1; NormcTestData all_data(m, n); - test_normc(all_data); + test_normc(all_data); } TEST_F(TestUtil, test_binary_rank_search_zero_mat) { @@ -184,10 +184,9 @@ TEST_F(TestUtil, test_binary_rank_search_zero_mat) { int64_t n = 100; std::vector A(m * n, 0.0); - test_binary_rank_search_zero_mat(m, n, A); + test_binary_rank_search_zero_mat(m, n, A); } - class Test_Inplace_Square_Transpose : public ::testing::Test { protected: @@ -205,7 +204,7 @@ class Test_Inplace_Square_Transpose : public ::testing::Test double *A2 = new double[n*n]; blas::copy(n*n, A1, 1, A2, 1); RandLAPACK::util::transpose_square(A2, n); - RandBLAS_Testing::Util::matrices_approx_equal( + test::comparison::matrices_approx_equal( layout, blas::Op::Trans, n, n, A1, n, A2, n, __PRETTY_FUNCTION__, __FILE__, __LINE__ ); @@ -215,6 +214,7 @@ class Test_Inplace_Square_Transpose : public ::testing::Test }; + TEST_F(Test_Inplace_Square_Transpose, random_matrix_colmajor) { apply(blas::Layout::ColMajor); } diff --git a/test/drivers/test_cqrrp.cc b/test/drivers/test_cqrrp.cc index 88a019c7..6624c639 100644 --- a/test/drivers/test_cqrrp.cc +++ b/test/drivers/test_cqrrp.cc @@ -45,7 +45,7 @@ class TestCQRRP : public ::testing::Test } }; - template + template static void norm_and_copy_computational_helper(T &norm_A, CQRRPTestData &all_data) { auto m = all_data.row; auto n = all_data.col; @@ -160,11 +160,11 @@ TEST_F(TestCQRRP, CQRRP_blocked_full_rank_basic) { //m_info.cond_num = 2; //m_info.rank = k; //m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - norm_and_copy_computational_helper(norm_A, all_data); + norm_and_copy_computational_helper(norm_A, all_data); #if !defined(__APPLE__) - test_CQRRP_general>(d_factor, norm_A, all_data, CQRRP_blocked, state); + test_CQRRP_general(d_factor, norm_A, all_data, CQRRP_blocked, state); #endif } @@ -189,11 +189,11 @@ TEST_F(TestCQRRP, CQRRP_blocked_full_rank_block_change) { //m_info.cond_num = 2; //m_info.rank = k; //m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - norm_and_copy_computational_helper(norm_A, all_data); + norm_and_copy_computational_helper(norm_A, all_data); #if !defined(__APPLE__) - test_CQRRP_general>(d_factor, norm_A, all_data, CQRRP_blocked, state); + test_CQRRP_general(d_factor, norm_A, all_data, CQRRP_blocked, state); #endif } @@ -219,11 +219,11 @@ TEST_F(TestCQRRP, CQRRP_blocked_low_rank) { //m_info.cond_num = 2; //m_info.rank = k; //m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - norm_and_copy_computational_helper(norm_A, all_data); + norm_and_copy_computational_helper(norm_A, all_data); #if !defined(__APPLE__) - test_CQRRP_general>(d_factor, norm_A, all_data, CQRRP_blocked, state); + test_CQRRP_general(d_factor, norm_A, all_data, CQRRP_blocked, state); #endif } @@ -245,9 +245,9 @@ TEST_F(TestCQRRP, something) { std::vector tau(n * 2, 0.0); RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info, A.data(), state); - RandLAPACK::gen::mat_gen(m_info, B.data(), state); - RandLAPACK::gen::mat_gen(m_info, D.data(), state); + RandLAPACK::gen::mat_gen(m_info, A.data(), state); + RandLAPACK::gen::mat_gen(m_info, B.data(), state); + RandLAPACK::gen::mat_gen(m_info, D.data(), state); lapack::lacpy(MatrixType::General, m, n, D.data(), m, D_cpy.data(), m); lapack::geqrf(m, n, A.data(), m, tau.data()); diff --git a/test/drivers/test_cqrrpt.cc b/test/drivers/test_cqrrpt.cc index 1e99bb54..310fc928 100644 --- a/test/drivers/test_cqrrpt.cc +++ b/test/drivers/test_cqrrpt.cc @@ -42,7 +42,7 @@ class TestCQRRPT : public ::testing::Test } }; - template + template static void norm_and_copy_computational_helper(T &norm_A, CQRRPTTestData &all_data) { auto m = all_data.row; auto n = all_data.col; @@ -148,10 +148,10 @@ TEST_F(TestCQRRPT, CQRRPT_full_rank_no_hqrrp) { m_info.cond_num = 2; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - norm_and_copy_computational_helper(norm_A, all_data); - test_CQRRPT_general>(d_factor, norm_A, all_data, CQRRPT, state); + norm_and_copy_computational_helper(norm_A, all_data); + test_CQRRPT_general(d_factor, norm_A, all_data, CQRRPT, state); } TEST_F(TestCQRRPT, CQRRPT_low_rank_with_hqrrp) { @@ -173,10 +173,10 @@ TEST_F(TestCQRRPT, CQRRPT_low_rank_with_hqrrp) { m_info.cond_num = 2; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - norm_and_copy_computational_helper(norm_A, all_data); - test_CQRRPT_general>(d_factor, norm_A, all_data, CQRRPT, state); + norm_and_copy_computational_helper(norm_A, all_data); + test_CQRRPT_general(d_factor, norm_A, all_data, CQRRPT, state); } // Using L2 norm rank estimation here is similar to using raive estimation. @@ -198,10 +198,10 @@ TEST_F(TestCQRRPT, CQRRPT_bad_orth) { RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::adverserial); m_info.scaling = 1e7; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - norm_and_copy_computational_helper(norm_A, all_data); - test_CQRRPT_general>(d_factor, norm_A, all_data, CQRRPT, state); + norm_and_copy_computational_helper(norm_A, all_data); + test_CQRRPT_general(d_factor, norm_A, all_data, CQRRPT, state); } diff --git a/test/drivers/test_hqrrp.cc b/test/drivers/test_hqrrp.cc index f08e6ee2..633255eb 100644 --- a/test/drivers/test_hqrrp.cc +++ b/test/drivers/test_hqrrp.cc @@ -45,7 +45,7 @@ class TestHQRRP : public ::testing::Test } }; - template + template static void norm_and_copy_computational_helper(T &norm_A, HQRRPtestData &all_data) { auto m = all_data.row; auto n = all_data.col; @@ -159,11 +159,11 @@ TEST_F(TestHQRRP, HQRRP_full_rank_cholqr) { m_info.cond_num = 2; m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - norm_and_copy_computational_helper(norm_A, all_data); + norm_and_copy_computational_helper(norm_A, all_data); // This test uses orhr_col #if !defined(__APPLE__) - test_HQRRP_general(d_factor, b_sz, use_cholqr, panel_pivoting, norm_A, all_data, state); + test_HQRRP_general(d_factor, b_sz, use_cholqr, panel_pivoting, norm_A, all_data, state); #endif } diff --git a/test/drivers/test_rbki.cc b/test/drivers/test_rbki.cc index 2f30faea..f14874da 100644 --- a/test/drivers/test_rbki.cc +++ b/test/drivers/test_rbki.cc @@ -117,7 +117,7 @@ TEST_F(TestRBKI, RBKI_basic) { RBKI.num_threads_rest = 16; RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::gaussian); - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - test_RBKI_general>(b_sz, target_rank, custom_rank, all_data, RBKI, state); + test_RBKI_general(b_sz, target_rank, custom_rank, all_data, RBKI, state); } diff --git a/test/drivers/test_revd2.cc b/test/drivers/test_revd2.cc index c0c184a6..4feaf31b 100644 --- a/test/drivers/test_revd2.cc +++ b/test/drivers/test_revd2.cc @@ -97,7 +97,7 @@ class TestREVD2 : public ::testing::Test {} }; - template + template static void symm_mat_and_copy_computational_helper(T &norm_A, REVD2TestData &all_data) { auto m = all_data.dim; // We're using Nystrom, the original must be positive semidefinite @@ -113,7 +113,7 @@ class TestREVD2 : public ::testing::Test norm_A = lapack::lange(Norm::Fro, m, m, all_data.A_cpy.data(), m); } - template + template static void uplo_computational_helper(REVD2UploTestData &all_data) { auto m = all_data.dim; T* A_u_dat = all_data.A_u.data(); @@ -249,10 +249,10 @@ TEST_F(TestREVD2, Underestimation1) { m_info.cond_num = std::pow(10, 8); m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); - symm_mat_and_copy_computational_helper(norm_A, all_data); - test_REVD2_general( + symm_mat_and_copy_computational_helper(norm_A, all_data); + test_REVD2_general( k_start, tol, rank_expectation, err_expectation, norm_A, all_data, all_algs, state ); } @@ -287,10 +287,10 @@ TEST_F(TestREVD2, Underestimation2) { m_info.cond_num = std::pow(10, 8); m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); - symm_mat_and_copy_computational_helper(norm_A, all_data); - test_REVD2_general( + symm_mat_and_copy_computational_helper(norm_A, all_data); + test_REVD2_general( k_start, tol, rank_expectation, err_expectation, norm_A, all_data, all_algs, state ); } @@ -325,10 +325,10 @@ TEST_F(TestREVD2, Overestimation1) { m_info.cond_num = std::pow(10, 2); m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); - symm_mat_and_copy_computational_helper(norm_A, all_data); - test_REVD2_general( + symm_mat_and_copy_computational_helper(norm_A, all_data); + test_REVD2_general( k_start, tol, rank_expectation, err_expectation, norm_A, all_data, all_algs, state ); } @@ -363,10 +363,10 @@ TEST_F(TestREVD2, Oversetimation2) { m_info.cond_num = std::pow(10, 2); m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); - symm_mat_and_copy_computational_helper(norm_A, all_data); - test_REVD2_general( + symm_mat_and_copy_computational_helper(norm_A, all_data); + test_REVD2_general( k_start, tol, rank_expectation, err_expectation, norm_A, all_data, all_algs, state ); } @@ -401,10 +401,10 @@ TEST_F(TestREVD2, Exactness) { m_info.cond_num = std::pow(10, 2); m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A_cpy.data(), state); - symm_mat_and_copy_computational_helper(norm_A, all_data); - test_REVD2_general( + symm_mat_and_copy_computational_helper(norm_A, all_data); + test_REVD2_general( k_start, tol, rank_expectation, err_expectation, norm_A, all_data, all_algs, state ); } @@ -437,9 +437,9 @@ TEST_F(TestREVD2, Uplo) { m_info.cond_num = std::pow(10, 2); m_info.rank = k; m_info.exponent = 2.0; - RandLAPACK::gen::mat_gen(m_info, all_data.work.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.work.data(), state); - uplo_computational_helper(all_data); + uplo_computational_helper(all_data); - test_REVD2_uplo(k_start, tol, err_expectation, all_data, all_algs, state); + test_REVD2_uplo(k_start, tol, err_expectation, all_data, all_algs, state); } diff --git a/test/drivers/test_rsvd.cc b/test/drivers/test_rsvd.cc index 39ba1609..20dc1e5f 100644 --- a/test/drivers/test_rsvd.cc +++ b/test/drivers/test_rsvd.cc @@ -182,8 +182,8 @@ TEST_F(TestRSVD, SimpleTest) RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::polynomial); m_info.cond_num = 2; m_info.rank = k; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); - computational_helper(all_data); - test_RSVD1_general(tol, all_data, all_algs, state); + computational_helper(all_data); + test_RSVD1_general(tol, all_data, all_algs, state); }