Skip to content

Commit

Permalink
Updating RandLAPACK to work with the most recent version of RandBLAS (#…
Browse files Browse the repository at this point in the history
…68)

Consider adding some changes resolving the existing issues.
  • Loading branch information
TeachRaccooon authored Apr 16, 2024
1 parent 5432731 commit f8e4226
Showing 28 changed files with 187 additions and 182 deletions.
2 changes: 1 addition & 1 deletion CMake/rl_build_options.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

option(BUILD_SHARED_LIBS OFF "Configure to build shared or static libraries")
2 changes: 1 addition & 1 deletion RandBLAS
Submodule RandBLAS updated 77 files
+1 −1 CMake/rb_build_options.cmake
+1 −1 CMakeLists.txt
+1 −1 INSTALL.md
+76 −0 LICENSE
+3 −21 README.md
+3 −3 RandBLAS.hh
+0 −91 RandBLAS/base.cc
+127 −22 RandBLAS/base.hh
+71 −440 RandBLAS/dense_skops.hh
+1 −1 RandBLAS/exceptions.hh
+6 −6 RandBLAS/random_gen.hh
+597 −390 RandBLAS/skge.hh
+339 −0 RandBLAS/skge3_to_gemm.hh
+301 −0 RandBLAS/skges_to_spmm.hh
+0 −979 RandBLAS/sparse.hh
+162 −0 RandBLAS/sparse_data/base.hh
+184 −0 RandBLAS/sparse_data/conversions.hh
+383 −0 RandBLAS/sparse_data/coo_matrix.hh
+135 −0 RandBLAS/sparse_data/coo_spmm_impl.hh
+223 −0 RandBLAS/sparse_data/csc_matrix.hh
+125 −0 RandBLAS/sparse_data/csc_spmm_impl.hh
+236 −0 RandBLAS/sparse_data/csr_matrix.hh
+91 −0 RandBLAS/sparse_data/csr_spmm_impl.hh
+278 −0 RandBLAS/sparse_data/sksp.hh
+339 −0 RandBLAS/sparse_data/sksp3_to_spmm.hh
+348 −0 RandBLAS/sparse_data/spmm_dispatch.hh
+461 −0 RandBLAS/sparse_skops.hh
+0 −449 RandBLAS/test_util.hh
+33 −0 RandBLAS/util.hh
+51 −0 examples/CMakeLists.txt
+174 −0 examples/TLS_DenseSkOp.cc
+173 −0 examples/TLS_SparseSkOp.cc
+1 −0 rtd/howwebuiltthis.md
+1 −0 rtd/requirements.txt
+2 −1 rtd/source/Doxyfile
+19 −0 rtd/source/api_reference/index.rst
+21 −0 rtd/source/api_reference/other_sparse.rst
+41 −0 rtd/source/api_reference/sketch_dense.rst
+20 −0 rtd/source/api_reference/sketch_sparse.rst
+57 −0 rtd/source/api_reference/skops_and_dists.rst
+31 −0 rtd/source/api_reference/sparse_matrices.rst
+2 −0 rtd/source/assets/sparse_vs_dense_diagram_no_header.html
+5 −3 rtd/source/conf.py
+15 −9 rtd/source/index.rst
+19 −0 rtd/source/tutorial/_incomplete_sketching.rst
+89 −0 rtd/source/tutorial/distributions.rst
+105 −0 rtd/source/tutorial/gemm.rst
+59 −0 rtd/source/tutorial/index.rst
+132 −0 rtd/source/tutorial/sampling_skops.rst
+76 −0 rtd/source/tutorial/submatrices.rst
+68 −0 rtd/source/tutorial/temp.rst
+0 −43 rtd/source/user_guide/index.rst
+0 −93 rtd/source/user_guide/operators.rst
+0 −46 rtd/source/user_guide/rng_details.rst
+0 −38 rtd/source/user_guide/sketching.rst
+7 −0 rtd/themes/randblas_rtd/static/theme_overrides.css
+29 −8 test/CMakeLists.txt
+208 −0 test/comparison.hh
+689 −0 test/linop_common.hh
+0 −412 test/test_dense/test_sketch_gefr3.cc
+52 −12 test/test_dense_skops/test_construction.cc
+21 −153 test/test_dense_skops/test_lskge3.cc
+273 −0 test/test_dense_skops/test_rskge3.cc
+3 −3 test/test_rng_speed.cc
+190 −0 test/test_sketch_vector.cc
+0 −144 test/test_sparse/test_construction.cc
+0 −823 test/test_sparse/test_sketch_gefls.cc
+130 −0 test/test_sparse_data/common.hh
+89 −0 test/test_sparse_data/test_conversions.cc
+581 −0 test/test_sparse_data/test_coo.cc
+485 −0 test/test_sparse_data/test_csc.cc
+514 −0 test/test_sparse_data/test_csr.cc
+89 −0 test/test_sparse_data/test_left_multiply.hh
+81 −0 test/test_sparse_data/test_right_multiply.hh
+218 −0 test/test_sparse_skops/test_construction.cc
+561 −0 test/test_sparse_skops/test_lskges.cc
+53 −167 test/test_sparse_skops/test_rskges.cc
3 changes: 2 additions & 1 deletion RandLAPACK/misc/rl_gen.hh
Original file line number Diff line number Diff line change
@@ -59,6 +59,7 @@ struct mat_gen_info {
exponent = 1.0;
theta = 1.0;
perturb = 1.0;
check_true_rank = false;
}
};

@@ -250,7 +251,7 @@ void gen_spiked_mat(

/// sample from [m] without replacement. Get the row indices for a tall LASO with a single column.
RandBLAS::SparseDist DS = {.n_rows = m, .n_cols = 1, .vec_nnz = num_rows_sampled, .major_axis = RandBLAS::MajorAxis::Long};
RandBLAS::SparseSkOp<T, RNG> S(DS, state);
RandBLAS::SparseSkOp<T> S(DS, state);
state = RandBLAS::fill_sparse(S);

T* V = ( T * ) calloc( n * n, sizeof( T ) );
2 changes: 1 addition & 1 deletion benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.10)
project(benchmark)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED True)

message(STATUS "Checking for OpenMP ... ")
21 changes: 12 additions & 9 deletions benchmark/Gemm_vs_ormqr.cc
Original file line number Diff line number Diff line change
@@ -13,7 +13,10 @@ using namespace RandLAPACK;

template <typename T, typename RNG>
static void
test_speed(int64_t m, int64_t n, int64_t runs, RandBLAS::RNGState<RNG> const_state) {
test_speed(int64_t m,
int64_t n,
int64_t runs,
RandBLAS::RNGState<RNG> const_state) {

// Matrix to decompose.
std::vector<T> A(m * n, 0.0);
@@ -36,8 +39,8 @@ test_speed(int64_t m, int64_t n, int64_t runs, RandBLAS::RNGState<RNG> const_sta
auto state = const_state;

RandLAPACK::gen::mat_gen_info<double> m_info(m, n, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info, A, state);
RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info, B1, state);
RandLAPACK::gen::mat_gen(m_info, A, state);
RandLAPACK::gen::mat_gen(m_info, B1, state);
lapack::lacpy(MatrixType::General, m, n, B1_dat, m, B2_dat, m);

// Get the implicit Q-factor in A_dat
@@ -66,11 +69,11 @@ test_speed(int64_t m, int64_t n, int64_t runs, RandBLAS::RNGState<RNG> const_sta

int main() {
auto state = RandBLAS::RNGState();
test_speed<double, r123::Philox4x32>(std::pow(2, 10), std::pow(2, 5), 10, state);
test_speed<double, r123::Philox4x32>(std::pow(2, 11), std::pow(2, 6), 10, state);
test_speed<double, r123::Philox4x32>(std::pow(2, 12), std::pow(2, 7), 10, state);
test_speed<double, r123::Philox4x32>(std::pow(2, 13), std::pow(2, 8), 10, state);
test_speed<double, r123::Philox4x32>(std::pow(2, 14), std::pow(2, 9), 10, state);
test_speed<double, r123::Philox4x32>(std::pow(2, 15), std::pow(2, 10), 10, state);
test_speed<double>(std::pow(2, 10), std::pow(2, 5), 10, state);
test_speed<double>(std::pow(2, 11), std::pow(2, 6), 10, state);
test_speed<double>(std::pow(2, 12), std::pow(2, 7), 10, state);
test_speed<double>(std::pow(2, 13), std::pow(2, 8), 10, state);
test_speed<double>(std::pow(2, 14), std::pow(2, 9), 10, state);
test_speed<double>(std::pow(2, 15), std::pow(2, 10), 10, state);
return 0;
}
20 changes: 10 additions & 10 deletions benchmark/bench_CQRRP/CQRRP_pivot_quality.cc
Original file line number Diff line number Diff line change
@@ -40,13 +40,13 @@ static void data_regen(RandLAPACK::gen::mat_gen_info<T> m_info,
QR_speed_benchmark_data<T> &all_data,
RandBLAS::RNGState<RNG> &state) {

RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info, all_data.A.data(), state);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);
std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0);
std::fill(all_data.J.begin(), all_data.J.end(), 0);
}

// Re-generate and clear data
template <typename T, typename RNG>
template <typename T>
static std::vector<T> get_norms( QR_speed_benchmark_data<T> &all_data) {

int64_t m = all_data.row;
@@ -82,16 +82,16 @@ static void R_norm_ratio(
std::iota(all_data.J.begin(), all_data.J.end(), 1);
//RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, 0, 0, state, (T*) nullptr);
lapack::geqp3(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data());
std::vector<T> R_norms_HQRRP = get_norms<T, RNG>(all_data);
std::vector<T> R_norms_HQRRP = get_norms(all_data);
printf("\nDone with HQRRP\n");

// Clear and re-generate data
data_regen<T, RNG>(m_info, all_data, state);
data_regen(m_info, all_data, state);

printf("\nStarting CQRRP\n");
// Running CQRRP
CQRRP_blocked.call(m, n, all_data.A.data(), m, d_factor, all_data.tau.data(), all_data.J.data(), state);
std::vector<T> R_norms_CQRRP = get_norms<T, RNG>(all_data);
std::vector<T> R_norms_CQRRP = get_norms(all_data);

// Declare a data file
std::fstream file1("data_out/QR_R_norm_ratios_rows_" + std::to_string(m)
@@ -139,7 +139,7 @@ static void sv_ratio(
lapack::gesdd(Job::NoVec, m, n, all_data.A.data(), m, all_data.S.data(), (T*) nullptr, m, (T*) nullptr, n);

// Clear and re-generate data
data_regen<T, RNG>(m_info, all_data, state);
data_regen(m_info, all_data, state);

// Running GEQP3
std::iota(all_data.J.begin(), all_data.J.end(), 1);
@@ -153,7 +153,7 @@ static void sv_ratio(
file2 << ",\n";

// Clear and re-generate data
data_regen<T, RNG>(m_info, all_data, state1);
data_regen(m_info, all_data, state1);

// Running CQRRP
CQRRP_blocked.call(m, n, all_data.A.data(), m, d_factor, all_data.tau.data(), all_data.J.data(), state);
@@ -184,12 +184,12 @@ int main() {
//m_info.cond_num = std::pow(10, 10);
//m_info.rank = n;
//m_info.exponent = 2.0;
RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info, all_data.A.data(), state);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);

#if !defined(__APPLE__)
R_norm_ratio<double, r123::Philox4x32>(m_info, b_sz, all_data, state_constant1);
R_norm_ratio(m_info, b_sz, all_data, state_constant1);
printf("R done\n");
sv_ratio<double, r123::Philox4x32>(m_info, b_sz, all_data, state_constant2);
sv_ratio(m_info, b_sz, all_data, state_constant2);
printf("SV done\n\n");
#endif
}
8 changes: 4 additions & 4 deletions benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc
Original file line number Diff line number Diff line change
@@ -48,7 +48,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info<T> m_info,
QR_speed_benchmark_data<T> &all_data,
RandBLAS::RNGState<RNG> &state) {

RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info, all_data.A.data(), state);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);
std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0);
std::fill(all_data.J.begin(), all_data.J.end(), 0);
}
@@ -95,7 +95,7 @@ static std::vector<long> call_all_algs(
state_gen_0 = state;
state_alg_0 = state;
// Clear and re-generate data
data_regen<T, RNG>(m_info, all_data, state_gen_0);
data_regen(m_info, all_data, state_gen_0);
}

return inner_timing_best;
@@ -120,7 +120,7 @@ int main() {
QR_speed_benchmark_data<double> all_data(m, n, tol, d_factor);
// Generate the input matrix - gaussian suffices for performance tests.
RandLAPACK::gen::mat_gen_info<double> m_info(m, n, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info, all_data.A.data(), state);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);

// Declare a data file
std::fstream file("CQRRP_inner_speed_" + std::to_string(m)
@@ -132,7 +132,7 @@ int main() {

#if !defined(__APPLE__)
for (;b_sz_start <= b_sz_end; b_sz_start *= 2) {
res = call_all_algs<double, r123::Philox4x32>(m_info, numruns, b_sz_start, all_data, state_constant);
res = call_all_algs(m_info, numruns, b_sz_start, all_data, state_constant);
file << res[0] << ", " << res[1] << ", " << res[2] << ", " << res[3] << ", " << res[4] << ", " << res[5] << ", " << res[6] << ", " << res[7] << ", " << res[8] << ", " << res[9] << ", " << res[10] << ", " << res[11] << ",\n";
}
#endif
20 changes: 9 additions & 11 deletions benchmark/bench_CQRRP/CQRRP_single_precision.cc
Original file line number Diff line number Diff line change
@@ -38,7 +38,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info<T> m_info,
QR_speed_benchmark_data<T> &all_data,
RandBLAS::RNGState<RNG> &state, int apply_itoa) {

RandLAPACK::gen::mat_gen<T, r123::Philox4x32>(m_info, all_data.A.data(), state);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);
std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0);
if (apply_itoa) {
std::iota(all_data.J.begin(), all_data.J.end(), 1);
@@ -47,7 +47,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info<T> m_info,
}
}

template <typename T_rest, typename T_cqrrp, typename RNG>
template <typename T_rest, typename RNG, typename T_cqrrp>
static std::vector<long> call_all_algs(
RandLAPACK::gen::mat_gen_info<T_cqrrp> m_info_cqrrp,
RandLAPACK::gen::mat_gen_info<T_rest> m_info_rest,
@@ -67,8 +67,6 @@ static std::vector<long> call_all_algs(
CQRRP_blocked.nnz = 2;
CQRRP_blocked.num_threads = 48;
// We are nbot using panel pivoting in performance testing.
int panel_pivoting = 0;

// timing vars
long dur_cqrrp = 0;
long dur_geqrf = 0;
@@ -87,12 +85,12 @@ static std::vector<long> call_all_algs(
auto start_getrf = high_resolution_clock::now();
lapack::getrf(m, n, all_data_rest.A.data(), m, all_data_rest.J.data());
auto stop_getrf = high_resolution_clock::now();
auto dur_getrf = duration_cast<microseconds>(stop_getrf - start_getrf).count();
dur_getrf = duration_cast<microseconds>(stop_getrf - start_getrf).count();
printf("TOTAL TIME FOR GETRF %ld\n", dur_getrf);
// Update best timing
i == 0 ? t_getrf_best = dur_getrf : (dur_getrf < t_getrf_best) ? t_getrf_best = dur_getrf : NULL;

data_regen<T_rest, RNG>(m_info_rest, all_data_rest, state_gen, 0);
data_regen(m_info_rest, all_data_rest, state_gen, 0);
state_gen = state;

// Testing GEQRF
@@ -105,7 +103,7 @@ static std::vector<long> call_all_algs(
i == 0 ? t_geqrf_best = dur_geqrf : (dur_geqrf < t_geqrf_best) ? t_geqrf_best = dur_geqrf : NULL;

// Clear and re-generate data
data_regen<T_rest, RNG>(m_info_rest, all_data_rest, state_gen, 0);
data_regen(m_info_rest, all_data_rest, state_gen, 0);
state_gen = state;

// Testing CQRRP - best setup
@@ -118,7 +116,7 @@ static std::vector<long> call_all_algs(
i == 0 ? t_cqrrp_best = dur_cqrrp : (dur_cqrrp < t_cqrrp_best) ? t_cqrrp_best = dur_cqrrp : NULL;

// Clear and re-generate data
data_regen<T_cqrrp, RNG>(m_info_cqrrp, all_data_cqrrp, state_gen, 1);
data_regen(m_info_cqrrp, all_data_cqrrp, state_gen, 1);
state_gen = state;
state_alg = state;
}
@@ -148,13 +146,13 @@ int main() {
QR_speed_benchmark_data<double> all_data_d(m, n, tol, d_factor);
// Generate the input matrix - gaussian suffices for performance tests.
RandLAPACK::gen::mat_gen_info<double> m_info_d(m, n, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info_d, all_data_d.A.data(), state);
RandLAPACK::gen::mat_gen(m_info_d, all_data_d.A.data(), state);

// Allocate basic workspace - float
QR_speed_benchmark_data<float> all_data_f(m, n, (float) tol, (float) d_factor);
// Generate the input matrix - gaussian suffices for performance tests.
RandLAPACK::gen::mat_gen_info<float> m_info_f(m, n, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen<float, r123::Philox4x32>(m_info_f, all_data_f.A.data(), state_cpy);
RandLAPACK::gen::mat_gen(m_info_f, all_data_f.A.data(), state_cpy);

// Declare a data file
std::fstream file("Apple_QR_time_raw_rows_" + std::to_string(m)
@@ -165,7 +163,7 @@ int main() {
+ ".dat", std::fstream::app);
#if !defined(__APPLE__)
for (;b_sz_start <= b_sz_end; b_sz_start *= 2) {
res = call_all_algs<double, float, r123::Philox4x32>(m_info_f, m_info_d, numruns, b_sz_start, all_data_f, all_data_d, state_constant);
res = call_all_algs(m_info_f, m_info_d, numruns, b_sz_start, all_data_f, all_data_d, state_constant);
file << res[0] << ", " << res[1] << ", " << res[2] << ",\n";
}
#endif
22 changes: 11 additions & 11 deletions benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc
Original file line number Diff line number Diff line change
@@ -45,7 +45,7 @@ static void data_regen(RandLAPACK::gen::mat_gen_info<T> m_info,
QR_speed_benchmark_data<T> &all_data,
RandBLAS::RNGState<RNG> &state, int apply_itoa) {

RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info, all_data.A.data(), state);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);
std::fill(all_data.tau.begin(), all_data.tau.end(), 0.0);
if (apply_itoa) {
std::iota(all_data.J.begin(), all_data.J.end(), 1);
@@ -99,11 +99,11 @@ static std::vector<long> call_all_algs(
auto dur_geqp3 = duration_cast<microseconds>(stop_geqp3 - start_geqp3).count();
printf("TOTAL TIME FOR GEQP3 %ld\n", dur_geqp3);

data_regen<T, RNG>(m_info, all_data, state_buf, 0);
data_regen(m_info, all_data, state_buf, 0);

// Testing GEQRF
auto start_geqrf = high_resolution_clock::now();
//lapack::geqrf(m, n, all_data.A.data(), m, all_data.tau.data());
lapack::geqrf(m, n, all_data.A.data(), m, all_data.tau.data());
auto stop_geqrf = high_resolution_clock::now();
dur_geqrf = duration_cast<microseconds>(stop_geqrf - start_geqrf).count();
printf("TOTAL TIME FOR GEQRF %ld\n", dur_geqrf);
@@ -114,7 +114,7 @@ static std::vector<long> call_all_algs(
auto state_gen_1 = state_gen_0;
auto state_alg_1 = state_alg_0;
// Clear and re-generate data
data_regen<T, RNG>(m_info, all_data, state_gen_0, 0);
data_regen(m_info, all_data, state_gen_0, 0);

// Testing CQRRP - best setup
auto start_cqrrp = high_resolution_clock::now();
@@ -128,11 +128,11 @@ static std::vector<long> call_all_algs(
auto state_gen_3 = state_gen_1;
auto state_alg_3 = state_alg_1;
// Clear and re-generate data
data_regen<T, RNG>(m_info, all_data, state_gen_1, 1);
data_regen(m_info, all_data, state_gen_1, 1);

// Testing HQRRP with GEQRF
auto start_hqrrp_geqrf = high_resolution_clock::now();
//RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 0, state_alg_1, (T*) nullptr);
RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 0, state_alg_1, (T*) nullptr);
auto stop_hqrrp_geqrf = high_resolution_clock::now();
dur_hqrrp_geqrf = duration_cast<microseconds>(stop_hqrrp_geqrf - start_hqrrp_geqrf).count();
printf("TOTAL TIME FOR HQRRP WITH GEQRF %ld\n", dur_hqrrp_geqrf);
@@ -143,11 +143,11 @@ static std::vector<long> call_all_algs(
auto state_gen_4 = state_gen_3;
auto state_alg_4 = state_alg_3;
// Clear and re-generate data
data_regen<T, RNG>(m_info, all_data, state_gen_3, 1);
data_regen(m_info, all_data, state_gen_3, 1);

// Testing HQRRP with Cholqr
auto start_hqrrp_cholqr = high_resolution_clock::now();
//RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 1, state_alg_3, (T*) nullptr);
RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 1, state_alg_3, (T*) nullptr);
auto stop_hqrrp_cholqr = high_resolution_clock::now();
dur_hqrrp_cholqr = duration_cast<microseconds>(stop_hqrrp_cholqr - start_hqrrp_cholqr).count();
printf("TOTAL TIME FOR HQRRP WITH CHOLQRQ %ld\n", dur_hqrrp_cholqr);
@@ -159,7 +159,7 @@ static std::vector<long> call_all_algs(
state_alg_0 = state_alg_4;
state_buf = state_gen_4;
// Clear and re-generate data
data_regen<T, RNG>(m_info, all_data, state_gen_4, 0);
data_regen(m_info, all_data, state_gen_4, 0);
}

printf("CQRRP takes %ld μs\n", t_cqrrp_best);
@@ -190,7 +190,7 @@ int main() {
QR_speed_benchmark_data<double> all_data(m, n, tol, d_factor);
// Generate the input matrix - gaussian suffices for performance tests.
RandLAPACK::gen::mat_gen_info<double> m_info(m, n, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen<double, r123::Philox4x32>(m_info, all_data.A.data(), state);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);

// Declare a data file
std::fstream file("ICQRRP_QP3_QR_time_raw_rows_" + std::to_string(m)
@@ -201,7 +201,7 @@ int main() {
+ ".dat", std::fstream::app);
#if !defined(__APPLE__)
for (;b_sz_start <= b_sz_end; b_sz_start *= 2) {
res = call_all_algs<double, r123::Philox4x32>(m_info, numruns, b_sz_start, all_data, state_constant);
res = call_all_algs(m_info, numruns, b_sz_start, all_data, state_constant);
file << res[0] << ", " << res[1] << ", " << res[2] << ", " << res[3] << ",\n";
}
#endif
Loading

0 comments on commit f8e4226

Please sign in to comment.