From f3a69cc37211b215a3ce30912f897cf37485410a Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Wed, 3 Jul 2024 16:09:43 -0400 Subject: [PATCH 1/9] remove include guards and use "#pragma once" instead --- RandBLAS/base.hh | 5 +- RandBLAS/config.h.in | 5 +- RandBLAS/dense_skops.hh | 6 +-- RandBLAS/exceptions.hh | 5 -- RandBLAS/random_gen.hh | 4 +- RandBLAS/skge.hh | 7 +-- RandBLAS/sksy.hh | 4 +- RandBLAS/skve.hh | 5 +- RandBLAS/sparse_data/base.hh | 8 +-- RandBLAS/sparse_data/conversions.hh | 5 +- RandBLAS/sparse_data/coo_matrix.hh | 5 +- RandBLAS/sparse_data/coo_spmm_impl.hh | 6 +-- RandBLAS/sparse_data/csc_matrix.hh | 9 ++-- RandBLAS/sparse_data/csc_spmm_impl.hh | 6 +-- RandBLAS/sparse_data/csr_matrix.hh | 5 +- RandBLAS/sparse_data/csr_spmm_impl.hh | 6 +-- RandBLAS/sparse_data/sksp.hh | 6 +-- RandBLAS/sparse_data/spmm_dispatch.hh | 6 +-- RandBLAS/sparse_skops.hh | 6 +-- RandBLAS/util.hh | 77 +++++++++++++++++++++++++-- 20 files changed, 99 insertions(+), 87 deletions(-) diff --git a/RandBLAS/base.hh b/RandBLAS/base.hh index 76280170..62aa456a 100644 --- a/RandBLAS/base.hh +++ b/RandBLAS/base.hh @@ -27,8 +27,7 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_base_hh -#define randblas_base_hh +#pragma once /// @file @@ -212,5 +211,3 @@ std::ostream &operator<<( } } // end namespace RandBLAS::base - -#endif diff --git a/RandBLAS/config.h.in b/RandBLAS/config.h.in index 0070edb2..128a4959 100644 --- a/RandBLAS/config.h.in +++ b/RandBLAS/config.h.in @@ -1,5 +1,4 @@ -#ifndef RandBLAS_config_h -#define RandBLAS_config_h +#pragma once #define RandBLAS_FULL_VERSION "@RandBLAS_FULL_VERSION@" #define RandBLAS_VERSION_MAJOR @RandBLAS_VERSION_MAJOR@ @@ -53,5 +52,3 @@ // // if you are linking to OpenMP. // - -#endif diff --git a/RandBLAS/dense_skops.hh b/RandBLAS/dense_skops.hh index c7e60e90..ac92ecbd 100644 --- a/RandBLAS/dense_skops.hh +++ b/RandBLAS/dense_skops.hh @@ -26,9 +26,7 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // - -#ifndef randblas_dense_hh -#define randblas_dense_hh +#pragma once #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" @@ -640,5 +638,3 @@ RNGState fill_dense( } } // end namespace RandBLAS - -#endif \ No newline at end of file diff --git a/RandBLAS/exceptions.hh b/RandBLAS/exceptions.hh index 15c51405..898f0bbe 100644 --- a/RandBLAS/exceptions.hh +++ b/RandBLAS/exceptions.hh @@ -29,9 +29,6 @@ #pragma once -#ifndef RandBLAS_EXCEPTIONS_HH -#define RandBLAS_EXCEPTIONS_HH - #include #include #include @@ -164,5 +161,3 @@ inline void abort_if( bool cond, const char* func, const char* format, ... ) { #endif } // namespace RandBLAS::exceptions - -#endif \ No newline at end of file diff --git a/RandBLAS/random_gen.hh b/RandBLAS/random_gen.hh index 2bec35c4..d7dcf127 100644 --- a/RandBLAS/random_gen.hh +++ b/RandBLAS/random_gen.hh @@ -27,8 +27,7 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef random_gen_hh -#define random_gen_hh +#pragma once /// @file @@ -178,4 +177,3 @@ struct uneg11 } // end of namespace r123ext -#endif diff --git a/RandBLAS/skge.hh b/RandBLAS/skge.hh index 647e02dd..61590105 100644 --- a/RandBLAS/skge.hh +++ b/RandBLAS/skge.hh @@ -26,9 +26,7 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // - -#ifndef randblas_skge_hh -#define randblas_skge_hh +#pragma once #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" @@ -1231,6 +1229,3 @@ inline void sketch_general( }; } // end namespace RandBLAS - - -#endif diff --git a/RandBLAS/sksy.hh b/RandBLAS/sksy.hh index 19aa232d..63c0954d 100644 --- a/RandBLAS/sksy.hh +++ b/RandBLAS/sksy.hh @@ -27,8 +27,7 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sksy_hh -#define randblas_sksy_hh +#pragma once #include "RandBLAS/util.hh" #include "RandBLAS/base.hh" @@ -538,4 +537,3 @@ inline void sketch_symmetric( } } // end namespace RandBLAS -#endif diff --git a/RandBLAS/skve.hh b/RandBLAS/skve.hh index da473502..9657c23f 100644 --- a/RandBLAS/skve.hh +++ b/RandBLAS/skve.hh @@ -26,9 +26,7 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // - -#ifndef randblas_skve_hh -#define randblas_skve_hh +#pragma once #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" @@ -260,4 +258,3 @@ inline void sketch_vector( } } // end namespace RandBLAS -#endif diff --git a/RandBLAS/sparse_data/base.hh b/RandBLAS/sparse_data/base.hh index ac3c1ddd..fa9abafe 100644 --- a/RandBLAS/sparse_data/base.hh +++ b/RandBLAS/sparse_data/base.hh @@ -26,9 +26,7 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // - -#ifndef randblas_sparse_data_hh -#define randblas_sparse_data_hh +#pragma once #include "RandBLAS/config.h" #include "RandBLAS/base.hh" @@ -193,7 +191,3 @@ namespace RandBLAS { using RandBLAS::sparse_data::IndexBase; using RandBLAS::sparse_data::SparseMatrix; } - - - -#endif diff --git a/RandBLAS/sparse_data/conversions.hh b/RandBLAS/sparse_data/conversions.hh index 497053ab..1db914b6 100644 --- a/RandBLAS/sparse_data/conversions.hh +++ b/RandBLAS/sparse_data/conversions.hh @@ -26,9 +26,8 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // +#pragma once -#ifndef randblas_sparse_data_conversions -#define randblas_sparse_data_conversions #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/sparse_data/base.hh" @@ -209,5 +208,3 @@ void reindex_inplace(COOMatrix &A, IndexBase desired) { } } // end namespace RandBLAS::sparse_data::conversions - -#endif diff --git a/RandBLAS/sparse_data/coo_matrix.hh b/RandBLAS/sparse_data/coo_matrix.hh index 962f93f4..76f587f6 100644 --- a/RandBLAS/sparse_data/coo_matrix.hh +++ b/RandBLAS/sparse_data/coo_matrix.hh @@ -27,8 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sparse_data_coo -#define randblas_sparse_data_coo +#pragma once + #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/sparse_data/base.hh" @@ -409,4 +409,3 @@ void coo_to_dense(const COOMatrix &spmat, Layout layout, T *mat) { } // end namespace RandBLAS::sparse_data::coo -#endif diff --git a/RandBLAS/sparse_data/coo_spmm_impl.hh b/RandBLAS/sparse_data/coo_spmm_impl.hh index 60f1d397..2f798eae 100644 --- a/RandBLAS/sparse_data/coo_spmm_impl.hh +++ b/RandBLAS/sparse_data/coo_spmm_impl.hh @@ -27,8 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sparse_data_coo_multiply -#define randblas_sparse_data_coo_multiply +#pragma once + #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/sparse_data/base.hh" @@ -163,5 +163,3 @@ static void apply_coo_left_jki_p11( } // end namespace - -#endif diff --git a/RandBLAS/sparse_data/csc_matrix.hh b/RandBLAS/sparse_data/csc_matrix.hh index c296666b..eb0fa6bd 100644 --- a/RandBLAS/sparse_data/csc_matrix.hh +++ b/RandBLAS/sparse_data/csc_matrix.hh @@ -27,8 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sparse_data_csc -#define randblas_sparse_data_csc +#pragma once + #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/sparse_data/base.hh" @@ -246,7 +246,4 @@ void dense_to_csc(Layout layout, T* mat, T abs_tol, CSCMatrix &spmat) { return; } - -} - -#endif \ No newline at end of file +} // end namespace RandBLAS::sparse_data::csc diff --git a/RandBLAS/sparse_data/csc_spmm_impl.hh b/RandBLAS/sparse_data/csc_spmm_impl.hh index 8932e38a..77d63654 100644 --- a/RandBLAS/sparse_data/csc_spmm_impl.hh +++ b/RandBLAS/sparse_data/csc_spmm_impl.hh @@ -27,8 +27,7 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sparse_data_csc_multiply -#define randblas_sparse_data_csc_multiply +#pragma once #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/sparse_data/base.hh" @@ -213,5 +212,4 @@ static void apply_csc_left_kib_rowmajor_1p1( return; } -} -#endif +} // end namespace RandBLAS::sparse_data::csc diff --git a/RandBLAS/sparse_data/csr_matrix.hh b/RandBLAS/sparse_data/csr_matrix.hh index 6844137b..9335b3b0 100644 --- a/RandBLAS/sparse_data/csr_matrix.hh +++ b/RandBLAS/sparse_data/csr_matrix.hh @@ -27,8 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sparse_data_csr -#define randblas_sparse_data_csr +#pragma once + #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/sparse_data/base.hh" @@ -262,4 +262,3 @@ void dense_to_csr(Layout layout, T* mat, T abs_tol, CSRMatrix &spmat) { } // end namespace RandBLAS::sparse_data::csr -#endif diff --git a/RandBLAS/sparse_data/csr_spmm_impl.hh b/RandBLAS/sparse_data/csr_spmm_impl.hh index d752064b..1665c6ec 100644 --- a/RandBLAS/sparse_data/csr_spmm_impl.hh +++ b/RandBLAS/sparse_data/csr_spmm_impl.hh @@ -27,8 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sparse_data_csr_multiply -#define randblas_sparse_data_csr_multiply +#pragma once + #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/sparse_data/base.hh" @@ -156,5 +156,3 @@ static void apply_csr_left_ikb_rowmajor( } } // end namespace RandBLAS::sparse_data::csr - -#endif diff --git a/RandBLAS/sparse_data/sksp.hh b/RandBLAS/sparse_data/sksp.hh index 02085c7a..6be7912b 100644 --- a/RandBLAS/sparse_data/sksp.hh +++ b/RandBLAS/sparse_data/sksp.hh @@ -27,8 +27,7 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sksp_hh -#define randblas_sksp_hh +#pragma once #include "RandBLAS/base.hh" #include "RandBLAS/dense_skops.hh" @@ -625,6 +624,3 @@ inline void sketch_sparse( } } // end namespace RandBLAS - - -#endif diff --git a/RandBLAS/sparse_data/spmm_dispatch.hh b/RandBLAS/sparse_data/spmm_dispatch.hh index 4ea17544..ef6aa826 100644 --- a/RandBLAS/sparse_data/spmm_dispatch.hh +++ b/RandBLAS/sparse_data/spmm_dispatch.hh @@ -27,8 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_sparse_data_spmm_dispatch -#define randblas_sparse_data_spmm_dispatch +#pragma once + #include "RandBLAS/base.hh" #include "RandBLAS/exceptions.hh" #include "RandBLAS/sparse_data/base.hh" @@ -384,5 +384,3 @@ inline void spmm(blas::Layout layout, blas::Op opA, blas::Op opB, int64_t m, int } } - -#endif diff --git a/RandBLAS/sparse_skops.hh b/RandBLAS/sparse_skops.hh index 770d5076..fe7c8aab 100644 --- a/RandBLAS/sparse_skops.hh +++ b/RandBLAS/sparse_skops.hh @@ -26,9 +26,7 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // - -#ifndef randblas_sparse_skops_hh -#define randblas_sparse_skops_hh +#pragma once #include "RandBLAS/config.h" #include "RandBLAS/base.hh" @@ -483,5 +481,3 @@ static auto transpose(SKOP const &S) { } } // end namespace RandBLAS::sparse - -#endif diff --git a/RandBLAS/util.hh b/RandBLAS/util.hh index b05c51ed..81babb0b 100644 --- a/RandBLAS/util.hh +++ b/RandBLAS/util.hh @@ -27,8 +27,7 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_util_hh -#define randblas_util_hh +#pragma once #include #include @@ -243,6 +242,76 @@ void flip_layout(blas::Layout layout_in, int64_t m, int64_t n, std::vector &A return; } -} // end namespace RandBLAS::util -#endif +template +void weights_to_cdf(T* p, int64_t len_p) { + T sum = 0.0; + for (int64_t i = 0; i < len_p; ++i) { + sum += p[i]; + p[i] = sum; + } + blas::scal(len_p, ((T)1.0)/sum, p, 1); +} + +/*** + * Assume cdf is a buffer specifying a cumulative probability distribution function. + * TF is a template parameter for a real floating point type. + * + * This function produces "num_samples" from the distribution specified by "cdf" + * and stores them in "samples". + */ +template +RNGState sample_indices_iid( + TF* cdf, int64_t len_cdf, int64_t* samples , int64_t num_samples, RandBLAS::RNGState state +) { + auto [ctr, key] = state; + RNG gen; + auto rv_array = r123ext::uneg11::generate(gen, ctr, key); + int64_t len_c = (int64_t) state.len_c; + int64_t rv_index = 0; + for (int64_t i = 0; i < num_samples; ++i) { + if ((i+1) % len_c == 1) { + ctr.incr(1); + rv_array = r123ext::uneg11::generate(gen, ctr, key); + rv_index = 0; + } + TF random_unif01 = ((TF) (rv_array[rv_index] + 1.0)) / ((TF) 2.0); + int64_t sample_index = std::lower_bound(cdf, cdf + len_cdf, random_unif01) - cdf; + // ^ uses binary search to set sample_index to the smallest value for which + // random_unif01 < cdf[sample_index]. + samples[i] = sample_index; + rv_index += 1; + } + return RNGState(ctr, key); +} + +/*** + * This function produces "num_samples" from the uniform distribution over + * {0, ..., max_index_exclusive - 1} and stores them in "samples". + */ +template +RNGState sample_indices_iid_uniform( + int64_t max_index_exclusive, int64_t* samples , int64_t num_samples, RandBLAS::RNGState state +) { + auto [ctr, key] = state; + RNG gen; + auto rv_array = r123ext::uneg11::generate(gen, ctr, key); + int64_t len_c = (int64_t) state.len_c; + int64_t rv_index = 0; + double dmie = (double) max_index_exclusive; + for (int64_t i = 0; i < num_samples; ++i) { + if ((i+1) % len_c == 1) { + ctr.incr(1); + rv_array = r123ext::uneg11::generate(gen, ctr, key); + rv_index = 0; + } + double random_unif01 = (double) (rv_array[rv_index] + 1.0) / 2.0; + int64_t sample_index = (int64_t) dmie * random_unif01; + samples[i] = sample_index; + rv_index += 1; + } + return RNGState(ctr, key); +} + + +} // end namespace RandBLAS::util From c1c917b64ac4996d0cd13480f06c2ba6a3a9a598 Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Wed, 3 Jul 2024 16:10:53 -0400 Subject: [PATCH 2/9] improve tests for thread invariance of generating dense sketching operators --- test/test_datastructures/test_denseskop.cc | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/test/test_datastructures/test_denseskop.cc b/test/test_datastructures/test_denseskop.cc index a17e3ee6..c7ae4544 100644 --- a/test/test_datastructures/test_denseskop.cc +++ b/test/test_datastructures/test_denseskop.cc @@ -304,9 +304,7 @@ TEST_F(TestSubmatGeneration, diag) #if defined(RandBLAS_HAS_OpenMP) template -void DenseThreadTest() { - int64_t m = 32; - int64_t n = 8; +void DenseThreadTest(int64_t m, int64_t n) { int64_t d = m*n; std::vector base(d); @@ -319,10 +317,9 @@ void DenseThreadTest() { std::cerr << "with 1 thread: " << base << std::endl; // run with different numbers of threads, and check that the result is the same - int n_hyper = std::thread::hardware_concurrency(); - int n_threads = std::max(n_hyper / 2, 3); - + int n_threads = std::thread::hardware_concurrency(); for (int i = 2; i <= n_threads; ++i) { + std::fill(test.begin(), test.end(), (T) 0.0); omp_set_num_threads(i); RandBLAS::dense::fill_dense_submat_impl(n, test.data(), m, n, 0, state); std::cerr << "with " << i << " threads: " << test << std::endl; @@ -333,11 +330,19 @@ void DenseThreadTest() { } TEST(TestDenseThreading, UniformPhilox) { - DenseThreadTest(); + for (int i = 0; i < 10; ++i) { + DenseThreadTest(32, 8); + DenseThreadTest(1, 5); + DenseThreadTest(5, 1); + } } TEST(TestDenseThreading, GaussianPhilox) { - DenseThreadTest(); + for (int i = 0; i < 10; ++i) { + DenseThreadTest(32, 8); + DenseThreadTest(1, 5); + DenseThreadTest(5, 1); + } } #endif From 0083cf9bf0fc0162fe60f9b140b59b2f099ed13a Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Wed, 3 Jul 2024 16:11:45 -0400 Subject: [PATCH 3/9] replace more include guards --- test/test_datastructures/test_spmats/common.hh | 7 ++----- test/test_matmul_cores/linop_common.hh | 6 ++---- test/test_matmul_cores/test_spmm/spmm_test_helpers.hh | 2 ++ 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/test/test_datastructures/test_spmats/common.hh b/test/test_datastructures/test_spmats/common.hh index 173304c7..8a7ffa37 100644 --- a/test/test_datastructures/test_spmats/common.hh +++ b/test/test_datastructures/test_spmats/common.hh @@ -27,8 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_test_sparse_data_common_hh -#define randblas_test_sparse_data_common_hh +#pragma once + #include "RandBLAS/config.h" #include "RandBLAS/base.hh" #include "RandBLAS/dense_skops.hh" @@ -129,7 +129,4 @@ void coo_from_diag( return; } - } - -#endif diff --git a/test/test_matmul_cores/linop_common.hh b/test/test_matmul_cores/linop_common.hh index 0383f63d..e07c1888 100644 --- a/test/test_matmul_cores/linop_common.hh +++ b/test/test_matmul_cores/linop_common.hh @@ -27,8 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_test_linop_common_hh -#define randblas_test_linop_common_hh +#pragma once + #include "RandBLAS/config.h" #include "RandBLAS/base.hh" #include "RandBLAS/dense_skops.hh" @@ -713,5 +713,3 @@ void test_right_apply_to_transposed( } } // end namespace test::linop_common - -#endif \ No newline at end of file diff --git a/test/test_matmul_cores/test_spmm/spmm_test_helpers.hh b/test/test_matmul_cores/test_spmm/spmm_test_helpers.hh index 5190e256..c07f16a4 100644 --- a/test/test_matmul_cores/test_spmm/spmm_test_helpers.hh +++ b/test/test_matmul_cores/test_spmm/spmm_test_helpers.hh @@ -27,6 +27,8 @@ // POSSIBILITY OF SUCH DAMAGE. // +#pragma once + #include "test/test_datastructures/test_spmats/common.hh" #include "test/test_matmul_cores/linop_common.hh" #include From 544d7133cc7fb549254b5db3b22b42eefdf8aff2 Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Wed, 3 Jul 2024 16:13:32 -0400 Subject: [PATCH 4/9] babys first statistical tests --- test/CMakeLists.txt | 37 ++-- test/comparison.hh | 5 +- test/test_basic_rng/rng_common.hh | 186 +++++++++++++++++++++ test/test_basic_rng/test_sample_indices.cc | 163 ++++++++++++++++++ 4 files changed, 377 insertions(+), 14 deletions(-) create mode 100644 test/test_basic_rng/rng_common.hh create mode 100644 test/test_basic_rng/test_sample_indices.cc diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 822c1701..dfe6f94b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,6 +5,12 @@ set(tmp FALSE) if (GTest_FOUND) set(tmp TRUE) + ##################################################################### + # + # Tests with dense data and wrappers + # + ##################################################################### + add_executable(RandBLAS_tests comparison.hh test_datastructures/test_denseskop.cc @@ -19,13 +25,15 @@ if (GTest_FOUND) test_matmul_wrappers/test_sketch_vector.cc test_matmul_wrappers/test_sketch_symmetric.cc ) - target_link_libraries(RandBLAS_tests - RandBLAS - GTest::GTest - GTest::Main - ) + target_link_libraries(RandBLAS_tests RandBLAS GTest::GTest GTest::Main) gtest_discover_tests(RandBLAS_tests) + ##################################################################### + # + # Tests with sparse data + # + ##################################################################### + add_executable(SparseRandBLAS_tests comparison.hh @@ -40,13 +48,22 @@ if (GTest_FOUND) test_matmul_cores/test_spmm/test_spmm_csr.cc test_matmul_cores/test_spmm/test_spmm_coo.cc ) - target_link_libraries(SparseRandBLAS_tests - RandBLAS - GTest::GTest - GTest::Main - ) + target_link_libraries(SparseRandBLAS_tests RandBLAS GTest::GTest GTest::Main) gtest_discover_tests(SparseRandBLAS_tests) + ##################################################################### + # + # Statistical tests + # + ##################################################################### + + add_executable(RandBLAS_stats + test_basic_rng/rng_common.hh + test_basic_rng/test_sample_indices.cc + ) + target_link_libraries(RandBLAS_stats RandBLAS GTest::GTest GTest::Main) + gtest_discover_tests(RandBLAS_stats) + endif() message(STATUS "Checking for regression tests ... ${tmp}") diff --git a/test/comparison.hh b/test/comparison.hh index c8f69b27..15efd9f1 100644 --- a/test/comparison.hh +++ b/test/comparison.hh @@ -27,8 +27,7 @@ // POSSIBILITY OF SUCH DAMAGE. // -#ifndef randblas_test_comparisons_hh -#define randblas_test_comparisons_hh +#pragma once #include "RandBLAS.hh" #include @@ -239,5 +238,3 @@ void matrices_approx_equal( } } // end namespace test::comparison - -#endif diff --git a/test/test_basic_rng/rng_common.hh b/test/test_basic_rng/rng_common.hh new file mode 100644 index 00000000..fc66a748 --- /dev/null +++ b/test/test_basic_rng/rng_common.hh @@ -0,0 +1,186 @@ +#pragma once + +#include "RandBLAS.hh" +#include + +namespace RandBLAS_StatTests { + +// +// MARK: constants +// ^ and functions to perform lookups for the constants we store. + +namespace KolmogorovSmirnovConstants { + + +/*** From scipy.stats: critical_value = kstwo.ppf(1-significance, sample_size) */ + +const int SMALLEST_SAMPLE = 8; +const int LARGEST_SAMPLE = 16777216; + +const std::vector sample_sizes { + 8, 16, 32, 64, 128, 256, + 512, 1024, 2048, 4096, 8192, 16384, + 32768, 65536, 131072, 262144, 524288, 1048576, + 2097152, 4194304, 8388608, 16777216 +}; + +const double WEAKEST_SIGNIFICANCE = 0.05; +const double STRONGEST_SIGNIFICANCE = 1e-6; + +const std::vector significance_levels { + 0.05, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6 +}; + +const std::vector> critical_values {{ + // significance of 0.05 + 4.54266591e-01, 3.27333470e-01, 2.34240860e-01, 1.66933746e-01, + 1.18658276e-01, 8.42018587e-02, 5.96844982e-02, 4.22742678e-02, + 2.99273847e-02, 2.11791555e-02, 1.49845091e-02, 1.05999173e-02, + 7.49739992e-03, 5.30252270e-03, 3.74997893e-03, 2.65189975e-03, + 1.87530828e-03, 1.32610915e-03, 9.37733728e-04, 6.63094351e-04, + 4.68886747e-04, 3.31557115e-04 +}, { + // significance of 1e-2 + 5.41792524e-01, 3.92007307e-01, 2.80935776e-01, 2.00288899e-01, + 1.42362543e-01, 1.01005285e-01, 7.15810977e-02, 5.06916722e-02, + 3.58812433e-02, 2.53898259e-02, 1.79621350e-02, 1.27054989e-02, + 8.98630003e-03, 6.35534434e-03, 4.49443988e-03, 3.17831443e-03, + 2.24754012e-03, 1.58931697e-03, 1.12384982e-03, 7.94698321e-04, + 5.61944814e-04, 3.97359107e-04 +}, { + // significance of 1e-3 + 6.40978605e-01, 4.67504918e-01, 3.36105510e-01, 2.39914323e-01, + 1.70596759e-01, 1.21045341e-01, 8.57783224e-02, 6.07400729e-02, + 4.29898739e-02, 3.04175670e-02, 2.15177021e-02, 1.52198122e-02, + 1.07642402e-02, 7.61255632e-03, 5.38342952e-03, 3.80692734e-03, + 2.69203739e-03, 1.90362429e-03, 1.34609876e-03, 9.51852090e-04, + 6.73069322e-04, 4.75936005e-04 +}, { + // significance of 1e-4 + 7.20107998e-01, 5.30358433e-01, 3.82763541e-01, 2.73655631e-01, + 1.94715231e-01, 1.38189709e-01, 9.79338402e-02, 6.93467436e-02, + 4.90797287e-02, 3.47251628e-02, 2.45641333e-02, 1.73741413e-02, + 1.22876435e-02, 8.68978729e-03, 6.14515467e-03, 4.34555112e-03, + 3.07290290e-03, 2.17293722e-03, 1.53653188e-03, 1.08650868e-03, + 7.68285928e-04, 5.43264318e-04 +}, { + // significance of 1e-5 + 7.84314235e-01, 5.84534035e-01, 4.23688590e-01, 3.03463697e-01, + 2.16091906e-01, 1.53407046e-01, 1.08732306e-01, 7.69956230e-02, + 5.44929246e-02, 3.85544959e-02, 2.72724540e-02, 1.92894157e-02, + 1.36420186e-02, 9.64750036e-03, 6.82236902e-03, 4.82441714e-03, + 3.41151342e-03, 2.41237141e-03, 1.70583756e-03, 1.20622593e-03, + 8.52938821e-04, 6.03122960e-0 +}, { + // significance of 1e-6 + 8.36962528e-01, 6.32173765e-01, 4.60387149e-01, 3.30395198e-01, + 2.35470356e-01, 1.67220735e-01, 1.18543880e-01, 8.39483725e-02, + 5.94144379e-02, 4.20363448e-02, 2.97351313e-02, 2.10310168e-02, + 1.48735960e-02, 1.05183852e-02, 7.43818754e-03, 5.25987011e-03, + 3.71942641e-03, 2.63009921e-03, 1.85979452e-03, 1.31508999e-03, + 9.29917360e-04, 6.57555013e-04 +}}; + +/*** + * Returns the index in significance_levels for the "least significant" value + * that is "more significant" than "sig". + * + * The correctness of this function depends on significance_levels being sorted + * in decreasing order (which corresponds to weakest to strongest significances). + */ +int significance_rep(double sig) { + randblas_require(STRONGEST_SIGNIFICANCE <= sig && sig <= WEAKEST_SIGNIFICANCE); + int num_siglevels = (int) significance_levels.size(); + for (int i = 0; i < num_siglevels; ++i) { + if (significance_levels[i] <= sig) + return i; + } + // This code shouldn't be reachable! + randblas_require(false); + return -1; +} + +/*** + * Returns the index in sample_sizes for the smallest sample size that's >= n. + * + * The correctness of this function depends on sample_sizes being sorted in + * increasing order. + */ +int sample_size_rep(int n) { + randblas_require(SMALLEST_SAMPLE <= n && n <= LARGEST_SAMPLE); + int num_sample_sizes = (int) sample_sizes.size(); + for (int i = 0; i < num_sample_sizes; ++i) { + if (sample_sizes[i] <= n) + return i; + } + // This code shouldn't be reachable! + randblas_require(false); + return -1; +} + +std::tuple critical_value_rep(int n, double sig) { + int i = significance_rep(sig); + auto override_sig = significance_levels[i]; + int j = sample_size_rep(n); + auto override_sample = sample_sizes[j]; + auto cv = critical_values[i][j]; + return {cv, override_sample, override_sig}; +} + +} + +// +// MARK: combinatorics +// + +double log_binomial_coefficient(int64_t n, int64_t k) { + double result = 0.0; + for (int64_t i = 1; i <= k; ++i) { + result += std::log(static_cast(n - i + 1)) - std::log(static_cast(i)); + } + return result; +} + + +// +// MARK: hypergeometric +// + +/*** + * Compute the probability mass function of the hypergeometric distribution with parameters N, K, D. + * Concretely ... + * + * Suppose we draw D items without replacement from a set of size N that has K distinguished elements. + * This function returns the probability that the sample of D items will contain observed_k elements + * from the distinguished set. + */ +double hypergeometric_pmf(int64_t N, int64_t K, int64_t D, int64_t observed_k) { + randblas_require(0 <= K && K <= N); + randblas_require(0 <= D && D <= N); + randblas_require(0 <= observed_k && observed_k <= K); + double lognum = log_binomial_coefficient(N - K, D - observed_k) + log_binomial_coefficient(K, observed_k); + double logden = log_binomial_coefficient(N, D); + double exparg = lognum - logden; + double out = std::exp(exparg); + return out; +} + +double hypergeometric_mean(int64_t N, int64_t K, int64_t D) { + double dN = (double) N; + double dK = (double) K; + double dD = (double) D; + return dD * dK / dN; +} + +double hypergeometric_variance(int64_t N, int64_t K, int64_t D) { + double dN = (double) N; + double dK = (double) K; + double dD = (double) D; + + auto t1 = dK / dN; + auto t2 = (dN - dK) / dN; + auto t3 = (dN - dD) / (dN - 1.0); + return dD * t1 * t2 * t3; +} + +} // end namespace RandBLAS_StatTests diff --git a/test/test_basic_rng/test_sample_indices.cc b/test/test_basic_rng/test_sample_indices.cc new file mode 100644 index 00000000..fe6a80fc --- /dev/null +++ b/test/test_basic_rng/test_sample_indices.cc @@ -0,0 +1,163 @@ +// Copyright, 2024. See LICENSE for copyright holder information. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// (1) Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// (2) Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// (3) Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#include "RandBLAS/config.h" +#include "RandBLAS/base.hh" +#include "RandBLAS/util.hh" +using RandBLAS::RNGState; +#include "rng_common.hh" + +#include +#include +#include +#include +#include +#include +#include + + +class TestSampleIndices : public ::testing::Test +{ + protected: + + virtual void SetUp(){}; + + virtual void TearDown(){}; + + static void test_iid_uniform_smoke(int64_t N, int64_t k, uint32_t seed) { + RNGState state(seed); + std::vector samples(k, -1); + RandBLAS::util::sample_indices_iid_uniform(N, samples.data(), k, state); + int64_t* data = samples.data(); + for (int64_t i = 0; i < k; ++i) { + ASSERT_LT(data[i], N); + ASSERT_GE(data[i], 0); + } + return; + } + + static void test_iid_uniform_kolmogorov_smirnov(int64_t N, double significance, int64_t num_samples, uint32_t seed) { + randblas_require(N <= (int64_t) 1e6); + + using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep; + auto [critical_value, override_ns, override_sig] = critical_value_rep(num_samples, significance); + significance = (double) override_sig; + num_samples = (int64_t) override_ns; + + RNGState state(seed); + std::vector samples(num_samples, -1); + RandBLAS::util::sample_indices_iid_uniform(N, samples.data(), num_samples, state); + std::vector sample_cdf(N, 0.0); + for (int64_t s : samples) + sample_cdf[s] += 1; + RandBLAS::util::weights_to_cdf(sample_cdf.data(), N); + + std::vector true_cdf(N, 1.0); + RandBLAS::util::weights_to_cdf(true_cdf.data(), N); + + + for (int i = 0; i < num_samples; ++i) { + float diff = std::abs(sample_cdf[i] - true_cdf[i]); + ASSERT_LT(diff, critical_value); + } + return; + } + +}; + + +TEST_F(TestSampleIndices, smoke_3_x_10) { + for (uint32_t i = 0; i < 10; ++i) + test_iid_uniform_smoke(3, 10, i); +} + +TEST_F(TestSampleIndices, smoke_10_x_3) { + for (uint32_t i = 0; i < 10; ++i) + test_iid_uniform_smoke(10, 3, i); +} + +TEST_F(TestSampleIndices, smoke_med) { + for (uint32_t i = 0; i < 10; ++i) + test_iid_uniform_smoke((int) 1e6 , 6000, i); +} + +TEST_F(TestSampleIndices, smoke_big) { + int64_t huge_N = std::numeric_limits::max() / 2; + for (uint32_t i = 0; i < 10; ++i) + test_iid_uniform_smoke(huge_N, 1000, i); +} + +TEST_F(TestSampleIndices, iid_uniform_ks_generous) { + double s = 1e-6; + test_iid_uniform_kolmogorov_smirnov(100, s, 100000, 0); + test_iid_uniform_kolmogorov_smirnov(10000, s, 1000, 0); + test_iid_uniform_kolmogorov_smirnov(1000000, s, 1000, 0); +} + +TEST_F(TestSampleIndices, iid_uniform_ks_moderate) { + float s = 1e-4; + test_iid_uniform_kolmogorov_smirnov(100, s, 100000, 0); + test_iid_uniform_kolmogorov_smirnov(10000, s, 1000, 0); + test_iid_uniform_kolmogorov_smirnov(1000000, s, 1000, 0); +} + +TEST_F(TestSampleIndices, iid_uniform_ks_skeptical) { + float s = 1e-2; + test_iid_uniform_kolmogorov_smirnov(100, s, 100000, 0); + test_iid_uniform_kolmogorov_smirnov(10000, s, 1000, 0); + test_iid_uniform_kolmogorov_smirnov(1000000, s, 1000, 0); +} + + + +// class TestSampleIndices : public ::testing::Test +// { +// protected: + +// virtual void SetUp(){}; + +// virtual void TearDown(){}; + +// template +// static void test_basic( + +// ) { +// return; +// } + +// }; + + +// TEST_F(TestSampleIndices, smoke) +// { +// // do something +// } + + + From 4bfe1e03828d59a25c4cb6a269dfd63d3b9b3e5a Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Wed, 3 Jul 2024 16:40:21 -0400 Subject: [PATCH 5/9] tests for sampling from nonuniform distributions --- test/test_basic_rng/rng_common.hh | 10 +++ test/test_basic_rng/test_sample_indices.cc | 73 +++++++++++++++++----- 2 files changed, 69 insertions(+), 14 deletions(-) diff --git a/test/test_basic_rng/rng_common.hh b/test/test_basic_rng/rng_common.hh index fc66a748..af5b1d32 100644 --- a/test/test_basic_rng/rng_common.hh +++ b/test/test_basic_rng/rng_common.hh @@ -127,6 +127,16 @@ std::tuple critical_value_rep(int n, double sig) { return {cv, override_sample, override_sig}; } +template +double critical_value_rep_mutator(TI &n, double &sig) { + int i = significance_rep(sig); + sig = significance_levels[i]; + int j = sample_size_rep(n); + n = (TI) sample_sizes[j]; + double cv = critical_values[i][j]; + return cv; +} + } // diff --git a/test/test_basic_rng/test_sample_indices.cc b/test/test_basic_rng/test_sample_indices.cc index fe6a80fc..aaea689a 100644 --- a/test/test_basic_rng/test_sample_indices.cc +++ b/test/test_basic_rng/test_sample_indices.cc @@ -62,30 +62,53 @@ class TestSampleIndices : public ::testing::Test return; } + static void index_set_kolmogorov_smirnov_tester( + std::vector &samples, std::vector &true_cdf, double critical_value + ) { + auto num_samples = (int) samples.size(); + auto N = (int64_t) true_cdf.size(); + std::vector sample_cdf(N, 0.0); + for (int64_t s : samples) + sample_cdf[s] += 1; + RandBLAS::util::weights_to_cdf(sample_cdf.data(), N); + + for (int i = 0; i < num_samples; ++i) { + auto diff = (double) std::abs(sample_cdf[i] - true_cdf[i]); + ASSERT_LT(diff, critical_value); + } + return; + } + static void test_iid_uniform_kolmogorov_smirnov(int64_t N, double significance, int64_t num_samples, uint32_t seed) { - randblas_require(N <= (int64_t) 1e6); + using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep_mutator; + auto critical_value = critical_value_rep_mutator(num_samples, significance); - using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep; - auto [critical_value, override_ns, override_sig] = critical_value_rep(num_samples, significance); - significance = (double) override_sig; - num_samples = (int64_t) override_ns; + std::vector true_cdf(N, 1.0); + RandBLAS::util::weights_to_cdf(true_cdf.data(), N); RNGState state(seed); std::vector samples(num_samples, -1); RandBLAS::util::sample_indices_iid_uniform(N, samples.data(), num_samples, state); - std::vector sample_cdf(N, 0.0); - for (int64_t s : samples) - sample_cdf[s] += 1; - RandBLAS::util::weights_to_cdf(sample_cdf.data(), N); - std::vector true_cdf(N, 1.0); + index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value); + return; + } + + static void test_iid_kolmogorov_smirnov(int64_t N, double significance, int64_t num_samples, uint32_t seed) { + using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep_mutator; + auto critical_value = critical_value_rep_mutator(num_samples, significance); + + // Make the true CDF + std::vector true_cdf{}; + for (int i = 0; i < N; ++i) + true_cdf.push_back(1.0/((float)i + 1.0)); RandBLAS::util::weights_to_cdf(true_cdf.data(), N); + RNGState state(seed); + std::vector samples(num_samples, -1); + RandBLAS::util::sample_indices_iid(true_cdf.data(), N, samples.data(), num_samples, state); - for (int i = 0; i < num_samples; ++i) { - float diff = std::abs(sample_cdf[i] - true_cdf[i]); - ASSERT_LT(diff, critical_value); - } + index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value); return; } @@ -135,6 +158,28 @@ TEST_F(TestSampleIndices, iid_uniform_ks_skeptical) { } +TEST_F(TestSampleIndices, iid_ks_generous) { + double s = 1e-6; + test_iid_kolmogorov_smirnov(100, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); +} + +TEST_F(TestSampleIndices, iid_ks_moderate) { + float s = 1e-4; + test_iid_kolmogorov_smirnov(100, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); +} + +TEST_F(TestSampleIndices, iid_ks_skeptical) { + float s = 1e-2; + test_iid_kolmogorov_smirnov(100, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); +} + + // class TestSampleIndices : public ::testing::Test // { From adab6c667ea7c13cb12bc05164a8e566ce0fdc03 Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Wed, 3 Jul 2024 21:16:09 -0400 Subject: [PATCH 6/9] API tweaks, plus docs --- RandBLAS/base.hh | 13 +++++- RandBLAS/util.hh | 49 +++++++++++++--------- test/test_basic_rng/test_sample_indices.cc | 8 ++-- 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/RandBLAS/base.hh b/RandBLAS/base.hh index 62aa456a..a2382e80 100644 --- a/RandBLAS/base.hh +++ b/RandBLAS/base.hh @@ -53,9 +53,18 @@ /// code common across the project namespace RandBLAS { +/** + * Stores stride information for a matrix represented as a buffer. + * The intended semantics for a buffer "A" and the conceptualized + * matrix "mat(A)" are + * + * mat(A)[i, j] == A[i * inter_row_stride + j * inter_col_stride]. + * + * for all (i, j) within the bounds of mat(A). + */ struct stride_64t { - int64_t inter_row_stride; - int64_t inter_col_stride; + int64_t inter_row_stride; // step down a column + int64_t inter_col_stride; // step along a row }; inline stride_64t layout_to_strides(blas::Layout layout, int64_t ldim) { diff --git a/RandBLAS/util.hh b/RandBLAS/util.hh index 81babb0b..4dadf795 100644 --- a/RandBLAS/util.hh +++ b/RandBLAS/util.hh @@ -244,39 +244,50 @@ void flip_layout(blas::Layout layout_in, int64_t m, int64_t n, std::vector &A template -void weights_to_cdf(T* p, int64_t len_p) { +void weights_to_cdf(int64_t n, T* w, T error_if_below = -std::numeric_limits::epsilon()) { T sum = 0.0; - for (int64_t i = 0; i < len_p; ++i) { - sum += p[i]; - p[i] = sum; + for (int64_t i = 0; i < n; ++i) { + T val = w[i]; + randblas_require(val >= error_if_below); + val = std::max(val, (T) 0.0); + sum += val; + w[i] = sum; } - blas::scal(len_p, ((T)1.0)/sum, p, 1); + randblas_require(sum >= ((T) std::sqrt(n)) * std::numeric_limits::epsilon()); + blas::scal(n, ((T)1.0) / sum, w, 1); + return; +} + +template +static inline TO uneg11_to_uneg01(TI in) { + return ((TO) in + (TO) 1.0)/ ((TO) 2.0); } /*** - * Assume cdf is a buffer specifying a cumulative probability distribution function. + * cdf represents a cumulative distribution function over {0, ..., n - 1}. + * * TF is a template parameter for a real floating point type. * - * This function produces "num_samples" from the distribution specified by "cdf" - * and stores them in "samples". + * We overwrite the "samples" buffer with k (independent) samples from the + * distribution specified by cdf. */ template RNGState sample_indices_iid( - TF* cdf, int64_t len_cdf, int64_t* samples , int64_t num_samples, RandBLAS::RNGState state + int64_t n, TF* cdf, int64_t k, int64_t* samples, RandBLAS::RNGState state ) { auto [ctr, key] = state; RNG gen; auto rv_array = r123ext::uneg11::generate(gen, ctr, key); int64_t len_c = (int64_t) state.len_c; int64_t rv_index = 0; - for (int64_t i = 0; i < num_samples; ++i) { + for (int64_t i = 0; i < k; ++i) { if ((i+1) % len_c == 1) { ctr.incr(1); rv_array = r123ext::uneg11::generate(gen, ctr, key); rv_index = 0; } - TF random_unif01 = ((TF) (rv_array[rv_index] + 1.0)) / ((TF) 2.0); - int64_t sample_index = std::lower_bound(cdf, cdf + len_cdf, random_unif01) - cdf; + auto random_unif01 = uneg11_to_uneg01(rv_array[rv_index]); + int64_t sample_index = std::lower_bound(cdf, cdf + n, random_unif01) - cdf; // ^ uses binary search to set sample_index to the smallest value for which // random_unif01 < cdf[sample_index]. samples[i] = sample_index; @@ -286,27 +297,27 @@ RNGState sample_indices_iid( } /*** - * This function produces "num_samples" from the uniform distribution over - * {0, ..., max_index_exclusive - 1} and stores them in "samples". + * Overwrite the "samples" buffer with k (independent) samples from the + * uniform distribution over {0, ..., n - 1}. */ template RNGState sample_indices_iid_uniform( - int64_t max_index_exclusive, int64_t* samples , int64_t num_samples, RandBLAS::RNGState state + int64_t n, int64_t* samples , int64_t k, RandBLAS::RNGState state ) { auto [ctr, key] = state; RNG gen; auto rv_array = r123ext::uneg11::generate(gen, ctr, key); int64_t len_c = (int64_t) state.len_c; int64_t rv_index = 0; - double dmie = (double) max_index_exclusive; - for (int64_t i = 0; i < num_samples; ++i) { + double dN = (double) n; + for (int64_t i = 0; i < k; ++i) { if ((i+1) % len_c == 1) { ctr.incr(1); rv_array = r123ext::uneg11::generate(gen, ctr, key); rv_index = 0; } - double random_unif01 = (double) (rv_array[rv_index] + 1.0) / 2.0; - int64_t sample_index = (int64_t) dmie * random_unif01; + auto random_unif01 = uneg11_to_uneg01(rv_array[rv_index]); + int64_t sample_index = (int64_t) dN * random_unif01; samples[i] = sample_index; rv_index += 1; } diff --git a/test/test_basic_rng/test_sample_indices.cc b/test/test_basic_rng/test_sample_indices.cc index aaea689a..08557a8a 100644 --- a/test/test_basic_rng/test_sample_indices.cc +++ b/test/test_basic_rng/test_sample_indices.cc @@ -70,7 +70,7 @@ class TestSampleIndices : public ::testing::Test std::vector sample_cdf(N, 0.0); for (int64_t s : samples) sample_cdf[s] += 1; - RandBLAS::util::weights_to_cdf(sample_cdf.data(), N); + RandBLAS::util::weights_to_cdf(N, sample_cdf.data()); for (int i = 0; i < num_samples; ++i) { auto diff = (double) std::abs(sample_cdf[i] - true_cdf[i]); @@ -84,7 +84,7 @@ class TestSampleIndices : public ::testing::Test auto critical_value = critical_value_rep_mutator(num_samples, significance); std::vector true_cdf(N, 1.0); - RandBLAS::util::weights_to_cdf(true_cdf.data(), N); + RandBLAS::util::weights_to_cdf(N, true_cdf.data()); RNGState state(seed); std::vector samples(num_samples, -1); @@ -102,11 +102,11 @@ class TestSampleIndices : public ::testing::Test std::vector true_cdf{}; for (int i = 0; i < N; ++i) true_cdf.push_back(1.0/((float)i + 1.0)); - RandBLAS::util::weights_to_cdf(true_cdf.data(), N); + RandBLAS::util::weights_to_cdf(N, true_cdf.data()); RNGState state(seed); std::vector samples(num_samples, -1); - RandBLAS::util::sample_indices_iid(true_cdf.data(), N, samples.data(), num_samples, state); + RandBLAS::util::sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state); index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value); return; From a191110299866ebb668dee106b1f45387f2a74d2 Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Thu, 4 Jul 2024 08:34:15 -0400 Subject: [PATCH 7/9] tests for degenerate distributions --- RandBLAS/util.hh | 2 - test/test_basic_rng/test_sample_indices.cc | 100 +++++++++++++-------- 2 files changed, 61 insertions(+), 41 deletions(-) diff --git a/RandBLAS/util.hh b/RandBLAS/util.hh index 4dadf795..177a1f9a 100644 --- a/RandBLAS/util.hh +++ b/RandBLAS/util.hh @@ -288,8 +288,6 @@ RNGState sample_indices_iid( } auto random_unif01 = uneg11_to_uneg01(rv_array[rv_index]); int64_t sample_index = std::lower_bound(cdf, cdf + n, random_unif01) - cdf; - // ^ uses binary search to set sample_index to the smallest value for which - // random_unif01 < cdf[sample_index]. samples[i] = sample_index; rv_index += 1; } diff --git a/test/test_basic_rng/test_sample_indices.cc b/test/test_basic_rng/test_sample_indices.cc index 08557a8a..5cf041f1 100644 --- a/test/test_basic_rng/test_sample_indices.cc +++ b/test/test_basic_rng/test_sample_indices.cc @@ -94,14 +94,14 @@ class TestSampleIndices : public ::testing::Test return; } - static void test_iid_kolmogorov_smirnov(int64_t N, double significance, int64_t num_samples, uint32_t seed) { + static void test_iid_kolmogorov_smirnov(int64_t N, int exponent, double significance, int64_t num_samples, uint32_t seed) { using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep_mutator; auto critical_value = critical_value_rep_mutator(num_samples, significance); // Make the true CDF std::vector true_cdf{}; for (int i = 0; i < N; ++i) - true_cdf.push_back(1.0/((float)i + 1.0)); + true_cdf.push_back(std::pow(1.0/((float)i + 1.0), exponent)); RandBLAS::util::weights_to_cdf(N, true_cdf.data()); RNGState state(seed); @@ -111,6 +111,42 @@ class TestSampleIndices : public ::testing::Test index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value); return; } + + static void test_iid_degenerate_distributions(uint32_t seed) { + int64_t N = 100; + int64_t num_samples = N*N; + std::vector samples(num_samples, -1); + RNGState state(seed); + + using RandBLAS::util::weights_to_cdf; + using RandBLAS::util::sample_indices_iid; + + // Test case 1: distribution is nonuniform, with mass only on even elements != 10. + std::vector true_cdf(N, 0.0); + for (int i = 0; i < N; i = i + 2) + true_cdf[i] = 1.0f / ((float) i + 1.0f); + true_cdf[10] = 0.0; + weights_to_cdf(N, true_cdf.data()); + sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state); + for (auto s : samples) { + ASSERT_FALSE(s == 10 || s % 2 == 1) << "s = " << s; + } + + // Test case 2: distribution is trivial (a delta function), + // and a negative weight needs to be clipped without error. + std::fill(true_cdf.begin(), true_cdf.end(), 0.0); + std::fill(samples.begin(), samples.end(), -1); + true_cdf[17] = 99.0f; + true_cdf[3] = -std::numeric_limits::epsilon()/10; + randblas_require(true_cdf[3] < 0); + weights_to_cdf(N, true_cdf.data()); + ASSERT_GE(true_cdf[17], 0.0f); + sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state); + for (auto s : samples) { + ASSERT_EQ(s, 17); + } + return; + } }; @@ -136,6 +172,11 @@ TEST_F(TestSampleIndices, smoke_big) { test_iid_uniform_smoke(huge_N, 1000, i); } +TEST_F(TestSampleIndices, support_of_degenerate_distributions) { + for (uint32_t i = 789; i < 799; ++i) + test_iid_degenerate_distributions(i); +} + TEST_F(TestSampleIndices, iid_uniform_ks_generous) { double s = 1e-6; test_iid_uniform_kolmogorov_smirnov(100, s, 100000, 0); @@ -160,49 +201,30 @@ TEST_F(TestSampleIndices, iid_uniform_ks_skeptical) { TEST_F(TestSampleIndices, iid_ks_generous) { double s = 1e-6; - test_iid_kolmogorov_smirnov(100, s, 100000, 0); - test_iid_kolmogorov_smirnov(10000, s, 1000, 0); - test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 1, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 3, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 3, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 3, s, 1000, 0); } TEST_F(TestSampleIndices, iid_ks_moderate) { float s = 1e-4; - test_iid_kolmogorov_smirnov(100, s, 100000, 0); - test_iid_kolmogorov_smirnov(10000, s, 1000, 0); - test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 1, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 3, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 3, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 3, s, 1000, 0); } TEST_F(TestSampleIndices, iid_ks_skeptical) { float s = 1e-2; - test_iid_kolmogorov_smirnov(100, s, 100000, 0); - test_iid_kolmogorov_smirnov(10000, s, 1000, 0); - test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 1, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 3, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 3, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 3, s, 1000, 0); } - - - -// class TestSampleIndices : public ::testing::Test -// { -// protected: - -// virtual void SetUp(){}; - -// virtual void TearDown(){}; - -// template -// static void test_basic( - -// ) { -// return; -// } - -// }; - - -// TEST_F(TestSampleIndices, smoke) -// { -// // do something -// } - - - From 8ebaca640e43a2014ac319abe721227043596fa1 Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Tue, 16 Jul 2024 16:26:13 -0400 Subject: [PATCH 8/9] remove unused (and kind of gross) utility function --- RandBLAS/util.hh | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/RandBLAS/util.hh b/RandBLAS/util.hh index 177a1f9a..c912b159 100644 --- a/RandBLAS/util.hh +++ b/RandBLAS/util.hh @@ -57,26 +57,6 @@ void safe_scal(int64_t n, T a, T* x, int64_t inc_x) { } } -template -void genmat( - int64_t n_rows, - int64_t n_cols, - T* mat, - uint64_t seed) -{ - typedef r123::Philox2x64 CBRNG; - CBRNG::key_type key = {{seed}}; - CBRNG::ctr_type ctr = {{0,0}}; - CBRNG g; - uint64_t prod = n_rows * n_cols; - for (uint64_t i = 0; i < prod; ++i) - { - ctr[0] = i; - CBRNG::ctr_type rand = g(ctr, key); - mat[i] = r123::uneg11(rand.v[0]); - } -} - template void print_colmaj(int64_t n_rows, int64_t n_cols, T *a, char label[]) { From e0f173e9a585ae20f7a1f92df5daafae2d52dc29 Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Tue, 16 Jul 2024 17:20:45 -0400 Subject: [PATCH 9/9] try suggestion from Robot Friend to resolve compiler error with gcc --- test/test_basic_rng/rng_common.hh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_basic_rng/rng_common.hh b/test/test_basic_rng/rng_common.hh index af5b1d32..f1103b4b 100644 --- a/test/test_basic_rng/rng_common.hh +++ b/test/test_basic_rng/rng_common.hh @@ -2,6 +2,7 @@ #include "RandBLAS.hh" #include +#include namespace RandBLAS_StatTests { @@ -31,7 +32,7 @@ const std::vector significance_levels { 0.05, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6 }; -const std::vector> critical_values {{ +const std::array, 6> critical_values {{{ // significance of 0.05 4.54266591e-01, 3.27333470e-01, 2.34240860e-01, 1.66933746e-01, 1.18658276e-01, 8.42018587e-02, 5.96844982e-02, 4.22742678e-02, @@ -79,7 +80,7 @@ const std::vector> critical_values {{ 1.48735960e-02, 1.05183852e-02, 7.43818754e-03, 5.25987011e-03, 3.71942641e-03, 2.63009921e-03, 1.85979452e-03, 1.31508999e-03, 9.29917360e-04, 6.57555013e-04 -}}; +}}}; /*** * Returns the index in significance_levels for the "least significant" value