From 1489103b3df578c55604c7c08bb7a4dce60665ca Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Sun, 29 Sep 2024 16:15:29 -0400 Subject: [PATCH] Release of RandBLAS 1.0.1: bugfixes for sampling from index sets (#121) Changes since 1.0.0: * Resolved #118. * Resolved minor bugs in values of RNGState objects returned from sample_indices_iid and sample_indices_iid_uniform. * Changed certain template defaults from ``r123::Philox4x32`` to ``DefaultRNG``. * Replaced the RNGState constructor which previously accepted ``RNG::key_type::value_type`` to now accept``std::uint64_t``. This has the benefit of allowing any initial key for RandBLAS' default RNG, as opposed to only allowing keys of the form ``{k, 0}`` for some uint32 ``k``. * Added ``sqrt_epsilon()`` to the web documentation. * Clarify that any functions which appear in the web documentation are part of our public API, for purposes of semantic versioning. --- .gitignore | 3 + RandBLAS/base.hh | 96 +++++++---- RandBLAS/dense_skops.hh | 16 +- RandBLAS/sparse_skops.hh | 12 +- RandBLAS/util.hh | 24 ++- rtd/source/api_reference/utilities.rst | 7 +- rtd/source/updates/index.rst | 16 +- test/test_basic_rng/test_discrete.cc | 166 +++++++++++++++----- test/test_basic_rng/test_r123.cc | 31 ++++ test/test_datastructures/test_sparseskop.cc | 36 +++++ 10 files changed, 310 insertions(+), 97 deletions(-) diff --git a/.gitignore b/.gitignore index 2ced0696..72b39562 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,9 @@ ## Local +RandBLAS/config.h +temp_vscode + .vscode/* .idea/*.xml diff --git a/RandBLAS/base.hh b/RandBLAS/base.hh index 72db3769..2fb3fcd0 100644 --- a/RandBLAS/base.hh +++ b/RandBLAS/base.hh @@ -35,9 +35,7 @@ #include "RandBLAS/random_gen.hh" #include -#include #include -#include #include #include #include @@ -47,22 +45,22 @@ #endif #include -#include /// code common across the project namespace RandBLAS { typedef r123::Philox4x32 DefaultRNG; +using std::uint64_t; -/** A representation of the state of a counter-based random number generator - * (CBRNG) defined in Random123. The representation consists of two arrays: - * the counter and the key. The arrays' types are statically sized, small - * (typically of length 2 or 4), and can be distinct from one another. - * - * The template parameter RNG is a CBRNG type in defined in Random123. We've found - * that Philox-based CBRNGs work best for our purposes, but we also support Threefry-based CBRNGS. - */ +/// ------------------------------------------------------------------- +/// This is a stateful version of a +/// *counter-based random number generator* (CBRNG) from Random123. +/// It packages a CBRNG together with two arrays, called "counter" and "key," +/// which are interpreted as extended-width unsigned integers. +/// +/// RNGStates are used in every RandBLAS function that involves random sampling. +/// template struct RNGState { @@ -78,32 +76,50 @@ struct RNGState { // ^ An array type defined in Random123. using ctr_uint = typename RNG::ctr_type::value_type; // ^ The unsigned integer type used in this RNGState's counter array. - - /// ------------------------------------------------------------------- - /// The unsigned integer type used in this RNGState's key array. - /// This is typically std::uint32_t, but it can be std::uint64_t. using key_uint = typename RNG::key_type::value_type; + // ^ The unsigned integer type used in this RNGState's key array. - const static int len_c = RNG::ctr_type::static_size; - static_assert(len_c >= 2); - const static int len_k = RNG::key_type::static_size; - + /// ------------------------------------------------------------------ + /// This is a Random123-defined statically-sized array of unsigned integers. + /// With RandBLAS' default, it contains four 32-bit unsigned ints + /// and is interpreted as one 128-bit unsigned int. + /// + /// This member specifies a "location" in the random stream + /// defined by RNGState::generator and RNGState::key. + /// Random sampling functions in RandBLAS effectively consume elements + /// of the random stream starting from this location. + /// + /// **RandBLAS functions do not mutate input RNGStates.** Free-functions + /// return new RNGStates with suitably updated counters. Constructors + /// for SketchingOperator objects store updated RNGStates in the + /// object's next_state member. typename RNG::ctr_type counter; - // ^ This RNGState's counter array. Exclude from doxygen comments - // since end-users shouldn't touch it. /// ------------------------------------------------------------------ - /// This RNGState's key array. If you want to manually advance the key - /// by an integer increment of size "step," then you do so by calling - /// this->key.incr(step). + /// This is a Random123-defined statically-sized array of unsigned integers. + /// With RandBLAS' default, it contains two 32-bit unsigned ints + /// and is interpreted as one 64-bit unsigned int. + /// + /// This member specifices a sequece of pseudo-random numbers + /// that RNGState::generator can produce. Any fixed sequence has + /// fairly large period (\math{2^{132},} with RandBLAS' default) and + /// is statistically independent from sequences induced by different keys. + /// + /// To increment the key by "step," call \math{\ttt{key.incr(step)}}. typename RNG::key_type key; + const static int len_c = RNG::ctr_type::static_size; + static_assert(len_c >= 2); + const static int len_k = RNG::key_type::static_size; + + /// Initialize the counter and key to zero. + RNGState() : counter{}, key{} {} - /// Initialize the counter and key arrays to all zeros. - RNGState() : counter{{0}}, key(key_type{{}}) {} + /// Initialize the counter and key to zero, then increment the key by k. + RNGState(uint64_t k) : counter{}, key{} { key.incr(k); } // construct from a key - RNGState(key_type const &k) : counter{{0}}, key(k) {} + RNGState(key_type const &k) : counter{}, key(k) {} // Initialize counter and key arrays at the given values. RNGState(ctr_type const &c, key_type const &k) : counter(c), key(k) {} @@ -111,13 +127,12 @@ struct RNGState { // move construct from an initial counter and key RNGState(ctr_type &&c, key_type &&k) : counter(std::move(c)), key(std::move(k)) {} - /// Initialize the counter array to all zeros. Initialize the key array to have first - /// element equal to k and all other elements equal to zero. - RNGState(key_uint k) : counter{{0}}, key{{k}} {} + // move constructor. + RNGState(RNGState &&s) : RNGState(std::move(s.counter), std::move(s.key)) {}; ~RNGState() {}; - /// A copy constructor. + /// Copy constructor. RNGState(const RNGState &s) : RNGState(s.counter, s.key) {}; // A copy-assignment operator. @@ -127,6 +142,25 @@ struct RNGState { return *this; }; + // + // Comparators (for now, these are just for testing and debugging) + // + + bool operator==(const RNGState &s) const { + // the compiler should only allow comparisons between RNGStates of the same type. + for (int i = 0; i < len_c; ++i) { + if (counter.v[i] != s.counter.v[i]) { return false; } + } + for (int i = 0; i < len_k; ++i) { + if (key.v[i] != s.key.v[i]) { return false; } + } + return true; + }; + + bool operator!=(const RNGState &s) const { + return !(this == s); + }; + }; template diff --git a/RandBLAS/dense_skops.hh b/RandBLAS/dense_skops.hh index 295d9a9c..b25d46b6 100644 --- a/RandBLAS/dense_skops.hh +++ b/RandBLAS/dense_skops.hh @@ -204,17 +204,15 @@ inline blas::Layout natural_layout(Axis major_axis, int64_t n_rows, int64_t n_co namespace RandBLAS { // ============================================================================= -/// We support two distributions for dense sketching operators: those whose -/// entries are iid Gaussians or iid uniform over a symmetric interval. -/// +/// Names for mean-zero variance-one distributions on the reals. enum class ScalarDist : char { // --------------------------------------------------------------------------- - /// Indicates the Gaussian distribution with mean 0 and variance 1. + /// The Gaussian distribution with mean 0 and variance 1. Gaussian = 'G', // --------------------------------------------------------------------------- - /// Indicates the uniform distribution over [-r, r] where r := sqrt(3) - /// is the radius that provides for a variance of 1. + /// The uniform distribution over \math{[-r, r],} where + /// \math{r := \sqrt{3}} provides for a variance of 1. Uniform = 'U' }; @@ -333,7 +331,7 @@ static_assert(SketchingDistribution); /// A sample from a distribution over matrices whose entries are iid /// mean-zero variance-one random variables. /// This type conforms to the SketchingOperator concept. -template +template struct DenseSkOp { // --------------------------------------------------------------------------- @@ -537,7 +535,7 @@ static_assert(SketchingOperator>); /// - Used to define :math:`\mtxS` as a sample from :math:`\D.` /// /// @endverbatim -template +template RNGState fill_dense_unpacked(blas::Layout layout, const DenseDist &D, int64_t n_rows, int64_t n_cols, int64_t ro_s, int64_t co_s, T* buff, const RNGState &seed) { using RandBLAS::dense::fill_dense_submat_impl; randblas_require(D.n_rows >= n_rows + ro_s); @@ -597,7 +595,7 @@ RNGState fill_dense_unpacked(blas::Layout layout, const DenseDist &D, int64 /// A CBRNG state /// - Used to define \math{\mat(\buff)} as a sample from \math{\D}. /// -template +template RNGState fill_dense(const DenseDist &D, T *buff, const RNGState &seed) { return fill_dense_unpacked(D.natural_layout, D, D.n_rows, D.n_cols, 0, 0, buff, seed); } diff --git a/RandBLAS/sparse_skops.hh b/RandBLAS/sparse_skops.hh index 6ab2473c..9664bd9f 100644 --- a/RandBLAS/sparse_skops.hh +++ b/RandBLAS/sparse_skops.hh @@ -101,6 +101,7 @@ static state_t repeated_fisher_yates( vec_work[ell] = swap; } } + ctr.incr(dim_minor * vec_nnz); return state_t {ctr, key}; } @@ -239,7 +240,7 @@ inline state_t repeated_fisher_yates( return sparse::repeated_fisher_yates(state, k, n, r, samples, (sint_t*) nullptr, (double*) nullptr); } -template +template RNGState compute_next_state(SparseDist dist, RNGState state) { int64_t num_mavec, incrs_per_mavec; if (dist.major_axis == Axis::Short) { @@ -249,7 +250,7 @@ RNGState compute_next_state(SparseDist dist, RNGState state) { // See repeated_fisher_yates. } else { num_mavec = std::min(dist.n_rows, dist.n_cols); - incrs_per_mavec = dist.vec_nnz * ((int64_t) state.len_c/2); + incrs_per_mavec = (int64_t) std::ceil((double) dist.vec_nnz / ((double) state.len_c/2)); // ^ LASOs do try to be frugal with CBRNG increments. // See sample_indices_iid_uniform. } @@ -262,7 +263,7 @@ RNGState compute_next_state(SparseDist dist, RNGState state) { /// A sample from a distribution over structured sparse matrices with either /// independent rows or independent columns. This type conforms to the /// SketchingOperator concept. -template +template struct SparseSkOp { // --------------------------------------------------------------------------- @@ -483,6 +484,10 @@ void laso_merge_long_axis_vector_coo_data( /// a COO sparse matrix representation of the SparseSkOp /// defined by \math{(\D,\ttt{seed_state)}.} /// +/// Note: the "nosub" suffix in this function name reflects how it has no ability +/// to sample submatrices of sparse sketching operators. A future release of +/// RandBLAS will add a function called "fill_sparse_unpacked()" with capabilities +/// that are completely analogous to fill_dense_unpacked(). /// template state_t fill_sparse_unpacked_nosub( @@ -570,6 +575,7 @@ void fill_sparse(SparseSkOp &S) { randblas_require(S.cols != nullptr); randblas_require(S.vals != nullptr); fill_sparse_unpacked_nosub(S.dist, S.nnz, S.vals, S.rows, S.cols, S.seed_state); + // ^ We ignore the return value from that function call. return; } diff --git a/RandBLAS/util.hh b/RandBLAS/util.hh index a273c4a9..bd55736b 100644 --- a/RandBLAS/util.hh +++ b/RandBLAS/util.hh @@ -391,13 +391,19 @@ void transpose_square(T* A, int64_t n, int64_t lda) { return; } +// ============================================================================= +/// \fn sqrt_epsilon() +/// Alias for sqrt(numeric_limits::epsilon()). For example, +/// \math{\ttt{sqrt_epsilon()} \approx 0.0003452}, and +/// \math{\ttt{sqrt_epsilon()} \approx 1.4901\text{e-}8}. +/// template T sqrt_epsilon() { return std::sqrt(std::numeric_limits::epsilon()); } // ============================================================================= -/// \fn weights_to_cdf(int64_t n, T* w, T error_if_below = -std::numeric_limits::epsilon()) +/// \fn weights_to_cdf(int64_t n, T* w, T error_if_below = -sqrt_epsilon()) /// @verbatim embed:rst:leading-slashes /// Checks if all elements of length-:math:`n` array ":math:`w`" are at no smaller than /// :math:`\ttt{error_if_below}.` If this check passes, then we (implicitly) initialize :math:`v := w`` @@ -426,7 +432,7 @@ void weights_to_cdf(int64_t n, T* w, T error_if_below = -sqrt_epsilon()) { } template -static inline TO uneg11_to_uneg01(TI in) { +static inline TO uneg11_to_u01(TI in) { return ((TO) in + (TO) 1.0)/ ((TO) 2.0); } @@ -453,16 +459,17 @@ state_t sample_indices_iid(int64_t n, const T* cdf, int64_t k, sint_t* samples, int64_t len_c = (int64_t) state.len_c; int64_t rv_index = 0; for (int64_t i = 0; i < k; ++i) { - if ((i+1) % len_c == 1) { + auto random_unif01 = uneg11_to_u01(rv_array[rv_index]); + sint_t sample_index = std::lower_bound(cdf, cdf + n, random_unif01) - cdf; + samples[i] = sample_index; + rv_index += 1; + if (rv_index == len_c) { ctr.incr(1); rv_array = r123ext::uneg11::generate(gen, ctr, key); rv_index = 0; } - auto random_unif01 = uneg11_to_uneg01(rv_array[rv_index]); - sint_t sample_index = std::lower_bound(cdf, cdf + n, random_unif01) - cdf; - samples[i] = sample_index; - rv_index += 1; } + if (0 < rv_index) ctr.incr(1); return state_t(ctr, key); } @@ -480,7 +487,7 @@ state_t sample_indices_iid_uniform(int64_t n, int64_t k, sint_t* samples, T* rad int64_t rv_index = 0; double dN = (double) n; for (int64_t i = 0; i < k; ++i) { - auto random_unif01 = uneg11_to_uneg01(rv_array[rv_index]); + auto random_unif01 = uneg11_to_u01(rv_array[rv_index]); sint_t sample_index = (sint_t) dN * random_unif01; samples[i] = sample_index; rv_index += 1; @@ -494,6 +501,7 @@ state_t sample_indices_iid_uniform(int64_t n, int64_t k, sint_t* samples, T* rad rv_index = 0; } } + if (0 < rv_index) ctr.incr(1); return state_t(ctr, key); } diff --git a/rtd/source/api_reference/utilities.rst b/rtd/source/api_reference/utilities.rst index 8de9e71f..69586502 100644 --- a/rtd/source/api_reference/utilities.rst +++ b/rtd/source/api_reference/utilities.rst @@ -10,10 +10,15 @@ Utilities ############################################################ +Numerical tolerances +==================== +.. doxygenfunction:: RandBLAS::sqrt_epsilon + :project: RandBLAS + Random sampling from index sets =============================== -.. doxygenfunction:: RandBLAS::weights_to_cdf(int64_t n, T* w, T error_if_below = -std::numeric_limits::epsilon()) +.. doxygenfunction:: RandBLAS::weights_to_cdf(int64_t n, T* w, T error_if_below = -sqrt_epsilon()) :project: RandBLAS .. doxygenfunction:: RandBLAS::sample_indices_iid(int64_t n, const T* cdf, int64_t k, sint_t* samples, const state_t &state) diff --git a/rtd/source/updates/index.rst b/rtd/source/updates/index.rst index d43723ca..d0f00cec 100644 --- a/rtd/source/updates/index.rst +++ b/rtd/source/updates/index.rst @@ -2,17 +2,20 @@ Changes to RandBLAS =================== -This page details changes made to RandBLAS over time, in reverse chronological order. +This page reviews changes made to RandBLAS over time, in reverse chronological order. We have a tentative policy of providing bugfix support for any release of RandBLAS upon request, no matter how old. With any luck, this project will grow enough that we'll have to change this policy. -RandBLAS follows `Semantic Versioning `_. +RandBLAS follows `Semantic Versioning `_. Any function documented +on this website is part of the public API. There are many functions which are not +part of our public API, but could be added to it if there is user interest. +RandBLAS' latest version is :ref:`1.0.1 `. RandBLAS 1.0 ------------ -*Release date: September 12, 2024. Release manager: Riley Murray.* +*Original release date: September 12, 2024. Release manager: Riley Murray.* Today marks RandBLAS' second-ever release, its first *stable* release, and its first release featuring the contributions of someone who showed @@ -94,6 +97,13 @@ National Technology and Engineering Solutions of Sandia, LLC., a wholly owned su of Honeywell International, Inc., for the U.S. Department of Energy’s National Nuclear Security Administration under contract DE-NA-0003525. +.. _v10x_patches: + +Patch releases in series 1.0.x +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Version 1.0.1 (September 29, 2024). This patches bugs in values of RNGStates +returned from functions for sampling from index sets. See GitHub for more details. RandBLAS 0.2 diff --git a/test/test_basic_rng/test_discrete.cc b/test/test_basic_rng/test_discrete.cc index 6b485dca..43536b66 100644 --- a/test/test_basic_rng/test_discrete.cc +++ b/test/test_basic_rng/test_discrete.cc @@ -34,7 +34,12 @@ #include #include using RandBLAS::RNGState; +using RandBLAS::weights_to_cdf; +using RandBLAS::sample_indices_iid; +using RandBLAS::sample_indices_iid_uniform; +using RandBLAS::repeated_fisher_yates; #include "rng_common.hh" +#include "../comparison.hh" #include #include @@ -50,6 +55,8 @@ using RandBLAS::RNGState; #include #include +using std::vector; + class TestSampleIndices : public ::testing::Test { @@ -60,12 +67,13 @@ class TestSampleIndices : public ::testing::Test virtual void TearDown(){}; // - // Mark: With Replacement Tests + // MARK: With Replacement // + static void test_iid_uniform_smoke(int64_t N, int64_t k, uint32_t seed) { RNGState state(seed); - std::vector samples(k, -1); - RandBLAS::sample_indices_iid_uniform(N, k, samples.data(), state); + vector samples(k, -1); + sample_indices_iid_uniform(N, k, samples.data(), state); int64_t* data = samples.data(); for (int64_t i = 0; i < k; ++i) { ASSERT_LT(data[i], N); @@ -75,13 +83,13 @@ class TestSampleIndices : public ::testing::Test } static void index_set_kolmogorov_smirnov_tester( - std::vector &samples, std::vector &true_cdf, double critical_value + vector &samples, vector &true_cdf, double critical_value ) { auto N = (int64_t) true_cdf.size(); - std::vector sample_cdf(N, 0.0); + vector sample_cdf(N, 0.0); for (int64_t s : samples) sample_cdf[s] += 1; - RandBLAS::weights_to_cdf(N, sample_cdf.data()); + weights_to_cdf(N, sample_cdf.data()); for (int i = 0; i < N; ++i) { float F_empirical = sample_cdf[i]; @@ -96,12 +104,12 @@ class TestSampleIndices : public ::testing::Test using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep_mutator; auto critical_value = critical_value_rep_mutator(num_samples, significance); - std::vector true_cdf(N, 1.0); - RandBLAS::weights_to_cdf(N, true_cdf.data()); + vector true_cdf(N, 1.0); + weights_to_cdf(N, true_cdf.data()); RNGState state(seed); - std::vector samples(num_samples, -1); - RandBLAS::sample_indices_iid_uniform(N, num_samples, samples.data(), state); + vector samples(num_samples, -1); + sample_indices_iid_uniform(N, num_samples, samples.data(), state); index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value); return; @@ -112,14 +120,14 @@ class TestSampleIndices : public ::testing::Test auto critical_value = critical_value_rep_mutator(num_samples, significance); // Make the true CDF - std::vector true_cdf{}; + vector true_cdf{}; for (int i = 0; i < N; ++i) true_cdf.push_back(std::pow(1.0/((float)i + 1.0), exponent)); - RandBLAS::weights_to_cdf(N, true_cdf.data()); + weights_to_cdf(N, true_cdf.data()); RNGState state(seed); - std::vector samples(num_samples, -1); - RandBLAS::sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state); + vector samples(num_samples, -1); + sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state); index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value); return; @@ -128,14 +136,11 @@ class TestSampleIndices : public ::testing::Test static void test_iid_degenerate_distributions(uint32_t seed) { int64_t N = 100; int64_t num_samples = N*N; - std::vector samples(num_samples, -1); + vector samples(num_samples, -1); RNGState state(seed); - using RandBLAS::weights_to_cdf; - using RandBLAS::sample_indices_iid; - // Test case 1: distribution is nonuniform, with mass only on even elements != 10. - std::vector true_cdf(N, 0.0); + vector true_cdf(N, 0.0); for (int i = 0; i < N; i = i + 2) true_cdf[i] = 1.0f / ((float) i + 1.0f); true_cdf[10] = 0.0; @@ -161,31 +166,74 @@ class TestSampleIndices : public ::testing::Test return; } + static void test_updated_rngstates_iid_uniform() { + RNGState seed; + int offset = 3456; + seed.counter.incr(offset); + int n = 40; + int k = 17; + vector unimportant(2*k); + auto s1 = sample_indices_iid_uniform(n, k, unimportant.data(), seed); + auto s2 = sample_indices_iid_uniform(n, k, unimportant.data(), s1); + // check that counter increments are the same for the two samples of k indices. + auto total_2call = s2.counter.v[0]; + EXPECT_EQ(total_2call-offset, 2*(s1.counter.v[0]-offset)); + + // check that the counter increment for a single sample of size 2k is (a) no larger + // than the total increment for two samples of size k, and (b) is at most one less + // than the total increment for two samples of size k. + auto t = sample_indices_iid_uniform(n, 2*k, unimportant.data(), seed); + auto total_1call = t.counter.v[0]; + EXPECT_LE( total_1call, total_2call ); + EXPECT_LE( total_2call, total_1call + 1); + } + + static void test_updated_rngstates_iid() { + RNGState seed; + int offset = 8675309; + seed.counter.incr(offset); + int n = 29; + int k = 13; + vector unimportant(2*k); + vector cdf(n, 1.0); + weights_to_cdf(n, cdf.data()); + + auto s1 = sample_indices_iid(n, cdf.data(), k, unimportant.data(), seed); + auto s2 = sample_indices_iid(n, cdf.data(), k, unimportant.data(), s1); + // check that counter increments are the same for the two samples of k indices. + auto total_2call = s2.counter.v[0]; + EXPECT_EQ(total_2call-offset, 2*(s1.counter.v[0]-offset)); + + // check that the counter increment for a single sample of size 2k is (a) no larger + // than the total increment for two samples of size k, and (b) is at most one less + // than the total increment for two samples of size k. + auto t = sample_indices_iid(n, cdf.data(), 2*k, unimportant.data(), seed); + auto total_1call = t.counter.v[0]; + EXPECT_LE( total_1call, total_2call ); + EXPECT_LE( total_2call, total_1call + 1); + } + // - // Mark: Without Replacement Tests + // MARK: Without Replacement // - static std::vector fisher_yates_cdf(const std::vector &idxs_major, int64_t K, int64_t num_samples) { - using RandBLAS::weights_to_cdf; - std::vector empirical_cdf; + static vector fisher_yates_cdf(const vector &idxs_major, int64_t K, int64_t num_samples) { + vector empirical_cdf; // If K is 0, then there's nothing to count over and we should just return 1 if (K == 0) { empirical_cdf.push_back(1.0); - } - else { + } else { // Count how many values in idxs_major are less than K across the samples - std::vector counter(K + 1, 0); + vector counter(K + 1, 0); for (int64_t i = 0; i < K * num_samples; i += K) { int count = 0; for (int64_t j = 0; j < K; ++j) { - if (idxs_major[i + j] < K) { + if (idxs_major[i + j] < K) count += 1; - } } counter[count] += 1; } - // Copy counter and normalize to get empirical_cdf empirical_cdf.resize(counter.size()); std::copy(counter.begin(), counter.end(), empirical_cdf.begin()); @@ -196,38 +244,33 @@ class TestSampleIndices : public ::testing::Test } static void fisher_yates_kolmogorov_smirnov_tester( - const std::vector &idxs_major, std::vector &true_cdf, double critical_value, int64_t N, int64_t K, int64_t num_samples + const vector &idxs_major, vector &true_cdf, double critical_value, int64_t N, int64_t K, int64_t num_samples ) { using RandBLAS_StatTests::ks_check_critval; // Calculate the empirical cdf and check critval - std::vector empirical_cdf = fisher_yates_cdf(idxs_major, K, num_samples); + vector empirical_cdf = fisher_yates_cdf(idxs_major, K, num_samples); std::pair result = ks_check_critval(true_cdf, empirical_cdf, critical_value); - - std::cout << std::endl; - if (result.first != -1) { - std::cout << std::endl; - std::cout << "KS test failed at index " << result.first << " with difference " << result.second << " and critical value " << critical_value << std::endl; - std::cout << "Test parameters: " << "N=" << N << " " << "K=" << K << " " << "num_samples=" << num_samples << std::endl; - } + EXPECT_EQ(result.first, -1) + << "\nKS test failed at index " << result.first + << " with difference " << result.second << " and critical value " << critical_value + << "\nTest parameters: " << "N=" << N << " " << "K=" << K << " " << "num_samples=" << num_samples << std::endl; } static void single_test_fisher_yates_kolmogorov_smirnov(int64_t N, int64_t K, double significance, int64_t num_samples, uint32_t seed) { - using RandBLAS::sparse::repeated_fisher_yates; using RandBLAS_StatTests::hypergeometric_pmf_arr; - using RandBLAS::weights_to_cdf; using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep_mutator; auto critical_value = critical_value_rep_mutator(num_samples, significance); // Initialize arguments for repeated_fisher_yates - std::vector indices(K * num_samples); + vector indices(K * num_samples); RNGState state(seed); // Generate repeated Fisher-Yates in idxs_major state = repeated_fisher_yates(K, N, num_samples, indices.data(), state); // Generate the true hypergeometric cdf (get the pdf first) - std::vector true_cdf = hypergeometric_pmf_arr(N, K, K); + vector true_cdf = hypergeometric_pmf_arr(N, K, K); weights_to_cdf(true_cdf.size(), true_cdf.data()); // Compute the critval and check against it @@ -258,8 +301,47 @@ class TestSampleIndices : public ::testing::Test } } + static void test_updated_rngstates_fisher_yates() { + RNGState seed; + int offset = 306; + seed.counter.incr(offset); + int n = 29; + int k = 17; + int r1 = 1; + int r2 = 3; + int r_total = r1 + r2; + vector twocall(r_total*k); + vector onecall(r_total*k); + + auto s1 = repeated_fisher_yates(k, n, r1, twocall.data(), seed); + auto s2 = repeated_fisher_yates(k, n, r2, twocall.data() + r1*k, s1); + auto ctr_twocall = (int) s2.counter.v[0]; + auto expect_incr = (int) std::ceil(((float)r_total/r1)*(s1.counter.v[0]-offset)); + EXPECT_EQ(ctr_twocall - offset, expect_incr); + + auto t = repeated_fisher_yates(k, n, r_total, onecall.data(), seed); + auto ctr_onecall = t.counter.v[0]; + EXPECT_EQ( ctr_onecall, ctr_twocall ); + + test::comparison::buffs_approx_equal( + onecall.data(), twocall.data(), r_total*k, __PRETTY_FUNCTION__, __FILE__, __LINE__ + ); + } + }; +TEST_F(TestSampleIndices, rngstate_updates_iid_uniform) { + test_updated_rngstates_iid_uniform(); +} + +TEST_F(TestSampleIndices, rngstate_updates_iid) { + test_updated_rngstates_iid(); +} + +TEST_F(TestSampleIndices, rngstate_updates_fisher_yates) { + test_updated_rngstates_fisher_yates(); +} + TEST_F(TestSampleIndices, smoke_3_x_10) { for (uint32_t i = 0; i < 10; ++i) diff --git a/test/test_basic_rng/test_r123.cc b/test/test_basic_rng/test_r123.cc index b63a4ab8..8049a4de 100644 --- a/test/test_basic_rng/test_r123.cc +++ b/test/test_basic_rng/test_r123.cc @@ -672,6 +672,37 @@ void run_ut_uniform(){ // MARK: my tests + Googletest +class TestRNGState : public ::testing::Test { + + protected: + + void test_uint_key_constructors() { + using RNG = r123::Philox4x32; + int len_k = RNG::key_type::static_size; + ASSERT_EQ(len_k, 2); + // No-arugment constructor + RandBLAS::RNGState s; + ASSERT_EQ(s.key[0], 0); + ASSERT_EQ(s.key[1], 0); + for (int i = 0; i < 4; ++i) { + ASSERT_EQ(s.counter[i], 0) << "Failed at index " << i; + } + // unsigned-int constructor + RandBLAS::RNGState t(42); + ASSERT_EQ(t.key[0], 42); + ASSERT_EQ(t.key[1], 0); + for (int i = 0; i < 4; ++i) { + ASSERT_EQ(t.counter[i], 0) << "Failed at index " << i; + } + return; + } +}; + +TEST_F(TestRNGState, uint_key_constructors) { + test_uint_key_constructors(); +} + + class TestRandom123 : public ::testing::Test { protected: diff --git a/test/test_datastructures/test_sparseskop.cc b/test/test_datastructures/test_sparseskop.cc index e329bb7b..38cc1250 100644 --- a/test/test_datastructures/test_sparseskop.cc +++ b/test/test_datastructures/test_sparseskop.cc @@ -34,12 +34,14 @@ #include #include +using std::vector; using RandBLAS::RNGState; using RandBLAS::SignedInteger; using RandBLAS::SparseDist; using RandBLAS::SparseSkOp; using RandBLAS::Axis; using RandBLAS::fill_sparse; +using RandBLAS::fill_sparse_unpacked_nosub; class TestSparseSkOpConstruction : public ::testing::Test @@ -143,6 +145,31 @@ class TestSparseSkOpConstruction : public ::testing::Test test::comparison::buffs_approx_equal(vals.data(), vals_copy.data(), sd.full_nnz, __PRETTY_FUNCTION__, __FILE__, __LINE__, (T) 0, (T) 0); return; } + + void unpacked_nosub(const SparseDist &D) { + RNGState s(1); + SparseSkOp S(D, s); + auto expect_next = S.next_state; + fill_sparse(S); + vector rows(D.full_nnz); + vector cols(D.full_nnz); + vector vals(D.full_nnz); + int64_t nnz = 0; + auto actual_next = fill_sparse_unpacked_nosub( + D, nnz, vals.data(), rows.data(), cols.data(), s + ); + EXPECT_EQ(S.nnz, nnz); + EXPECT_TRUE(actual_next == expect_next); + test::comparison::buffs_approx_equal( + vals.data(), S.vals, nnz, __PRETTY_FUNCTION__, __FILE__, __LINE__ + ); + test::comparison::buffs_approx_equal( + rows.data(), S.rows, nnz, __PRETTY_FUNCTION__, __FILE__, __LINE__ + ); + test::comparison::buffs_approx_equal( + cols.data(), S.cols, nnz, __PRETTY_FUNCTION__, __FILE__, __LINE__ + ); + } }; TEST_F(TestSparseSkOpConstruction, respect_ownership) { @@ -155,6 +182,15 @@ TEST_F(TestSparseSkOpConstruction, respect_ownership) { respect_ownership(7, 20); } +TEST_F(TestSparseSkOpConstruction, fill_unpacked_nosub_saso) { + unpacked_nosub({10,20,7,Axis::Short}); + unpacked_nosub({20,10,7,Axis::Short}); +} + +TEST_F(TestSparseSkOpConstruction, fill_unpacked_nosub_laso) { + unpacked_nosub({10,20,7,Axis::Long}); + unpacked_nosub({20,10,7,Axis::Long}); +} //////////////////////////////////////////////////////////////////////// //