From 1489103b3df578c55604c7c08bb7a4dce60665ca Mon Sep 17 00:00:00 2001
From: Riley Murray <rileyjmurray@users.noreply.github.com>
Date: Sun, 29 Sep 2024 16:15:29 -0400
Subject: [PATCH] Release of RandBLAS 1.0.1: bugfixes for sampling from index
 sets (#121)

Changes since 1.0.0:
 * Resolved #118.
* Resolved minor bugs in values of RNGState objects returned from
sample_indices_iid and sample_indices_iid_uniform.
* Changed certain template defaults from ``r123::Philox4x32`` to
``DefaultRNG``.
* Replaced the RNGState constructor which previously accepted
``RNG::key_type::value_type`` to now accept``std::uint64_t``. This has
the benefit of allowing any initial key for RandBLAS' default RNG, as
opposed to only allowing keys of the form ``{k, 0}`` for some uint32
``k``.
 * Added ``sqrt_epsilon<T>()`` to the web documentation.
* Clarify that any functions which appear in the web documentation are
part of our public API, for purposes of semantic versioning.
---
 .gitignore                                  |   3 +
 RandBLAS/base.hh                            |  96 +++++++----
 RandBLAS/dense_skops.hh                     |  16 +-
 RandBLAS/sparse_skops.hh                    |  12 +-
 RandBLAS/util.hh                            |  24 ++-
 rtd/source/api_reference/utilities.rst      |   7 +-
 rtd/source/updates/index.rst                |  16 +-
 test/test_basic_rng/test_discrete.cc        | 166 +++++++++++++++-----
 test/test_basic_rng/test_r123.cc            |  31 ++++
 test/test_datastructures/test_sparseskop.cc |  36 +++++
 10 files changed, 310 insertions(+), 97 deletions(-)
diff --git a/.gitignore b/.gitignore
index 2ced0696..72b39562 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,9 @@
 
 ## Local 
 
+RandBLAS/config.h
+temp_vscode
+
 .vscode/*
 .idea/*.xml
 
diff --git a/RandBLAS/base.hh b/RandBLAS/base.hh
index 72db3769..2fb3fcd0 100644
--- a/RandBLAS/base.hh
+++ b/RandBLAS/base.hh
@@ -35,9 +35,7 @@
 #include "RandBLAS/random_gen.hh"
 
 #include <blas.hh>
-#include <tuple>
 #include <utility>
-#include <type_traits>
 #include <cstring>
 #include <cstdint>
 #include <iostream>
@@ -47,22 +45,22 @@
 #endif
 
 #include<iostream>
-#include<numeric>
 
 
 /// code common across the project
 namespace RandBLAS {
 
 typedef r123::Philox4x32 DefaultRNG;
+using std::uint64_t;
 
-/** A representation of the state of a counter-based random number generator
- * (CBRNG) defined in Random123. The representation consists of two arrays:
- * the counter and the key. The arrays' types are statically sized, small
- * (typically of length 2 or 4), and can be distinct from one another.
- * 
- * The template parameter RNG is a CBRNG type in defined in Random123. We've found
- * that Philox-based CBRNGs work best for our purposes, but we also support Threefry-based CBRNGS.
- */
+/// -------------------------------------------------------------------
+/// This is a stateful version of a
+/// *counter-based random number generator* (CBRNG) from Random123.
+/// It packages a CBRNG together with two arrays, called "counter" and "key,"
+/// which are interpreted as extended-width unsigned integers.
+/// 
+/// RNGStates are used in every RandBLAS function that involves random sampling.
+///
 template <typename RNG = DefaultRNG>
 struct RNGState {
 
@@ -78,32 +76,50 @@ struct RNGState {
     // ^ An array type defined in Random123.
     using ctr_uint = typename RNG::ctr_type::value_type;
     // ^ The unsigned integer type used in this RNGState's counter array.
-
-    /// -------------------------------------------------------------------
-    /// The unsigned integer type used in this RNGState's key array.
-    /// This is typically std::uint32_t, but it can be std::uint64_t.
     using key_uint = typename RNG::key_type::value_type;
+    // ^ The unsigned integer type used in this RNGState's key array.
 
-    const static int len_c = RNG::ctr_type::static_size;
-    static_assert(len_c >= 2);
-    const static int len_k = RNG::key_type::static_size;
-
+    /// ------------------------------------------------------------------
+    /// This is a Random123-defined statically-sized array of unsigned integers.
+    /// With RandBLAS' default, it contains four 32-bit unsigned ints
+    /// and is interpreted as one 128-bit unsigned int.
+    /// 
+    /// This member specifies a "location" in the random stream
+    /// defined by RNGState::generator and RNGState::key.
+    /// Random sampling functions in RandBLAS effectively consume elements
+    /// of the random stream starting from this location.
+    ///
+    /// **RandBLAS functions do not mutate input RNGStates.** Free-functions 
+    /// return new RNGStates with suitably updated counters. Constructors
+    /// for SketchingOperator objects store updated RNGStates in the
+    /// object's next_state member.
     typename RNG::ctr_type counter;
-    // ^ This RNGState's counter array.  Exclude from doxygen comments
-    //   since end-users shouldn't touch it.
 
     /// ------------------------------------------------------------------
-    /// This RNGState's key array. If you want to manually advance the key
-    /// by an integer increment of size "step," then you do so by calling 
-    /// this->key.incr(step).
+    /// This is a Random123-defined statically-sized array of unsigned integers.
+    /// With RandBLAS' default, it contains two 32-bit unsigned ints
+    /// and is interpreted as one 64-bit unsigned int.
+    ///
+    /// This member specifices a sequece of pseudo-random numbers
+    /// that RNGState::generator can produce. Any fixed sequence has
+    /// fairly large period (\math{2^{132},} with RandBLAS' default) and
+    /// is statistically independent from sequences induced by different keys.
+    ///
+    /// To increment the key by "step," call \math{\ttt{key.incr(step)}}.
     typename RNG::key_type key;
 
+    const static int len_c = RNG::ctr_type::static_size;
+    static_assert(len_c >= 2);
+    const static int len_k = RNG::key_type::static_size;
+
+    /// Initialize the counter and key to zero.
+    RNGState() : counter{}, key{} {}
 
-    /// Initialize the counter and key arrays to all zeros.
-    RNGState() : counter{{0}}, key(key_type{{}}) {}
+    /// Initialize the counter and key to zero, then increment the key by k.
+    RNGState(uint64_t k) : counter{}, key{} { key.incr(k); }
 
     // construct from a key
-    RNGState(key_type const &k) : counter{{0}}, key(k) {}
+    RNGState(key_type const &k) : counter{}, key(k) {}
 
     // Initialize counter and key arrays at the given values.
     RNGState(ctr_type const &c, key_type const &k) : counter(c), key(k) {}
@@ -111,13 +127,12 @@ struct RNGState {
     // move construct from an initial counter and key
     RNGState(ctr_type &&c, key_type &&k) : counter(std::move(c)), key(std::move(k)) {}
 
-    /// Initialize the counter array to all zeros. Initialize the key array to have first
-    /// element equal to k and all other elements equal to zero.
-    RNGState(key_uint k) : counter{{0}}, key{{k}} {}
+    // move constructor.
+    RNGState(RNGState<RNG> &&s) : RNGState(std::move(s.counter), std::move(s.key)) {};
 
     ~RNGState() {};
 
-    /// A copy constructor.
+    /// Copy constructor.
     RNGState(const RNGState<RNG> &s) : RNGState(s.counter, s.key) {};
 
     // A copy-assignment operator.
@@ -127,6 +142,25 @@ struct RNGState {
         return *this;
     };
 
+    //
+    // Comparators (for now, these are just for testing and debugging)
+    // 
+
+    bool operator==(const RNGState<RNG> &s) const {
+        // the compiler should only allow comparisons between RNGStates of the same type.
+        for (int i = 0; i < len_c; ++i) {
+            if (counter.v[i] != s.counter.v[i]) { return false; }
+        }
+        for (int i = 0; i < len_k; ++i) {
+            if (key.v[i] != s.key.v[i]) { return false; }
+        }
+        return true;
+    };
+
+    bool operator!=(const RNGState<RNG> &s) const {
+        return !(this == s);
+    };
+
 };
 
 template <typename RNG>
diff --git a/RandBLAS/dense_skops.hh b/RandBLAS/dense_skops.hh
index 295d9a9c..b25d46b6 100644
--- a/RandBLAS/dense_skops.hh
+++ b/RandBLAS/dense_skops.hh
@@ -204,17 +204,15 @@ inline blas::Layout natural_layout(Axis major_axis, int64_t n_rows, int64_t n_co
 namespace RandBLAS {
 
 // =============================================================================
-/// We support two distributions for dense sketching operators: those whose
-/// entries are iid Gaussians or iid uniform over a symmetric interval.
-/// 
+/// Names for mean-zero variance-one distributions on the reals.
 enum class ScalarDist : char {
     // ---------------------------------------------------------------------------
-    ///  Indicates the Gaussian distribution with mean 0 and variance 1.
+    ///  The Gaussian distribution with mean 0 and variance 1.
     Gaussian = 'G',
 
     // ---------------------------------------------------------------------------
-    ///  Indicates the uniform distribution over [-r, r] where r := sqrt(3)
-    ///  is the radius that provides for a variance of 1.
+    ///  The uniform distribution over \math{[-r, r],} where 
+    ///  \math{r := \sqrt{3}} provides for a variance of 1.
     Uniform = 'U'
 };
 
@@ -333,7 +331,7 @@ static_assert(SketchingDistribution<DenseDist>);
 ///  A sample from a distribution over matrices whose entries are iid
 ///  mean-zero variance-one random variables.
 ///  This type conforms to the SketchingOperator concept.
-template <typename T, typename RNG = r123::Philox4x32>
+template <typename T, typename RNG = DefaultRNG>
 struct DenseSkOp {
 
     // ---------------------------------------------------------------------------
@@ -537,7 +535,7 @@ static_assert(SketchingOperator<DenseSkOp<double>>);
 ///      - Used to define :math:`\mtxS` as a sample from :math:`\D.`
 ///
 /// @endverbatim
-template<typename T, typename RNG = r123::Philox4x32>
+template<typename T, typename RNG = DefaultRNG>
 RNGState<RNG> fill_dense_unpacked(blas::Layout layout, const DenseDist &D, int64_t n_rows, int64_t n_cols, int64_t ro_s, int64_t co_s, T* buff, const RNGState<RNG> &seed) {
     using RandBLAS::dense::fill_dense_submat_impl;
     randblas_require(D.n_rows >= n_rows + ro_s);
@@ -597,7 +595,7 @@ RNGState<RNG> fill_dense_unpacked(blas::Layout layout, const DenseDist &D, int64
 ///      A CBRNG state
 ///      - Used to define \math{\mat(\buff)} as a sample from \math{\D}.
 ///
-template <typename T, typename RNG = r123::Philox4x32>
+template <typename T, typename RNG = DefaultRNG>
 RNGState<RNG> fill_dense(const DenseDist &D, T *buff, const RNGState<RNG> &seed) {
     return fill_dense_unpacked(D.natural_layout, D, D.n_rows, D.n_cols, 0, 0, buff, seed);
 }
diff --git a/RandBLAS/sparse_skops.hh b/RandBLAS/sparse_skops.hh
index 6ab2473c..9664bd9f 100644
--- a/RandBLAS/sparse_skops.hh
+++ b/RandBLAS/sparse_skops.hh
@@ -101,6 +101,7 @@ static state_t repeated_fisher_yates(
             vec_work[ell] = swap;
         }
     }
+    ctr.incr(dim_minor * vec_nnz);
     return state_t {ctr, key};
 }
 
@@ -239,7 +240,7 @@ inline state_t repeated_fisher_yates(
     return sparse::repeated_fisher_yates(state, k, n, r, samples, (sint_t*) nullptr, (double*) nullptr);
 }
 
-template <typename RNG>
+template <typename RNG = DefaultRNG>
 RNGState<RNG> compute_next_state(SparseDist dist, RNGState<RNG> state) {
     int64_t num_mavec, incrs_per_mavec;
     if (dist.major_axis == Axis::Short) {
@@ -249,7 +250,7 @@ RNGState<RNG> compute_next_state(SparseDist dist, RNGState<RNG> state) {
         //   See repeated_fisher_yates.
     } else {
         num_mavec = std::min(dist.n_rows, dist.n_cols);
-        incrs_per_mavec = dist.vec_nnz * ((int64_t) state.len_c/2);
+        incrs_per_mavec = (int64_t) std::ceil((double) dist.vec_nnz / ((double) state.len_c/2));
         // ^ LASOs do try to be frugal with CBRNG increments.
         //   See sample_indices_iid_uniform.
     }
@@ -262,7 +263,7 @@ RNGState<RNG> compute_next_state(SparseDist dist, RNGState<RNG> state) {
 /// A sample from a distribution over structured sparse matrices with either
 /// independent rows or independent columns. This type conforms to the
 /// SketchingOperator concept.
-template <typename T, typename RNG = r123::Philox4x32, SignedInteger sint_t = int64_t>
+template <typename T, typename RNG = DefaultRNG, SignedInteger sint_t = int64_t>
 struct SparseSkOp {
 
     // ---------------------------------------------------------------------------
@@ -483,6 +484,10 @@ void laso_merge_long_axis_vector_coo_data(
 /// a COO sparse matrix representation of the SparseSkOp
 /// defined by \math{(\D,\ttt{seed_state)}.}
 ///
+/// Note: the "nosub" suffix in this function name reflects how it has no ability
+/// to sample submatrices of sparse sketching operators. A future release of
+/// RandBLAS will add a function called "fill_sparse_unpacked()" with capabilities
+/// that are completely analogous to fill_dense_unpacked().
 /// 
 template <typename T, typename sint_t, typename state_t>
 state_t fill_sparse_unpacked_nosub(
@@ -570,6 +575,7 @@ void fill_sparse(SparseSkOp &S) {
     randblas_require(S.cols != nullptr);
     randblas_require(S.vals != nullptr);
     fill_sparse_unpacked_nosub(S.dist, S.nnz, S.vals, S.rows, S.cols, S.seed_state);
+    // ^ We ignore the return value from that function call.
     return;
 }
 
diff --git a/RandBLAS/util.hh b/RandBLAS/util.hh
index a273c4a9..bd55736b 100644
--- a/RandBLAS/util.hh
+++ b/RandBLAS/util.hh
@@ -391,13 +391,19 @@ void transpose_square(T* A, int64_t n, int64_t lda) {
     return;
 }
 
+// =============================================================================
+/// \fn sqrt_epsilon()
+/// Alias for sqrt(numeric_limits<T>::epsilon()). For example,
+/// \math{\ttt{sqrt_epsilon<float>()} \approx 0.0003452}, and 
+/// \math{\ttt{sqrt_epsilon<double>()} \approx 1.4901\text{e-}8}.
+/// 
 template <typename T>
 T sqrt_epsilon() {
     return std::sqrt(std::numeric_limits<T>::epsilon());
 }
 
 // =============================================================================
-/// \fn weights_to_cdf(int64_t n, T* w, T error_if_below = -std::numeric_limits<T>::epsilon())
+/// \fn weights_to_cdf(int64_t n, T* w, T error_if_below = -sqrt_epsilon<T>())
 /// @verbatim embed:rst:leading-slashes
 /// Checks if all elements of length-:math:`n` array ":math:`w`" are at no smaller than 
 /// :math:`\ttt{error_if_below}.` If this check passes, then we (implicitly) initialize :math:`v := w`` 
@@ -426,7 +432,7 @@ void weights_to_cdf(int64_t n, T* w, T error_if_below = -sqrt_epsilon<T>()) {
 }
 
 template <typename TO, typename TI>
-static inline TO uneg11_to_uneg01(TI in) {
+static inline TO uneg11_to_u01(TI in) {
     return ((TO) in + (TO) 1.0)/ ((TO) 2.0);
 }
 
@@ -453,16 +459,17 @@ state_t sample_indices_iid(int64_t n, const T* cdf, int64_t k, sint_t* samples,
     int64_t len_c = (int64_t) state.len_c;
     int64_t rv_index = 0;
     for (int64_t i = 0; i < k; ++i) {
-        if ((i+1) % len_c == 1) {
+        auto random_unif01 = uneg11_to_u01<T>(rv_array[rv_index]);
+        sint_t sample_index = std::lower_bound(cdf, cdf + n, random_unif01) - cdf;
+        samples[i] = sample_index;
+        rv_index += 1;
+        if (rv_index == len_c) {
             ctr.incr(1);
             rv_array = r123ext::uneg11::generate(gen, ctr, key);
             rv_index = 0;
         }
-        auto random_unif01 = uneg11_to_uneg01<T>(rv_array[rv_index]);
-        sint_t sample_index = std::lower_bound(cdf, cdf + n, random_unif01) - cdf;
-        samples[i] = sample_index;
-        rv_index += 1;
     }
+    if (0 < rv_index) ctr.incr(1);
     return state_t(ctr, key);
 }
  
@@ -480,7 +487,7 @@ state_t sample_indices_iid_uniform(int64_t n, int64_t k, sint_t* samples, T* rad
     int64_t rv_index = 0;
     double dN = (double) n;
     for (int64_t i = 0; i < k; ++i) {
-        auto random_unif01 = uneg11_to_uneg01<double>(rv_array[rv_index]);
+        auto random_unif01 = uneg11_to_u01<double>(rv_array[rv_index]);
         sint_t sample_index = (sint_t) dN * random_unif01;
         samples[i] = sample_index;
         rv_index += 1;
@@ -494,6 +501,7 @@ state_t sample_indices_iid_uniform(int64_t n, int64_t k, sint_t* samples, T* rad
             rv_index = 0;
         }
     }
+    if (0 < rv_index) ctr.incr(1);
     return state_t(ctr, key);
 }
 
diff --git a/rtd/source/api_reference/utilities.rst b/rtd/source/api_reference/utilities.rst
index 8de9e71f..69586502 100644
--- a/rtd/source/api_reference/utilities.rst
+++ b/rtd/source/api_reference/utilities.rst
@@ -10,10 +10,15 @@
 Utilities
 ############################################################
 
+Numerical tolerances
+====================
+.. doxygenfunction:: RandBLAS::sqrt_epsilon
+  :project: RandBLAS
+
 Random sampling from index sets
 ===============================
 
-.. doxygenfunction:: RandBLAS::weights_to_cdf(int64_t n, T* w, T error_if_below = -std::numeric_limits<T>::epsilon())
+.. doxygenfunction:: RandBLAS::weights_to_cdf(int64_t n, T* w, T error_if_below = -sqrt_epsilon<T>())
   :project: RandBLAS
 
 .. doxygenfunction:: RandBLAS::sample_indices_iid(int64_t n, const T* cdf, int64_t k, sint_t* samples, const state_t &state)
diff --git a/rtd/source/updates/index.rst b/rtd/source/updates/index.rst
index d43723ca..d0f00cec 100644
--- a/rtd/source/updates/index.rst
+++ b/rtd/source/updates/index.rst
@@ -2,17 +2,20 @@
 Changes to RandBLAS
 ===================
 
-This page details changes made to RandBLAS over time, in reverse chronological order.
+This page reviews changes made to RandBLAS over time, in reverse chronological order.
 We have a tentative policy of providing bugfix support for any release of 
 RandBLAS upon request, no matter how old. With any luck, this project will grow enough
 that we'll have to change this policy.
 
-RandBLAS follows `Semantic Versioning <https://semver.org>`_.
+RandBLAS follows `Semantic Versioning <https://semver.org>`_. Any function documented
+on this website is part of the public API. There are many functions which are not
+part of our public API, but could be added to it if there is user interest. 
 
+RandBLAS' latest version is :ref:`1.0.1 <v10x_patches>`.
 
 RandBLAS 1.0
 ------------
-*Release date: September 12, 2024. Release manager: Riley Murray.*
+*Original release date: September 12, 2024. Release manager: Riley Murray.*
 
 Today marks RandBLAS' second-ever release, its first *stable* release,
 and its first release featuring the contributions of someone who showed
@@ -94,6 +97,13 @@ National Technology and Engineering Solutions of Sandia, LLC., a wholly owned su
 of Honeywell International, Inc., for the U.S. Department of Energy’s National Nuclear
 Security Administration under contract DE-NA-0003525.
 
+.. _v10x_patches:
+
+Patch releases in series 1.0.x
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Version 1.0.1 (September 29, 2024). This patches bugs in values of RNGStates
+returned from functions for sampling from index sets. See GitHub for more details.
 
 
 RandBLAS 0.2
diff --git a/test/test_basic_rng/test_discrete.cc b/test/test_basic_rng/test_discrete.cc
index 6b485dca..43536b66 100644
--- a/test/test_basic_rng/test_discrete.cc
+++ b/test/test_basic_rng/test_discrete.cc
@@ -34,7 +34,12 @@
 #include <RandBLAS/exceptions.hh>
 #include <RandBLAS/sparse_skops.hh>
 using RandBLAS::RNGState;
+using RandBLAS::weights_to_cdf;
+using RandBLAS::sample_indices_iid;
+using RandBLAS::sample_indices_iid_uniform;
+using RandBLAS::repeated_fisher_yates;
 #include "rng_common.hh"
+#include "../comparison.hh"
 
 #include <algorithm>
 #include <iostream>
@@ -50,6 +55,8 @@ using RandBLAS::RNGState;
 #include <cassert>
 #include <gtest/gtest.h>
 
+using std::vector;
+
 
 class TestSampleIndices : public ::testing::Test
 {
@@ -60,12 +67,13 @@ class TestSampleIndices : public ::testing::Test
     virtual void TearDown(){};
 
     //
-    // Mark: With Replacement Tests
+    // MARK: With Replacement
     //
+
     static void test_iid_uniform_smoke(int64_t N, int64_t k, uint32_t seed) { 
         RNGState state(seed);
-        std::vector<int64_t> samples(k, -1);
-        RandBLAS::sample_indices_iid_uniform(N, k, samples.data(), state);
+        vector<int64_t> samples(k, -1);
+        sample_indices_iid_uniform(N, k, samples.data(), state);
         int64_t* data = samples.data();
         for (int64_t i = 0; i < k; ++i) {
             ASSERT_LT(data[i], N);
@@ -75,13 +83,13 @@ class TestSampleIndices : public ::testing::Test
     }
 
     static void index_set_kolmogorov_smirnov_tester(
-        std::vector<int64_t> &samples, std::vector<float> &true_cdf, double critical_value
+        vector<int64_t> &samples, vector<float> &true_cdf, double critical_value
     ) {
         auto N = (int64_t) true_cdf.size();
-        std::vector<float> sample_cdf(N, 0.0);
+        vector<float> sample_cdf(N, 0.0);
         for (int64_t s : samples)
             sample_cdf[s] += 1;
-        RandBLAS::weights_to_cdf(N, sample_cdf.data());
+        weights_to_cdf(N, sample_cdf.data());
 
         for (int i = 0; i < N; ++i) {
             float F_empirical = sample_cdf[i];
@@ -96,12 +104,12 @@ class TestSampleIndices : public ::testing::Test
         using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep_mutator;
         auto critical_value = critical_value_rep_mutator(num_samples, significance);
 
-        std::vector<float> true_cdf(N, 1.0);
-        RandBLAS::weights_to_cdf(N, true_cdf.data());
+        vector<float> true_cdf(N, 1.0);
+        weights_to_cdf(N, true_cdf.data());
 
         RNGState state(seed);
-        std::vector<int64_t> samples(num_samples, -1);
-        RandBLAS::sample_indices_iid_uniform(N, num_samples, samples.data(), state);
+        vector<int64_t> samples(num_samples, -1);
+        sample_indices_iid_uniform(N, num_samples, samples.data(), state);
 
         index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value);
         return;
@@ -112,14 +120,14 @@ class TestSampleIndices : public ::testing::Test
         auto critical_value = critical_value_rep_mutator(num_samples, significance);
 
         // Make the true CDF 
-        std::vector<float> true_cdf{};
+        vector<float> true_cdf{};
         for (int i = 0; i < N; ++i)
             true_cdf.push_back(std::pow(1.0/((float)i + 1.0), exponent));
-        RandBLAS::weights_to_cdf(N, true_cdf.data());
+        weights_to_cdf(N, true_cdf.data());
 
         RNGState state(seed);
-        std::vector<int64_t> samples(num_samples, -1);
-        RandBLAS::sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state);
+        vector<int64_t> samples(num_samples, -1);
+        sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state);
 
         index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value);
         return;
@@ -128,14 +136,11 @@ class TestSampleIndices : public ::testing::Test
     static void test_iid_degenerate_distributions(uint32_t seed) {
         int64_t N = 100;
         int64_t num_samples = N*N;
-        std::vector<int64_t> samples(num_samples, -1);
+        vector<int64_t> samples(num_samples, -1);
         RNGState state(seed);
 
-        using RandBLAS::weights_to_cdf;
-        using RandBLAS::sample_indices_iid;
-
         // Test case 1: distribution is nonuniform, with mass only on even elements != 10.
-        std::vector<float> true_cdf(N, 0.0);
+        vector<float> true_cdf(N, 0.0);
         for (int i = 0; i < N; i = i + 2)
             true_cdf[i] = 1.0f / ((float) i + 1.0f);
         true_cdf[10] = 0.0;
@@ -161,31 +166,74 @@ class TestSampleIndices : public ::testing::Test
         return;
     }
 
+    static void test_updated_rngstates_iid_uniform() {
+        RNGState seed;
+        int offset = 3456;
+        seed.counter.incr(offset);
+        int n = 40;
+        int k = 17;
+        vector<int> unimportant(2*k);
+        auto s1 = sample_indices_iid_uniform(n, k, unimportant.data(), seed);
+        auto s2 = sample_indices_iid_uniform(n, k, unimportant.data(), s1);
+        // check that counter increments are the same for the two samples of k indices.
+        auto total_2call = s2.counter.v[0];
+        EXPECT_EQ(total_2call-offset, 2*(s1.counter.v[0]-offset));
+
+        // check that the counter increment for a single sample of size 2k is (a) no larger
+        // than the total increment for two samples of size k, and (b) is at most one less
+        // than the total increment for two samples of size k.
+        auto t = sample_indices_iid_uniform(n, 2*k, unimportant.data(), seed);
+        auto total_1call = t.counter.v[0];
+        EXPECT_LE( total_1call, total_2call    );
+        EXPECT_LE( total_2call, total_1call + 1);
+    }
+
+    static void test_updated_rngstates_iid() {
+        RNGState seed;
+        int offset = 8675309;
+        seed.counter.incr(offset);
+        int n = 29;
+        int k = 13;
+        vector<int> unimportant(2*k);
+        vector<float> cdf(n, 1.0);
+        weights_to_cdf(n, cdf.data());
+
+        auto s1 = sample_indices_iid(n, cdf.data(), k, unimportant.data(), seed);
+        auto s2 = sample_indices_iid(n, cdf.data(), k, unimportant.data(), s1);
+        // check that counter increments are the same for the two samples of k indices.
+        auto total_2call = s2.counter.v[0];
+        EXPECT_EQ(total_2call-offset, 2*(s1.counter.v[0]-offset));
+
+        // check that the counter increment for a single sample of size 2k is (a) no larger
+        // than the total increment for two samples of size k, and (b) is at most one less
+        // than the total increment for two samples of size k.
+        auto t = sample_indices_iid(n, cdf.data(), 2*k, unimportant.data(), seed);
+        auto total_1call = t.counter.v[0];
+        EXPECT_LE( total_1call, total_2call    );
+        EXPECT_LE( total_2call, total_1call + 1);
+    }
+
     //
-    // Mark: Without Replacement Tests
+    // MARK: Without Replacement
     //
 
-    static std::vector<float> fisher_yates_cdf(const std::vector<int64_t> &idxs_major, int64_t K, int64_t num_samples) {
-        using RandBLAS::weights_to_cdf;
-        std::vector<float> empirical_cdf;
+    static vector<float> fisher_yates_cdf(const vector<int64_t> &idxs_major, int64_t K, int64_t num_samples) {
+        vector<float> empirical_cdf;
 
         // If K is 0, then there's nothing to count over and we should just return 1
         if (K == 0) {
             empirical_cdf.push_back(1.0);
-        }
-        else {
+        } else {
             // Count how many values in idxs_major are less than K across the samples
-            std::vector<int64_t> counter(K + 1, 0);
+            vector<int64_t> counter(K + 1, 0);
             for (int64_t i = 0; i < K * num_samples; i += K) {
                 int count = 0;
                 for (int64_t j = 0; j < K; ++j) {
-                    if (idxs_major[i + j] < K) {
+                    if (idxs_major[i + j] < K)
                         count += 1;
-                    }
                 }
                 counter[count] += 1;
             }
-
             // Copy counter and normalize to get empirical_cdf
             empirical_cdf.resize(counter.size());
             std::copy(counter.begin(), counter.end(), empirical_cdf.begin());
@@ -196,38 +244,33 @@ class TestSampleIndices : public ::testing::Test
     }
 
     static void fisher_yates_kolmogorov_smirnov_tester(
-        const std::vector<int64_t> &idxs_major, std::vector<float> &true_cdf, double critical_value, int64_t N, int64_t K, int64_t num_samples
+        const vector<int64_t> &idxs_major, vector<float> &true_cdf, double critical_value, int64_t N, int64_t K, int64_t num_samples
     ) {
         using RandBLAS_StatTests::ks_check_critval;
         // Calculate the empirical cdf and check critval
-        std::vector<float> empirical_cdf = fisher_yates_cdf(idxs_major, K, num_samples);
+        vector<float> empirical_cdf = fisher_yates_cdf(idxs_major, K, num_samples);
         std::pair<int, double> result = ks_check_critval(true_cdf, empirical_cdf, critical_value);
-
-        std::cout << std::endl;
-        if (result.first != -1) {
-            std::cout << std::endl;
-            std::cout << "KS test failed at index " << result.first << " with difference " << result.second << " and critical value " << critical_value << std::endl;
-            std::cout << "Test parameters: " << "N=" << N << " " << "K=" << K << " " << "num_samples=" << num_samples << std::endl;
-        }
+        EXPECT_EQ(result.first, -1) 
+            << "\nKS test failed at index " << result.first 
+            << " with difference " << result.second << " and critical value " << critical_value
+            << "\nTest parameters: " << "N=" << N << " " << "K=" << K << " " << "num_samples=" << num_samples << std::endl;
     }
 
     static void single_test_fisher_yates_kolmogorov_smirnov(int64_t N, int64_t K, double significance, int64_t num_samples, uint32_t seed) {
-        using RandBLAS::sparse::repeated_fisher_yates;
         using RandBLAS_StatTests::hypergeometric_pmf_arr;
-        using RandBLAS::weights_to_cdf;
         using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep_mutator;
 
         auto critical_value = critical_value_rep_mutator(num_samples, significance);
 
         // Initialize arguments for repeated_fisher_yates
-        std::vector<int64_t> indices(K * num_samples);
+        vector<int64_t> indices(K * num_samples);
         RNGState state(seed);
 
         // Generate repeated Fisher-Yates in idxs_major
         state = repeated_fisher_yates(K, N, num_samples, indices.data(), state);
 
         // Generate the true hypergeometric cdf (get the pdf first)
-        std::vector<float> true_cdf = hypergeometric_pmf_arr<float>(N, K, K);
+        vector<float> true_cdf = hypergeometric_pmf_arr<float>(N, K, K);
         weights_to_cdf(true_cdf.size(), true_cdf.data());
 
         // Compute the critval and check against it
@@ -258,8 +301,47 @@ class TestSampleIndices : public ::testing::Test
         }
     }
 
+    static void test_updated_rngstates_fisher_yates() {
+        RNGState seed;
+        int offset = 306;
+        seed.counter.incr(offset);
+        int n = 29;
+        int k = 17;
+        int r1 = 1;
+        int r2 = 3;
+        int r_total  = r1 + r2;
+        vector<int> twocall(r_total*k);
+        vector<int> onecall(r_total*k);
+
+        auto s1 = repeated_fisher_yates(k, n, r1, twocall.data(), seed);
+        auto s2 = repeated_fisher_yates(k, n, r2, twocall.data() + r1*k, s1);
+        auto ctr_twocall = (int) s2.counter.v[0];
+        auto expect_incr = (int) std::ceil(((float)r_total/r1)*(s1.counter.v[0]-offset));
+        EXPECT_EQ(ctr_twocall - offset, expect_incr);
+
+        auto t = repeated_fisher_yates(k, n, r_total, onecall.data(), seed);
+        auto ctr_onecall = t.counter.v[0];
+        EXPECT_EQ( ctr_onecall, ctr_twocall );
+
+        test::comparison::buffs_approx_equal(
+            onecall.data(), twocall.data(), r_total*k, __PRETTY_FUNCTION__, __FILE__, __LINE__
+        );
+    }
+
 };
 
+TEST_F(TestSampleIndices, rngstate_updates_iid_uniform) {
+    test_updated_rngstates_iid_uniform();
+}
+
+TEST_F(TestSampleIndices, rngstate_updates_iid) {
+    test_updated_rngstates_iid();
+}
+
+TEST_F(TestSampleIndices, rngstate_updates_fisher_yates) {
+    test_updated_rngstates_fisher_yates();
+}
+
 
 TEST_F(TestSampleIndices, smoke_3_x_10) {
     for (uint32_t i = 0; i < 10; ++i)
diff --git a/test/test_basic_rng/test_r123.cc b/test/test_basic_rng/test_r123.cc
index b63a4ab8..8049a4de 100644
--- a/test/test_basic_rng/test_r123.cc
+++ b/test/test_basic_rng/test_r123.cc
@@ -672,6 +672,37 @@ void run_ut_uniform(){
 
 // MARK: my tests + Googletest
 
+class TestRNGState : public ::testing::Test {
+
+    protected:
+
+    void test_uint_key_constructors() {
+        using RNG = r123::Philox4x32;
+        int len_k = RNG::key_type::static_size;
+        ASSERT_EQ(len_k, 2);
+        // No-arugment constructor
+        RandBLAS::RNGState<RNG> s;
+        ASSERT_EQ(s.key[0], 0);
+        ASSERT_EQ(s.key[1], 0);
+        for (int i = 0; i < 4; ++i) {
+            ASSERT_EQ(s.counter[i], 0) << "Failed at index " << i;
+        }
+        // unsigned-int constructor
+        RandBLAS::RNGState<RNG> t(42);
+        ASSERT_EQ(t.key[0], 42);
+        ASSERT_EQ(t.key[1], 0);
+        for (int i = 0; i < 4; ++i) {
+            ASSERT_EQ(t.counter[i], 0) << "Failed at index " << i;
+        }
+        return;
+    }
+};
+
+TEST_F(TestRNGState, uint_key_constructors) {
+    test_uint_key_constructors();
+}
+
+
 class TestRandom123 : public ::testing::Test { 
 
     protected:
diff --git a/test/test_datastructures/test_sparseskop.cc b/test/test_datastructures/test_sparseskop.cc
index e329bb7b..38cc1250 100644
--- a/test/test_datastructures/test_sparseskop.cc
+++ b/test/test_datastructures/test_sparseskop.cc
@@ -34,12 +34,14 @@
 #include <gtest/gtest.h>
 #include <cmath>
 
+using std::vector;
 using RandBLAS::RNGState;
 using RandBLAS::SignedInteger;
 using RandBLAS::SparseDist;
 using RandBLAS::SparseSkOp;
 using RandBLAS::Axis;
 using RandBLAS::fill_sparse;
+using RandBLAS::fill_sparse_unpacked_nosub;
 
 
 class TestSparseSkOpConstruction : public ::testing::Test
@@ -143,6 +145,31 @@ class TestSparseSkOpConstruction : public ::testing::Test
         test::comparison::buffs_approx_equal(vals.data(), vals_copy.data(), sd.full_nnz, __PRETTY_FUNCTION__, __FILE__, __LINE__, (T) 0, (T) 0);
         return;
     }
+
+    void unpacked_nosub(const SparseDist &D) {
+        RNGState<RandBLAS::DefaultRNG> s(1);
+        SparseSkOp<float> S(D, s);
+        auto expect_next = S.next_state;
+        fill_sparse(S);
+        vector<int64_t> rows(D.full_nnz);
+        vector<int64_t> cols(D.full_nnz);
+        vector<float>   vals(D.full_nnz);
+        int64_t nnz = 0;
+        auto actual_next = fill_sparse_unpacked_nosub(
+            D, nnz, vals.data(), rows.data(), cols.data(), s
+        );
+        EXPECT_EQ(S.nnz, nnz);
+        EXPECT_TRUE(actual_next == expect_next);
+        test::comparison::buffs_approx_equal(
+            vals.data(), S.vals, nnz, __PRETTY_FUNCTION__, __FILE__, __LINE__
+        );
+        test::comparison::buffs_approx_equal(
+            rows.data(), S.rows, nnz,  __PRETTY_FUNCTION__, __FILE__, __LINE__
+        );
+        test::comparison::buffs_approx_equal(
+            cols.data(), S.cols, nnz,  __PRETTY_FUNCTION__, __FILE__, __LINE__
+        );
+    }
 };
 
 TEST_F(TestSparseSkOpConstruction, respect_ownership) {
@@ -155,6 +182,15 @@ TEST_F(TestSparseSkOpConstruction, respect_ownership) {
     respect_ownership<int>(7, 20);
 }
 
+TEST_F(TestSparseSkOpConstruction, fill_unpacked_nosub_saso) {
+    unpacked_nosub({10,20,7,Axis::Short});
+    unpacked_nosub({20,10,7,Axis::Short});
+}
+
+TEST_F(TestSparseSkOpConstruction, fill_unpacked_nosub_laso) {
+    unpacked_nosub({10,20,7,Axis::Long});
+    unpacked_nosub({20,10,7,Axis::Long});
+}
 
 ////////////////////////////////////////////////////////////////////////
 //