From a191110299866ebb668dee106b1f45387f2a74d2 Mon Sep 17 00:00:00 2001 From: Riley Murray Date: Thu, 4 Jul 2024 08:34:15 -0400 Subject: [PATCH] tests for degenerate distributions --- RandBLAS/util.hh | 2 - test/test_basic_rng/test_sample_indices.cc | 100 +++++++++++++-------- 2 files changed, 61 insertions(+), 41 deletions(-) diff --git a/RandBLAS/util.hh b/RandBLAS/util.hh index 4dadf795..177a1f9a 100644 --- a/RandBLAS/util.hh +++ b/RandBLAS/util.hh @@ -288,8 +288,6 @@ RNGState sample_indices_iid( } auto random_unif01 = uneg11_to_uneg01(rv_array[rv_index]); int64_t sample_index = std::lower_bound(cdf, cdf + n, random_unif01) - cdf; - // ^ uses binary search to set sample_index to the smallest value for which - // random_unif01 < cdf[sample_index]. samples[i] = sample_index; rv_index += 1; } diff --git a/test/test_basic_rng/test_sample_indices.cc b/test/test_basic_rng/test_sample_indices.cc index 08557a8a..5cf041f1 100644 --- a/test/test_basic_rng/test_sample_indices.cc +++ b/test/test_basic_rng/test_sample_indices.cc @@ -94,14 +94,14 @@ class TestSampleIndices : public ::testing::Test return; } - static void test_iid_kolmogorov_smirnov(int64_t N, double significance, int64_t num_samples, uint32_t seed) { + static void test_iid_kolmogorov_smirnov(int64_t N, int exponent, double significance, int64_t num_samples, uint32_t seed) { using RandBLAS_StatTests::KolmogorovSmirnovConstants::critical_value_rep_mutator; auto critical_value = critical_value_rep_mutator(num_samples, significance); // Make the true CDF std::vector true_cdf{}; for (int i = 0; i < N; ++i) - true_cdf.push_back(1.0/((float)i + 1.0)); + true_cdf.push_back(std::pow(1.0/((float)i + 1.0), exponent)); RandBLAS::util::weights_to_cdf(N, true_cdf.data()); RNGState state(seed); @@ -111,6 +111,42 @@ class TestSampleIndices : public ::testing::Test index_set_kolmogorov_smirnov_tester(samples, true_cdf, critical_value); return; } + + static void test_iid_degenerate_distributions(uint32_t seed) { + int64_t N = 100; + int64_t num_samples = N*N; + std::vector samples(num_samples, -1); + RNGState state(seed); + + using RandBLAS::util::weights_to_cdf; + using RandBLAS::util::sample_indices_iid; + + // Test case 1: distribution is nonuniform, with mass only on even elements != 10. + std::vector true_cdf(N, 0.0); + for (int i = 0; i < N; i = i + 2) + true_cdf[i] = 1.0f / ((float) i + 1.0f); + true_cdf[10] = 0.0; + weights_to_cdf(N, true_cdf.data()); + sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state); + for (auto s : samples) { + ASSERT_FALSE(s == 10 || s % 2 == 1) << "s = " << s; + } + + // Test case 2: distribution is trivial (a delta function), + // and a negative weight needs to be clipped without error. + std::fill(true_cdf.begin(), true_cdf.end(), 0.0); + std::fill(samples.begin(), samples.end(), -1); + true_cdf[17] = 99.0f; + true_cdf[3] = -std::numeric_limits::epsilon()/10; + randblas_require(true_cdf[3] < 0); + weights_to_cdf(N, true_cdf.data()); + ASSERT_GE(true_cdf[17], 0.0f); + sample_indices_iid(N, true_cdf.data(), num_samples, samples.data(), state); + for (auto s : samples) { + ASSERT_EQ(s, 17); + } + return; + } }; @@ -136,6 +172,11 @@ TEST_F(TestSampleIndices, smoke_big) { test_iid_uniform_smoke(huge_N, 1000, i); } +TEST_F(TestSampleIndices, support_of_degenerate_distributions) { + for (uint32_t i = 789; i < 799; ++i) + test_iid_degenerate_distributions(i); +} + TEST_F(TestSampleIndices, iid_uniform_ks_generous) { double s = 1e-6; test_iid_uniform_kolmogorov_smirnov(100, s, 100000, 0); @@ -160,49 +201,30 @@ TEST_F(TestSampleIndices, iid_uniform_ks_skeptical) { TEST_F(TestSampleIndices, iid_ks_generous) { double s = 1e-6; - test_iid_kolmogorov_smirnov(100, s, 100000, 0); - test_iid_kolmogorov_smirnov(10000, s, 1000, 0); - test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 1, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 3, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 3, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 3, s, 1000, 0); } TEST_F(TestSampleIndices, iid_ks_moderate) { float s = 1e-4; - test_iid_kolmogorov_smirnov(100, s, 100000, 0); - test_iid_kolmogorov_smirnov(10000, s, 1000, 0); - test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 1, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 3, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 3, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 3, s, 1000, 0); } TEST_F(TestSampleIndices, iid_ks_skeptical) { float s = 1e-2; - test_iid_kolmogorov_smirnov(100, s, 100000, 0); - test_iid_kolmogorov_smirnov(10000, s, 1000, 0); - test_iid_kolmogorov_smirnov(1000000, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 1, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 1, s, 1000, 0); + test_iid_kolmogorov_smirnov(100, 3, s, 100000, 0); + test_iid_kolmogorov_smirnov(10000, 3, s, 1000, 0); + test_iid_kolmogorov_smirnov(1000000, 3, s, 1000, 0); } - - - -// class TestSampleIndices : public ::testing::Test -// { -// protected: - -// virtual void SetUp(){}; - -// virtual void TearDown(){}; - -// template -// static void test_basic( - -// ) { -// return; -// } - -// }; - - -// TEST_F(TestSampleIndices, smoke) -// { -// // do something -// } - - -