From 4015fbc8c3236cedcc26ad6d6bdd937841563b75 Mon Sep 17 00:00:00 2001 From: mborland Date: Wed, 8 Jul 2020 22:35:28 -0500 Subject: [PATCH 01/83] Initial Commit --- .../special_functions/prime_functions.hpp | 118 ++++++++++++++++++ .../prime_functions_performance.cpp | 42 +++++++ test/test_prime_functions.cpp | 113 +++++++++++++++++ 3 files changed, 273 insertions(+) create mode 100644 include/boost/math/special_functions/prime_functions.hpp create mode 100644 reporting/performance/prime_functions_performance.cpp create mode 100644 test/test_prime_functions.cpp diff --git a/include/boost/math/special_functions/prime_functions.hpp b/include/boost/math/special_functions/prime_functions.hpp new file mode 100644 index 0000000000..6c4d1de423 --- /dev/null +++ b/include/boost/math/special_functions/prime_functions.hpp @@ -0,0 +1,118 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_FUNCTIONS_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_FUNCTIONS_HPP + +#include +#include +#include +#include +#include +#include + +namespace boost { namespace math +{ + +// https://mathworld.wolfram.com/SieveofEratosthenes.html +// https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf +template +auto prime_sieve(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) +{ + static_assert(std::is_integral::value, "No primes for floating point types"); + std::vector least_divisors; + std::deque primes; + + try + { + least_divisors.reserve(upper_bound + 1); + for (size_t i{}; i < upper_bound + 1; ++i) + { + least_divisors.emplace_back(0); + } + } + + catch (const std::exception &e) + { + // If exception is thrown it is most likely std::bad_alloc + std::cerr << e.what() << '\n'; + throw; + } + + + for (Z i{2}; i <= upper_bound; ++i) + { + if (least_divisors[i] == 0) + { + least_divisors[i] = i; + primes.emplace_back(i); + } + + for (size_t j{}; j < least_divisors.size(); ++j) + { + if (j >= primes.size()) + { + break; + } + + if (primes[j] > least_divisors[i]) + { + break; + } + + if (i * primes[j] > upper_bound) + { + break; + } + + least_divisors[i * primes[j]] = primes[j]; + } + } + + auto it{primes.begin()}; + while (*it < lower_bound && it != primes.end()) + { + primes.pop_front(); + ++it; + } + + return std::move(primes.begin(), primes.end(), output); +} + +template +auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) +{ + if (upper_bound <= 104729) + { + Z i{2}; + unsigned counter {}; + std::deque primes; + while (i <= upper_bound) + { + if (i >= lower_bound) + { + primes.emplace_back(i); + } + ++counter; + i = static_cast(boost::math::prime(counter)); + } + + return std::move(primes.begin(), primes.end(), output); + } else + { + return prime_sieve(lower_bound, upper_bound, output); + } +} + +template +inline auto prime_range(Z upper_bound, OutputIterator output) -> decltype(output) +{ + return prime_range(static_cast(2), upper_bound, output); +} +}} + +#endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_FUNCTIONS_HPP diff --git a/reporting/performance/prime_functions_performance.cpp b/reporting/performance/prime_functions_performance.cpp new file mode 100644 index 0000000000..cf08027d6f --- /dev/null +++ b/reporting/performance/prime_functions_performance.cpp @@ -0,0 +1,42 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "../../include/boost/math/special_functions/prime_functions.hpp" + +#include + +template +void prime_sieve(benchmark::State& state) +{ + Z upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(boost::math::prime_sieve(static_cast(2), upper, std::back_inserter(primes))); + } +} + +template +void prime_sieve_partial_range(benchmark::State& state) +{ + Z upper = static_cast(state.range(0)); + Z lower = static_cast(state.range(0)) > 2 ? static_cast(state.range(0)) : 2; + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(boost::math::prime_sieve(lower, upper, std::back_inserter(primes))); + } +} + +BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_sieve_partial_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_sieve_partial_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_sieve_partial_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); + +BENCHMARK_MAIN(); diff --git a/test/test_prime_functions.cpp b/test/test_prime_functions.cpp new file mode 100644 index 0000000000..6aef4e9851 --- /dev/null +++ b/test/test_prime_functions.cpp @@ -0,0 +1,113 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "../include/boost/math/special_functions/prime_functions.hpp" + +#include +#include +#include +#include + +template +void test_prime_sieve() +{ + std::vector primes; + Z ref {168}; // Calculated with wolfram-alpha + + // Does the function work with a vector + boost::math::prime_sieve(2, 1000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), ref); + + // Tests for correctness + // 100 + primes.clear(); + boost::math::prime_sieve(2, 100, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 25); + + // 10'000 + primes.clear(); + boost::math::prime_sieve(2, 10000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 1229); + + // 100'000 + primes.clear(); + boost::math::prime_sieve(2, 100000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 9592); + + // 1'000'000 + primes.clear(); + boost::math::prime_sieve(2, 1000000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 78498); + + // Does the function work with a list? + std::list l_primes; + boost::math::prime_sieve(2, 1000, std::back_inserter(l_primes)); + BOOST_TEST_EQ(l_primes.size(), ref); + + // Does the function work with a deque? + std::deque d_primes; + boost::math::prime_sieve(2, 1000, std::back_inserter(d_primes)); + BOOST_TEST_EQ(d_primes.size(), ref); +} + +template +void test_prime_range() +{ + std::vector primes; + Z ref {168}; // Calculated with wolfram-alpha + + // Does the upper and lower bound call work + boost::math::prime_range(2, 1000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), ref); + + // Does the upper bound call work + primes.clear(); + boost::math::prime_range(1000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), ref); + + // Does it work with a deque? + std::deque d_primes; + boost::math::prime_range(1000, std::back_inserter(d_primes)); + BOOST_TEST_EQ(d_primes.size(), ref); + + // Does it work with a list? + std::list l_primes; + boost::math::prime_range(1000, std::front_inserter(l_primes)); + BOOST_TEST_EQ(l_primes.size(), ref); + + // Does the lower bound change the results? + ref = 143; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(100, 1000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), ref); + + // Does it work with 0 difference? + primes.clear(); + boost::math::prime_range(2, 2, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 1); + + // Will it call the sieve for large input + ref = 78498; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(1000000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), ref); +} + +int main() +{ + test_prime_sieve(); + test_prime_sieve(); + test_prime_sieve(); + test_prime_sieve(); + + test_prime_range(); + test_prime_range(); + test_prime_range(); + test_prime_range(); + + boost::report_errors(); +} From 3ee737beb0944e0609a8168301046bd1e8f66eda Mon Sep 17 00:00:00 2001 From: mborland Date: Thu, 9 Jul 2020 22:13:01 -0500 Subject: [PATCH 02/83] Changed init of least_divisors --- .../special_functions/prime_functions.hpp | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/include/boost/math/special_functions/prime_functions.hpp b/include/boost/math/special_functions/prime_functions.hpp index 6c4d1de423..020b5fccbc 100644 --- a/include/boost/math/special_functions/prime_functions.hpp +++ b/include/boost/math/special_functions/prime_functions.hpp @@ -24,26 +24,9 @@ template auto prime_sieve(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { static_assert(std::is_integral::value, "No primes for floating point types"); - std::vector least_divisors; + std::vector least_divisors(upper_bound + 1, 0); std::deque primes; - try - { - least_divisors.reserve(upper_bound + 1); - for (size_t i{}; i < upper_bound + 1; ++i) - { - least_divisors.emplace_back(0); - } - } - - catch (const std::exception &e) - { - // If exception is thrown it is most likely std::bad_alloc - std::cerr << e.what() << '\n'; - throw; - } - - for (Z i{2}; i <= upper_bound; ++i) { if (least_divisors[i] == 0) From 9512bb6c5e4a008f2930fe21cc3aa627bb60c891 Mon Sep 17 00:00:00 2001 From: mborland Date: Thu, 9 Jul 2020 22:52:12 -0500 Subject: [PATCH 03/83] Cleanup --- .../boost/math/special_functions/prime_functions.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/boost/math/special_functions/prime_functions.hpp b/include/boost/math/special_functions/prime_functions.hpp index 020b5fccbc..e74298c743 100644 --- a/include/boost/math/special_functions/prime_functions.hpp +++ b/include/boost/math/special_functions/prime_functions.hpp @@ -13,7 +13,6 @@ #include #include #include -#include namespace boost { namespace math { @@ -42,17 +41,20 @@ auto prime_sieve(Z lower_bound, Z upper_bound, OutputIterator output) -> decltyp break; } - if (primes[j] > least_divisors[i]) + else if (primes[j] > least_divisors[i]) { break; } - if (i * primes[j] > upper_bound) + else if (i * primes[j] > upper_bound) { break; } - least_divisors[i * primes[j]] = primes[j]; + else + { + least_divisors[i * primes[j]] = primes[j]; + } } } From d762398dd8278094b9226c65ae11dc21485c7452 Mon Sep 17 00:00:00 2001 From: mborland Date: Thu, 9 Jul 2020 22:56:29 -0500 Subject: [PATCH 04/83] Cleanup --- include/boost/math/special_functions/prime_functions.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/boost/math/special_functions/prime_functions.hpp b/include/boost/math/special_functions/prime_functions.hpp index e74298c743..59271eaf34 100644 --- a/include/boost/math/special_functions/prime_functions.hpp +++ b/include/boost/math/special_functions/prime_functions.hpp @@ -82,12 +82,15 @@ auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltyp { primes.emplace_back(i); } + ++counter; i = static_cast(boost::math::prime(counter)); } return std::move(primes.begin(), primes.end(), output); - } else + } + + else { return prime_sieve(lower_bound, upper_bound, output); } From 5375a1d692716ce34d3d5dc892c47ac99b5f9e7c Mon Sep 17 00:00:00 2001 From: mborland Date: Fri, 10 Jul 2020 13:59:13 -0500 Subject: [PATCH 05/83] Added additional tests, benchmarks, and overflow checks --- .../{prime_functions.hpp => prime_sieve.hpp} | 2 ++ ...rmance.cpp => prime_sieve_performance.cpp} | 28 +++++++++++++++++-- ...ime_functions.cpp => test_prime_sieve.cpp} | 18 ++++++++++-- 3 files changed, 44 insertions(+), 4 deletions(-) rename include/boost/math/special_functions/{prime_functions.hpp => prime_sieve.hpp} (96%) rename reporting/performance/{prime_functions_performance.cpp => prime_sieve_performance.cpp} (52%) rename test/{test_prime_functions.cpp => test_prime_sieve.cpp} (87%) diff --git a/include/boost/math/special_functions/prime_functions.hpp b/include/boost/math/special_functions/prime_sieve.hpp similarity index 96% rename from include/boost/math/special_functions/prime_functions.hpp rename to include/boost/math/special_functions/prime_sieve.hpp index 59271eaf34..709274e1fe 100644 --- a/include/boost/math/special_functions/prime_functions.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace boost { namespace math { @@ -23,6 +24,7 @@ template auto prime_sieve(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { static_assert(std::is_integral::value, "No primes for floating point types"); + BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); std::vector least_divisors(upper_bound + 1, 0); std::deque primes; diff --git a/reporting/performance/prime_functions_performance.cpp b/reporting/performance/prime_sieve_performance.cpp similarity index 52% rename from reporting/performance/prime_functions_performance.cpp rename to reporting/performance/prime_sieve_performance.cpp index cf08027d6f..2b900b1091 100644 --- a/reporting/performance/prime_functions_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -5,8 +5,7 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#include "../../include/boost/math/special_functions/prime_functions.hpp" - +#include #include template @@ -18,6 +17,19 @@ void prime_sieve(benchmark::State& state) std::vector primes; benchmark::DoNotOptimize(boost::math::prime_sieve(static_cast(2), upper, std::back_inserter(primes))); } + state.SetComplexityN(state.range(0)); +} + +template +void prime_range(benchmark::State& state) +{ + Z upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(boost::math::prime_range(static_cast(2), upper, std::back_inserter(primes))); + } + state.SetComplexityN(state.range(0)); } template @@ -30,6 +42,7 @@ void prime_sieve_partial_range(benchmark::State& state) std::vector primes; benchmark::DoNotOptimize(boost::math::prime_sieve(lower, upper, std::back_inserter(primes))); } + state.SetComplexityN(state.range(0)); } BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); @@ -38,5 +51,16 @@ BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 < BENCHMARK_TEMPLATE(prime_sieve_partial_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); + +// Direct comparison of lookup vs sieve using only range of lookup +BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); +BENCHMARK_TEMPLATE(prime_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); +BENCHMARK_TEMPLATE(prime_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); +BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); +BENCHMARK_TEMPLATE(prime_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); BENCHMARK_MAIN(); diff --git a/test/test_prime_functions.cpp b/test/test_prime_sieve.cpp similarity index 87% rename from test/test_prime_functions.cpp rename to test/test_prime_sieve.cpp index 6aef4e9851..158a6e5b0b 100644 --- a/test/test_prime_functions.cpp +++ b/test/test_prime_sieve.cpp @@ -5,9 +5,9 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#include "../include/boost/math/special_functions/prime_functions.hpp" - +#include #include +#include #include #include #include @@ -97,6 +97,16 @@ void test_prime_range() BOOST_TEST_EQ(primes.size(), ref); } +template +void test_prime_sieve_overflow() +{ + std::vector primes; + + // Should die with call to BOOST_ASSERT + boost::math::prime_sieve(static_cast(2), static_cast(std::numeric_limits::max()), + std::back_inserter(primes)); +} + int main() { test_prime_sieve(); @@ -109,5 +119,9 @@ int main() test_prime_range(); test_prime_range(); + test_prime_sieve(); + + //test_prime_sieve_overflow(); + boost::report_errors(); } From a684dbd3c57f7f6fb297d72e24df66b3b3ce2bae Mon Sep 17 00:00:00 2001 From: mborland Date: Fri, 10 Jul 2020 15:55:33 -0500 Subject: [PATCH 06/83] Fix include guard naming --- include/boost/math/special_functions/prime_sieve.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 709274e1fe..a4b79ac526 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -5,8 +5,8 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_FUNCTIONS_HPP -#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_FUNCTIONS_HPP +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP #include #include @@ -105,4 +105,4 @@ inline auto prime_range(Z upper_bound, OutputIterator output) -> decltype(output } }} -#endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_FUNCTIONS_HPP +#endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP From 3e4db8a6097c1ac5f34f340463a8555fe50bb335 Mon Sep 17 00:00:00 2001 From: mborland Date: Sat, 11 Jul 2020 19:35:32 -0500 Subject: [PATCH 07/83] Complete revamp of algorithm. Hide implementation behind detail namespace. [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 140 +++++++++++++----- .../performance/prime_sieve_performance.cpp | 27 +--- test/test_prime_sieve.cpp | 17 +-- 3 files changed, 117 insertions(+), 67 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index a4b79ac526..bf6be6b56e 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -14,88 +14,162 @@ #include #include #include +#include +#include -namespace boost { namespace math +namespace boost { namespace math { namespace detail { - // https://mathworld.wolfram.com/SieveofEratosthenes.html // https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf -template -auto prime_sieve(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) +template +void linear_sieve(Z upper_bound, Container &c) { - static_assert(std::is_integral::value, "No primes for floating point types"); - BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); - std::vector least_divisors(upper_bound + 1, 0); - std::deque primes; + Z least_divisors_size{upper_bound + 1}; + Z *least_divisors{new Z[least_divisors_size]{0}}; for (Z i{2}; i <= upper_bound; ++i) { if (least_divisors[i] == 0) { least_divisors[i] = i; - primes.emplace_back(i); + c.emplace_back(i); } - for (size_t j{}; j < least_divisors.size(); ++j) + for (size_t j{}; j < least_divisors_size; ++j) { - if (j >= primes.size()) + if (j >= c.size()) { break; } - else if (primes[j] > least_divisors[i]) + else if (c[j] > least_divisors[i]) { break; } - else if (i * primes[j] > upper_bound) + else if (i * c[j] > upper_bound) { break; } else { - least_divisors[i * primes[j]] = primes[j]; + least_divisors[i * c[j]] = c[j]; } } } - auto it{primes.begin()}; - while (*it < lower_bound && it != primes.end()) + delete[] least_divisors; +} + +template +void prime_table(Z upper_bound, Container &c) +{ + Z i{2}; + unsigned counter{}; + + while (i <= upper_bound && counter < 9999) // 10k elements are in the lookup table { - primes.pop_front(); - ++it; + c.emplace_back(i); + ++counter; + i = static_cast(boost::math::prime(counter)); } - - return std::move(primes.begin(), primes.end(), output); } -template -auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) +template +void mask_sieve(Z lower_bound, Z upper_bound, Container &c) { - if (upper_bound <= 104729) + Z limit{static_cast(std::floor(std::sqrt(upper_bound))) + 1}; + std::vector primes; + primes.reserve(limit / std::log(limit)); + + boost::math::detail::linear_sieve(limit, primes); + + const Z n{upper_bound - lower_bound + 1}; + bool *mask{new bool[n + 1]{false}}; + + for (size_t i{}; i < primes.size(); ++i) { - Z i{2}; - unsigned counter {}; - std::deque primes; - while (i <= upper_bound) + Z lower_limit = std::floor(lower_bound / primes[i]) * primes[i]; + + if (lower_limit < lower_bound) + { + lower_limit += primes[i]; + } + + if (lower_limit == primes[i]) + { + lower_limit += primes[i]; + } + + for (Z j{lower_limit}; j <= upper_bound; j += primes[i]) + { + mask[j - lower_bound] = true; + } + } + + // Numbers which are not masked in range, are prime + for (Z i{lower_bound}; i <= upper_bound; i++) + { + if (!mask[i - lower_bound]) { if (i >= lower_bound) { - primes.emplace_back(i); + c.emplace_back(i); } - - ++counter; - i = static_cast(boost::math::prime(counter)); } + } + + delete[] mask; +} +} // End namespace detail + +template +auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) +{ + static_assert(std::is_integral::value, "No primes for floating point types"); + BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); - return std::move(primes.begin(), primes.end(), output); + std::vector primes; + primes.reserve(upper_bound / std::log(upper_bound)); + + if (upper_bound <= 104729) + { + boost::math::detail::prime_table(upper_bound, primes); } else { - return prime_sieve(lower_bound, upper_bound, output); + std::vector small_primes; + small_primes.reserve(1000); + + // Spilt into two vectors and merge after joined to avoid data races + std::thread t1([upper_bound, &small_primes]{boost::math::detail::prime_table(static_cast(104729), small_primes);}); + std::thread t2([upper_bound, &primes]{boost::math::detail::mask_sieve(static_cast(104729), upper_bound, primes);}); + + t1.join(); + t2.join(); + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + } + + return std::move(primes.begin(), primes.end(), output); +} + +template +auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) +{ + std::vector primes; + primes.reserve(upper_bound / std::log(upper_bound)); + + boost::math::prime_sieve(upper_bound, std::back_inserter(primes)); + + auto it{primes.begin()}; + while(*it < lower_bound && it != primes.end()) + { + ++it; } + + return std::move(it, primes.end(), output); } template diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 2b900b1091..d627b12f9d 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -15,19 +15,7 @@ void prime_sieve(benchmark::State& state) for(auto _ : state) { std::vector primes; - benchmark::DoNotOptimize(boost::math::prime_sieve(static_cast(2), upper, std::back_inserter(primes))); - } - state.SetComplexityN(state.range(0)); -} - -template -void prime_range(benchmark::State& state) -{ - Z upper = static_cast(state.range(0)); - for(auto _ : state) - { - std::vector primes; - benchmark::DoNotOptimize(boost::math::prime_range(static_cast(2), upper, std::back_inserter(primes))); + benchmark::DoNotOptimize(boost::math::prime_sieve(upper, std::back_inserter(primes))); } state.SetComplexityN(state.range(0)); } @@ -40,7 +28,7 @@ void prime_sieve_partial_range(benchmark::State& state) for(auto _ : state) { std::vector primes; - benchmark::DoNotOptimize(boost::math::prime_sieve(lower, upper, std::back_inserter(primes))); + benchmark::DoNotOptimize(boost::math::prime_range(lower, upper, std::back_inserter(primes))); } state.SetComplexityN(state.range(0)); } @@ -51,16 +39,5 @@ BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 < BENCHMARK_TEMPLATE(prime_sieve_partial_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); -BENCHMARK_TEMPLATE(prime_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); -BENCHMARK_TEMPLATE(prime_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); -BENCHMARK_TEMPLATE(prime_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); - -// Direct comparison of lookup vs sieve using only range of lookup -BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); -BENCHMARK_TEMPLATE(prime_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); -BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); -BENCHMARK_TEMPLATE(prime_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); -BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); -BENCHMARK_TEMPLATE(prime_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); BENCHMARK_MAIN(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 158a6e5b0b..83acd17ca1 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -19,38 +19,38 @@ void test_prime_sieve() Z ref {168}; // Calculated with wolfram-alpha // Does the function work with a vector - boost::math::prime_sieve(2, 1000, std::back_inserter(primes)); + boost::math::prime_sieve(1000, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), ref); // Tests for correctness // 100 primes.clear(); - boost::math::prime_sieve(2, 100, std::back_inserter(primes)); + boost::math::prime_sieve(100, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), 25); // 10'000 primes.clear(); - boost::math::prime_sieve(2, 10000, std::back_inserter(primes)); + boost::math::prime_sieve(10000, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), 1229); // 100'000 primes.clear(); - boost::math::prime_sieve(2, 100000, std::back_inserter(primes)); + boost::math::prime_sieve(100000, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), 9592); // 1'000'000 primes.clear(); - boost::math::prime_sieve(2, 1000000, std::back_inserter(primes)); + boost::math::prime_sieve(1000000, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), 78498); // Does the function work with a list? std::list l_primes; - boost::math::prime_sieve(2, 1000, std::back_inserter(l_primes)); + boost::math::prime_sieve(1000, std::back_inserter(l_primes)); BOOST_TEST_EQ(l_primes.size(), ref); // Does the function work with a deque? std::deque d_primes; - boost::math::prime_sieve(2, 1000, std::back_inserter(d_primes)); + boost::math::prime_sieve(1000, std::back_inserter(d_primes)); BOOST_TEST_EQ(d_primes.size(), ref); } @@ -109,6 +109,7 @@ void test_prime_sieve_overflow() int main() { + test_prime_sieve(); test_prime_sieve(); test_prime_sieve(); @@ -119,8 +120,6 @@ int main() test_prime_range(); test_prime_range(); - test_prime_sieve(); - //test_prime_sieve_overflow(); boost::report_errors(); From dd8a61c8641985a24816a32cfd942afe1e5c73ea Mon Sep 17 00:00:00 2001 From: mborland Date: Sun, 12 Jul 2020 12:00:12 -0500 Subject: [PATCH 08/83] Re-added support and tests for boost::multiprecision::cpp_int [CI SKIP] --- include/boost/math/special_functions/prime_sieve.hpp | 5 ++--- test/test_prime_sieve.cpp | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index bf6be6b56e..541cb9c9e9 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -79,7 +78,7 @@ void prime_table(Z upper_bound, Container &c) template void mask_sieve(Z lower_bound, Z upper_bound, Container &c) { - Z limit{static_cast(std::floor(std::sqrt(upper_bound))) + 1}; + Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; std::vector primes; primes.reserve(limit / std::log(limit)); @@ -159,7 +158,7 @@ template auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { std::vector primes; - primes.reserve(upper_bound / std::log(upper_bound)); + primes.reserve(upper_bound / std::log(static_cast(upper_bound))); boost::math::prime_sieve(upper_bound, std::back_inserter(primes)); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 83acd17ca1..d7dc8af83a 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -122,5 +122,7 @@ int main() //test_prime_sieve_overflow(); + test_prime_sieve(); + boost::report_errors(); } From 4debd0d4a33abeb7cc49c30b45c078c66d41a580 Mon Sep 17 00:00:00 2001 From: mborland Date: Mon, 13 Jul 2020 12:04:03 -0500 Subject: [PATCH 09/83] Changed benchmarks to support threading [CI SKIP] --- reporting/performance/prime_sieve_performance.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index d627b12f9d..f108797ae0 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -33,11 +33,11 @@ void prime_sieve_partial_range(benchmark::State& state) state.SetComplexityN(state.range(0)); } -BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); -BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); -BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); -BENCHMARK_TEMPLATE(prime_sieve_partial_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); -BENCHMARK_TEMPLATE(prime_sieve_partial_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); -BENCHMARK_TEMPLATE(prime_sieve_partial_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(); +BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_partial_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_partial_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_partial_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); BENCHMARK_MAIN(); From 2a7e03129f4f4f3e42c703533c4ff7b87420fe2b Mon Sep 17 00:00:00 2001 From: mborland Date: Mon, 13 Jul 2020 21:27:14 -0500 Subject: [PATCH 10/83] Added execution policies. Increased performance for dynamically linked libraries. Fixed -Wextra errors. [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 89 +++++++++++++++---- .../performance/prime_sieve_performance.cpp | 4 +- test/test_prime_sieve.cpp | 55 +++++++++++- 3 files changed, 123 insertions(+), 25 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 541cb9c9e9..f8bccfccb6 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -14,7 +14,11 @@ #include #include #include + +#if __has_include() #include +#include +#endif namespace boost { namespace math { namespace detail { @@ -23,7 +27,7 @@ namespace boost { namespace math { namespace detail template void linear_sieve(Z upper_bound, Container &c) { - Z least_divisors_size{upper_bound + 1}; + size_t least_divisors_size{static_cast(upper_bound + 1)}; Z *least_divisors{new Z[least_divisors_size]{0}}; for (Z i{2}; i <= upper_bound; ++i) @@ -123,8 +127,9 @@ void mask_sieve(Z lower_bound, Z upper_bound, Container &c) } } // End namespace detail -template -auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) +#if __has_include() +template +auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) { static_assert(std::is_integral::value, "No primes for floating point types"); BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); @@ -132,35 +137,47 @@ auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) std::vector primes; primes.reserve(upper_bound / std::log(upper_bound)); - if (upper_bound <= 104729) + if (upper_bound <= 8192) { - boost::math::detail::prime_table(upper_bound, primes); + boost::math::detail::linear_sieve(upper_bound, primes); } else { - std::vector small_primes; - small_primes.reserve(1000); - - // Spilt into two vectors and merge after joined to avoid data races - std::thread t1([upper_bound, &small_primes]{boost::math::detail::prime_table(static_cast(104729), small_primes);}); - std::thread t2([upper_bound, &primes]{boost::math::detail::mask_sieve(static_cast(104729), upper_bound, primes);}); + if constexpr (std::is_same_v) + { + boost::math::detail::mask_sieve(static_cast(2), upper_bound, primes); + } - t1.join(); - t2.join(); - primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + else + { + std::vector small_primes; + small_primes.reserve(1000); + + // Split into two vectors and merge after joined to avoid data races + std::thread t1([upper_bound, &small_primes] { + boost::math::detail::prime_table(static_cast(8192), small_primes); + }); + std::thread t2([upper_bound, &primes] { + boost::math::detail::mask_sieve(static_cast(8192), upper_bound, primes); + }); + + t1.join(); + t2.join(); + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + } } return std::move(primes.begin(), primes.end(), output); } -template -auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) +template +auto prime_range(ExecutionPolicy&& policy, Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { std::vector primes; primes.reserve(upper_bound / std::log(static_cast(upper_bound))); - boost::math::prime_sieve(upper_bound, std::back_inserter(primes)); + boost::math::prime_sieve(policy, upper_bound, std::back_inserter(primes)); auto it{primes.begin()}; while(*it < lower_bound && it != primes.end()) @@ -170,11 +187,45 @@ auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltyp return std::move(it, primes.end(), output); } +#endif //__has_include() + +template +auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) +{ + static_assert(std::is_integral::value, "No primes for floating point types"); + BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); + + std::vector primes; + primes.reserve(upper_bound / std::log(upper_bound)); + + if (upper_bound <= 8192) + { + boost::math::detail::linear_sieve(upper_bound, primes); + } + + else + { + boost::math::detail::mask_sieve(static_cast(2), upper_bound, primes); + } + + return std::move(primes.begin(), primes.end(), output); +} template -inline auto prime_range(Z upper_bound, OutputIterator output) -> decltype(output) +auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { - return prime_range(static_cast(2), upper_bound, output); + std::vector primes; + primes.reserve(upper_bound / std::log(static_cast(upper_bound))); + + boost::math::prime_sieve(upper_bound, std::back_inserter(primes)); + + auto it{primes.begin()}; + while(*it < lower_bound && it != primes.end()) + { + ++it; + } + + return std::move(it, primes.end(), output); } }} diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index f108797ae0..1f421e26d1 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -15,7 +15,7 @@ void prime_sieve(benchmark::State& state) for(auto _ : state) { std::vector primes; - benchmark::DoNotOptimize(boost::math::prime_sieve(upper, std::back_inserter(primes))); + benchmark::DoNotOptimize(boost::math::prime_sieve(std::execution::par, upper, std::back_inserter(primes))); } state.SetComplexityN(state.range(0)); } @@ -28,7 +28,7 @@ void prime_sieve_partial_range(benchmark::State& state) for(auto _ : state) { std::vector primes; - benchmark::DoNotOptimize(boost::math::prime_range(lower, upper, std::back_inserter(primes))); + benchmark::DoNotOptimize(boost::math::prime_range(std::execution::par, lower, upper, std::back_inserter(primes))); } state.SetComplexityN(state.range(0)); } diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index d7dc8af83a..c725bb71d8 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -66,17 +66,17 @@ void test_prime_range() // Does the upper bound call work primes.clear(); - boost::math::prime_range(1000, std::back_inserter(primes)); + boost::math::prime_range(2, 1000, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), ref); // Does it work with a deque? std::deque d_primes; - boost::math::prime_range(1000, std::back_inserter(d_primes)); + boost::math::prime_range(2, 1000, std::back_inserter(d_primes)); BOOST_TEST_EQ(d_primes.size(), ref); // Does it work with a list? std::list l_primes; - boost::math::prime_range(1000, std::front_inserter(l_primes)); + boost::math::prime_range(2, 1000, std::front_inserter(l_primes)); BOOST_TEST_EQ(l_primes.size(), ref); // Does the lower bound change the results? @@ -93,7 +93,7 @@ void test_prime_range() // Will it call the sieve for large input ref = 78498; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(1000000, std::back_inserter(primes)); + boost::math::prime_range(2, 1000000, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), ref); } @@ -107,6 +107,48 @@ void test_prime_sieve_overflow() std::back_inserter(primes)); } +template +void test_par_prime_sieve() +{ + std::vector primes; + Z ref {168}; // Calculated with wolfram-alpha + + // Does the function work with a vector + boost::math::prime_sieve(std::execution::par, 1000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), ref); + + // Tests for correctness + // 100 + primes.clear(); + boost::math::prime_sieve(std::execution::par, 100, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 25); + + // 10'000 + primes.clear(); + boost::math::prime_sieve(std::execution::par, 10000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 1229); + + // 100'000 + primes.clear(); + boost::math::prime_sieve(std::execution::par, 100000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 9592); + + // 1'000'000 + primes.clear(); + boost::math::prime_sieve(std::execution::par, 1000000, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 78498); + + // Does the function work with a list? + std::list l_primes; + boost::math::prime_sieve(std::execution::par, 1000, std::back_inserter(l_primes)); + BOOST_TEST_EQ(l_primes.size(), ref); + + // Does the function work with a deque? + std::deque d_primes; + boost::math::prime_sieve(std::execution::par, 1000, std::back_inserter(d_primes)); + BOOST_TEST_EQ(d_primes.size(), ref); +} + int main() { @@ -124,5 +166,10 @@ int main() test_prime_sieve(); + test_par_prime_sieve(); + test_par_prime_sieve(); + test_par_prime_sieve(); + test_par_prime_sieve(); + boost::report_errors(); } From a68910ed2fc16ec14ff267c1d016a81deb681fc9 Mon Sep 17 00:00:00 2001 From: mborland Date: Tue, 14 Jul 2020 13:31:20 -0500 Subject: [PATCH 11/83] Added massively parallel section to prime_sieve. Increased length of benchmark for int64_t. --- .../math/special_functions/prime_sieve.hpp | 85 ++++++++++++++++--- .../performance/prime_sieve_performance.cpp | 2 +- 2 files changed, 76 insertions(+), 11 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index f8bccfccb6..3b9830281e 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -135,28 +135,39 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); std::vector primes; - primes.reserve(upper_bound / std::log(upper_bound)); + primes.reserve(upper_bound / std::log(static_cast(upper_bound))); - if (upper_bound <= 8192) + // Range for when the linear sieve is no longer faster than threading + if (upper_bound < 8192) { boost::math::detail::linear_sieve(upper_bound, primes); } + + else if (std::is_same_v) + { + boost::math::detail::mask_sieve(static_cast(2), upper_bound, primes); + } + else { - if constexpr (std::is_same_v) + unsigned processor_count {std::thread::hardware_concurrency()}; + + // May return 0 when unable to detect + if(processor_count == 0) { - boost::math::detail::mask_sieve(static_cast(2), upper_bound, primes); + processor_count = 2; } - else - { - std::vector small_primes; - small_primes.reserve(1000); + std::vector small_primes; + small_primes.reserve(1000); + // Threshold for when 2 thread performance begins to be non-linear, or when the system can only support two threads + if(upper_bound <= 16777216 || processor_count == 2) + { // Split into two vectors and merge after joined to avoid data races - std::thread t1([upper_bound, &small_primes] { - boost::math::detail::prime_table(static_cast(8192), small_primes); + std::thread t1([&small_primes] { + boost::math::detail::linear_sieve(static_cast(8192), small_primes); }); std::thread t2([upper_bound, &primes] { boost::math::detail::mask_sieve(static_cast(8192), upper_bound, primes); @@ -166,6 +177,60 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) t2.join(); primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); } + + //If sufficiently large upper bound spawn as many threads as the system has processors for + else + { + std::vector thread_manager; + std::vector> prime_vectors(processor_count - 1); + const Z range_per_thread = upper_bound / (processor_count - 1); + Z current_lower_bound {8192}; + Z current_upper_bound {current_lower_bound + range_per_thread}; + Z primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - + current_lower_bound / std::log(static_cast(current_lower_bound)))}; + + std::thread t1([upper_bound, &small_primes] { + boost::math::detail::linear_sieve(static_cast(8192), small_primes); + }); + thread_manager.push_back(std::move(t1)); + + for(size_t i{1}; i < processor_count - 1; ++i) + { + std::vector temp; + temp.reserve(primes_in_range); + prime_vectors.emplace_back(temp); + std::thread t([current_lower_bound, current_upper_bound, &prime_vectors, i] { + boost::math::detail::mask_sieve(current_lower_bound, current_upper_bound, prime_vectors[i]); + }); + + thread_manager.push_back(std::move(t)); + + current_lower_bound = current_upper_bound; + current_upper_bound += range_per_thread; + primes_in_range = static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - + current_lower_bound / std::log(static_cast(current_lower_bound))); + } + + std::vector temp; + temp.reserve(primes_in_range); + prime_vectors.emplace_back(temp); + std::thread t([current_lower_bound, upper_bound, &prime_vectors] { + boost::math::detail::mask_sieve(current_lower_bound, upper_bound, prime_vectors.back()); + }); + + thread_manager.push_back(std::move(t)); + + for(auto &thread : thread_manager) + { + thread.join(); + } + + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + for(auto &v : prime_vectors) + { + primes.insert(primes.begin(), v.begin(), v.end()); + } + } } return std::move(primes.begin(), primes.end(), output); diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 1f421e26d1..ccfa98bd1f 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -34,7 +34,7 @@ void prime_sieve_partial_range(benchmark::State& state) } BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity()->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); From 386cc4d0201154a51f5e39973bc9d981ef484d88 Mon Sep 17 00:00:00 2001 From: mborland Date: Wed, 15 Jul 2020 12:54:56 -0500 Subject: [PATCH 12/83] Add test for multi-threading section and add to Jamfile [CI SKIP] --- test/Jamfile.v2 | 1 + test/test_prime_sieve.cpp | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 4833b6de3f..16acfcde8e 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -966,6 +966,7 @@ test-suite misc : [ run compile_test/catmull_rom_concept_test.cpp compile_test_main : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ] [ run ooura_fourier_integral_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run test_prime_sieve.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run empirical_cumulative_distribution_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index c725bb71d8..0d989de2d9 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -149,6 +149,17 @@ void test_par_prime_sieve() BOOST_TEST_EQ(d_primes.size(), ref); } +template +void test_par_prime_sieve_large() +{ + std::vector primes; + Z ref {1077871}; // Calculated with wolfram-alpha + + // Force the sieve into the multi-threading section + boost::math::prime_sieve(std::execution::par, 16777217, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), ref); +} + int main() { @@ -171,5 +182,7 @@ int main() test_par_prime_sieve(); test_par_prime_sieve(); + //test_par_prime_sieve_large(); + boost::report_errors(); } From d79eddb6cfb41dd4d088ea8a13e7a22252c7c692 Mon Sep 17 00:00:00 2001 From: mborland Date: Wed, 15 Jul 2020 13:40:02 -0500 Subject: [PATCH 13/83] Added prime sieve to existing prime numbers documentation [CI SKIP] --- doc/sf/number_series.qbk | 60 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index 970b3c7937..9ae8d43477 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -259,6 +259,66 @@ Passing a value greater than `max_prime` results in a __domain_error being raise This function is `constexpr` only if the compiler supports C++14 constexpr functions. +[endsect] [/section:primes Prime Numbers] + +[section:primes Prime Sieve] + +[h4 Synopsis] + +`` +#include +`` + +namespace boost { namespace math { + + template + auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) + + template + auto prime_range(ExecutionPolicy&& policy, Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) + + template + auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) + + template + auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) + + +}} // End namespaces + +[h4 Description] + +There are two sets of functions available `prime_sieve` and `prime_range`. `prime_sieve` will return all primes in the +range 2 to `upper_bound` inclusive. `prime_range` will return all the primes in the range `lower_bound` to `upper_bound` +inclusive. + +If you have a C++17 compatible compiler you are able to pass an execution policy to both both functions. Any +policy besides `std::execution::seq` will enable internal multi-threading. + +For upper_bound <= 2^24 two threads will be used. For any value larger than 2^24 `std::thread::hardware_concurrency()` will be called, +and all available concurrency will be used. Additionally, the memory requirements are `O(sqrt(N)`. + +/Nota bene:/ If `std::thread::hardware_concurrency()` returns 0 the max number of threads will be set to 2. + +Example: + // To calculate primes 2 - 1,000,000 in parallel + std::vector primes; + boost::math::prime_sieve(std::execution::par, 1000000, std::back_inserter(primes)); + + // To calculate primes 100 - 1,000 sequentially + std::vector primes; + boost::math::prime_range(100, 1000, std::back_inserter(primes)); + + +/Nota bene:/ For values larger than 2^24 the returned primes will not be sorted + + +[h4 References] +* Sorensen, Jonathan [@https://research.cs.wisc.edu/techreports/1990/TR909.pdf An Introduction to Prime Number Sieves] +* Gries, David and Misra, Jayadev [@https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf A Linear Sieve Algorithm for Finding Prime Numbers] + +[endsect] [/section:primes Prime Sieve] + [endsect] [/section:primes] [endsect] [/Number Series] From 3fdf917d98b2e50f722a833bbb6c097b6a4fddd8 Mon Sep 17 00:00:00 2001 From: mborland Date: Wed, 15 Jul 2020 14:34:29 -0500 Subject: [PATCH 14/83] Revisions to documentation and send to CI --- doc/sf/number_series.qbk | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index 9ae8d43477..069a390448 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -292,11 +292,12 @@ There are two sets of functions available `prime_sieve` and `prime_range`. `prim range 2 to `upper_bound` inclusive. `prime_range` will return all the primes in the range `lower_bound` to `upper_bound` inclusive. -If you have a C++17 compatible compiler you are able to pass an execution policy to both both functions. Any -policy besides `std::execution::seq` will enable internal multi-threading. +If you have a C++17 compatible compiler you are able to pass an execution policy to both functions. Any +policy besides `std::execution::seq` will enable internal multi-threading. If your compiler is not C++17 compatible the +sequential overloads will be used. For upper_bound <= 2^24 two threads will be used. For any value larger than 2^24 `std::thread::hardware_concurrency()` will be called, -and all available concurrency will be used. Additionally, the memory requirements are `O(sqrt(N)`. +and all available concurrency will be used. Additionally, the memory requirements are `O(sqrt(N))`. /Nota bene:/ If `std::thread::hardware_concurrency()` returns 0 the max number of threads will be set to 2. From 6ca245be27055ec09dfec82e3efb4d5df91374fe Mon Sep 17 00:00:00 2001 From: mborland Date: Wed, 15 Jul 2020 15:16:21 -0500 Subject: [PATCH 15/83] Changed include guards to be compatible with C++11 and 14. --- include/boost/math/special_functions/prime_sieve.hpp | 8 ++++---- test/test_prime_sieve.cpp | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 3b9830281e..05e5390648 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -14,9 +14,9 @@ #include #include #include - -#if __has_include() #include + +#if __cplusplus >= 201703 #include #endif @@ -127,7 +127,7 @@ void mask_sieve(Z lower_bound, Z upper_bound, Container &c) } } // End namespace detail -#if __has_include() +#if __cplusplus >= 201703 template auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) { @@ -252,7 +252,7 @@ auto prime_range(ExecutionPolicy&& policy, Z lower_bound, Z upper_bound, OutputI return std::move(it, primes.end(), output); } -#endif //__has_include() +#endif //__cplusplus >= 201703 template auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 0d989de2d9..5d9a9da6fe 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -107,6 +107,7 @@ void test_prime_sieve_overflow() std::back_inserter(primes)); } +#if __cplusplus >= 201703 template void test_par_prime_sieve() { @@ -159,6 +160,7 @@ void test_par_prime_sieve_large() boost::math::prime_sieve(std::execution::par, 16777217, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), ref); } +#endif //__cplusplus >= 201703 int main() { @@ -177,12 +179,14 @@ int main() test_prime_sieve(); + #if __cplusplus >= 201703 test_par_prime_sieve(); test_par_prime_sieve(); test_par_prime_sieve(); test_par_prime_sieve(); //test_par_prime_sieve_large(); + #endif boost::report_errors(); } From 7d3a52037f5b0318ed749fec52ee0300ba68439b Mon Sep 17 00:00:00 2001 From: mborland Date: Wed, 15 Jul 2020 17:19:47 -0500 Subject: [PATCH 16/83] Fixed doc, and ensured that primes are sorted --- doc/sf/number_series.qbk | 12 +++--------- include/boost/math/special_functions/prime_sieve.hpp | 2 +- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index 069a390448..4f5b4107c3 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -261,7 +261,7 @@ This function is `constexpr` only if the compiler supports C++14 constexpr funct [endsect] [/section:primes Prime Numbers] -[section:primes Prime Sieve] +[section:prime_sieve Prime Sieve] [h4 Synopsis] @@ -301,7 +301,7 @@ and all available concurrency will be used. Additionally, the memory requirement /Nota bene:/ If `std::thread::hardware_concurrency()` returns 0 the max number of threads will be set to 2. -Example: +[h4 Examples] // To calculate primes 2 - 1,000,000 in parallel std::vector primes; boost::math::prime_sieve(std::execution::par, 1000000, std::back_inserter(primes)); @@ -310,19 +310,13 @@ Example: std::vector primes; boost::math::prime_range(100, 1000, std::back_inserter(primes)); - -/Nota bene:/ For values larger than 2^24 the returned primes will not be sorted - - [h4 References] * Sorensen, Jonathan [@https://research.cs.wisc.edu/techreports/1990/TR909.pdf An Introduction to Prime Number Sieves] * Gries, David and Misra, Jayadev [@https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf A Linear Sieve Algorithm for Finding Prime Numbers] [endsect] [/section:primes Prime Sieve] -[endsect] [/section:primes] - -[endsect] [/Number Series] +[endsect] [/section:number_series Number Series] [/ Copyright 2013, 2014 Nikhar Agrawal, Christopher Kormanyos, John Maddock, Paul A. Bristow. diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 05e5390648..97683acc8b 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -228,7 +228,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); for(auto &v : prime_vectors) { - primes.insert(primes.begin(), v.begin(), v.end()); + primes.insert(primes.end(), v.begin(), v.end()); } } } From a1ac504ebbd76faf7b25ac08228c569f80090356 Mon Sep 17 00:00:00 2001 From: mborland Date: Thu, 16 Jul 2020 18:02:24 -0500 Subject: [PATCH 17/83] Fixed documentation. Complete re-design of mask_sieve algo. Pre-generation of primes to reduce duplication and memory usage. Segmentation now fits L1 cache. [CI SKIP] --- doc/sf/number_series.qbk | 4 +- .../math/special_functions/prime_sieve.hpp | 159 +++++++++++++----- 2 files changed, 122 insertions(+), 41 deletions(-) diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index 4f5b4107c3..adb108a4f5 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -296,8 +296,8 @@ If you have a C++17 compatible compiler you are able to pass an execution policy policy besides `std::execution::seq` will enable internal multi-threading. If your compiler is not C++17 compatible the sequential overloads will be used. -For upper_bound <= 2^24 two threads will be used. For any value larger than 2^24 `std::thread::hardware_concurrency()` will be called, -and all available concurrency will be used. Additionally, the memory requirements are `O(sqrt(N))`. +For upper_bound <= 2[super 24] two threads will be used. For any value larger than 2[super 24] `std::thread::hardware_concurrency()` will be called, +and all available concurrency will be used. Additionally, the memory requirements are `bigo[](sqrt(N))`. /Nota bene:/ If `std::thread::hardware_concurrency()` returns 0 the max number of threads will be set to 2. diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 97683acc8b..cad366bf5d 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -79,7 +79,56 @@ void prime_table(Z upper_bound, Container &c) } } -template +//https://core.ac.uk/download/pdf/62440589.pdf +template +void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Container &c) +{ + Z limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + + size_t primes_size; + auto it{primes.begin()}; + while(*it < limit && it != primes.end()) + { + ++primes_size; + ++it; + } + + // Needed for thread sanitizer. Throws FPE without this check. + if(primes_size > primes.size()) + { + primes_size = primes.size(); + } + + // Faster and safer than std::vector which does not behave like all other vectors + const size_t n {static_cast(upper_bound - lower_bound + 1)}; + bool* is_prime {new bool[n]}; + memset(is_prime, true, sizeof(*is_prime) * (n)); + + for(auto i : primes) + { + for(Z j {std::max(i * i, (lower_bound + i - 1) / i * i)}; j <= upper_bound; j += i) + { + is_prime[j - lower_bound] = false; + } + } + + if(lower_bound == 1) + { + is_prime[0] = false; + } + + for(Z i{lower_bound}; i <= upper_bound; ++i) + { + if(is_prime[i - lower_bound]) + { + c.emplace_back(i); + } + } + + delete[] is_prime; +} + +template void mask_sieve(Z lower_bound, Z upper_bound, Container &c) { Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; @@ -88,42 +137,53 @@ void mask_sieve(Z lower_bound, Z upper_bound, Container &c) boost::math::detail::linear_sieve(limit, primes); - const Z n{upper_bound - lower_bound + 1}; - bool *mask{new bool[n + 1]{false}}; + boost::math::detail::mask_sieve(lower_bound, upper_bound, primes, c); +} + - for (size_t i{}; i < primes.size(); ++i) + +template +void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes, Container &c) +{ + const Z L1_SIZE {32648}; + const Z interval {static_cast(std::floor(L1_SIZE / sizeof(Z)))}; + Z current_lower_bound{lower_bound}; + Z current_upper_bound{current_lower_bound + interval}; + + if(current_upper_bound > upper_bound) { - Z lower_limit = std::floor(lower_bound / primes[i]) * primes[i]; + current_upper_bound = upper_bound; + } - if (lower_limit < lower_bound) - { - lower_limit += primes[i]; - } + while(current_upper_bound < (upper_bound - interval)) + { + boost::math::detail::mask_sieve(current_lower_bound, current_upper_bound, primes, c); + current_lower_bound = current_upper_bound + 1; + current_upper_bound += interval; + } - if (lower_limit == primes[i]) - { - lower_limit += primes[i]; - } + boost::math::detail::mask_sieve(current_lower_bound, upper_bound, primes, c); +} - for (Z j{lower_limit}; j <= upper_bound; j += primes[i]) - { - mask[j - lower_bound] = true; - } +template +void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) +{ + Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + std::vector primes; + primes.reserve(limit / std::log(limit)); + + // Prepare for max value so you do not have to calculate this again + if(limit < 8192) + { + boost::math::detail::linear_sieve(limit, primes); } - // Numbers which are not masked in range, are prime - for (Z i{lower_bound}; i <= upper_bound; i++) + else { - if (!mask[i - lower_bound]) - { - if (i >= lower_bound) - { - c.emplace_back(i); - } - } + boost::math::detail::mask_sieve(static_cast(2), limit, primes); } - delete[] mask; + boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes, c); } } // End namespace detail @@ -146,7 +206,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) else if (std::is_same_v) { - boost::math::detail::mask_sieve(static_cast(2), upper_bound, primes); + boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); } else @@ -170,7 +230,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) boost::math::detail::linear_sieve(static_cast(8192), small_primes); }); std::thread t2([upper_bound, &primes] { - boost::math::detail::mask_sieve(static_cast(8192), upper_bound, primes); + boost::math::detail::segmented_sieve(static_cast(8192), upper_bound, primes); }); t1.join(); @@ -181,6 +241,30 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) //If sufficiently large upper bound spawn as many threads as the system has processors for else { + //Pre-generate all of the primes so that each thread does not have to + Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + std::vector pre_generated_primes; + pre_generated_primes.reserve(limit / std::log(limit)); + + if(limit < 8192) + { + boost::math::detail::linear_sieve(limit, pre_generated_primes); + } + + else + { + std::thread t1([&small_primes] { + boost::math::detail::linear_sieve(static_cast(8192), small_primes); + }); + std::thread t2([limit, &pre_generated_primes] { + boost::math::detail::segmented_sieve(static_cast(8192), limit, pre_generated_primes); + }); + + t1.join(); + t2.join(); + pre_generated_primes.insert(pre_generated_primes.begin(), small_primes.begin(), small_primes.end()); + } + std::vector thread_manager; std::vector> prime_vectors(processor_count - 1); const Z range_per_thread = upper_bound / (processor_count - 1); @@ -189,18 +273,15 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) Z primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - current_lower_bound / std::log(static_cast(current_lower_bound)))}; - std::thread t1([upper_bound, &small_primes] { - boost::math::detail::linear_sieve(static_cast(8192), small_primes); - }); - thread_manager.push_back(std::move(t1)); - for(size_t i{1}; i < processor_count - 1; ++i) { std::vector temp; temp.reserve(primes_in_range); prime_vectors.emplace_back(temp); - std::thread t([current_lower_bound, current_upper_bound, &prime_vectors, i] { - boost::math::detail::mask_sieve(current_lower_bound, current_upper_bound, prime_vectors[i]); + + std::thread t([current_lower_bound, current_upper_bound, &prime_vectors, i, &pre_generated_primes] { + boost::math::detail::segmented_sieve(current_lower_bound, current_upper_bound, pre_generated_primes, + prime_vectors[i]); }); thread_manager.push_back(std::move(t)); @@ -214,10 +295,10 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) std::vector temp; temp.reserve(primes_in_range); prime_vectors.emplace_back(temp); - std::thread t([current_lower_bound, upper_bound, &prime_vectors] { - boost::math::detail::mask_sieve(current_lower_bound, upper_bound, prime_vectors.back()); - }); + std::thread t([current_lower_bound, upper_bound, &prime_vectors, &pre_generated_primes] { + boost::math::detail::segmented_sieve(current_lower_bound, upper_bound, pre_generated_primes, prime_vectors.back()); + }); thread_manager.push_back(std::move(t)); for(auto &thread : thread_manager) From 35d2aa1ec7230f24e1d6a21efe81f15103426a5a Mon Sep 17 00:00:00 2001 From: mborland Date: Fri, 17 Jul 2020 14:22:35 -0500 Subject: [PATCH 18/83] All vectors now init {}. Change include guards. Replace raw pointer. --- .../math/special_functions/prime_sieve.hpp | 47 +++++++++---------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index cad366bf5d..ef70e108df 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -15,10 +15,17 @@ #include #include #include +#include +#ifdef _MSVC_LANG +#if _MSVC_LANG >= 201703 // _MSVC_LANG == __cplusplus: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ +#include +#endif +#else #if __cplusplus >= 201703 #include #endif +#endif namespace boost { namespace math { namespace detail { @@ -28,7 +35,7 @@ template void linear_sieve(Z upper_bound, Container &c) { size_t least_divisors_size{static_cast(upper_bound + 1)}; - Z *least_divisors{new Z[least_divisors_size]{0}}; + std::unique_ptr least_divisors{new Z[least_divisors_size]{0}}; for (Z i{2}; i <= upper_bound; ++i) { @@ -61,8 +68,6 @@ void linear_sieve(Z upper_bound, Container &c) } } } - - delete[] least_divisors; } template @@ -85,20 +90,14 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont { Z limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - size_t primes_size; + size_t primes_size {}; auto it{primes.begin()}; - while(*it < limit && it != primes.end()) + while(it != primes.end() && *it < limit) { ++primes_size; ++it; } - // Needed for thread sanitizer. Throws FPE without this check. - if(primes_size > primes.size()) - { - primes_size = primes.size(); - } - // Faster and safer than std::vector which does not behave like all other vectors const size_t n {static_cast(upper_bound - lower_bound + 1)}; bool* is_prime {new bool[n]}; @@ -132,7 +131,7 @@ template void mask_sieve(Z lower_bound, Z upper_bound, Container &c) { Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - std::vector primes; + std::vector primes {}; primes.reserve(limit / std::log(limit)); boost::math::detail::linear_sieve(limit, primes); @@ -146,7 +145,7 @@ template void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes, Container &c) { const Z L1_SIZE {32648}; - const Z interval {static_cast(std::floor(L1_SIZE / sizeof(Z)))}; + const Z interval {L1_SIZE * 4}; Z current_lower_bound{lower_bound}; Z current_upper_bound{current_lower_bound + interval}; @@ -169,7 +168,7 @@ template void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) { Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - std::vector primes; + std::vector primes {}; primes.reserve(limit / std::log(limit)); // Prepare for max value so you do not have to calculate this again @@ -194,7 +193,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) static_assert(std::is_integral::value, "No primes for floating point types"); BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); - std::vector primes; + std::vector primes {}; primes.reserve(upper_bound / std::log(static_cast(upper_bound))); // Range for when the linear sieve is no longer faster than threading @@ -219,7 +218,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) processor_count = 2; } - std::vector small_primes; + std::vector small_primes {}; small_primes.reserve(1000); // Threshold for when 2 thread performance begins to be non-linear, or when the system can only support two threads @@ -243,7 +242,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) { //Pre-generate all of the primes so that each thread does not have to Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - std::vector pre_generated_primes; + std::vector pre_generated_primes {}; pre_generated_primes.reserve(limit / std::log(limit)); if(limit < 8192) @@ -265,7 +264,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) pre_generated_primes.insert(pre_generated_primes.begin(), small_primes.begin(), small_primes.end()); } - std::vector thread_manager; + std::vector thread_manager {}; std::vector> prime_vectors(processor_count - 1); const Z range_per_thread = upper_bound / (processor_count - 1); Z current_lower_bound {8192}; @@ -275,7 +274,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) for(size_t i{1}; i < processor_count - 1; ++i) { - std::vector temp; + std::vector temp {}; temp.reserve(primes_in_range); prime_vectors.emplace_back(temp); @@ -292,7 +291,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) current_lower_bound / std::log(static_cast(current_lower_bound))); } - std::vector temp; + std::vector temp {}; temp.reserve(primes_in_range); prime_vectors.emplace_back(temp); @@ -320,7 +319,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) template auto prime_range(ExecutionPolicy&& policy, Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { - std::vector primes; + std::vector primes {}; primes.reserve(upper_bound / std::log(static_cast(upper_bound))); boost::math::prime_sieve(policy, upper_bound, std::back_inserter(primes)); @@ -341,7 +340,7 @@ auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) static_assert(std::is_integral::value, "No primes for floating point types"); BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); - std::vector primes; + std::vector primes{}; primes.reserve(upper_bound / std::log(upper_bound)); if (upper_bound <= 8192) @@ -351,7 +350,7 @@ auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) else { - boost::math::detail::mask_sieve(static_cast(2), upper_bound, primes); + boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); } return std::move(primes.begin(), primes.end(), output); @@ -360,7 +359,7 @@ auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) template auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { - std::vector primes; + std::vector primes{}; primes.reserve(upper_bound / std::log(static_cast(upper_bound))); boost::math::prime_sieve(upper_bound, std::back_inserter(primes)); From 243a2997e1e2c375de9ee63610393c0a549bf305 Mon Sep 17 00:00:00 2001 From: mborland Date: Fri, 17 Jul 2020 20:02:00 -0500 Subject: [PATCH 19/83] Changed from [lower_bound, upper_bound] to [lower_bound, upper_bound). Edge test case removed. Fixed bad vector indexing. Raw pointers replaced with smart pointers. --- .../math/special_functions/prime_sieve.hpp | 30 +++++++++---------- test/test_prime_sieve.cpp | 10 ++----- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index ef70e108df..5438c86e1d 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -98,10 +98,9 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont ++it; } - // Faster and safer than std::vector which does not behave like all other vectors const size_t n {static_cast(upper_bound - lower_bound + 1)}; - bool* is_prime {new bool[n]}; - memset(is_prime, true, sizeof(*is_prime) * (n)); + std::unique_ptr is_prime {new bool[n]}; + memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); for(auto i : primes) { @@ -123,8 +122,6 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont c.emplace_back(i); } } - - delete[] is_prime; } template @@ -186,13 +183,15 @@ void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) } } // End namespace detail -#if __cplusplus >= 201703 +#if __cplusplus >= 201703 || _MSVC_LANG >= 201703 template auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) { static_assert(std::is_integral::value, "No primes for floating point types"); BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); + --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be + std::vector primes {}; primes.reserve(upper_bound / std::log(static_cast(upper_bound))); @@ -222,7 +221,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) small_primes.reserve(1000); // Threshold for when 2 thread performance begins to be non-linear, or when the system can only support two threads - if(upper_bound <= 16777216 || processor_count == 2) + if(upper_bound < 16777216 || processor_count == 2) { // Split into two vectors and merge after joined to avoid data races std::thread t1([&small_primes] { @@ -265,18 +264,16 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) } std::vector thread_manager {}; - std::vector> prime_vectors(processor_count - 1); - const Z range_per_thread = upper_bound / (processor_count - 1); + std::vector> prime_vectors(processor_count); + const Z range_per_thread = upper_bound / (processor_count); Z current_lower_bound {8192}; Z current_upper_bound {current_lower_bound + range_per_thread}; Z primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - current_lower_bound / std::log(static_cast(current_lower_bound)))}; - for(size_t i{1}; i < processor_count - 1; ++i) + for(size_t i{}; i < processor_count - 1; ++i) { - std::vector temp {}; - temp.reserve(primes_in_range); - prime_vectors.emplace_back(temp); + prime_vectors[i].reserve(primes_in_range); std::thread t([current_lower_bound, current_upper_bound, &prime_vectors, i, &pre_generated_primes] { boost::math::detail::segmented_sieve(current_lower_bound, current_upper_bound, pre_generated_primes, @@ -291,9 +288,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) current_lower_bound / std::log(static_cast(current_lower_bound))); } - std::vector temp {}; - temp.reserve(primes_in_range); - prime_vectors.emplace_back(temp); + prime_vectors.back().reserve(primes_in_range); std::thread t([current_lower_bound, upper_bound, &prime_vectors, &pre_generated_primes] { boost::math::detail::segmented_sieve(current_lower_bound, upper_bound, pre_generated_primes, prime_vectors.back()); @@ -319,6 +314,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) template auto prime_range(ExecutionPolicy&& policy, Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { + --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be std::vector primes {}; primes.reserve(upper_bound / std::log(static_cast(upper_bound))); @@ -340,6 +336,7 @@ auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) static_assert(std::is_integral::value, "No primes for floating point types"); BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); + --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be std::vector primes{}; primes.reserve(upper_bound / std::log(upper_bound)); @@ -359,6 +356,7 @@ auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) template auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) { + --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be std::vector primes{}; primes.reserve(upper_bound / std::log(static_cast(upper_bound))); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 5d9a9da6fe..feeb9174a9 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -85,11 +85,6 @@ void test_prime_range() boost::math::prime_range(100, 1000, std::back_inserter(primes)); BOOST_TEST_EQ(primes.size(), ref); - // Does it work with 0 difference? - primes.clear(); - boost::math::prime_range(2, 2, std::back_inserter(primes)); - BOOST_TEST_EQ(primes.size(), 1); - // Will it call the sieve for large input ref = 78498; // Calculated with wolfram-alpha primes.clear(); @@ -107,7 +102,7 @@ void test_prime_sieve_overflow() std::back_inserter(primes)); } -#if __cplusplus >= 201703 +#if __cplusplus >= 201703 || _MSVC_LANG >= 201703 template void test_par_prime_sieve() { @@ -164,7 +159,6 @@ void test_par_prime_sieve_large() int main() { - test_prime_sieve(); test_prime_sieve(); test_prime_sieve(); @@ -179,7 +173,7 @@ int main() test_prime_sieve(); - #if __cplusplus >= 201703 + #if __cplusplus >= 201703 || _MSVC_LANG >= 201703 test_par_prime_sieve(); test_par_prime_sieve(); test_par_prime_sieve(); From 2eadeff1e8b64ef798693d7ca94636141e5e64a8 Mon Sep 17 00:00:00 2001 From: mborland Date: Fri, 17 Jul 2020 21:10:39 -0500 Subject: [PATCH 20/83] Fixed documentation [CI SKIP] --- doc/sf/number_series.qbk | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index adb108a4f5..ebdd857d0d 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -289,8 +289,7 @@ namespace boost { namespace math { [h4 Description] There are two sets of functions available `prime_sieve` and `prime_range`. `prime_sieve` will return all primes in the -range 2 to `upper_bound` inclusive. `prime_range` will return all the primes in the range `lower_bound` to `upper_bound` -inclusive. +range [2, `upper_bound`). `prime_range` will return all the primes in the range [lower_bound, upper_bound). If you have a C++17 compatible compiler you are able to pass an execution policy to both functions. Any policy besides `std::execution::seq` will enable internal multi-threading. If your compiler is not C++17 compatible the From 173ce0d09ba2f7305f621dd4e81ec936f60f74b7 Mon Sep 17 00:00:00 2001 From: mborland Date: Sat, 18 Jul 2020 17:15:29 -0500 Subject: [PATCH 21/83] segmented_sieve now runs using std::async. [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 5438c86e1d..8ba58b271b 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef _MSVC_LANG #if _MSVC_LANG >= 201703 // _MSVC_LANG == __cplusplus: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ @@ -136,8 +137,6 @@ void mask_sieve(Z lower_bound, Z upper_bound, Container &c) boost::math::detail::mask_sieve(lower_bound, upper_bound, primes, c); } - - template void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes, Container &c) { @@ -151,14 +150,43 @@ void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes current_upper_bound = upper_bound; } - while(current_upper_bound < (upper_bound - interval)) + size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; + + std::vector> prime_vectors(ranges + 1); + std::vector> future_manager(ranges); + + Z primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - + current_lower_bound / std::log(static_cast(current_lower_bound)))}; + + for(size_t i {}; i < ranges; ++i) { - boost::math::detail::mask_sieve(current_lower_bound, current_upper_bound, primes, c); + prime_vectors[i].reserve(primes_in_range); + + future_manager.emplace_back(std::async([current_lower_bound, current_upper_bound, &primes, &prime_vectors, i]{ + boost::math::detail::mask_sieve(current_lower_bound, current_upper_bound, primes, prime_vectors[i]); + })); + current_lower_bound = current_upper_bound + 1; current_upper_bound += interval; } - boost::math::detail::mask_sieve(current_lower_bound, upper_bound, primes, c); + prime_vectors[ranges].reserve(primes_in_range); + future_manager.emplace_back(std::async([current_lower_bound, upper_bound, &primes, &prime_vectors]{ + boost::math::detail::mask_sieve(current_lower_bound, upper_bound, primes, prime_vectors.back()); + })); + + for(auto &&future : future_manager) + { + if(future.valid()) + { + future.get(); + } + } + + for(auto &v : prime_vectors) + { + c.insert(c.end(), v.begin(), v.end()); + } } template From 23fba3681964f0a2a19484693de22a28a5d75201 Mon Sep 17 00:00:00 2001 From: mborland Date: Sun, 19 Jul 2020 14:45:20 -0500 Subject: [PATCH 22/83] Removed extraneous operations [CI SKIP] --- include/boost/math/special_functions/prime_sieve.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 8ba58b271b..524dc31299 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -103,9 +103,11 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont std::unique_ptr is_prime {new bool[n]}; memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); - for(auto i : primes) + for(size_t i{}; i < primes_size; ++i) { - for(Z j {std::max(i * i, (lower_bound + i - 1) / i * i)}; j <= upper_bound; j += i) + Z current_prime{primes[i]}; + for(Z j {std::max(current_prime * current_prime, (lower_bound + current_prime - 1) / current_prime * current_prime)}; + j <= upper_bound; j += current_prime) { is_prime[j - lower_bound] = false; } From f7b45fdf33fc34605295f6fb806d8588d6d23c05 Mon Sep 17 00:00:00 2001 From: mborland Date: Sun, 19 Jul 2020 17:27:10 -0500 Subject: [PATCH 23/83] Cleanup style, and delete unused function. Enable par_unseq mask_sieve. Reduce limit to change sieving methods. [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 52 +++++++++---------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 524dc31299..7aaca1da3a 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -71,22 +71,8 @@ void linear_sieve(Z upper_bound, Container &c) } } -template -void prime_table(Z upper_bound, Container &c) -{ - Z i{2}; - unsigned counter{}; - - while (i <= upper_bound && counter < 9999) // 10k elements are in the lookup table - { - c.emplace_back(i); - ++counter; - i = static_cast(boost::math::prime(counter)); - } -} - //https://core.ac.uk/download/pdf/62440589.pdf -template +template void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Container &c) { Z limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; @@ -103,6 +89,17 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont std::unique_ptr is_prime {new bool[n]}; memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); + #if __cplusplus >= 201703 || _MSVC_LANG >= 201703 + // Enable use of thread pool, and vectorization if supported + std::for_each(std::execution::par_unseq, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime) + { + for(Z j {std::max(prime * prime, (lower_bound + prime - 1) / prime * prime)}; j <= upper_bound; j += prime) + { + is_prime[j - lower_bound] = false; + } + }); + + #else for(size_t i{}; i < primes_size; ++i) { Z current_prime{primes[i]}; @@ -112,6 +109,8 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont is_prime[j - lower_bound] = false; } } + #endif + if(lower_bound == 1) { @@ -127,7 +126,7 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont } } -template +template void mask_sieve(Z lower_bound, Z upper_bound, Container &c) { Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; @@ -139,7 +138,7 @@ void mask_sieve(Z lower_bound, Z upper_bound, Container &c) boost::math::detail::mask_sieve(lower_bound, upper_bound, primes, c); } -template +template void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes, Container &c) { const Z L1_SIZE {32648}; @@ -191,7 +190,7 @@ void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes } } -template +template void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) { Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; @@ -214,7 +213,7 @@ void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) } // End namespace detail #if __cplusplus >= 201703 || _MSVC_LANG >= 201703 -template +template auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) { static_assert(std::is_integral::value, "No primes for floating point types"); @@ -226,12 +225,11 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) primes.reserve(upper_bound / std::log(static_cast(upper_bound))); // Range for when the linear sieve is no longer faster than threading - if (upper_bound < 8192) + if (upper_bound < 4096) { boost::math::detail::linear_sieve(upper_bound, primes); } - else if (std::is_same_v) { boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); @@ -274,7 +272,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) std::vector pre_generated_primes {}; pre_generated_primes.reserve(limit / std::log(limit)); - if(limit < 8192) + if(limit < 4096) { boost::math::detail::linear_sieve(limit, pre_generated_primes); } @@ -282,10 +280,10 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) else { std::thread t1([&small_primes] { - boost::math::detail::linear_sieve(static_cast(8192), small_primes); + boost::math::detail::linear_sieve(static_cast(4096), small_primes); }); std::thread t2([limit, &pre_generated_primes] { - boost::math::detail::segmented_sieve(static_cast(8192), limit, pre_generated_primes); + boost::math::detail::segmented_sieve(static_cast(4096), limit, pre_generated_primes); }); t1.join(); @@ -296,7 +294,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) std::vector thread_manager {}; std::vector> prime_vectors(processor_count); const Z range_per_thread = upper_bound / (processor_count); - Z current_lower_bound {8192}; + Z current_lower_bound {limit}; Z current_upper_bound {current_lower_bound + range_per_thread}; Z primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - current_lower_bound / std::log(static_cast(current_lower_bound)))}; @@ -330,7 +328,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) thread.join(); } - primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + primes.insert(primes.begin(), pre_generated_primes.begin(), pre_generated_primes.end()); for(auto &v : prime_vectors) { primes.insert(primes.end(), v.begin(), v.end()); @@ -360,7 +358,7 @@ auto prime_range(ExecutionPolicy&& policy, Z lower_bound, Z upper_bound, OutputI } #endif //__cplusplus >= 201703 -template +template auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) { static_assert(std::is_integral::value, "No primes for floating point types"); From d687d5eb8604ff55b71ba2c3da72c786f7a22e58 Mon Sep 17 00:00:00 2001 From: mborland Date: Sun, 19 Jul 2020 21:29:40 -0500 Subject: [PATCH 24/83] Small Cleanup and limit reduction for sieving methods. --- include/boost/math/special_functions/prime_sieve.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 7aaca1da3a..d459ed4b3f 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -10,7 +10,6 @@ #include #include -#include #include #include #include @@ -198,7 +197,7 @@ void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) primes.reserve(limit / std::log(limit)); // Prepare for max value so you do not have to calculate this again - if(limit < 8192) + if(limit < 4096) { boost::math::detail::linear_sieve(limit, primes); } @@ -368,7 +367,7 @@ auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) std::vector primes{}; primes.reserve(upper_bound / std::log(upper_bound)); - if (upper_bound <= 8192) + if (upper_bound <= 4096) { boost::math::detail::linear_sieve(upper_bound, primes); } From e51d727c32d7ea0eb88cab8d3b749994684829c6 Mon Sep 17 00:00:00 2001 From: mborland Date: Sat, 25 Jul 2020 17:41:04 -0500 Subject: [PATCH 25/83] Add pritchard's sub-linear algo [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 224 +++++++++++++++++- 1 file changed, 222 insertions(+), 2 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index d459ed4b3f..1d7fef2749 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -16,6 +16,9 @@ #include #include #include +#include +#include +#include #ifdef _MSVC_LANG #if _MSVC_LANG >= 201703 // _MSVC_LANG == __cplusplus: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ @@ -89,8 +92,8 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); #if __cplusplus >= 201703 || _MSVC_LANG >= 201703 - // Enable use of thread pool, and vectorization if supported - std::for_each(std::execution::par_unseq, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime) + // Enable use of thread pool, not SIMD compatible + std::for_each(std::execution::par, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime) { for(Z j {std::max(prime * prime, (lower_bound + prime - 1) / prime * prime)}; j <= upper_bound; j += prime) { @@ -125,6 +128,223 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont } } +// https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 +// Implementing S +template +class SetS +{ +private: + std::deque srec_; + size_t i_ = 0; + +public: + SetS() = default; + + constexpr Z next(const Z x) + { + if(srec_[i_ + 1] > x && srec_[i_] <= x) + { + ++i_; + return srec_[i_]; + } + + for(size_t i {}; i < srec_.size(); ++i) + { + if(srec_[i] > x) + { + i_ = i; + return srec_[i]; + } + } + + return srec_.back(); + } + + constexpr Z prev(const Z x) + { + if(srec_[i_ + 1] > x && srec_[i_ - 1] < x) + { + --i_; + return srec_[i_]; + } + + for(size_t i {}; i < srec_.size(); ++i) + { + if(srec_[i] > x) + { + i_ = i; + return srec_[i - 1]; + } + } + + return srec_.front(); + } + + constexpr Z max() noexcept + { + return srec_.back(); + } + + void remove(const Z x) + { + for(size_t i {}; i < srec_.size(); ++i) + { + if(srec_[i] == x) + { + srec_.erase(srec_.begin() + i); + } + } + } + + void insert(Z x) + { + srec_.push_back(x); + } + + constexpr Z operator[] (const Z index) + { + return srec_[index]; + } + + constexpr size_t size() noexcept + { + return srec_.size(); + } +}; + +template +constexpr bool is_prime(const Z n) +{ + if(n <= 1) + { + return false; + } + + for(Z factor{2}; factor * factor <= n; ++factor) + { + if(n % factor == 0) + { + return false; + } + } + + return true; +} + +//https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 +// 7 - A segmented Wheel Sieve [Pritchard '87] +template +void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) +{ + Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + + // Step 1 - Compute the wheel + Z Mk {1}; + Z k {2}; + for(; true; ++k) + { + if(is_prime(k)) + { + if(Mk * k > limit) + { + break; + } + + else + { + Mk *= k; + resultant_primes.emplace_back(k); + } + + } + } + + // Initialze wheel wk + std::vector wk; + wk.emplace_back(static_cast(0)); + for(Z i{1}; i < Mk; ++i) + { + // If the number is not prime + if(std::gcd(i, Mk) != 1) + { + wk.emplace_back(static_cast(0)); + } + + else + { + wk.emplace_back(static_cast(1)); + } + } + + // Part 3 of init wheel + wk.back() = static_cast(2); + for(Z x{Mk - 2}; x > 0; --x) + { + if(wk[x] == 0) + { + continue; + } + + else + { + Z i{x + 1}; + while(wk[i] == 0) + { + ++i; + } + wk[x] = i - x; + } + } + + // Step 2 - Init set S to the kth wheel extended to n + Z d {1}; + SetS S; + while(d < upper_bound) + { + S.insert(d); + d += wk[d % Mk]; + } + + // Step 3 - Run the linear algorithm starting with p := next(S, 1), which is p_k+1 + // 4 - A linear Algorithm + Z p {2}; + + while(p * p <= upper_bound) + { + //Remove Multiples + Z f {p}; + while(p * f <= upper_bound) + { + f = S.next(f); + } + + // Loop down through the values of f + while(f >= p) + { + S.remove(p * f); + f = S.prev(f); + } + + p = S.next(p); + } + + // Step 4 - Write S - {1} and the first k primes to resultant_primes + for(size_t i{1}; i < S.size(); ++i) + { + resultant_primes.emplace_back(S[i]); + } +} + +/* +//https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 +// 8 - A segmented Wheel Sieve [Pritchard '83] +template +void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Container &resultant_primes) +{ + +} +*/ + template void mask_sieve(Z lower_bound, Z upper_bound, Container &c) { From 1245d27d54559c67abb563e85c7414fa1307a84f Mon Sep 17 00:00:00 2001 From: mborland Date: Sun, 26 Jul 2020 11:09:51 -0500 Subject: [PATCH 26/83] Added pritchards segmented sieve [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 128 +++++++++++++++++- 1 file changed, 122 insertions(+), 6 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 1d7fef2749..51f837c339 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -18,7 +18,6 @@ #include #include #include -#include #ifdef _MSVC_LANG #if _MSVC_LANG >= 201703 // _MSVC_LANG == __cplusplus: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ @@ -335,15 +334,132 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) } } -/* -//https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 +// https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 // 8 - A segmented Wheel Sieve [Pritchard '83] -template -void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Container &resultant_primes) +template +void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resultant_primes) { + const Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + const Z interval {upper_bound - lower_bound}; + + // Pre-processing + // 1 + Z Mk {1}; + Z k {2}; + Z counter{}; + + for(; true; ++k) + { + if(is_prime(k)) + { + if(Mk * k > limit) + { + if(Mk * k - limit > limit - Mk) + { + break; + } + + else + { + Mk *= k; + ++counter; + } + } + + else + { + Mk *= k; + ++counter; + } + } + } + + // Initialze wheel wk + std::vector wk; + wk.emplace_back(static_cast(0)); + for(Z i{1}; i < Mk; ++i) + { + // If the number is not prime + if(std::gcd(i, Mk) != 1) + { + wk.emplace_back(static_cast(0)); + } + + else + { + wk.emplace_back(static_cast(1)); + } + } + + // Part 3 of init wheel + wk.back() = static_cast(2); + for(Z x{Mk - 2}; x > 0; --x) + { + if(wk[x] == 0) + { + continue; + } + + else + { + Z i{x + 1}; + while(wk[i] == 0) + { + ++i; + } + wk[x] = i - x; + } + } + + // Pre-processing step 2 + std::vector primes; + boost::math::detail::sub_linear_wheel_sieve(limit, primes); + // Pre-processing step 3 + std::vector factor; + for(size_t i{static_cast(counter)}; i < primes.size(); ++i) + { + factor.emplace_back(primes[i]); + } + + // Sieving the interval + // Step 1 + std::unique_ptr mark {new bool [static_cast(interval + 1)]}; + + for(Z x {lower_bound}, i {}; x <= upper_bound; ++x, ++i) + { + if(std::gcd(Mk, x) == 1) + { + mark[i] = 1; + } + } + + // Step 2 + for(Z p {}; p < static_cast(factor.size()); ++p) + { + Z f {factor[p]}; + Z current_prime{factor[p]}; + while(f * current_prime <= upper_bound) + { + if(f * current_prime > lower_bound) + { + mark[f * current_prime - lower_bound] = 0; + } + + f += wk[f % Mk]; + } + factor[p] = f; + } + + for(Z i {}; i < interval; ++i) + { + if(mark[i] == 1) + { + resultant_primes.emplace_back(i + lower_bound); + } + } } -*/ + template void mask_sieve(Z lower_bound, Z upper_bound, Container &c) From 2052081053babf7c891bf107b1b979c5c204613e Mon Sep 17 00:00:00 2001 From: mborland Date: Sun, 26 Jul 2020 20:03:07 -0500 Subject: [PATCH 27/83] Implemented binary search in SetS [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 137 +++++++++++++----- 1 file changed, 102 insertions(+), 35 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 51f837c339..ab3eded9e5 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef _MSVC_LANG #if _MSVC_LANG >= 201703 // _MSVC_LANG == __cplusplus: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ @@ -134,49 +135,95 @@ class SetS { private: std::deque srec_; - size_t i_ = 0; - + public: SetS() = default; - constexpr Z next(const Z x) + constexpr Z next(const Z x) noexcept { - if(srec_[i_ + 1] > x && srec_[i_] <= x) - { - ++i_; - return srec_[i_]; - } + const Z length {static_cast(srec_.size()) - 1}; + Z low {0}; + Z high {length}; - for(size_t i {}; i < srec_.size(); ++i) + while(low <= high) { - if(srec_[i] > x) + Z mid {low + ((high - low) / 2)}; + + if(srec_[mid] < x) + { + low = mid + 1; + } + + else if(srec_[mid] > x) { - i_ = i; - return srec_[i]; + high = mid - 1; + } + + else + { + return srec_[mid + 1]; } } - return srec_.back(); + if(high < 0) + { + return srec_.front(); + } + + else if(low > length) + { + return srec_.back(); + } + + else + { + return (low < high) ? srec_[low + 1] : srec_[high + 1]; + } } - constexpr Z prev(const Z x) + constexpr Z prev(const Z x) noexcept { - if(srec_[i_ + 1] > x && srec_[i_ - 1] < x) + const Z length {static_cast(srec_.size()) - 1}; + Z low {0}; + Z high {length}; + + while(high >= low) + { + Z mid {low + (high - low) / 2}; + + if(srec_[mid] < x) + { + low = mid + 1; + } + + else if(srec_[mid] > x) + { + high = mid - 1; + } + + else + { + return srec_[mid - 1]; + } + } + + if(high < 0) { - --i_; - return srec_[i_]; + return srec_.front(); } - - for(size_t i {}; i < srec_.size(); ++i) + + else { - if(srec_[i] > x) + if(low > length) { - i_ = i; - return srec_[i - 1]; + return srec_.back(); } - } - return srec_.front(); + else + { + return (low < high) ? srec_[low] : srec_[high + 1]; + } + } } constexpr Z max() noexcept @@ -186,11 +233,28 @@ class SetS void remove(const Z x) { - for(size_t i {}; i < srec_.size(); ++i) + const Z length {static_cast(srec_.size()) - 1}; + Z low {0}; + Z high {length}; + + while(high >= low) { - if(srec_[i] == x) + Z mid {low + ((high - low) / 2)}; + + if(srec_[mid] < x) + { + low = mid + 1; + } + + else if(srec_[mid] > x) + { + high = mid - 1; + } + + else { - srec_.erase(srec_.begin() + i); + srec_.erase(srec_.begin() + mid); + return; } } } @@ -236,11 +300,13 @@ template void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) { Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + resultant_primes.reserve(upper_bound / std::log(upper_bound)); // Step 1 - Compute the wheel - Z Mk {1}; - Z k {2}; - for(; true; ++k) + Z Mk {2}; + Z k {3}; + resultant_primes.emplace_back(static_cast(2)); + for(; true; k += 2) { if(is_prime(k)) { @@ -254,13 +320,14 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) Mk *= k; resultant_primes.emplace_back(k); } - } } // Initialze wheel wk std::vector wk; + wk.reserve(Mk); wk.emplace_back(static_cast(0)); + for(Z i{1}; i < Mk; ++i) { // If the number is not prime @@ -346,7 +413,7 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul // 1 Z Mk {1}; Z k {2}; - Z counter{}; + Z k_index{}; for(; true; ++k) { @@ -362,14 +429,14 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul else { Mk *= k; - ++counter; + ++k_index; } } else { Mk *= k; - ++counter; + ++k_index; } } } @@ -417,7 +484,7 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul // Pre-processing step 3 std::vector factor; - for(size_t i{static_cast(counter)}; i < primes.size(); ++i) + for(size_t i{static_cast(k_index)}; i < primes.size(); ++i) { factor.emplace_back(primes[i]); } From 9f81e0d9646fe652c9dbd06ea7ccf4bcdbcda522 Mon Sep 17 00:00:00 2001 From: mborland Date: Sun, 26 Jul 2020 21:54:17 -0500 Subject: [PATCH 28/83] Tests and performance compairsons [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 24 ++-- .../performance/prime_sieve_performance.cpp | 119 +++++++++++++++++- test/test_prime_sieve.cpp | 66 +++++++++- 3 files changed, 188 insertions(+), 21 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index ab3eded9e5..d06284e2ab 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -306,22 +306,13 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) Z Mk {2}; Z k {3}; resultant_primes.emplace_back(static_cast(2)); - for(; true; k += 2) - { - if(is_prime(k)) - { - if(Mk * k > limit) - { - break; - } - else - { - Mk *= k; - resultant_primes.emplace_back(k); - } - } - } + while(Mk * k <= limit) + { + Mk *= k; + resultant_primes.emplace_back(k); + k += 2; + } // Initialze wheel wk std::vector wk; @@ -372,8 +363,9 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) } // Step 3 - Run the linear algorithm starting with p := next(S, 1), which is p_k+1 + // next(S, 1) = S[1] // 4 - A linear Algorithm - Z p {2}; + Z p {S[1]}; while(p * p <= upper_bound) { diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index ccfa98bd1f..01afbd5f1b 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -5,9 +5,94 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#include +#include "../../include/boost/math/special_functions/prime_sieve.hpp" +//#include #include +#include +#include +// Individual Algos +// Linear +template +inline auto linear_sieve_helper(Z upper_bound, std::vector c) -> std::vector +{ + boost::math::detail::linear_sieve(upper_bound, c); + return c; +} + +template +void linear_sieve(benchmark::State& state) +{ + Z upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(linear_sieve_helper(upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +template +inline auto sub_linear_sieve_helper(Z upper_bound, std::vector c) -> std::vector +{ + boost::math::detail::sub_linear_wheel_sieve(upper_bound, c); + return c; +} + +template +void sub_linear_sieve(benchmark::State& state) +{ + Z upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(sub_linear_sieve_helper(upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +// Segmented +template +inline auto mask_sieve_helper(Z lower_bound, Z upper_bound, std::vector c) -> std::vector +{ + boost::math::detail::mask_sieve(lower_bound, upper_bound, c); + return c; +} + +template +void mask_sieve(benchmark::State& state) +{ + Z lower = static_cast(2); + Z upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(mask_sieve_helper(lower, upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +template +inline auto segmented_wheel_sieve_helper(Z lower_bound, Z upper_bound, std::vector c) -> std::vector +{ + boost::math::detail::linear_segmented_wheel_sieve(lower_bound, upper_bound, c); + return c; +} + +template +void segmented_wheel_sieve(benchmark::State& state) +{ + Z lower = static_cast(2); + Z upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(segmented_wheel_sieve_helper(lower, upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +// Complete Implementations template void prime_sieve(benchmark::State& state) { @@ -33,11 +118,39 @@ void prime_sieve_partial_range(benchmark::State& state) state.SetComplexityN(state.range(0)); } +template +void kimwalish_primes(benchmark::State& state) +{ + + Z upper = static_cast(state.range(0)); + for (auto _ : state) + { + std::vector primes; + primesieve::generate_primes(upper, &primes); + benchmark::DoNotOptimize(primes.back()); + } + state.SetComplexityN(state.range(0)); +} + +// Individual Algos + +// Linear +BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(sub_linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); + + +// Segmented +BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +BENCHMARK_TEMPLATE(segmented_wheel_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(); + +/* +// Complete Implementations BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity()->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve_partial_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); - +*/ BENCHMARK_MAIN(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index feeb9174a9..8f9c2a6b95 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -5,7 +5,8 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#include +#include "../include/boost/math/special_functions/prime_sieve.hpp" +//#include #include #include #include @@ -23,6 +24,11 @@ void test_prime_sieve() BOOST_TEST_EQ(primes.size(), ref); // Tests for correctness + // 2 + primes.clear(); + boost::math::prime_sieve(2, std::back_inserter(primes)); + BOOST_TEST_EQ(primes.size(), 0); + // 100 primes.clear(); boost::math::prime_sieve(100, std::back_inserter(primes)); @@ -92,6 +98,51 @@ void test_prime_range() BOOST_TEST_EQ(primes.size(), ref); } +template +void test_sub_linear_prime_sieve() +{ + std::vector primes; + + // Does the function work with a vector + boost::math::detail::sub_linear_wheel_sieve(100, primes); + BOOST_TEST_EQ(primes.size(), 25); + + // 1'000 + primes.clear(); + boost::math::detail::sub_linear_wheel_sieve(1'000, primes); + BOOST_TEST_EQ(primes.size(), 168); + + // 10'000 + primes.clear(); + boost::math::detail::sub_linear_wheel_sieve(10'000, primes); + BOOST_TEST_EQ(primes.size(), 1229); +} + +template +void test_linear_segmented_sieve() +{ + std::vector primes; + + // 10 - 20: Tests only step 1 + boost::math::detail::linear_segmented_wheel_sieve(10, 20, primes); + BOOST_TEST_EQ(primes.size(), 4); + + // 100 - 200: Tests step 2 + primes.clear(); + boost::math::detail::linear_segmented_wheel_sieve(100, 200, primes); + BOOST_TEST_EQ(primes.size(), 21); + + // 100 - 1'000 + primes.clear(); + boost::math::detail::linear_segmented_wheel_sieve(100, 1'000, primes); + BOOST_TEST_EQ(primes.size(), 143); + + // 1'000 - 10'000 + primes.clear(); + boost::math::detail::linear_segmented_wheel_sieve(1'000, 10'000, primes); + BOOST_TEST_EQ(primes.size(), 1061); +} + template void test_prime_sieve_overflow() { @@ -159,6 +210,17 @@ void test_par_prime_sieve_large() int main() { + test_sub_linear_prime_sieve(); + test_sub_linear_prime_sieve(); + test_sub_linear_prime_sieve(); + test_sub_linear_prime_sieve(); + + test_linear_segmented_sieve(); + test_linear_segmented_sieve(); + test_linear_segmented_sieve(); + test_linear_segmented_sieve(); + + /* test_prime_sieve(); test_prime_sieve(); test_prime_sieve(); @@ -181,6 +243,6 @@ int main() //test_par_prime_sieve_large(); #endif - + */ boost::report_errors(); } From 55ea0450faec7d3890c160ae5a396325edab8e21 Mon Sep 17 00:00:00 2001 From: mborland Date: Sun, 26 Jul 2020 23:15:51 -0500 Subject: [PATCH 29/83] More tests. Segmented sieve small cases fix [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 20 +++++++++++++++++++ test/test_prime_sieve.cpp | 16 +++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index d06284e2ab..3dbf94c364 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -307,6 +307,11 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) Z k {3}; resultant_primes.emplace_back(static_cast(2)); + if(upper_bound == 2) + { + return; + } + while(Mk * k <= limit) { Mk *= k; @@ -401,6 +406,21 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul const Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; const Z interval {upper_bound - lower_bound}; + // Solves small cases for benchmark, but needs better remedy + if(lower_bound == 2 && upper_bound <= 10) + { + boost::math::detail::sub_linear_wheel_sieve(upper_bound, resultant_primes); + return; + } + + else if(lower_bound == 2 && upper_bound > 10) + { + boost::math::detail::sub_linear_wheel_sieve(static_cast(10), resultant_primes); + boost::math::detail::linear_segmented_wheel_sieve(static_cast(11), upper_bound, resultant_primes); + return; + } + + // Pre-processing // 1 Z Mk {1}; diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 8f9c2a6b95..6e7cb213a7 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -107,6 +107,11 @@ void test_sub_linear_prime_sieve() boost::math::detail::sub_linear_wheel_sieve(100, primes); BOOST_TEST_EQ(primes.size(), 25); + // 2 + primes.clear(); + boost::math::detail::sub_linear_wheel_sieve(2, primes); + BOOST_TEST_EQ(primes.size(), 1); + // 1'000 primes.clear(); boost::math::detail::sub_linear_wheel_sieve(1'000, primes); @@ -123,7 +128,12 @@ void test_linear_segmented_sieve() { std::vector primes; + // 2 - 20: + boost::math::detail::linear_segmented_wheel_sieve(2, 20, primes); + BOOST_TEST_EQ(primes.size(), 8); + // 10 - 20: Tests only step 1 + primes.clear(); boost::math::detail::linear_segmented_wheel_sieve(10, 20, primes); BOOST_TEST_EQ(primes.size(), 4); @@ -141,6 +151,12 @@ void test_linear_segmented_sieve() primes.clear(); boost::math::detail::linear_segmented_wheel_sieve(1'000, 10'000, primes); BOOST_TEST_EQ(primes.size(), 1061); + + // 2 - 10'000 + primes.clear(); + primes.clear(); + boost::math::detail::linear_segmented_wheel_sieve(2, 10'000, primes); + BOOST_TEST_EQ(primes.size(), 1229); } template From 31a21058f63242519386f526b5b3f6ef1188aab1 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 28 Jul 2020 19:47:42 -0500 Subject: [PATCH 30/83] Imporve segmented performance [WIP][CI SKIP] --- .gitignore | 3 + .../math/special_functions/prime_sieve.hpp | 64 +++++++------------ 2 files changed, 27 insertions(+), 40 deletions(-) diff --git a/.gitignore b/.gitignore index 9e27eddf76..e1d8210b59 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ inspect.html test/cuda *.DS_Store /**/*.dSYM/ +build/* +*.json + diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 3dbf94c364..342c695ef9 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -100,8 +100,8 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont is_prime[j - lower_bound] = false; } }); - #else + for(size_t i{}; i < primes_size; ++i) { Z current_prime{primes[i]}; @@ -139,7 +139,7 @@ class SetS public: SetS() = default; - constexpr Z next(const Z x) noexcept + constexpr Z next(const Z x) const noexcept { const Z length {static_cast(srec_.size()) - 1}; Z low {0}; @@ -181,7 +181,7 @@ class SetS } } - constexpr Z prev(const Z x) noexcept + constexpr Z prev(const Z x) const noexcept { const Z length {static_cast(srec_.size()) - 1}; Z low {0}; @@ -226,7 +226,7 @@ class SetS } } - constexpr Z max() noexcept + constexpr Z max() const noexcept { return srec_.back(); } @@ -259,17 +259,17 @@ class SetS } } - void insert(Z x) + void insert(const Z x) { - srec_.push_back(x); + srec_.emplace_back(x); } - constexpr Z operator[] (const Z index) + constexpr Z operator[] (const Z index) const { return srec_[index]; } - constexpr size_t size() noexcept + constexpr size_t size() const noexcept { return srec_.size(); } @@ -409,50 +409,35 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul // Solves small cases for benchmark, but needs better remedy if(lower_bound == 2 && upper_bound <= 10) { - boost::math::detail::sub_linear_wheel_sieve(upper_bound, resultant_primes); + //boost::math::detail::sub_linear_wheel_sieve(upper_bound, resultant_primes); + boost::math::detail::linear_sieve(upper_bound, resultant_primes); return; } else if(lower_bound == 2 && upper_bound > 10) { - boost::math::detail::sub_linear_wheel_sieve(static_cast(10), resultant_primes); + //boost::math::detail::sub_linear_wheel_sieve(static_cast(10), resultant_primes); + boost::math::detail::linear_sieve(static_cast(10), resultant_primes); boost::math::detail::linear_segmented_wheel_sieve(static_cast(11), upper_bound, resultant_primes); return; } - // Pre-processing // 1 - Z Mk {1}; - Z k {2}; - Z k_index{}; + std::vector primes; + boost::math::detail::linear_sieve(limit, primes); - for(; true; ++k) - { - if(is_prime(k)) - { - if(Mk * k > limit) - { - if(Mk * k - limit > limit - Mk) - { - break; - } - - else - { - Mk *= k; - ++k_index; - } - } + Z Mk {2}; + Z k {3}; + Z k_index {1}; - else - { - Mk *= k; - ++k_index; - } - } + while(Mk * k <= limit) + { + Mk *= k; + ++k_index; + k = primes[k_index]; } - + // Initialze wheel wk std::vector wk; wk.emplace_back(static_cast(0)); @@ -491,8 +476,7 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul } // Pre-processing step 2 - std::vector primes; - boost::math::detail::sub_linear_wheel_sieve(limit, primes); + // Done as part of step 1 for performance improvement // Pre-processing step 3 std::vector factor; From f545928c4f2eb2dae10c303e48d9ea1c1521db8b Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 28 Jul 2020 22:13:59 -0500 Subject: [PATCH 31/83] Replaced SetS members with stl algos [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 114 ++---------------- 1 file changed, 10 insertions(+), 104 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 342c695ef9..5844674c7a 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef _MSVC_LANG #if _MSVC_LANG >= 201703 // _MSVC_LANG == __cplusplus: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ @@ -139,93 +140,16 @@ class SetS public: SetS() = default; - constexpr Z next(const Z x) const noexcept + constexpr Z next(const Z x) const { - const Z length {static_cast(srec_.size()) - 1}; - Z low {0}; - Z high {length}; - - while(low <= high) - { - Z mid {low + ((high - low) / 2)}; - - if(srec_[mid] < x) - { - low = mid + 1; - } - - else if(srec_[mid] > x) - { - high = mid - 1; - } - - else - { - return srec_[mid + 1]; - } - } - - if(high < 0) - { - return srec_.front(); - } - - else if(low > length) - { - return srec_.back(); - } - - else - { - return (low < high) ? srec_[low + 1] : srec_[high + 1]; - } + return *std::upper_bound(srec_.begin(), srec_.end(), x); } - constexpr Z prev(const Z x) const noexcept + constexpr Z prev(const Z x) const { - const Z length {static_cast(srec_.size()) - 1}; - Z low {0}; - Z high {length}; - - while(high >= low) - { - Z mid {low + (high - low) / 2}; - - if(srec_[mid] < x) - { - low = mid + 1; - } - - else if(srec_[mid] > x) - { - high = mid - 1; - } - - else - { - return srec_[mid - 1]; - } - } - - if(high < 0) - { - return srec_.front(); - } - - else - { - if(low > length) - { - return srec_.back(); - } - - else - { - return (low < high) ? srec_[low] : srec_[high + 1]; - } - } + return *--std::lower_bound(srec_.begin(), srec_.end(), x); } - + constexpr Z max() const noexcept { return srec_.back(); @@ -233,29 +157,11 @@ class SetS void remove(const Z x) { - const Z length {static_cast(srec_.size()) - 1}; - Z low {0}; - Z high {length}; - - while(high >= low) + auto index {std::lower_bound(srec_.begin(), srec_.end(), x)}; + + if(index != srec_.end() && !(x < *index)) { - Z mid {low + ((high - low) / 2)}; - - if(srec_[mid] < x) - { - low = mid + 1; - } - - else if(srec_[mid] > x) - { - high = mid - 1; - } - - else - { - srec_.erase(srec_.begin() + mid); - return; - } + srec_.erase(index); } } From d780db5140820187ea931978b1d30db9546ee9c1 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 31 Jul 2020 18:20:06 -0500 Subject: [PATCH 32/83] Replace searching with tracked index. General performance improvements. [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 76 ++++++++++--------- 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 5844674c7a..f70e9f7705 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -135,19 +135,34 @@ template class SetS { private: - std::deque srec_; + std::vector srec_; public: SetS() = default; - constexpr Z next(const Z x) const + constexpr explicit SetS(const Z limit) { - return *std::upper_bound(srec_.begin(), srec_.end(), x); + srec_.reserve(limit); } - constexpr Z prev(const Z x) const + constexpr Z next(const Z x) const noexcept { - return *--std::lower_bound(srec_.begin(), srec_.end(), x); + return *std::upper_bound(srec_.cbegin(), srec_.cend(), x); + } + + constexpr auto next_it(const Z x) const noexcept + { + return std::upper_bound(srec_.cbegin(), srec_.cend(), x); + } + + constexpr Z prev(const Z x) const noexcept + { + return *--std::lower_bound(srec_.cbegin(), srec_.cend(), x); + } + + constexpr auto prev_it(const Z x) const noexcept + { + return --std::lower_bound(srec_.cbegin(), srec_.cend(), x); } constexpr Z max() const noexcept @@ -155,22 +170,22 @@ class SetS return srec_.back(); } - void remove(const Z x) + void remove(const Z x) noexcept { - auto index {std::lower_bound(srec_.begin(), srec_.end(), x)}; + auto index {std::lower_bound(srec_.cbegin(), srec_.cend(), x)}; - if(index != srec_.end() && !(x < *index)) + if(index != srec_.cend() && !(x < *index)) { srec_.erase(index); } } - void insert(const Z x) + void insert(const Z x) noexcept { srec_.emplace_back(x); } - constexpr Z operator[] (const Z index) const + constexpr Z operator[] (const Z index) const noexcept { return srec_[index]; } @@ -225,35 +240,23 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) k += 2; } - // Initialze wheel wk - std::vector wk; - wk.reserve(Mk); - wk.emplace_back(static_cast(0)); + // Initialze wheel wk + std::unique_ptr wk{new Z[Mk]{0}}; for(Z i{1}; i < Mk; ++i) { - // If the number is not prime - if(std::gcd(i, Mk) != 1) + if(std::gcd(i, Mk) == 1) { - wk.emplace_back(static_cast(0)); - } - - else - { - wk.emplace_back(static_cast(1)); + wk[i] = 1; } } // Part 3 of init wheel - wk.back() = static_cast(2); + wk[Mk - 1] = static_cast(2); + for(Z x{Mk - 2}; x > 0; --x) { - if(wk[x] == 0) - { - continue; - } - - else + if(wk[x] == 1) { Z i{x + 1}; while(wk[i] == 0) @@ -266,7 +269,8 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) // Step 2 - Init set S to the kth wheel extended to n Z d {1}; - SetS S; + SetS S(upper_bound); + while(d < upper_bound) { S.insert(d); @@ -277,24 +281,27 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) // next(S, 1) = S[1] // 4 - A linear Algorithm Z p {S[1]}; + size_t p_index {1}; while(p * p <= upper_bound) { //Remove Multiples Z f {p}; + size_t f_index {p_index}; + while(p * f <= upper_bound) { - f = S.next(f); + f = S[++f_index]; } // Loop down through the values of f while(f >= p) { S.remove(p * f); - f = S.prev(f); + f = S[--f_index]; } - p = S.next(p); + p = S[++p_index]; } // Step 4 - Write S - {1} and the first k primes to resultant_primes @@ -404,7 +411,7 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul } // Step 2 - for(Z p {}; p < static_cast(factor.size()); ++p) + for(Z p {0}; p < static_cast(factor.size()); ++p) { Z f {factor[p]}; Z current_prime{factor[p]}; @@ -429,7 +436,6 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul } } - template void mask_sieve(Z lower_bound, Z upper_bound, Container &c) { From 2639bed7c605ea85e7a907d69564b40d92a9f821 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 31 Jul 2020 22:21:48 -0500 Subject: [PATCH 33/83] Minor change to SetS remove [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index f70e9f7705..34b3f01f1e 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -170,14 +170,14 @@ class SetS return srec_.back(); } - void remove(const Z x) noexcept - { - auto index {std::lower_bound(srec_.cbegin(), srec_.cend(), x)}; - - if(index != srec_.cend() && !(x < *index)) + void remove(const size_t current_index, const Z x) noexcept + { + auto index {std::lower_bound(srec_.cbegin() + current_index + 1, srec_.cend(), x)}; + + if(index != srec_.cend() && *index == x) { - srec_.erase(index); - } + srec_.erase(index); + } } void insert(const Z x) noexcept @@ -297,7 +297,7 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) // Loop down through the values of f while(f >= p) { - S.remove(p * f); + S.remove(f_index, p * f); f = S[--f_index]; } @@ -306,7 +306,7 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) // Step 4 - Write S - {1} and the first k primes to resultant_primes for(size_t i{1}; i < S.size(); ++i) - { + { resultant_primes.emplace_back(S[i]); } } From 94fc1acba76ab290a942eb589f6ae59ea5084449 Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 1 Aug 2020 13:27:11 -0500 Subject: [PATCH 34/83] Pritchard segmented performance improvements [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 34b3f01f1e..6ad4b31a77 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef _MSVC_LANG #if _MSVC_LANG >= 201703 // _MSVC_LANG == __cplusplus: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ @@ -215,6 +216,13 @@ constexpr bool is_prime(const Z n) return true; } +//https://www.youtube.com/watch?v=nXaxk27zwlk&feature=youtu.be&t=56m34s +template +constexpr Z fast_mod(const Z input, const Z ceil) +{ + return input >= ceil ? input % ceil : input; +} + //https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 // 7 - A segmented Wheel Sieve [Pritchard '87] template @@ -352,32 +360,22 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul } // Initialze wheel wk - std::vector wk; - wk.emplace_back(static_cast(0)); + std::unique_ptr wk{new Z[Mk]{0}}; + for(Z i{1}; i < Mk; ++i) { - // If the number is not prime - if(std::gcd(i, Mk) != 1) - { - wk.emplace_back(static_cast(0)); - } - - else + if(std::gcd(i, Mk) == 1) { - wk.emplace_back(static_cast(1)); + wk[i] = 1; } } // Part 3 of init wheel - wk.back() = static_cast(2); + wk[Mk - 1] = static_cast(2); + for(Z x{Mk - 2}; x > 0; --x) { - if(wk[x] == 0) - { - continue; - } - - else + if(wk[x] == 1) { Z i{x + 1}; while(wk[i] == 0) @@ -392,26 +390,28 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul // Done as part of step 1 for performance improvement // Pre-processing step 3 - std::vector factor; + size_t factor_size {primes.size() - k_index}; + std::unique_ptr factor{new Z [factor_size]}; + for(size_t i{static_cast(k_index)}; i < primes.size(); ++i) { - factor.emplace_back(primes[i]); + factor[i - k_index] = primes[i]; } // Sieving the interval // Step 1 std::unique_ptr mark {new bool [static_cast(interval + 1)]}; - - for(Z x {lower_bound}, i {}; x <= upper_bound; ++x, ++i) + + for(Z x = lower_bound; x <= upper_bound; ++x) { if(std::gcd(Mk, x) == 1) { - mark[i] = 1; + mark[x - lower_bound] = 1; } } // Step 2 - for(Z p {0}; p < static_cast(factor.size()); ++p) + for(Z p = 0; p < static_cast(factor_size); ++p) { Z f {factor[p]}; Z current_prime{factor[p]}; @@ -422,12 +422,12 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul mark[f * current_prime - lower_bound] = 0; } - f += wk[f % Mk]; + f += wk[fast_mod(f, Mk)]; } factor[p] = f; } - for(Z i {}; i < interval; ++i) + for(Z i {0}; i < interval; ++i) { if(mark[i] == 1) { From 6ebb906fffc73423a78aec41c434fd84b2e502aa Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 1 Aug 2020 15:19:53 -0500 Subject: [PATCH 35/83] Fixed failed test [WIP][CI SKIP] --- include/boost/math/special_functions/prime_sieve.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 6ad4b31a77..d62bd1cea1 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -593,7 +593,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) boost::math::detail::linear_sieve(static_cast(4096), small_primes); }); std::thread t2([limit, &pre_generated_primes] { - boost::math::detail::segmented_sieve(static_cast(4096), limit, pre_generated_primes); + boost::math::detail::segmented_sieve(static_cast(4097), limit, pre_generated_primes); }); t1.join(); @@ -604,7 +604,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) std::vector thread_manager {}; std::vector> prime_vectors(processor_count); const Z range_per_thread = upper_bound / (processor_count); - Z current_lower_bound {limit}; + Z current_lower_bound {limit + 1}; Z current_upper_bound {current_lower_bound + range_per_thread}; Z primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - current_lower_bound / std::log(static_cast(current_lower_bound)))}; From 0521854458a5fd2f4dc33e90be5b33a9edff0c21 Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 1 Aug 2020 19:16:34 -0500 Subject: [PATCH 36/83] Various performance improvements [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 101 ++++++++++++++---- 1 file changed, 78 insertions(+), 23 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index d62bd1cea1..c442f31f0b 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -9,6 +9,7 @@ #define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP #include +#include #include #include #include @@ -92,7 +93,7 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont const size_t n {static_cast(upper_bound - lower_bound + 1)}; std::unique_ptr is_prime {new bool[n]}; memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); - + #if __cplusplus >= 201703 || _MSVC_LANG >= 201703 // Enable use of thread pool, not SIMD compatible std::for_each(std::execution::par, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime) @@ -130,6 +131,26 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont } } +template +constexpr void prime_table(size_t min_index, Z upper_bound, Container &c) +{ + size_t current_index {min_index}; + Z current_prime {2}; + + while(current_prime < upper_bound) + { + c.emplace_back(current_prime); + ++current_index; + current_prime = prime(current_index); + } +} + +template +constexpr void prime_table(Z upper_bound, Container &c) +{ + prime_table(0, upper_bound, c); +} + // https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 // Implementing S template @@ -253,7 +274,7 @@ void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) for(Z i{1}; i < Mk; ++i) { - if(std::gcd(i, Mk) == 1) + if(boost::math::gcd(i, Mk) == 1) { wk[i] = 1; } @@ -364,7 +385,7 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul for(Z i{1}; i < Mk; ++i) { - if(std::gcd(i, Mk) == 1) + if(boost::math::gcd(i, Mk) == 1) { wk[i] = 1; } @@ -401,16 +422,18 @@ void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resul // Sieving the interval // Step 1 std::unique_ptr mark {new bool [static_cast(interval + 1)]}; - + + #pragma omp parallel for shared(mark) for(Z x = lower_bound; x <= upper_bound; ++x) { - if(std::gcd(Mk, x) == 1) + if(boost::math::gcd(Mk, x) == 1) { mark[x - lower_bound] = 1; } } // Step 2 + #pragma omp parallel for shared(mark) for(Z p = 0; p < static_cast(factor_size); ++p) { Z f {factor[p]}; @@ -473,7 +496,7 @@ void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes { prime_vectors[i].reserve(primes_in_range); - future_manager.emplace_back(std::async([current_lower_bound, current_upper_bound, &primes, &prime_vectors, i]{ + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, current_upper_bound, &primes, &prime_vectors, i]{ boost::math::detail::mask_sieve(current_lower_bound, current_upper_bound, primes, prime_vectors[i]); })); @@ -482,7 +505,7 @@ void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes } prime_vectors[ranges].reserve(primes_in_range); - future_manager.emplace_back(std::async([current_lower_bound, upper_bound, &primes, &prime_vectors]{ + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, upper_bound, &primes, &prime_vectors]{ boost::math::detail::mask_sieve(current_lower_bound, upper_bound, primes, prime_vectors.back()); })); @@ -534,12 +557,12 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) std::vector primes {}; primes.reserve(upper_bound / std::log(static_cast(upper_bound))); - // Range for when the linear sieve is no longer faster than threading - if (upper_bound < 4096) + if(upper_bound <= 32768) { - boost::math::detail::linear_sieve(upper_bound, primes); + boost::math::detail::prime_table(upper_bound, primes); } + else if (std::is_same_v) { boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); @@ -556,21 +579,38 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) } std::vector small_primes {}; - small_primes.reserve(1000); + small_primes.reserve(1028); // Threshold for when 2 thread performance begins to be non-linear, or when the system can only support two threads if(upper_bound < 16777216 || processor_count == 2) { // Split into two vectors and merge after joined to avoid data races - std::thread t1([&small_primes] { - boost::math::detail::linear_sieve(static_cast(8192), small_primes); - }); - std::thread t2([upper_bound, &primes] { - boost::math::detail::segmented_sieve(static_cast(8192), upper_bound, primes); - }); + if(upper_bound <= 104729) + { + std::thread t1([&small_primes]{ + boost::math::detail::prime_table(static_cast(32768), small_primes); + }); + + std::thread t2([upper_bound, &primes]{ + boost::math::detail::prime_table(3512, upper_bound, primes); + }); - t1.join(); - t2.join(); + t1.join(); + t2.join(); + } + + else + { + std::thread t1([&small_primes] { + boost::math::detail::prime_table(static_cast(104729), small_primes); + }); + std::thread t2([upper_bound, &primes] { + boost::math::detail::segmented_sieve(static_cast(104729), upper_bound, primes); + }); + + t1.join(); + t2.join(); + } primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); } @@ -581,19 +621,34 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; std::vector pre_generated_primes {}; pre_generated_primes.reserve(limit / std::log(limit)); + + if(limit <= 32768) + { + boost::math::detail::prime_table(limit, pre_generated_primes); + } - if(limit < 4096) + else if(limit <= 104729) { - boost::math::detail::linear_sieve(limit, pre_generated_primes); + std::thread t1([&small_primes]{ + boost::math::detail::prime_table(static_cast(32768), small_primes); + }); + + std::thread t2([limit, &pre_generated_primes]{ + boost::math::detail::prime_table(3512, limit, pre_generated_primes); + }); + + t1.join(); + t2.join(); + pre_generated_primes.insert(pre_generated_primes.begin(), small_primes.begin(), small_primes.end()); } else { std::thread t1([&small_primes] { - boost::math::detail::linear_sieve(static_cast(4096), small_primes); + boost::math::detail::prime_table(static_cast(104729), small_primes); }); std::thread t2([limit, &pre_generated_primes] { - boost::math::detail::segmented_sieve(static_cast(4097), limit, pre_generated_primes); + boost::math::detail::segmented_sieve(static_cast(104729), limit, pre_generated_primes); }); t1.join(); From b233a8072913034d09daafc53c007fd3e04a5024 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 2 Aug 2020 11:21:07 -0500 Subject: [PATCH 37/83] Build primes in situ [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index c442f31f0b..c32a583a14 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -545,17 +545,39 @@ void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) } } // End namespace detail +template +struct IsVector +{ + using type = T; + constexpr static bool value = false; +}; + +template +struct IsVector> +{ + using type = std::vector; + constexpr static bool value = true; +}; + +template +constexpr bool is_vector_v = IsVector::value; + #if __cplusplus >= 201703 || _MSVC_LANG >= 201703 -template -auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) +//template +//auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) +template +void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) { static_assert(std::is_integral::value, "No primes for floating point types"); BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be - std::vector primes {}; - primes.reserve(upper_bound / std::log(static_cast(upper_bound))); + //std::vector primes {}; + if(is_vector_v) + { + primes.reserve(upper_bound / std::log(static_cast(upper_bound))); + } if(upper_bound <= 32768) { @@ -701,7 +723,7 @@ auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) } } - return std::move(primes.begin(), primes.end(), output); + //return std::move(primes.begin(), primes.end(), output); } template From f95c2cf2f6861c23ab30331f0e2050c833209ddc Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 2 Aug 2020 15:52:20 -0500 Subject: [PATCH 38/83] Refactoring. Now requires C++17 --- .../math/special_functions/prime_sieve.hpp | 569 ++++-------------- .../performance/prime_sieve_performance.cpp | 130 ++-- test/test_prime_sieve.cpp | 198 ++---- 3 files changed, 184 insertions(+), 713 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index c32a583a14..b571119e2a 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -21,66 +21,55 @@ #include #include #include -#include - -#ifdef _MSVC_LANG -#if _MSVC_LANG >= 201703 // _MSVC_LANG == __cplusplus: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ -#include -#endif -#else -#if __cplusplus >= 201703 #include -#endif -#endif -namespace boost { namespace math { namespace detail +namespace boost::math { namespace detail { // https://mathworld.wolfram.com/SieveofEratosthenes.html // https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf -template -void linear_sieve(Z upper_bound, Container &c) +template +void linear_sieve(Integer upper_bound, Container &resultant_primes) { size_t least_divisors_size{static_cast(upper_bound + 1)}; - std::unique_ptr least_divisors{new Z[least_divisors_size]{0}}; + std::unique_ptr least_divisors{new Integer[least_divisors_size]{0}}; - for (Z i{2}; i <= upper_bound; ++i) + for (Integer i{2}; i <= upper_bound; ++i) { if (least_divisors[i] == 0) { least_divisors[i] = i; - c.emplace_back(i); + resultant_primes.emplace_back(i); } for (size_t j{}; j < least_divisors_size; ++j) { - if (j >= c.size()) + if (j >= resultant_primes.size()) { break; } - else if (c[j] > least_divisors[i]) + else if (resultant_primes[j] > least_divisors[i]) { break; } - else if (i * c[j] > upper_bound) + else if (i * resultant_primes[j] > upper_bound) { break; } else { - least_divisors[i * c[j]] = c[j]; + least_divisors[i * resultant_primes[j]] = resultant_primes[j]; } } } } -//https://core.ac.uk/download/pdf/62440589.pdf -template -void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Container &c) +template +void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& primes, Container &resultant_primes) { - Z limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + Integer limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; size_t primes_size {}; auto it{primes.begin()}; @@ -94,390 +83,67 @@ void mask_sieve(Z lower_bound, Z upper_bound, const PrimeContainer& primes, Cont std::unique_ptr is_prime {new bool[n]}; memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); - #if __cplusplus >= 201703 || _MSVC_LANG >= 201703 // Enable use of thread pool, not SIMD compatible - std::for_each(std::execution::par, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime) - { - for(Z j {std::max(prime * prime, (lower_bound + prime - 1) / prime * prime)}; j <= upper_bound; j += prime) + std::for_each(std::execution::par, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime){ + for(Integer j {std::max(prime * prime, (lower_bound + prime - 1) / prime * prime)}; j <= upper_bound; j += prime) { is_prime[j - lower_bound] = false; } }); - #else - - for(size_t i{}; i < primes_size; ++i) - { - Z current_prime{primes[i]}; - for(Z j {std::max(current_prime * current_prime, (lower_bound + current_prime - 1) / current_prime * current_prime)}; - j <= upper_bound; j += current_prime) - { - is_prime[j - lower_bound] = false; - } - } - #endif - if(lower_bound == 1) { is_prime[0] = false; } - for(Z i{lower_bound}; i <= upper_bound; ++i) + for(Integer i{lower_bound}; i <= upper_bound; ++i) { if(is_prime[i - lower_bound]) { - c.emplace_back(i); + resultant_primes.emplace_back(i); } } } -template -constexpr void prime_table(size_t min_index, Z upper_bound, Container &c) -{ - size_t current_index {min_index}; - Z current_prime {2}; - - while(current_prime < upper_bound) - { - c.emplace_back(current_prime); - ++current_index; - current_prime = prime(current_index); - } -} - -template -constexpr void prime_table(Z upper_bound, Container &c) +template +void mask_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) { - prime_table(0, upper_bound, c); -} - -// https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 -// Implementing S -template -class SetS -{ -private: - std::vector srec_; - -public: - SetS() = default; - - constexpr explicit SetS(const Z limit) - { - srec_.reserve(limit); - } - - constexpr Z next(const Z x) const noexcept - { - return *std::upper_bound(srec_.cbegin(), srec_.cend(), x); - } - - constexpr auto next_it(const Z x) const noexcept - { - return std::upper_bound(srec_.cbegin(), srec_.cend(), x); - } - - constexpr Z prev(const Z x) const noexcept - { - return *--std::lower_bound(srec_.cbegin(), srec_.cend(), x); - } - - constexpr auto prev_it(const Z x) const noexcept - { - return --std::lower_bound(srec_.cbegin(), srec_.cend(), x); - } - - constexpr Z max() const noexcept - { - return srec_.back(); - } - - void remove(const size_t current_index, const Z x) noexcept - { - auto index {std::lower_bound(srec_.cbegin() + current_index + 1, srec_.cend(), x)}; - - if(index != srec_.cend() && *index == x) - { - srec_.erase(index); - } - } - - void insert(const Z x) noexcept - { - srec_.emplace_back(x); - } - - constexpr Z operator[] (const Z index) const noexcept - { - return srec_[index]; - } - - constexpr size_t size() const noexcept - { - return srec_.size(); - } -}; - -template -constexpr bool is_prime(const Z n) -{ - if(n <= 1) - { - return false; - } - - for(Z factor{2}; factor * factor <= n; ++factor) - { - if(n % factor == 0) - { - return false; - } - } - - return true; -} - -//https://www.youtube.com/watch?v=nXaxk27zwlk&feature=youtu.be&t=56m34s -template -constexpr Z fast_mod(const Z input, const Z ceil) -{ - return input >= ceil ? input % ceil : input; -} - -//https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 -// 7 - A segmented Wheel Sieve [Pritchard '87] -template -void sub_linear_wheel_sieve(Z upper_bound, Container &resultant_primes) -{ - Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - resultant_primes.reserve(upper_bound / std::log(upper_bound)); - - // Step 1 - Compute the wheel - Z Mk {2}; - Z k {3}; - resultant_primes.emplace_back(static_cast(2)); - - if(upper_bound == 2) - { - return; - } - - while(Mk * k <= limit) - { - Mk *= k; - resultant_primes.emplace_back(k); - k += 2; - } - - // Initialze wheel wk - std::unique_ptr wk{new Z[Mk]{0}}; - - for(Z i{1}; i < Mk; ++i) - { - if(boost::math::gcd(i, Mk) == 1) - { - wk[i] = 1; - } - } - - // Part 3 of init wheel - wk[Mk - 1] = static_cast(2); - - for(Z x{Mk - 2}; x > 0; --x) - { - if(wk[x] == 1) - { - Z i{x + 1}; - while(wk[i] == 0) - { - ++i; - } - wk[x] = i - x; - } - } - - // Step 2 - Init set S to the kth wheel extended to n - Z d {1}; - SetS S(upper_bound); - - while(d < upper_bound) - { - S.insert(d); - d += wk[d % Mk]; - } - - // Step 3 - Run the linear algorithm starting with p := next(S, 1), which is p_k+1 - // next(S, 1) = S[1] - // 4 - A linear Algorithm - Z p {S[1]}; - size_t p_index {1}; - - while(p * p <= upper_bound) - { - //Remove Multiples - Z f {p}; - size_t f_index {p_index}; - - while(p * f <= upper_bound) - { - f = S[++f_index]; - } - - // Loop down through the values of f - while(f >= p) - { - S.remove(f_index, p * f); - f = S[--f_index]; - } + Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + std::vector primes {}; + primes.reserve(limit / std::log(limit)); - p = S[++p_index]; - } + boost::math::detail::linear_sieve(limit, primes); - // Step 4 - Write S - {1} and the first k primes to resultant_primes - for(size_t i{1}; i < S.size(); ++i) - { - resultant_primes.emplace_back(S[i]); - } + boost::math::detail::mask_sieve(lower_bound, upper_bound, primes, resultant_primes); } -// https://minds.wisconsin.edu/bitstream/handle/1793/59248/TR909.pdf?sequence=1 -// 8 - A segmented Wheel Sieve [Pritchard '83] -template -void linear_segmented_wheel_sieve(Z lower_bound, Z upper_bound, Container &resultant_primes) +template +constexpr void prime_table(size_t min_index, Integer upper_bound, Container &resultant_primes) { - const Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - const Z interval {upper_bound - lower_bound}; - - // Solves small cases for benchmark, but needs better remedy - if(lower_bound == 2 && upper_bound <= 10) - { - //boost::math::detail::sub_linear_wheel_sieve(upper_bound, resultant_primes); - boost::math::detail::linear_sieve(upper_bound, resultant_primes); - return; - } - - else if(lower_bound == 2 && upper_bound > 10) - { - //boost::math::detail::sub_linear_wheel_sieve(static_cast(10), resultant_primes); - boost::math::detail::linear_sieve(static_cast(10), resultant_primes); - boost::math::detail::linear_segmented_wheel_sieve(static_cast(11), upper_bound, resultant_primes); - return; - } - - // Pre-processing - // 1 - std::vector primes; - boost::math::detail::linear_sieve(limit, primes); - - Z Mk {2}; - Z k {3}; - Z k_index {1}; - - while(Mk * k <= limit) - { - Mk *= k; - ++k_index; - k = primes[k_index]; - } - - // Initialze wheel wk - std::unique_ptr wk{new Z[Mk]{0}}; - - for(Z i{1}; i < Mk; ++i) - { - if(boost::math::gcd(i, Mk) == 1) - { - wk[i] = 1; - } - } - - // Part 3 of init wheel - wk[Mk - 1] = static_cast(2); - - for(Z x{Mk - 2}; x > 0; --x) - { - if(wk[x] == 1) - { - Z i{x + 1}; - while(wk[i] == 0) - { - ++i; - } - wk[x] = i - x; - } - } - - // Pre-processing step 2 - // Done as part of step 1 for performance improvement - - // Pre-processing step 3 - size_t factor_size {primes.size() - k_index}; - std::unique_ptr factor{new Z [factor_size]}; - - for(size_t i{static_cast(k_index)}; i < primes.size(); ++i) - { - factor[i - k_index] = primes[i]; - } - - // Sieving the interval - // Step 1 - std::unique_ptr mark {new bool [static_cast(interval + 1)]}; - - #pragma omp parallel for shared(mark) - for(Z x = lower_bound; x <= upper_bound; ++x) - { - if(boost::math::gcd(Mk, x) == 1) - { - mark[x - lower_bound] = 1; - } - } - - // Step 2 - #pragma omp parallel for shared(mark) - for(Z p = 0; p < static_cast(factor_size); ++p) - { - Z f {factor[p]}; - Z current_prime{factor[p]}; - while(f * current_prime <= upper_bound) - { - if(f * current_prime > lower_bound) - { - mark[f * current_prime - lower_bound] = 0; - } - - f += wk[fast_mod(f, Mk)]; - } - factor[p] = f; - } + size_t current_index {min_index}; + Integer current_prime {2}; - for(Z i {0}; i < interval; ++i) + while(current_prime < upper_bound) { - if(mark[i] == 1) - { - resultant_primes.emplace_back(i + lower_bound); - } + resultant_primes.emplace_back(current_prime); + ++current_index; + current_prime = prime(current_index); } } -template -void mask_sieve(Z lower_bound, Z upper_bound, Container &c) +template +constexpr void prime_table(Integer upper_bound, Container &resultant_primes) { - Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - std::vector primes {}; - primes.reserve(limit / std::log(limit)); - - boost::math::detail::linear_sieve(limit, primes); - - boost::math::detail::mask_sieve(lower_bound, upper_bound, primes, c); + prime_table(0, upper_bound, resultant_primes); } -template -void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes, Container &c) +template +void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesContainer &primes, Container &resultant_primes) { - const Z L1_SIZE {32648}; - const Z interval {L1_SIZE * 4}; - Z current_lower_bound{lower_bound}; - Z current_upper_bound{current_lower_bound + interval}; + const Integer L1_SIZE {32648}; + const Integer interval {L1_SIZE * 4}; + Integer current_lower_bound{lower_bound}; + Integer current_upper_bound{current_lower_bound + interval}; if(current_upper_bound > upper_bound) { @@ -486,11 +152,11 @@ void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; - std::vector> prime_vectors(ranges + 1); + std::vector> prime_vectors(ranges + 1); std::vector> future_manager(ranges); - Z primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - - current_lower_bound / std::log(static_cast(current_lower_bound)))}; + Integer primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - + current_lower_bound / std::log(static_cast(current_lower_bound)))}; for(size_t i {}; i < ranges; ++i) { @@ -519,15 +185,15 @@ void segmented_sieve(Z lower_bound, Z upper_bound, const PrimesContainer &primes for(auto &v : prime_vectors) { - c.insert(c.end(), v.begin(), v.end()); + resultant_primes.insert(resultant_primes.end(), v.begin(), v.end()); } } -template -void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) +template +void segmented_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) { - Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - std::vector primes {}; + Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + std::vector primes {}; primes.reserve(limit / std::log(limit)); // Prepare for max value so you do not have to calculate this again @@ -538,12 +204,11 @@ void segmented_sieve(Z lower_bound, Z upper_bound, Container &c) else { - boost::math::detail::mask_sieve(static_cast(2), limit, primes); + boost::math::detail::mask_sieve(static_cast(2), limit, primes); } - boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes, c); + boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes, resultant_primes); } -} // End namespace detail template struct IsVector @@ -561,20 +226,22 @@ struct IsVector> template constexpr bool is_vector_v = IsVector::value; +} // End namespace detail -#if __cplusplus >= 201703 || _MSVC_LANG >= 201703 -//template -//auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) -template -void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) +template +void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) { - static_assert(std::is_integral::value, "No primes for floating point types"); - BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); + static_assert(std::is_integral::value, "No primes for floating point types"); + BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); + + if(upper_bound == 2) + { + return; + } --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be - //std::vector primes {}; - if(is_vector_v) + if constexpr (detail::is_vector_v) { primes.reserve(upper_bound / std::log(static_cast(upper_bound))); } @@ -587,7 +254,7 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) else if (std::is_same_v) { - boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); + boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); } else @@ -600,7 +267,7 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) processor_count = 2; } - std::vector small_primes {}; + std::vector small_primes {}; small_primes.reserve(1028); // Threshold for when 2 thread performance begins to be non-linear, or when the system can only support two threads @@ -610,12 +277,12 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) if(upper_bound <= 104729) { std::thread t1([&small_primes]{ - boost::math::detail::prime_table(static_cast(32768), small_primes); - }); + boost::math::detail::prime_table(static_cast(32768), small_primes); + }); std::thread t2([upper_bound, &primes]{ - boost::math::detail::prime_table(3512, upper_bound, primes); - }); + boost::math::detail::prime_table(3512, upper_bound, primes); + }); t1.join(); t2.join(); @@ -623,11 +290,11 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) else { - std::thread t1([&small_primes] { - boost::math::detail::prime_table(static_cast(104729), small_primes); + std::thread t1([&small_primes]{ + boost::math::detail::prime_table(static_cast(104729), small_primes); }); - std::thread t2([upper_bound, &primes] { - boost::math::detail::segmented_sieve(static_cast(104729), upper_bound, primes); + std::thread t2([upper_bound, &primes]{ + boost::math::detail::segmented_sieve(static_cast(104729), upper_bound, primes); }); t1.join(); @@ -640,8 +307,8 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) else { //Pre-generate all of the primes so that each thread does not have to - Z limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - std::vector pre_generated_primes {}; + Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + std::vector pre_generated_primes {}; pre_generated_primes.reserve(limit / std::log(limit)); if(limit <= 32768) @@ -652,12 +319,12 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) else if(limit <= 104729) { std::thread t1([&small_primes]{ - boost::math::detail::prime_table(static_cast(32768), small_primes); - }); + boost::math::detail::prime_table(static_cast(32768), small_primes); + }); std::thread t2([limit, &pre_generated_primes]{ - boost::math::detail::prime_table(3512, limit, pre_generated_primes); - }); + boost::math::detail::prime_table(3512, limit, pre_generated_primes); + }); t1.join(); t2.join(); @@ -667,10 +334,10 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) else { std::thread t1([&small_primes] { - boost::math::detail::prime_table(static_cast(104729), small_primes); + boost::math::detail::prime_table(static_cast(104729), small_primes); }); std::thread t2([limit, &pre_generated_primes] { - boost::math::detail::segmented_sieve(static_cast(104729), limit, pre_generated_primes); + boost::math::detail::segmented_sieve(static_cast(104729), limit, pre_generated_primes); }); t1.join(); @@ -679,11 +346,11 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) } std::vector thread_manager {}; - std::vector> prime_vectors(processor_count); - const Z range_per_thread = upper_bound / (processor_count); - Z current_lower_bound {limit + 1}; - Z current_upper_bound {current_lower_bound + range_per_thread}; - Z primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - + std::vector> prime_vectors(processor_count); + const Integer range_per_thread = upper_bound / (processor_count); + Integer current_lower_bound {limit + 1}; + Integer current_upper_bound {current_lower_bound + range_per_thread}; + Integer primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - current_lower_bound / std::log(static_cast(current_lower_bound)))}; for(size_t i{}; i < processor_count - 1; ++i) @@ -699,7 +366,7 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) current_lower_bound = current_upper_bound; current_upper_bound += range_per_thread; - primes_in_range = static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - + primes_in_range = static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - current_lower_bound / std::log(static_cast(current_lower_bound))); } @@ -722,60 +389,24 @@ void prime_sieve(ExecutionPolicy&& policy, Z upper_bound, Container &primes) } } } - - //return std::move(primes.begin(), primes.end(), output); } -template -auto prime_range(ExecutionPolicy&& policy, Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) +template +void prime_sieve(Integer upper_bound, Container &primes) { - --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be - std::vector primes {}; - primes.reserve(upper_bound / std::log(static_cast(upper_bound))); - - boost::math::prime_sieve(policy, upper_bound, std::back_inserter(primes)); - - auto it{primes.begin()}; - while(*it < lower_bound && it != primes.end()) - { - ++it; - } - - return std::move(it, primes.end(), output); + prime_sieve(std::execution::par, upper_bound, primes); } -#endif //__cplusplus >= 201703 -template -auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) -{ - static_assert(std::is_integral::value, "No primes for floating point types"); - BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); - - --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be - std::vector primes{}; - primes.reserve(upper_bound / std::log(upper_bound)); - - if (upper_bound <= 4096) - { - boost::math::detail::linear_sieve(upper_bound, primes); - } - else +template +void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &primes) +{ + if constexpr (detail::is_vector_v) { - boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); + primes.reserve(upper_bound / std::log(static_cast(upper_bound))); } - return std::move(primes.begin(), primes.end(), output); -} - -template -auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) -{ - --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be - std::vector primes{}; - primes.reserve(upper_bound / std::log(static_cast(upper_bound))); - - boost::math::prime_sieve(upper_bound, std::back_inserter(primes)); + boost::math::prime_sieve(policy, upper_bound, primes); auto it{primes.begin()}; while(*it < lower_bound && it != primes.end()) @@ -783,8 +414,14 @@ auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltyp ++it; } - return std::move(it, primes.end(), output); + primes.erase(primes.begin(), it); +} + +template +inline void prime_range(Integer lower_bound, Integer upper_bound, Container &primes) +{ + prime_range(std::execution::par, lower_bound, upper_bound, primes); +} } -}} #endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 01afbd5f1b..47223c54fe 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -5,152 +5,106 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#include "../../include/boost/math/special_functions/prime_sieve.hpp" -//#include +#include #include #include #include // Individual Algos -// Linear -template -inline auto linear_sieve_helper(Z upper_bound, std::vector c) -> std::vector +template +inline auto linear_sieve_helper(Integer upper_bound, std::vector primes) -> std::vector { - boost::math::detail::linear_sieve(upper_bound, c); - return c; + boost::math::detail::linear_sieve(upper_bound, primes); + return primes; } -template +template void linear_sieve(benchmark::State& state) { - Z upper = static_cast(state.range(0)); + Integer upper = static_cast(state.range(0)); for(auto _ : state) { - std::vector primes; + std::vector primes; benchmark::DoNotOptimize(linear_sieve_helper(upper, primes)); } state.SetComplexityN(state.range(0)); } -template -inline auto sub_linear_sieve_helper(Z upper_bound, std::vector c) -> std::vector -{ - boost::math::detail::sub_linear_wheel_sieve(upper_bound, c); - return c; -} - -template -void sub_linear_sieve(benchmark::State& state) -{ - Z upper = static_cast(state.range(0)); - for(auto _ : state) - { - std::vector primes; - benchmark::DoNotOptimize(sub_linear_sieve_helper(upper, primes)); - } - state.SetComplexityN(state.range(0)); -} - -// Segmented -template -inline auto mask_sieve_helper(Z lower_bound, Z upper_bound, std::vector c) -> std::vector +template +inline auto mask_sieve_helper(Integer lower_bound, Integer upper_bound, std::vector primes) -> std::vector { - boost::math::detail::mask_sieve(lower_bound, upper_bound, c); - return c; + boost::math::detail::mask_sieve(lower_bound, upper_bound, primes); + return primes; } -template +template void mask_sieve(benchmark::State& state) { - Z lower = static_cast(2); - Z upper = static_cast(state.range(0)); + Integer lower = static_cast(2); + Integer upper = static_cast(state.range(0)); for(auto _ : state) { - std::vector primes; + std::vector primes; benchmark::DoNotOptimize(mask_sieve_helper(lower, upper, primes)); } state.SetComplexityN(state.range(0)); } -template -inline auto segmented_wheel_sieve_helper(Z lower_bound, Z upper_bound, std::vector c) -> std::vector -{ - boost::math::detail::linear_segmented_wheel_sieve(lower_bound, upper_bound, c); - return c; -} - -template -void segmented_wheel_sieve(benchmark::State& state) +template +inline auto prime_sieve_helper(ExecuitionPolicy policy, Integer upper, Container primes) { - Z lower = static_cast(2); - Z upper = static_cast(state.range(0)); - for(auto _ : state) - { - std::vector primes; - benchmark::DoNotOptimize(segmented_wheel_sieve_helper(lower, upper, primes)); - } - state.SetComplexityN(state.range(0)); + boost::math::prime_sieve(policy, upper, primes); + return primes; } // Complete Implementations -template +template void prime_sieve(benchmark::State& state) { - Z upper = static_cast(state.range(0)); + Integer upper = static_cast(state.range(0)); for(auto _ : state) { - std::vector primes; - benchmark::DoNotOptimize(boost::math::prime_sieve(std::execution::par, upper, std::back_inserter(primes))); + std::vector primes; + benchmark::DoNotOptimize(prime_sieve_helper(std::execution::par, upper, primes)); } state.SetComplexityN(state.range(0)); } -template -void prime_sieve_partial_range(benchmark::State& state) +template +inline auto kimwalish_primes_helper(Integer upper, std::vector primes) -> std::vector { - Z upper = static_cast(state.range(0)); - Z lower = static_cast(state.range(0)) > 2 ? static_cast(state.range(0)) : 2; - for(auto _ : state) - { - std::vector primes; - benchmark::DoNotOptimize(boost::math::prime_range(std::execution::par, lower, upper, std::back_inserter(primes))); - } - state.SetComplexityN(state.range(0)); + primesieve::generate_primes(upper, &primes); + return primes; } -template +template void kimwalish_primes(benchmark::State& state) { - Z upper = static_cast(state.range(0)); + Integer upper = static_cast(state.range(0)); for (auto _ : state) { - std::vector primes; - primesieve::generate_primes(upper, &primes); - benchmark::DoNotOptimize(primes.back()); + std::vector primes; + benchmark::DoNotOptimize(kimwalish_primes_helper(upper, primes)); } state.SetComplexityN(state.range(0)); } -// Individual Algos - +// Invidiual Implementations // Linear +BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(sub_linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(); - +BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); // Segmented +BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); -BENCHMARK_TEMPLATE(segmented_wheel_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(); +BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); -/* -// Complete Implementations -BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); +// Complete Implemenations +BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve_partial_range, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve_partial_range, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve_partial_range, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity()->UseRealTime(); -*/ +BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark +BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); + BENCHMARK_MAIN(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 6e7cb213a7..8b3d774f41 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -5,243 +5,130 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#include "../include/boost/math/special_functions/prime_sieve.hpp" -//#include +#include #include #include #include #include #include -template +template void test_prime_sieve() { - std::vector primes; - Z ref {168}; // Calculated with wolfram-alpha + std::vector primes; + Integer ref {168}; // Calculated with wolfram-alpha // Does the function work with a vector - boost::math::prime_sieve(1000, std::back_inserter(primes)); + boost::math::prime_sieve(1000, primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Does the sequential policy work + primes.clear(); + boost::math::prime_sieve(std::execution::seq, 1000, primes); BOOST_TEST_EQ(primes.size(), ref); // Tests for correctness // 2 primes.clear(); - boost::math::prime_sieve(2, std::back_inserter(primes)); + boost::math::prime_sieve(2, primes); BOOST_TEST_EQ(primes.size(), 0); // 100 primes.clear(); - boost::math::prime_sieve(100, std::back_inserter(primes)); + boost::math::prime_sieve(100, primes); BOOST_TEST_EQ(primes.size(), 25); // 10'000 primes.clear(); - boost::math::prime_sieve(10000, std::back_inserter(primes)); + boost::math::prime_sieve(10000, primes); BOOST_TEST_EQ(primes.size(), 1229); // 100'000 primes.clear(); - boost::math::prime_sieve(100000, std::back_inserter(primes)); + boost::math::prime_sieve(100000, primes); BOOST_TEST_EQ(primes.size(), 9592); // 1'000'000 primes.clear(); - boost::math::prime_sieve(1000000, std::back_inserter(primes)); + boost::math::prime_sieve(1000000, primes); BOOST_TEST_EQ(primes.size(), 78498); // Does the function work with a list? - std::list l_primes; - boost::math::prime_sieve(1000, std::back_inserter(l_primes)); + std::list l_primes; + boost::math::prime_sieve(1000, l_primes); BOOST_TEST_EQ(l_primes.size(), ref); // Does the function work with a deque? - std::deque d_primes; - boost::math::prime_sieve(1000, std::back_inserter(d_primes)); + std::deque d_primes; + boost::math::prime_sieve(1000, d_primes); BOOST_TEST_EQ(d_primes.size(), ref); } -template +template void test_prime_range() { - std::vector primes; - Z ref {168}; // Calculated with wolfram-alpha + std::vector primes; + Integer ref {168}; // Calculated with wolfram-alpha // Does the upper and lower bound call work - boost::math::prime_range(2, 1000, std::back_inserter(primes)); + boost::math::prime_range(static_cast(2), static_cast(1000), primes); BOOST_TEST_EQ(primes.size(), ref); // Does the upper bound call work primes.clear(); - boost::math::prime_range(2, 1000, std::back_inserter(primes)); + boost::math::prime_range(static_cast(2), static_cast(1000), primes); BOOST_TEST_EQ(primes.size(), ref); // Does it work with a deque? - std::deque d_primes; - boost::math::prime_range(2, 1000, std::back_inserter(d_primes)); + std::deque d_primes; + boost::math::prime_range(static_cast(2), static_cast(1000), d_primes); BOOST_TEST_EQ(d_primes.size(), ref); // Does it work with a list? - std::list l_primes; - boost::math::prime_range(2, 1000, std::front_inserter(l_primes)); + std::list l_primes; + boost::math::prime_range(static_cast(2), static_cast(1000), l_primes); BOOST_TEST_EQ(l_primes.size(), ref); // Does the lower bound change the results? ref = 143; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(100, 1000, std::back_inserter(primes)); + boost::math::prime_range(static_cast(100), static_cast(1000), primes); BOOST_TEST_EQ(primes.size(), ref); // Will it call the sieve for large input ref = 78498; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(2, 1000000, std::back_inserter(primes)); + boost::math::prime_range(static_cast(2), static_cast(1000000), primes); BOOST_TEST_EQ(primes.size(), ref); } -template -void test_sub_linear_prime_sieve() -{ - std::vector primes; - - // Does the function work with a vector - boost::math::detail::sub_linear_wheel_sieve(100, primes); - BOOST_TEST_EQ(primes.size(), 25); - - // 2 - primes.clear(); - boost::math::detail::sub_linear_wheel_sieve(2, primes); - BOOST_TEST_EQ(primes.size(), 1); - - // 1'000 - primes.clear(); - boost::math::detail::sub_linear_wheel_sieve(1'000, primes); - BOOST_TEST_EQ(primes.size(), 168); - - // 10'000 - primes.clear(); - boost::math::detail::sub_linear_wheel_sieve(10'000, primes); - BOOST_TEST_EQ(primes.size(), 1229); -} - -template -void test_linear_segmented_sieve() -{ - std::vector primes; - - // 2 - 20: - boost::math::detail::linear_segmented_wheel_sieve(2, 20, primes); - BOOST_TEST_EQ(primes.size(), 8); - - // 10 - 20: Tests only step 1 - primes.clear(); - boost::math::detail::linear_segmented_wheel_sieve(10, 20, primes); - BOOST_TEST_EQ(primes.size(), 4); - - // 100 - 200: Tests step 2 - primes.clear(); - boost::math::detail::linear_segmented_wheel_sieve(100, 200, primes); - BOOST_TEST_EQ(primes.size(), 21); - - // 100 - 1'000 - primes.clear(); - boost::math::detail::linear_segmented_wheel_sieve(100, 1'000, primes); - BOOST_TEST_EQ(primes.size(), 143); - - // 1'000 - 10'000 - primes.clear(); - boost::math::detail::linear_segmented_wheel_sieve(1'000, 10'000, primes); - BOOST_TEST_EQ(primes.size(), 1061); - - // 2 - 10'000 - primes.clear(); - primes.clear(); - boost::math::detail::linear_segmented_wheel_sieve(2, 10'000, primes); - BOOST_TEST_EQ(primes.size(), 1229); -} - -template +template void test_prime_sieve_overflow() { - std::vector primes; + std::vector primes; // Should die with call to BOOST_ASSERT - boost::math::prime_sieve(static_cast(2), static_cast(std::numeric_limits::max()), - std::back_inserter(primes)); + boost::math::prime_sieve(static_cast(2), static_cast(std::numeric_limits::max()), primes); } -#if __cplusplus >= 201703 || _MSVC_LANG >= 201703 -template -void test_par_prime_sieve() -{ - std::vector primes; - Z ref {168}; // Calculated with wolfram-alpha - - // Does the function work with a vector - boost::math::prime_sieve(std::execution::par, 1000, std::back_inserter(primes)); - BOOST_TEST_EQ(primes.size(), ref); - - // Tests for correctness - // 100 - primes.clear(); - boost::math::prime_sieve(std::execution::par, 100, std::back_inserter(primes)); - BOOST_TEST_EQ(primes.size(), 25); - - // 10'000 - primes.clear(); - boost::math::prime_sieve(std::execution::par, 10000, std::back_inserter(primes)); - BOOST_TEST_EQ(primes.size(), 1229); - - // 100'000 - primes.clear(); - boost::math::prime_sieve(std::execution::par, 100000, std::back_inserter(primes)); - BOOST_TEST_EQ(primes.size(), 9592); - - // 1'000'000 - primes.clear(); - boost::math::prime_sieve(std::execution::par, 1000000, std::back_inserter(primes)); - BOOST_TEST_EQ(primes.size(), 78498); - - // Does the function work with a list? - std::list l_primes; - boost::math::prime_sieve(std::execution::par, 1000, std::back_inserter(l_primes)); - BOOST_TEST_EQ(l_primes.size(), ref); - - // Does the function work with a deque? - std::deque d_primes; - boost::math::prime_sieve(std::execution::par, 1000, std::back_inserter(d_primes)); - BOOST_TEST_EQ(d_primes.size(), ref); -} - -template +template void test_par_prime_sieve_large() { - std::vector primes; - Z ref {1077871}; // Calculated with wolfram-alpha + std::vector primes; + Integer ref {1077871}; // Calculated with wolfram-alpha // Force the sieve into the multi-threading section - boost::math::prime_sieve(std::execution::par, 16777217, std::back_inserter(primes)); + boost::math::prime_sieve(static_cast(16777217), primes); BOOST_TEST_EQ(primes.size(), ref); } -#endif //__cplusplus >= 201703 int main() { - test_sub_linear_prime_sieve(); - test_sub_linear_prime_sieve(); - test_sub_linear_prime_sieve(); - test_sub_linear_prime_sieve(); - - test_linear_segmented_sieve(); - test_linear_segmented_sieve(); - test_linear_segmented_sieve(); - test_linear_segmented_sieve(); - - /* test_prime_sieve(); test_prime_sieve(); test_prime_sieve(); test_prime_sieve(); - + test_prime_range(); test_prime_range(); test_prime_range(); @@ -251,14 +138,7 @@ int main() test_prime_sieve(); - #if __cplusplus >= 201703 || _MSVC_LANG >= 201703 - test_par_prime_sieve(); - test_par_prime_sieve(); - test_par_prime_sieve(); - test_par_prime_sieve(); - //test_par_prime_sieve_large(); - #endif - */ + boost::report_errors(); } From 8e2e29a32f6f48bed4185959e8547adcad65005c Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 3 Aug 2020 19:03:18 -0500 Subject: [PATCH 39/83] Add segmented bit sieve [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 79 ++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index b571119e2a..975631b992 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -9,7 +9,7 @@ #define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP #include -#include +#include #include #include #include @@ -66,6 +66,83 @@ void linear_sieve(Integer upper_bound, Container &resultant_primes) } } +// WIP - Not segmented properly between lower bound and upper bound if lower bound > segment_size +template +void segmented_bit_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer &primes, Container &resultant_primes) +{ + constexpr Integer L1D_CACHE_SIZE {32768}; + const Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound))))}; + const Integer segment_size = std::max(limit, L1D_CACHE_SIZE); + + Integer n {lower_bound}; + if(n % 2 == 0) + { + ++n; + } + + boost::dynamic_bitset<> sieve(segment_size); + std::vector current_primes; + std::vector multiples; + auto prime_it {++primes.begin()}; // start at 3 + + for (Integer low {0}; low <= upper_bound; low += segment_size) + { + sieve.set(); + + // current segment = [low, high] + Integer high = low + segment_size - 1; + high = std::min(high, upper_bound); + + while((*prime_it * *prime_it) <= high && prime_it != primes.end()) + { + current_primes.emplace_back(*prime_it); + multiples.emplace_back((*prime_it * *prime_it) - low); + ++prime_it; + } + + // sieve the current segment + for (size_t i {}; i < current_primes.size(); i++) + { + Integer j {multiples[i]}; + for (Integer k {current_primes[i] * 2}; j < segment_size; j += k) + { + sieve[j] = 0; + } + multiples[i] = j - segment_size; + } + + for (; n <= high; n += 2) + { + if (sieve[n - low]) + { + if(n > lower_bound) + { + resultant_primes.emplace_back(n); + } + } + } + } + + if(lower_bound == 2) + { + resultant_primes.insert(resultant_primes.begin(), static_cast(2)); + } +} + + +template +void segmented_bit_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) +{ + Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + std::vector primes {}; + primes.reserve(limit / std::log(limit)); + + boost::math::detail::linear_sieve(limit, primes); + + boost::math::detail::segmented_bit_sieve(lower_bound, upper_bound, primes, resultant_primes); +} + + template void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& primes, Container &resultant_primes) { From 1a24f164d159b8d37a619120b6e4e4331c293c8d Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 5 Aug 2020 20:24:01 -0500 Subject: [PATCH 40/83] Removed segmented bit sieve and excess headers [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 81 +------------------ 1 file changed, 1 insertion(+), 80 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 975631b992..c22fbacd55 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -9,7 +9,6 @@ #define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP #include -#include #include #include #include @@ -17,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -66,83 +64,6 @@ void linear_sieve(Integer upper_bound, Container &resultant_primes) } } -// WIP - Not segmented properly between lower bound and upper bound if lower bound > segment_size -template -void segmented_bit_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer &primes, Container &resultant_primes) -{ - constexpr Integer L1D_CACHE_SIZE {32768}; - const Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound))))}; - const Integer segment_size = std::max(limit, L1D_CACHE_SIZE); - - Integer n {lower_bound}; - if(n % 2 == 0) - { - ++n; - } - - boost::dynamic_bitset<> sieve(segment_size); - std::vector current_primes; - std::vector multiples; - auto prime_it {++primes.begin()}; // start at 3 - - for (Integer low {0}; low <= upper_bound; low += segment_size) - { - sieve.set(); - - // current segment = [low, high] - Integer high = low + segment_size - 1; - high = std::min(high, upper_bound); - - while((*prime_it * *prime_it) <= high && prime_it != primes.end()) - { - current_primes.emplace_back(*prime_it); - multiples.emplace_back((*prime_it * *prime_it) - low); - ++prime_it; - } - - // sieve the current segment - for (size_t i {}; i < current_primes.size(); i++) - { - Integer j {multiples[i]}; - for (Integer k {current_primes[i] * 2}; j < segment_size; j += k) - { - sieve[j] = 0; - } - multiples[i] = j - segment_size; - } - - for (; n <= high; n += 2) - { - if (sieve[n - low]) - { - if(n > lower_bound) - { - resultant_primes.emplace_back(n); - } - } - } - } - - if(lower_bound == 2) - { - resultant_primes.insert(resultant_primes.begin(), static_cast(2)); - } -} - - -template -void segmented_bit_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) -{ - Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - std::vector primes {}; - primes.reserve(limit / std::log(limit)); - - boost::math::detail::linear_sieve(limit, primes); - - boost::math::detail::segmented_bit_sieve(lower_bound, upper_bound, primes, resultant_primes); -} - - template void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& primes, Container &resultant_primes) { @@ -159,7 +80,7 @@ void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& const size_t n {static_cast(upper_bound - lower_bound + 1)}; std::unique_ptr is_prime {new bool[n]}; memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); - + // Enable use of thread pool, not SIMD compatible std::for_each(std::execution::par, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime){ for(Integer j {std::max(prime * prime, (lower_bound + prime - 1) / prime * prime)}; j <= upper_bound; j += prime) From c63f1f13bcda962a1d1f40776017aa0d187854bc Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 14 Aug 2020 23:06:08 -0500 Subject: [PATCH 41/83] Added wheel class [WIP][CI SKIP] --- .../math/special_functions/prime_wheel.hpp | 210 ++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 include/boost/math/special_functions/prime_wheel.hpp diff --git a/include/boost/math/special_functions/prime_wheel.hpp b/include/boost/math/special_functions/prime_wheel.hpp new file mode 100644 index 0000000000..9cc47d7ddf --- /dev/null +++ b/include/boost/math/special_functions/prime_wheel.hpp @@ -0,0 +1,210 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_WHEEL_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_WHEEL_HPP + +#include +#include +#include +#include + +namespace boost::math::detail +{ +template +class Wheel +{ +private: + struct Wheelrec + { + std::int_fast32_t rp; + std::int_fast32_t dist; + std::int_fast32_t pos; + std::int_fast32_t inv; + }; + + std::unique_ptr W_; + Integer M_; + Integer k_; + Integer phi_; + + static constexpr std::array P_ {2, 3, 5, 7, 11, 13, 17, 19}; + + void build(Integer korsize); + +public: + Wheel() : W_{nullptr}, M_{0}, k_{0}, phi_{0} {}; + explicit Wheel(Integer korsize) { build(korsize); } + explicit Wheel(const Wheel &x) { build(x.K()); } + ~Wheel() = default; + + constexpr bool operator!() const noexcept { return W_ == nullptr; } + constexpr const Wheelrec& operator[](const Integer i) const noexcept { return W_[i % M_]; } + const Wheel& operator=(const Wheel &x) + { + if(this != &x) + { + build(x.K()); + } + return *this; + } + + constexpr Integer Size() const noexcept { return M_; } + constexpr int K() const noexcept { return k_; } + constexpr Integer Phi() const noexcept { return phi_; } + + constexpr Integer Next(const Integer i) const noexcept { return i + W_[i % M_].dist; } + constexpr Integer MakeRP(const Integer i) const noexcept + { + if(W[i % M].rp) + { + return i; + } + return Next(i); + } + constexpr Integer Prev(const Integer i) const noexcept { return i - W_[(M_ - (i % M_)) % M_].dist; } + constexpr Integer Pos(const Integer i) const noexcept { return phi_ * (i / M_) + W_[i % M_].pos; } + constexpr Integer Inv(const Integer i) const noexcept { return M_ * (i / phi_) + W_[i % phi_].inv; } + + void Print(); +}; + +template +void Wheel::build(Integer korsize) +{ + // Calculate k_ and M_ + if(korsize >= 10) + { + --korsize; + for(k_ = 0; korsize > 0; ++k_) + { + korsize /= P_[k_]; + } + } + else + { + k_ = korsize; + } + + Integer i {0}; + Integer dist {0}; + Integer pos {1}; + + for(M_ = 1; i < k_; ++i) + { + M_ *= P_[i]; + } + + W_ = std::make_unique(M_); + + // Compute the RP field + for(i = 0; i < M_; ++i) + { + W_[i].rp = 1; + } + + for(i = 0; i < k_; ++i) + { + for(Integer j {0}; j < M_; j += P_[i]) + { + W_[j].rp = 0; + } + } + + // Compute the dist field + W_[M_- 1].dist = 2; + for(i = M_ - 2; i >= 0; --i) + { + W_[i].dist = ++dist; + if(W_[i].rp) + { + dist = 0; + } + } + + // Copute pos and inv fields + for(i = 0; i < M_; ++i) + { + W_[i].inv = 0; + if(W_[i].rp) + { + W_[pos].inv = i; + W_[i].pos = pos++; + } + else + { + W_[i].pos = 0; + } + + } + + W_[0].inv = -1; + phi_ = W_[M_- 1].pos; +} + +template +void Wheel::Print() +{ + std::int_fast32_t i {}; + std::cout << "Wheel size = " << this->Size() + << "\nk = " << this->K() + << "\nphi(M) = " << this->Phi() << std::endl; + + // Verify size + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << i; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nRP Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].rp; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nDist Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].dist; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nPos Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].pos; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nInv Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << W_[i].inv; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + std::cout << std::endl; +} +} + +#endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_WHEEL_HPP From b7d42564f5a1b37b56aa4fb583d0c7fab40ab497 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 23 Aug 2020 10:59:18 -0500 Subject: [PATCH 42/83] Added fixed mod 210 wheel [WIP][CI SKIP] --- .../math/special_functions/prime_wheel.hpp | 97 +++++++++++++++++-- 1 file changed, 90 insertions(+), 7 deletions(-) diff --git a/include/boost/math/special_functions/prime_wheel.hpp b/include/boost/math/special_functions/prime_wheel.hpp index 9cc47d7ddf..5c676f5e88 100644 --- a/include/boost/math/special_functions/prime_wheel.hpp +++ b/include/boost/math/special_functions/prime_wheel.hpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace boost::math::detail { @@ -40,7 +41,6 @@ class Wheel Wheel() : W_{nullptr}, M_{0}, k_{0}, phi_{0} {}; explicit Wheel(Integer korsize) { build(korsize); } explicit Wheel(const Wheel &x) { build(x.K()); } - ~Wheel() = default; constexpr bool operator!() const noexcept { return W_ == nullptr; } constexpr const Wheelrec& operator[](const Integer i) const noexcept { return W_[i % M_]; } @@ -54,7 +54,7 @@ class Wheel } constexpr Integer Size() const noexcept { return M_; } - constexpr int K() const noexcept { return k_; } + constexpr Integer K() const noexcept { return k_; } constexpr Integer Phi() const noexcept { return phi_; } constexpr Integer Next(const Integer i) const noexcept { return i + W_[i % M_].dist; } @@ -157,7 +157,7 @@ void Wheel::Print() // Verify size for(i = 0; i < this->Size(); ++i) { - std::cout << std::setw(4) << i; + std::cout << std::setw(4) << i << ','; if(i % 25 == 24) { std::cout << std::endl; @@ -167,7 +167,7 @@ void Wheel::Print() std::cout << "\n\nRP Field\n"; for(i = 0; i < this->Size(); ++i) { - std::cout << std::setw(3) << W_[i].rp; + std::cout << std::setw(3) << W_[i].rp << ','; if(i % 25 == 24) { std::cout << std::endl; @@ -177,7 +177,7 @@ void Wheel::Print() std::cout << "\n\nDist Field\n"; for(i = 0; i < this->Size(); ++i) { - std::cout << std::setw(3) << W_[i].dist; + std::cout << std::setw(3) << W_[i].dist << ','; if(i % 25 == 24) { std::cout << std::endl; @@ -187,7 +187,7 @@ void Wheel::Print() std::cout << "\n\nPos Field\n"; for(i = 0; i < this->Size(); ++i) { - std::cout << std::setw(3) << W_[i].pos; + std::cout << std::setw(3) << W_[i].pos << ','; if(i % 25 == 24) { std::cout << std::endl; @@ -197,7 +197,7 @@ void Wheel::Print() std::cout << "\n\nInv Field\n"; for(i = 0; i < this->Size(); ++i) { - std::cout << std::setw(4) << W_[i].inv; + std::cout << std::setw(4) << W_[i].inv << ','; if(i % 25 == 24) { std::cout << std::endl; @@ -205,6 +205,89 @@ void Wheel::Print() } std::cout << std::endl; } + +// Pre-computed MOD 210 wheel +template +class MOD210Wheel final +{ +private: + static constexpr auto M_ {210}; + static constexpr auto k_ {4}; + static constexpr auto phi_ {28}; + + static constexpr std::array rp_ + { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 + }; + + static constexpr std::array inv_ + { + -1, 1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, + 107, 109, 113, 121, 127, 131, 137, 139, 143, 149, 151, 157, 163, 167, 169, 173, 179, 181, 187, 191, 193, 197, 199, 209, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + static constexpr std::array pos_ + { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 4, 0, 5, 0, 0, 0, 6, 0, + 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, 0, 0, 9, 0, 0, 0, 10, 0, 11, 0, 0, 0, 12, 0, 0, + 0, 0, 0, 13, 0, 0, 0, 0, 0, 14, 0, 15, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 0, + 0, 0, 0, 0, 19, 0, 0, 0, 20, 0, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, + 0, 23, 0, 24, 0, 0, 0, 25, 0, 26, 0, 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, + 0, 0, 29, 0, 0, 0, 30, 0, 0, 0, 0, 0, 31, 0, 32, 0, 0, 0, 33, 0, 0, 0, 0, 0, 34, + 0, 35, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 37, 0, 0, 0, 38, 0, 39, 0, 0, 0, 40, 0, + 0, 0, 0, 0, 41, 0, 42, 0, 0, 0, 0, 0, 43, 0, 0, 0, 44, 0, 45, 0, 0, 0, 46, 0, 47, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 48 + }; + + static constexpr std::array dist_ + { + 1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, + 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 6, 5, + 4, 3, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, + 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, + 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, + 1, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 10, + 9, 8, 7, 6, 5, 4, 3, 2, 1, 2 + }; + +public: + constexpr MOD210Wheel() = default; + ~MOD210Wheel() = default; + + constexpr auto Size() const noexcept { return M_; } + constexpr auto K() const noexcept { return k_; } + constexpr auto Phi() const noexcept { return phi_; } + + constexpr auto Next(const Integer i) const noexcept { return i + dist_[i % M_]; } + constexpr auto MakeRP(const Integer i) const noexcept + { + if(rp_[i % M]) + { + return i; + } + return Next(i); + } + constexpr auto Prev(const Integer i) const noexcept { return i - dist_[(M_ - (i % M_)) % M_]; } + constexpr auto Pos(const Integer i) const noexcept { return phi_ * (i / M_) + W_[i % M_].pos; } + constexpr auto Inv(const Integer i) const noexcept { return M_ * (i / phi_) + W_[i % phi_].inv; } +}; } #endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_WHEEL_HPP From fbc38c8bf6344f0baf6b879ca497ea0ed89bc716 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 23 Aug 2020 11:42:20 -0500 Subject: [PATCH 43/83] New segmented sieve algorithm [WIP][CI SKIP] --- .../math/special_functions/interval_sieve.hpp | 286 ++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 include/boost/math/special_functions/interval_sieve.hpp diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp new file mode 100644 index 0000000000..2f69765b53 --- /dev/null +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -0,0 +1,286 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_INTERVAL_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_INTERVAL_SIEVE_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail +{ +#ifdef __SIZEOF_INT128__ // Defined in GCC 4.6+, clang, intel. MSVC does not define. +using int_128t = __int128; // One machine word smaller than the boost equivalent +#else +using int_128t = boost::multiprecision::int128_t; +#endif + +template +class IntervalSieve +{ +private: + // Table of pseudo-sqares (https://mathworld.wolfram.com/Pseudosquare.html) + // This table is from page 421, table 16.3.1, Hugh Williams' book + // Last 8 entries added from Wooding's MS thesis, 2003, pp. 92-93 + struct pssentry + { + static constexpr std::size_t len {49}; + static constexpr std::array prime + { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 67, 71, 79, 83, 101, 103, 107, 113, 131, 149, 157, + 173, 181, 193, 197, 211, 227, 229, 233, 239, 241, 251, 257, 263, 277, 281, 283, 293, 311, 331, 337, 347, 353 + }; + + static constexpr std::array ps + { + 73, 241, 1'009, 2'641, 8'089, 18'001, 53'881, 87'481, 117'049, 515'761, 1'083'289, 3'206'641, 3'818'929, + 9'257'329, 22'000'801, 48'473'881, 175'244'281, 427'733'329, 898'716'289, 2'805'544'681, 10'310'263'441, + 23'616'331'489, 85'157'610'409, 196'265'095'009, 2'871'842'842'801, 26'250'887'023'729, 112'434'732'901'969, + 178'936'222'537'081, 696'161'110'209'049, 2'854'909'648'103'881, 6'450'045'516'630'769, 11'641'399'247'947'921, + 190'621'428'905'186'449, 196'640'148'121'928'601, 712'624'335'095'093'521, 1'773'855'791'877'850'321, + 2'327'687'064'124'474'441, 6'384'991'873'059'836'689, 8'019'204'661'305'419'761, 10'198'100'582'046'287'689, + 69'848'288'320'900'186'969, 208'936'365'799'044'975'961, 533'552'663'339'828'203'681, 936'664'079'266'714'697'089, + 2'142'202'860'370'269'916'129, 13'649'154'491'558'298'803'281, 34'594'858'801'670'127'778'801, + 99'492'945'930'479'213'334'049, 295'363'187'400'900'310'880'401 + }; + }; + + static constexpr pssentry pss_{}; + boost::math::detail::MOD210Wheel w_; + std::size_t tdlimit_; + + Integer delta_; + Integer left_; + Integer right_; + + // https://www.researchgate.net/publication/220803585_Performance_of_C_bit-vector_implementations + boost::dynamic_bitset<> b_; + + const Container& primes_; + std::int_fast64_t plimit_; + + void Settdlimit() noexcept; + void SeiveLength(Integer d) noexcept; + void Sieve() noexcept; + bool Psstest(std::size_t pos) noexcept; + void Psstestall() noexcept; + void WriteOutput(Container &resultant_primes) noexcept; + +public: + IntervalSieve(const Integer &left, const Integer &right, const PrimeContainer &primes, Container &resultant_primes); +}; + +template +void IntervalSieve::Settdlimit() noexcept +{ + const double dr = get_double(right_); + const double delta = get_double(delta_); + const double tdest = delta * std::log(dr); + + // Small cases + if(tdest * tdest >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + return; + } + + // First guess + if(tdest <= 1ul<<30) + { + tdlimit_ = static_cast(tdest); + } + + else + { + tdlimit_ = 1ul<<30; + } + + // Find the corresponding prime + std::size_t i; + for(i = pss_.len - 1; i > 0; --i) + { + if(static_cast(pss_.ps[i]) * tdlimit_ < dr) + { + break; + } + } + plimit_ = pss_.prime[i]; + + double tdlimit_guess = 1 + std::fmod(dr, pss_.ps[i]); + if(tdlimit_guess * tdlimit_guess >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + } +} + +template +void IntervalSieve::SeiveLength(Integer d) noexcept +{ + Integer r {left_ % d}; + Integer start {0}; + + if(r != 0) + { + start = d - r; + } + + for(Integer i {start}; i >= 0 && i < b_.size(); i += d) + { + b_[i] = 0; + } +} + +template +void IntervalSieve::Sieve() noexcept +{ + std::int_fast64_t primes_range {}; + if(plimit_ <= 10) + { + primes_range = 10; + } + + else + { + primes_range = plimit_; + } + + // Sieve with pre-computed primes and then use the wheel for the remainder + std::size_t i {}; + for(; primes_[i] < primes_range; ++i) + { + SeiveLength(primes_[i]); + } + + for(Integer j = w_.Next(primes_[--i]); j <= tdlimit_; j = w_.Next(j)) + { + SeiveLength(j); + } +} + +template +void IntervalSieve::WriteOutput(Container &resultant_primes) noexcept +{ + for(Integer i {0}; i < b_.size(); ++i) + { + if(b_[i]) + { + resultant_primes.emplace_back(left_ + i); + } + } +} + +// Performs the pseduosqaure prime test on n = left + pos +// return 1 if prime or prime power, 0 otherwise +// Begins with a base-2 test +template +bool IntervalSieve::Psstest(const std::size_t pos) noexcept +{ + const Integer n {left_ + pos}; + const Integer exponent {(n - 1) / 2}; + const std::int_fast64_t nmod8 = n % 8; + + std::int_fast64_t negative_one_count {0}; + + for(std::size_t i {}; i < primes_.size(); ++i) + { + Integer temp = primes_[i]; + temp = std::pow(temp, n); + + if(temp == 1) + { + if(i == 0 && nmod8 == 5) + { + return false; + } + } + + else + { + ++temp; + if(temp == n) + { + if(i > 0) + { + ++negative_one_count; + } + } + else + { + return false; + } + } + } + + return (nmod8 != 1 || negative_one_count > 0); +} + +template +void IntervalSieve::Psstestall() noexcept +{ + for(std::size_t i {}; i < b_.size(); ++i) + { + if(b_[i]) + { + if(!Psstest(i)) + { + b_[i] = 0; + } + } + } +} + +template +IntervalSieve::IntervalSieve(const Integer &left, const Integer &right, const PrimeContainer &primes, Container &resultant_primes) : + left_ {left}, right_ {right}, primes_ {primes} +{ + delta_ = right_ - left_; + b_.resize(delta_, 1); + Settdlimit(); + Sieve(); + + if(plimit_ != 0 ) + { + Psstestall(); + } + + WriteOutput(resultant_primes); +} + +#if defined(__MPIR_VERSION) || defined(__GNU_MP_VERSION) +// GNU GMP C or MPIR +inline double get_double(const mpz_t &x) noexcept +{ + return mpz_get_d(x); +} +#endif + +#if defined(__GNU_MP_VERSION) +#if __has_include() +// GNU GMP C++ bindings +inline double get_double(const mpz_class &x) noexcept +{ + return x.get_d() +} +#endif +#endif + +// boost::multiprecision and POD +template +inline double get_double(const Integer &x) noexcept +{ + return static_cast(x); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_INTERVAL_SIEVE_HPP From 2e46b81a33356e0c651d33e87b7eaa72a47bdacd Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 23 Aug 2020 13:59:24 -0500 Subject: [PATCH 44/83] Added interval sieve to performance test [CI SKIP] --- .../math/special_functions/interval_sieve.hpp | 2 +- .../performance/prime_sieve_performance.cpp | 35 +++++++++++++++---- test/test_prime_sieve.cpp | 1 + 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp index 2f69765b53..491266c579 100644 --- a/include/boost/math/special_functions/interval_sieve.hpp +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -65,7 +65,7 @@ class IntervalSieve // https://www.researchgate.net/publication/220803585_Performance_of_C_bit-vector_implementations boost::dynamic_bitset<> b_; - const Container& primes_; + const PrimeContainer& primes_; std::int_fast64_t plimit_; void Settdlimit() noexcept; diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 47223c54fe..f3a61a93fc 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -6,6 +6,7 @@ // or copy at http://www.boost.org/LICENSE_1_0.txt) #include +#include #include #include #include @@ -50,6 +51,28 @@ void mask_sieve(benchmark::State& state) state.SetComplexityN(state.range(0)); } +template +inline auto interval_sieve_helper(Integer lower_bound, Integer upper_bound, std::vector primes) -> std::vector +{ + std::vector pre_sieved_primes {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}; + boost::math::detail::IntervalSieve sieve(lower_bound, upper_bound, pre_sieved_primes, primes); + return primes; +} + +template +void interval_sieve(benchmark::State& state) +{ + Integer lower = static_cast(2); + Integer upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(mask_sieve_helper(lower, upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +// Complete Implementations template inline auto prime_sieve_helper(ExecuitionPolicy policy, Integer upper, Container primes) { @@ -57,7 +80,6 @@ inline auto prime_sieve_helper(ExecuitionPolicy policy, Integer upper, Container return primes; } -// Complete Implementations template void prime_sieve(benchmark::State& state) { @@ -92,14 +114,15 @@ void kimwalish_primes(benchmark::State& state) // Invidiual Implementations // Linear -BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); // Segmented -BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); -BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(); +//BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); // Complete Implemenations BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 8b3d774f41..5717b33717 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include From 6759ede1b3a1b99f4192164a56c2a2b78de66b87 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 23 Aug 2020 17:06:51 -0500 Subject: [PATCH 45/83] Added unit tests [WIP][CI SKIP] --- .../math/special_functions/interval_sieve.hpp | 27 ++++++++++++++++--- .../performance/prime_sieve_performance.cpp | 10 +++---- test/test_prime_sieve.cpp | 26 ++++++++++++++++++ 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp index 491266c579..a657594629 100644 --- a/include/boost/math/special_functions/interval_sieve.hpp +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -55,7 +55,7 @@ class IntervalSieve }; static constexpr pssentry pss_{}; - boost::math::detail::MOD210Wheel w_; + static constexpr boost::math::detail::MOD210Wheel w_{}; std::size_t tdlimit_; Integer delta_; @@ -76,7 +76,8 @@ class IntervalSieve void WriteOutput(Container &resultant_primes) noexcept; public: - IntervalSieve(const Integer &left, const Integer &right, const PrimeContainer &primes, Container &resultant_primes); + IntervalSieve(const Integer &left, const Integer &right, const PrimeContainer &primes, Container &resultant_primes) noexcept; + void NewRange(const Integer &left, const Integer &right, Container &resultant_primes) noexcept; }; template @@ -241,7 +242,7 @@ void IntervalSieve::Psstestall() noexcept } template -IntervalSieve::IntervalSieve(const Integer &left, const Integer &right, const PrimeContainer &primes, Container &resultant_primes) : +IntervalSieve::IntervalSieve(const Integer &left, const Integer &right, const PrimeContainer &primes, Container &resultant_primes) noexcept : left_ {left}, right_ {right}, primes_ {primes} { delta_ = right_ - left_; @@ -257,6 +258,26 @@ IntervalSieve::IntervalSieve(const Integer & WriteOutput(resultant_primes); } +template +void IntervalSieve::NewRange(const Integer &left, const Integer &right, Container &resultant_primes) noexcept +{ + left_ = left; + right_ = right; + delta_ = right_ - left_; + + b_.resize(delta_); + b_.set(); + Settdlimit(); + Sieve(); + + if(plimit_ != 0) + { + Psstestall(); + } + + WriteOutput(resultant_primes); +} + #if defined(__MPIR_VERSION) || defined(__GNU_MP_VERSION) // GNU GMP C or MPIR inline double get_double(const mpz_t &x) noexcept diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index f3a61a93fc..fb71316d8d 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -67,7 +67,7 @@ void interval_sieve(benchmark::State& state) for(auto _ : state) { std::vector primes; - benchmark::DoNotOptimize(mask_sieve_helper(lower, upper, primes)); + benchmark::DoNotOptimize(interval_sieve_helper(lower, upper, primes)); } state.SetComplexityN(state.range(0)); } @@ -125,9 +125,9 @@ BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 //BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); // Complete Implemenations -BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark -BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark +//BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_MAIN(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 5717b33717..31b668807c 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -12,6 +12,7 @@ #include #include #include +#include template void test_prime_sieve() @@ -123,8 +124,27 @@ void test_par_prime_sieve_large() BOOST_TEST_EQ(primes.size(), ref); } +template +void test_interval_sieve() +{ + std::vector pre_sieved_primes {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}; + std::vector primes; + + boost::math::detail::IntervalSieve sieve(1'000, 10'000, pre_sieved_primes, primes); + BOOST_TEST_EQ(primes.size(), 1'061); + + primes.clear(); + sieve.NewRange(10'000, 100'000, primes); + BOOST_TEST_EQ(primes.size(), 8'363); + + primes.clear(); + sieve.NewRange(100'000, 1'000'000, primes); + BOOST_TEST_EQ(primes.size(), 68'906); +} + int main() { + /* test_prime_sieve(); test_prime_sieve(); test_prime_sieve(); @@ -134,6 +154,12 @@ int main() test_prime_range(); test_prime_range(); test_prime_range(); + */ + + test_interval_sieve(); + test_interval_sieve(); + test_interval_sieve(); + test_interval_sieve(); //test_prime_sieve_overflow(); From 1b404038017074ec4c896edaa732ef2ded034dae Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 23 Aug 2020 18:50:07 -0500 Subject: [PATCH 46/83] Implemented interval sieve [CI SKIP] --- .../boost/math/special_functions/interval_sieve.hpp | 8 +++++--- .../boost/math/special_functions/prime_sieve.hpp | 13 +++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp index a657594629..305d3a433d 100644 --- a/include/boost/math/special_functions/interval_sieve.hpp +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -15,18 +15,20 @@ #include #include #include +#include namespace boost::math::detail { +template +class IntervalSieve +{ + #ifdef __SIZEOF_INT128__ // Defined in GCC 4.6+, clang, intel. MSVC does not define. using int_128t = __int128; // One machine word smaller than the boost equivalent #else using int_128t = boost::multiprecision::int128_t; #endif -template -class IntervalSieve -{ private: // Table of pseudo-sqares (https://mathworld.wolfram.com/Pseudosquare.html) // This table is from page 421, table 16.3.1, Hugh Williams' book diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index c22fbacd55..ceb8934e5c 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -9,6 +9,7 @@ #define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP #include +#include #include #include #include @@ -138,8 +139,8 @@ constexpr void prime_table(Integer upper_bound, Container &resultant_primes) template void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesContainer &primes, Container &resultant_primes) { - const Integer L1_SIZE {32648}; - const Integer interval {L1_SIZE * 4}; + const Integer L1_SIZE {32768}; + const Integer interval {L1_SIZE * 8}; Integer current_lower_bound{lower_bound}; Integer current_upper_bound{current_lower_bound + interval}; @@ -160,8 +161,8 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesConta { prime_vectors[i].reserve(primes_in_range); - future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, current_upper_bound, &primes, &prime_vectors, i]{ - boost::math::detail::mask_sieve(current_lower_bound, current_upper_bound, primes, prime_vectors[i]); + future_manager.emplace_back(std::async(std::launch::async, [¤t_lower_bound, ¤t_upper_bound, &primes, &prime_vectors, i]{ + boost::math::detail::IntervalSieve sieve(current_lower_bound, current_upper_bound, primes, prime_vectors[i]); })); current_lower_bound = current_upper_bound + 1; @@ -169,8 +170,8 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesConta } prime_vectors[ranges].reserve(primes_in_range); - future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, upper_bound, &primes, &prime_vectors]{ - boost::math::detail::mask_sieve(current_lower_bound, upper_bound, primes, prime_vectors.back()); + future_manager.emplace_back(std::async(std::launch::async, [¤t_lower_bound, &upper_bound, &primes, &prime_vectors]{ + boost::math::detail::IntervalSieve sieve(current_lower_bound, upper_bound, primes, prime_vectors.back()); })); for(auto &&future : future_manager) From 97244be0a0eb4096bf03e0022452dbc214a7774c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 23 Aug 2020 22:57:46 -0500 Subject: [PATCH 47/83] Performance improvements and bug fixes [CI SKIP] --- .../math/special_functions/interval_sieve.hpp | 66 ++++++------ .../math/special_functions/prime_sieve.hpp | 101 ++++++------------ .../math/special_functions/prime_wheel.hpp | 8 +- .../performance/prime_sieve_performance.cpp | 8 +- test/test_prime_sieve.cpp | 61 +++++++---- 5 files changed, 117 insertions(+), 127 deletions(-) diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp index 305d3a433d..a514d1591a 100644 --- a/include/boost/math/special_functions/interval_sieve.hpp +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -19,10 +19,35 @@ namespace boost::math::detail { + +#if defined(__MPIR_VERSION) || defined(__GNU_MP_VERSION) +// GNU GMP C or MPIR +inline double get_double(const mpz_t &x) noexcept +{ + return mpz_get_d(x); +} +#endif + +#if defined(__GNU_MP_VERSION) +#if __has_include() +// GNU GMP C++ bindings +inline double get_double(const mpz_class &x) noexcept +{ + return x.get_d() +} +#endif +#endif + +// boost::multiprecision and POD +template +inline double get_double(const Integer &x) noexcept +{ + return static_cast(x); +} + template class IntervalSieve { - #ifdef __SIZEOF_INT128__ // Defined in GCC 4.6+, clang, intel. MSVC does not define. using int_128t = __int128; // One machine word smaller than the boost equivalent #else @@ -71,15 +96,15 @@ using int_128t = boost::multiprecision::int128_t; std::int_fast64_t plimit_; void Settdlimit() noexcept; - void SeiveLength(Integer d) noexcept; + void SeiveLength(const Integer d) noexcept; void Sieve() noexcept; - bool Psstest(std::size_t pos) noexcept; + bool Psstest(const std::size_t pos) noexcept; void Psstestall() noexcept; void WriteOutput(Container &resultant_primes) noexcept; public: - IntervalSieve(const Integer &left, const Integer &right, const PrimeContainer &primes, Container &resultant_primes) noexcept; - void NewRange(const Integer &left, const Integer &right, Container &resultant_primes) noexcept; + IntervalSieve(const Integer left, const Integer right, const PrimeContainer &primes, Container &resultant_primes) noexcept; + void NewRange(const Integer left, const Integer right, Container &resultant_primes) noexcept; }; template @@ -128,7 +153,7 @@ void IntervalSieve::Settdlimit() noexcept } template -void IntervalSieve::SeiveLength(Integer d) noexcept +void IntervalSieve::SeiveLength(const Integer d) noexcept { Integer r {left_ % d}; Integer start {0}; @@ -244,7 +269,7 @@ void IntervalSieve::Psstestall() noexcept } template -IntervalSieve::IntervalSieve(const Integer &left, const Integer &right, const PrimeContainer &primes, Container &resultant_primes) noexcept : +IntervalSieve::IntervalSieve(const Integer left, const Integer right, const PrimeContainer &primes, Container &resultant_primes) noexcept : left_ {left}, right_ {right}, primes_ {primes} { delta_ = right_ - left_; @@ -261,7 +286,7 @@ IntervalSieve::IntervalSieve(const Integer & } template -void IntervalSieve::NewRange(const Integer &left, const Integer &right, Container &resultant_primes) noexcept +void IntervalSieve::NewRange(const Integer left, const Integer right, Container &resultant_primes) noexcept { left_ = left; right_ = right; @@ -279,31 +304,6 @@ void IntervalSieve::NewRange(const Integer & WriteOutput(resultant_primes); } - -#if defined(__MPIR_VERSION) || defined(__GNU_MP_VERSION) -// GNU GMP C or MPIR -inline double get_double(const mpz_t &x) noexcept -{ - return mpz_get_d(x); -} -#endif - -#if defined(__GNU_MP_VERSION) -#if __has_include() -// GNU GMP C++ bindings -inline double get_double(const mpz_class &x) noexcept -{ - return x.get_d() -} -#endif -#endif - -// boost::multiprecision and POD -template -inline double get_double(const Integer &x) noexcept -{ - return static_cast(x); -} } #endif // BOOST_MATH_SPECIAL_FUNCTIONS_INTERVAL_SIEVE_HPP diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index ceb8934e5c..8d18b8853a 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace boost::math { namespace detail { @@ -29,10 +30,10 @@ namespace boost::math { namespace detail template void linear_sieve(Integer upper_bound, Container &resultant_primes) { - size_t least_divisors_size{static_cast(upper_bound + 1)}; + std::size_t least_divisors_size{static_cast(upper_bound + 1)}; std::unique_ptr least_divisors{new Integer[least_divisors_size]{0}}; - for (Integer i{2}; i <= upper_bound; ++i) + for (std::size_t i{2}; i < upper_bound; ++i) { if (least_divisors[i] == 0) { @@ -40,9 +41,9 @@ void linear_sieve(Integer upper_bound, Container &resultant_primes) resultant_primes.emplace_back(i); } - for (size_t j{}; j < least_divisors_size; ++j) + for (std::size_t j{}; j < least_divisors_size; ++j) { - if (j >= resultant_primes.size()) + if (i * resultant_primes[j] > upper_bound) { break; } @@ -52,7 +53,7 @@ void linear_sieve(Integer upper_bound, Container &resultant_primes) break; } - else if (i * resultant_primes[j] > upper_bound) + else if (j >= resultant_primes.size()) { break; } @@ -70,7 +71,7 @@ void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& { Integer limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - size_t primes_size {}; + std::size_t primes_size {}; auto it{primes.begin()}; while(it != primes.end() && *it < limit) { @@ -78,13 +79,13 @@ void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& ++it; } - const size_t n {static_cast(upper_bound - lower_bound + 1)}; + const std::size_t n {static_cast(upper_bound - lower_bound + 1)}; std::unique_ptr is_prime {new bool[n]}; memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); // Enable use of thread pool, not SIMD compatible std::for_each(std::execution::par, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime){ - for(Integer j {std::max(prime * prime, (lower_bound + prime - 1) / prime * prime)}; j <= upper_bound; j += prime) + for(Integer j {std::max(prime * prime, (lower_bound + prime - 1) / prime * prime)}; j < upper_bound; j += prime) { is_prime[j - lower_bound] = false; } @@ -117,9 +118,9 @@ void mask_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_p } template -constexpr void prime_table(size_t min_index, Integer upper_bound, Container &resultant_primes) +constexpr void prime_table(std::size_t min_index, Integer upper_bound, Container &resultant_primes) { - size_t current_index {min_index}; + std::size_t current_index {min_index}; Integer current_prime {2}; while(current_prime < upper_bound) @@ -149,7 +150,7 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesConta current_upper_bound = upper_bound; } - size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; + std::size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; std::vector> prime_vectors(ranges + 1); std::vector> future_manager(ranges); @@ -157,20 +158,20 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesConta Integer primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - current_lower_bound / std::log(static_cast(current_lower_bound)))}; - for(size_t i {}; i < ranges; ++i) + for(std::size_t i {}; i < ranges; ++i) { prime_vectors[i].reserve(primes_in_range); - future_manager.emplace_back(std::async(std::launch::async, [¤t_lower_bound, ¤t_upper_bound, &primes, &prime_vectors, i]{ + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, current_upper_bound, &primes, &prime_vectors, i]{ boost::math::detail::IntervalSieve sieve(current_lower_bound, current_upper_bound, primes, prime_vectors[i]); })); - current_lower_bound = current_upper_bound + 1; + current_lower_bound = current_upper_bound; current_upper_bound += interval; } prime_vectors[ranges].reserve(primes_in_range); - future_manager.emplace_back(std::async(std::launch::async, [¤t_lower_bound, &upper_bound, &primes, &prime_vectors]{ + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, upper_bound, &primes, &prime_vectors]{ boost::math::detail::IntervalSieve sieve(current_lower_bound, upper_bound, primes, prime_vectors.back()); })); @@ -238,19 +239,17 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime return; } - --upper_bound; // Not inclusive, but several methods in boost::math::detail need to be - + if constexpr (detail::is_vector_v) { primes.reserve(upper_bound / std::log(static_cast(upper_bound))); } - - if(upper_bound <= 32768) + + if(upper_bound <= 4096) { - boost::math::detail::prime_table(upper_bound, primes); + boost::math::detail::linear_sieve(upper_bound, primes); } - else if (std::is_same_v) { boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); @@ -270,35 +269,18 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime small_primes.reserve(1028); // Threshold for when 2 thread performance begins to be non-linear, or when the system can only support two threads - if(upper_bound < 16777216 || processor_count == 2) + if(upper_bound < 1073741825 || processor_count == 2) { // Split into two vectors and merge after joined to avoid data races - if(upper_bound <= 104729) - { - std::thread t1([&small_primes]{ - boost::math::detail::prime_table(static_cast(32768), small_primes); - }); - - std::thread t2([upper_bound, &primes]{ - boost::math::detail::prime_table(3512, upper_bound, primes); - }); + std::thread t1([&small_primes] { + boost::math::detail::linear_sieve(static_cast(8192), small_primes); + }); + std::thread t2([upper_bound, &primes] { + boost::math::detail::segmented_sieve(static_cast(8192), upper_bound, primes); + }); - t1.join(); - t2.join(); - } - - else - { - std::thread t1([&small_primes]{ - boost::math::detail::prime_table(static_cast(104729), small_primes); - }); - std::thread t2([upper_bound, &primes]{ - boost::math::detail::segmented_sieve(static_cast(104729), upper_bound, primes); - }); - - t1.join(); - t2.join(); - } + t1.join(); + t2.join(); primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); } @@ -310,33 +292,18 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime std::vector pre_generated_primes {}; pre_generated_primes.reserve(limit / std::log(limit)); - if(limit <= 32768) - { - boost::math::detail::prime_table(limit, pre_generated_primes); - } - - else if(limit <= 104729) + if(limit < 4096) { - std::thread t1([&small_primes]{ - boost::math::detail::prime_table(static_cast(32768), small_primes); - }); - - std::thread t2([limit, &pre_generated_primes]{ - boost::math::detail::prime_table(3512, limit, pre_generated_primes); - }); - - t1.join(); - t2.join(); - pre_generated_primes.insert(pre_generated_primes.begin(), small_primes.begin(), small_primes.end()); + boost::math::detail::linear_sieve(limit, pre_generated_primes); } else { std::thread t1([&small_primes] { - boost::math::detail::prime_table(static_cast(104729), small_primes); + boost::math::detail::linear_sieve(static_cast(4096), small_primes); }); std::thread t2([limit, &pre_generated_primes] { - boost::math::detail::segmented_sieve(static_cast(104729), limit, pre_generated_primes); + boost::math::detail::segmented_sieve(static_cast(4096), limit, pre_generated_primes); }); t1.join(); @@ -352,7 +319,7 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime Integer primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - current_lower_bound / std::log(static_cast(current_lower_bound)))}; - for(size_t i{}; i < processor_count - 1; ++i) + for(std::size_t i{}; i < processor_count - 1; ++i) { prime_vectors[i].reserve(primes_in_range); diff --git a/include/boost/math/special_functions/prime_wheel.hpp b/include/boost/math/special_functions/prime_wheel.hpp index 5c676f5e88..0a03b5e6e6 100644 --- a/include/boost/math/special_functions/prime_wheel.hpp +++ b/include/boost/math/special_functions/prime_wheel.hpp @@ -60,7 +60,7 @@ class Wheel constexpr Integer Next(const Integer i) const noexcept { return i + W_[i % M_].dist; } constexpr Integer MakeRP(const Integer i) const noexcept { - if(W[i % M].rp) + if(W_[i % M_].rp) { return i; } @@ -278,15 +278,15 @@ class MOD210Wheel final constexpr auto Next(const Integer i) const noexcept { return i + dist_[i % M_]; } constexpr auto MakeRP(const Integer i) const noexcept { - if(rp_[i % M]) + if(rp_[i % M_]) { return i; } return Next(i); } constexpr auto Prev(const Integer i) const noexcept { return i - dist_[(M_ - (i % M_)) % M_]; } - constexpr auto Pos(const Integer i) const noexcept { return phi_ * (i / M_) + W_[i % M_].pos; } - constexpr auto Inv(const Integer i) const noexcept { return M_ * (i / phi_) + W_[i % phi_].inv; } + constexpr auto Pos(const Integer i) const noexcept { return phi_ * (i / M_) + pos_[i % M_]; } + constexpr auto Inv(const Integer i) const noexcept { return M_ * (i / phi_) + inv_[i % phi_]; } }; } diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index fb71316d8d..58e63b01f3 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -120,14 +120,14 @@ void kimwalish_primes(benchmark::State& state) // Segmented //BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); -BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); -BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(); +//BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(); //BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); // Complete Implemenations //BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); -//BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -//BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark //BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_MAIN(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 31b668807c..bf47d4eff3 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -55,10 +55,12 @@ void test_prime_sieve() boost::math::prime_sieve(1000000, primes); BOOST_TEST_EQ(primes.size(), 78498); + /* // Does the function work with a list? std::list l_primes; boost::math::prime_sieve(1000, l_primes); BOOST_TEST_EQ(l_primes.size(), ref); + */ // Does the function work with a deque? std::deque d_primes; @@ -117,10 +119,10 @@ template void test_par_prime_sieve_large() { std::vector primes; - Integer ref {1077871}; // Calculated with wolfram-alpha + Integer ref {54400028}; // Calculated with wolfram-alpha // Force the sieve into the multi-threading section - boost::math::prime_sieve(static_cast(16777217), primes); + boost::math::prime_sieve(static_cast(1073741824), primes); BOOST_TEST_EQ(primes.size(), ref); } @@ -142,30 +144,51 @@ void test_interval_sieve() BOOST_TEST_EQ(primes.size(), 68'906); } +template +void test_linear_sieve() +{ + std::vector primes; + + boost::math::detail::linear_sieve(1'000, primes); + BOOST_TEST_EQ(primes.size(), 168); + + primes.clear(); + boost::math::detail::linear_sieve(10'000, primes); + BOOST_TEST_EQ(primes.size(), 1229); + + primes.clear(); + boost::math::detail::linear_sieve(100'000, primes); + BOOST_TEST_EQ(primes.size(), 9592); +} + int main() { - /* - test_prime_sieve(); - test_prime_sieve(); - test_prime_sieve(); - test_prime_sieve(); + // Individual Algorithms + //test_linear_sieve(); + //test_linear_sieve(); + test_linear_sieve(); + //test_linear_sieve(); - test_prime_range(); - test_prime_range(); - test_prime_range(); - test_prime_range(); - */ - - test_interval_sieve(); - test_interval_sieve(); + //test_interval_sieve(); + //test_interval_sieve(); test_interval_sieve(); - test_interval_sieve(); + //test_interval_sieve(); - //test_prime_sieve_overflow(); + // Composite + //test_prime_sieve(); + //test_prime_sieve(); + test_prime_sieve(); + //test_prime_sieve(); + //test_prime_sieve(); - test_prime_sieve(); + //test_prime_range(); + //test_prime_range(); + //test_prime_range(); + //test_prime_range(); + + //test_prime_sieve_overflow(); - //test_par_prime_sieve_large(); + test_par_prime_sieve_large(); boost::report_errors(); } From fa041335136f8ace61258789090e598ca021bf22 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 24 Aug 2020 22:00:05 -0500 Subject: [PATCH 48/83] Significant refactoring [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 156 ++---------------- 1 file changed, 16 insertions(+), 140 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 8d18b8853a..8ec48ca2e5 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -41,27 +41,9 @@ void linear_sieve(Integer upper_bound, Container &resultant_primes) resultant_primes.emplace_back(i); } - for (std::size_t j{}; j < least_divisors_size; ++j) + for (std::size_t j{}; i * resultant_primes[j] <= upper_bound && resultant_primes[j] <= least_divisors[i] && j < least_divisors_size; ++j) { - if (i * resultant_primes[j] > upper_bound) - { - break; - } - - else if (resultant_primes[j] > least_divisors[i]) - { - break; - } - - else if (j >= resultant_primes.size()) - { - break; - } - - else - { - least_divisors[i * resultant_primes[j]] = resultant_primes[j]; - } + least_divisors[i * resultant_primes[j]] = resultant_primes[j]; } } } @@ -209,24 +191,13 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, Container &result boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes, resultant_primes); } +} // End namespace detail -template -struct IsVector -{ - using type = T; - constexpr static bool value = false; -}; - -template -struct IsVector> +template +constexpr void prime_reserve(Integer upper_bound, std::vector &prime_container) { - using type = std::vector; - constexpr static bool value = true; -}; - -template -constexpr bool is_vector_v = IsVector::value; -} // End namespace detail + prime_container.reserve(upper_bound / std::log(static_cast(upper_bound))); +} template void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) @@ -239,12 +210,6 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime return; } - - if constexpr (detail::is_vector_v) - { - primes.reserve(upper_bound / std::log(static_cast(upper_bound))); - } - if(upper_bound <= 4096) { boost::math::detail::linear_sieve(upper_bound, primes); @@ -257,103 +222,19 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime else { - unsigned processor_count {std::thread::hardware_concurrency()}; - - // May return 0 when unable to detect - if(processor_count == 0) - { - processor_count = 2; - } - std::vector small_primes {}; small_primes.reserve(1028); - // Threshold for when 2 thread performance begins to be non-linear, or when the system can only support two threads - if(upper_bound < 1073741825 || processor_count == 2) - { - // Split into two vectors and merge after joined to avoid data races - std::thread t1([&small_primes] { - boost::math::detail::linear_sieve(static_cast(8192), small_primes); - }); - std::thread t2([upper_bound, &primes] { - boost::math::detail::segmented_sieve(static_cast(8192), upper_bound, primes); - }); - - t1.join(); - t2.join(); - primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); - } + std::thread t1([&small_primes] { + boost::math::detail::linear_sieve(static_cast(8192), small_primes); + }); + std::thread t2([upper_bound, &primes] { + boost::math::detail::segmented_sieve(static_cast(8192), upper_bound, primes); + }); - //If sufficiently large upper bound spawn as many threads as the system has processors for - else - { - //Pre-generate all of the primes so that each thread does not have to - Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; - std::vector pre_generated_primes {}; - pre_generated_primes.reserve(limit / std::log(limit)); - - if(limit < 4096) - { - boost::math::detail::linear_sieve(limit, pre_generated_primes); - } - - else - { - std::thread t1([&small_primes] { - boost::math::detail::linear_sieve(static_cast(4096), small_primes); - }); - std::thread t2([limit, &pre_generated_primes] { - boost::math::detail::segmented_sieve(static_cast(4096), limit, pre_generated_primes); - }); - - t1.join(); - t2.join(); - pre_generated_primes.insert(pre_generated_primes.begin(), small_primes.begin(), small_primes.end()); - } - - std::vector thread_manager {}; - std::vector> prime_vectors(processor_count); - const Integer range_per_thread = upper_bound / (processor_count); - Integer current_lower_bound {limit + 1}; - Integer current_upper_bound {current_lower_bound + range_per_thread}; - Integer primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - - current_lower_bound / std::log(static_cast(current_lower_bound)))}; - - for(std::size_t i{}; i < processor_count - 1; ++i) - { - prime_vectors[i].reserve(primes_in_range); - - std::thread t([current_lower_bound, current_upper_bound, &prime_vectors, i, &pre_generated_primes] { - boost::math::detail::segmented_sieve(current_lower_bound, current_upper_bound, pre_generated_primes, - prime_vectors[i]); - }); - - thread_manager.push_back(std::move(t)); - - current_lower_bound = current_upper_bound; - current_upper_bound += range_per_thread; - primes_in_range = static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - - current_lower_bound / std::log(static_cast(current_lower_bound))); - } - - prime_vectors.back().reserve(primes_in_range); - - std::thread t([current_lower_bound, upper_bound, &prime_vectors, &pre_generated_primes] { - boost::math::detail::segmented_sieve(current_lower_bound, upper_bound, pre_generated_primes, prime_vectors.back()); - }); - thread_manager.push_back(std::move(t)); - - for(auto &thread : thread_manager) - { - thread.join(); - } - - primes.insert(primes.begin(), pre_generated_primes.begin(), pre_generated_primes.end()); - for(auto &v : prime_vectors) - { - primes.insert(primes.end(), v.begin(), v.end()); - } - } + t1.join(); + t2.join(); + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); } } @@ -367,11 +248,6 @@ void prime_sieve(Integer upper_bound, Container &primes) template void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &primes) { - if constexpr (detail::is_vector_v) - { - primes.reserve(upper_bound / std::log(static_cast(upper_bound))); - } - boost::math::prime_sieve(policy, upper_bound, primes); auto it{primes.begin()}; From c4a89c86ffe777824a2f9cebd8a1042ff83b512c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 24 Aug 2020 23:12:01 -0500 Subject: [PATCH 49/83] Seq policy actually sequential [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 38 ++++++++++++++++- test/test_prime_sieve.cpp | 42 +++++++++++++++---- 2 files changed, 69 insertions(+), 11 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 8ec48ca2e5..5d415b34c0 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -191,6 +191,39 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, Container &result boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes, resultant_primes); } + +template +void sequential_segmented_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) +{ + const Integer L1_SIZE {32768}; + const Integer interval {L1_SIZE * 8}; + Integer current_lower_bound{lower_bound}; + Integer current_upper_bound{current_lower_bound + interval}; + + if(current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + + std::size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; + + boost::math::detail::IntervalSieve sieve(current_lower_bound, current_upper_bound, resultant_primes, resultant_primes); + if(ranges == 0) + { + return; + } + + for(std::size_t i {}; i < ranges; ++i) + { + current_lower_bound = current_upper_bound; + current_upper_bound += interval; + if(current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + sieve.NewRange(current_lower_bound, current_upper_bound, resultant_primes); + } +} } // End namespace detail template @@ -215,9 +248,10 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime boost::math::detail::linear_sieve(upper_bound, primes); } - else if (std::is_same_v) + else if(typeid(policy) == typeid(std::execution::seq)) { - boost::math::detail::segmented_sieve(static_cast(2), upper_bound, primes); + boost::math::detail::linear_sieve(static_cast(4096), primes); + boost::math::detail::sequential_segmented_sieve(static_cast(4096), upper_bound, primes); } else diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index bf47d4eff3..8f0a5ee7db 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -13,6 +13,7 @@ #include #include #include +#include template void test_prime_sieve() @@ -24,11 +25,6 @@ void test_prime_sieve() boost::math::prime_sieve(1000, primes); BOOST_TEST_EQ(primes.size(), ref); - // Does the sequential policy work - primes.clear(); - boost::math::prime_sieve(std::execution::seq, 1000, primes); - BOOST_TEST_EQ(primes.size(), ref); - // Tests for correctness // 2 primes.clear(); @@ -68,6 +64,27 @@ void test_prime_sieve() BOOST_TEST_EQ(d_primes.size(), ref); } +template +void test_sequential_prime_sieve() +{ + std::vector primes; + + // 10'000 + primes.clear(); + boost::math::prime_sieve(std::execution::seq, 10000, primes); + BOOST_TEST_EQ(primes.size(), 1229); + + // 100'000 + primes.clear(); + boost::math::prime_sieve(std::execution::seq, 100000, primes); + BOOST_TEST_EQ(primes.size(), 9592); + + // 1'000'000 + primes.clear(); + boost::math::prime_sieve(std::execution::seq, 1000000, primes); + BOOST_TEST_EQ(primes.size(), 78498); +} + template void test_prime_range() { @@ -121,7 +138,8 @@ void test_par_prime_sieve_large() std::vector primes; Integer ref {54400028}; // Calculated with wolfram-alpha - // Force the sieve into the multi-threading section + // Force the sieve into the multi-threading section and test reserve functionality + boost::math::prime_reserve(static_cast(1073741824), primes); boost::math::prime_sieve(static_cast(1073741824), primes); BOOST_TEST_EQ(primes.size(), ref); } @@ -149,15 +167,15 @@ void test_linear_sieve() { std::vector primes; - boost::math::detail::linear_sieve(1'000, primes); + boost::math::detail::linear_sieve(static_cast(1'000), primes); BOOST_TEST_EQ(primes.size(), 168); primes.clear(); - boost::math::detail::linear_sieve(10'000, primes); + boost::math::detail::linear_sieve(static_cast(10'000), primes); BOOST_TEST_EQ(primes.size(), 1229); primes.clear(); - boost::math::detail::linear_sieve(100'000, primes); + boost::math::detail::linear_sieve(static_cast(100'000), primes); BOOST_TEST_EQ(primes.size(), 9592); } @@ -181,6 +199,12 @@ int main() //test_prime_sieve(); //test_prime_sieve(); + //test_sequential_prime_sieve(); + //test_sequential_prime_sieve(); + test_sequential_prime_sieve(); + //test_sequential_prime_sieve(); + //test_sequential_prime_sieve(); + //test_prime_range(); //test_prime_range(); //test_prime_range(); From 91836f62f35f2ed455c6051ab8a6146df5750a71 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 25 Aug 2020 18:44:07 -0500 Subject: [PATCH 50/83] Fixes for multiprecision and policies [CI SKIP] --- include/boost/math/special_functions/prime_sieve.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 5d415b34c0..b346d60c95 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -21,7 +21,6 @@ #include #include #include -#include namespace boost::math { namespace detail { @@ -41,9 +40,10 @@ void linear_sieve(Integer upper_bound, Container &resultant_primes) resultant_primes.emplace_back(i); } - for (std::size_t j{}; i * resultant_primes[j] <= upper_bound && resultant_primes[j] <= least_divisors[i] && j < least_divisors_size; ++j) + for (std::size_t j{}; j < resultant_primes.size() && i * resultant_primes[j] <= upper_bound && + resultant_primes[j] <= least_divisors[i] && j < least_divisors_size; ++j) { - least_divisors[i * resultant_primes[j]] = resultant_primes[j]; + least_divisors[i * static_cast(resultant_primes[j])] = resultant_primes[j]; } } } @@ -275,7 +275,7 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime template void prime_sieve(Integer upper_bound, Container &primes) { - prime_sieve(std::execution::par, upper_bound, primes); + prime_sieve(std::execution::seq, upper_bound, primes); } @@ -296,7 +296,7 @@ void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bo template inline void prime_range(Integer lower_bound, Integer upper_bound, Container &primes) { - prime_range(std::execution::par, lower_bound, upper_bound, primes); + prime_range(std::execution::seq, lower_bound, upper_bound, primes); } } From 6d6b19fb9e0ed3f964cdf607bd10841e2ee1b516 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 26 Aug 2020 19:23:54 -0500 Subject: [PATCH 51/83] cpp_int now passes all tests [CI SKIP] --- .../math/special_functions/interval_sieve.hpp | 10 ++-- .../math/special_functions/prime_sieve.hpp | 28 ++++----- .../math/special_functions/prime_wheel.hpp | 8 +-- test/test_prime_sieve.cpp | 60 ++++++++++--------- 4 files changed, 55 insertions(+), 51 deletions(-) diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp index a514d1591a..4c79dc30ac 100644 --- a/include/boost/math/special_functions/interval_sieve.hpp +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -165,7 +165,7 @@ void IntervalSieve::SeiveLength(const Intege for(Integer i {start}; i >= 0 && i < b_.size(); i += d) { - b_[i] = 0; + b_[static_cast(i)] = 0; } } @@ -199,7 +199,7 @@ void IntervalSieve::Sieve() noexcept template void IntervalSieve::WriteOutput(Container &resultant_primes) noexcept { - for(Integer i {0}; i < b_.size(); ++i) + for(std::size_t i {}; i < b_.size(); ++i) { if(b_[i]) { @@ -223,7 +223,7 @@ bool IntervalSieve::Psstest(const std::size_ for(std::size_t i {}; i < primes_.size(); ++i) { Integer temp = primes_[i]; - temp = std::pow(temp, n); + temp = static_cast(std::pow(get_double(temp), get_double(n))); if(temp == 1) { @@ -273,7 +273,7 @@ IntervalSieve::IntervalSieve(const Integer l left_ {left}, right_ {right}, primes_ {primes} { delta_ = right_ - left_; - b_.resize(delta_, 1); + b_.resize(static_cast(delta_), 1); Settdlimit(); Sieve(); @@ -292,7 +292,7 @@ void IntervalSieve::NewRange(const Integer l right_ = right; delta_ = right_ - left_; - b_.resize(delta_); + b_.resize(static_cast(delta_)); b_.set(); Settdlimit(); Sieve(); diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index b346d60c95..c43e3290f9 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -67,9 +67,10 @@ void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& // Enable use of thread pool, not SIMD compatible std::for_each(std::execution::par, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime){ - for(Integer j {std::max(prime * prime, (lower_bound + prime - 1) / prime * prime)}; j < upper_bound; j += prime) + for(Integer j {std::max(static_cast(prime * prime), static_cast((lower_bound + prime - 1) / prime * prime))}; + j < upper_bound; j += prime) { - is_prime[j - lower_bound] = false; + is_prime[static_cast(j - lower_bound)] = false; } }); @@ -80,7 +81,7 @@ void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& for(Integer i{lower_bound}; i <= upper_bound; ++i) { - if(is_prime[i - lower_bound]) + if(is_prime[static_cast(i - lower_bound)]) { resultant_primes.emplace_back(i); } @@ -90,7 +91,7 @@ void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& template void mask_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) { - Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + auto limit{std::floor(std::sqrt(static_cast(upper_bound))) + 1}; std::vector primes {}; primes.reserve(limit / std::log(limit)); @@ -137,8 +138,8 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesConta std::vector> prime_vectors(ranges + 1); std::vector> future_manager(ranges); - Integer primes_in_range {static_cast(current_upper_bound / std::log(static_cast(current_upper_bound)) - - current_lower_bound / std::log(static_cast(current_lower_bound)))}; + auto primes_in_range {static_cast(static_cast(current_upper_bound) / std::log(static_cast(current_upper_bound)) - + static_cast(current_lower_bound) / std::log(static_cast(current_lower_bound)))}; for(std::size_t i {}; i < ranges; ++i) { @@ -176,17 +177,19 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, Container &result { Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; std::vector primes {}; - primes.reserve(limit / std::log(limit)); + primes.reserve(static_cast(limit) / std::log(static_cast(limit))); // Prepare for max value so you do not have to calculate this again if(limit < 4096) { - boost::math::detail::linear_sieve(limit, primes); + boost::math::detail::linear_sieve(static_cast(limit), primes); } else { - boost::math::detail::mask_sieve(static_cast(2), limit, primes); + //boost::math::detail::mask_sieve(static_cast(2), limit, primes); + boost::math::detail::linear_sieve(static_cast(4096), primes); + boost::math::detail::segmented_sieve(static_cast(4096), limit, primes, primes); } boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes, resultant_primes); @@ -229,15 +232,12 @@ void sequential_segmented_sieve(Integer lower_bound, Integer upper_bound, Contai template constexpr void prime_reserve(Integer upper_bound, std::vector &prime_container) { - prime_container.reserve(upper_bound / std::log(static_cast(upper_bound))); + prime_container.reserve(static_cast(upper_bound) / std::log(static_cast(upper_bound))); } template void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) { - static_assert(std::is_integral::value, "No primes for floating point types"); - BOOST_ASSERT_MSG(upper_bound + 1 < std::numeric_limits::max(), "Type Overflow"); - if(upper_bound == 2) { return; @@ -245,7 +245,7 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime if(upper_bound <= 4096) { - boost::math::detail::linear_sieve(upper_bound, primes); + boost::math::detail::linear_sieve(static_cast(upper_bound), primes); } else if(typeid(policy) == typeid(std::execution::seq)) diff --git a/include/boost/math/special_functions/prime_wheel.hpp b/include/boost/math/special_functions/prime_wheel.hpp index 0a03b5e6e6..0999eac5cf 100644 --- a/include/boost/math/special_functions/prime_wheel.hpp +++ b/include/boost/math/special_functions/prime_wheel.hpp @@ -275,7 +275,7 @@ class MOD210Wheel final constexpr auto K() const noexcept { return k_; } constexpr auto Phi() const noexcept { return phi_; } - constexpr auto Next(const Integer i) const noexcept { return i + dist_[i % M_]; } + constexpr auto Next(const Integer i) const noexcept { return i + dist_[static_cast(i % M_)]; } constexpr auto MakeRP(const Integer i) const noexcept { if(rp_[i % M_]) @@ -284,9 +284,9 @@ class MOD210Wheel final } return Next(i); } - constexpr auto Prev(const Integer i) const noexcept { return i - dist_[(M_ - (i % M_)) % M_]; } - constexpr auto Pos(const Integer i) const noexcept { return phi_ * (i / M_) + pos_[i % M_]; } - constexpr auto Inv(const Integer i) const noexcept { return M_ * (i / phi_) + inv_[i % phi_]; } + constexpr auto Prev(const Integer i) const noexcept { return i - dist_[static_cast((M_ - (i % M_)) % M_)]; } + constexpr auto Pos(const Integer i) const noexcept { return phi_ * (i / M_) + pos_[static_cast(i % M_)]; } + constexpr auto Inv(const Integer i) const noexcept { return M_ * (i / phi_) + inv_[static_cast(i % phi_)]; } }; } diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 8f0a5ee7db..fa950a2742 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -22,33 +23,33 @@ void test_prime_sieve() Integer ref {168}; // Calculated with wolfram-alpha // Does the function work with a vector - boost::math::prime_sieve(1000, primes); + boost::math::prime_sieve(std::execution::par, static_cast(1'000), primes); BOOST_TEST_EQ(primes.size(), ref); // Tests for correctness // 2 primes.clear(); - boost::math::prime_sieve(2, primes); + boost::math::prime_sieve(std::execution::par, static_cast(2), primes); BOOST_TEST_EQ(primes.size(), 0); // 100 primes.clear(); - boost::math::prime_sieve(100, primes); + boost::math::prime_sieve(std::execution::par, static_cast(100), primes); BOOST_TEST_EQ(primes.size(), 25); // 10'000 primes.clear(); - boost::math::prime_sieve(10000, primes); + boost::math::prime_sieve(std::execution::par, static_cast(10'000), primes); BOOST_TEST_EQ(primes.size(), 1229); // 100'000 primes.clear(); - boost::math::prime_sieve(100000, primes); + boost::math::prime_sieve(std::execution::par, static_cast(100'000), primes); BOOST_TEST_EQ(primes.size(), 9592); // 1'000'000 primes.clear(); - boost::math::prime_sieve(1000000, primes); + boost::math::prime_sieve(std::execution::par, static_cast(1'000'000), primes); BOOST_TEST_EQ(primes.size(), 78498); /* @@ -60,7 +61,7 @@ void test_prime_sieve() // Does the function work with a deque? std::deque d_primes; - boost::math::prime_sieve(1000, d_primes); + boost::math::prime_sieve(std::execution::par, static_cast(1'000), d_primes); BOOST_TEST_EQ(d_primes.size(), ref); } @@ -71,17 +72,17 @@ void test_sequential_prime_sieve() // 10'000 primes.clear(); - boost::math::prime_sieve(std::execution::seq, 10000, primes); + boost::math::prime_sieve(static_cast(10'000), primes); BOOST_TEST_EQ(primes.size(), 1229); // 100'000 primes.clear(); - boost::math::prime_sieve(std::execution::seq, 100000, primes); + boost::math::prime_sieve(static_cast(100'000), primes); BOOST_TEST_EQ(primes.size(), 9592); // 1'000'000 primes.clear(); - boost::math::prime_sieve(std::execution::seq, 1000000, primes); + boost::math::prime_sieve(static_cast(1'000'000), primes); BOOST_TEST_EQ(primes.size(), 78498); } @@ -140,7 +141,7 @@ void test_par_prime_sieve_large() // Force the sieve into the multi-threading section and test reserve functionality boost::math::prime_reserve(static_cast(1073741824), primes); - boost::math::prime_sieve(static_cast(1073741824), primes); + boost::math::prime_sieve(std::execution::par, static_cast(1073741824), primes); BOOST_TEST_EQ(primes.size(), ref); } @@ -150,15 +151,15 @@ void test_interval_sieve() std::vector pre_sieved_primes {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}; std::vector primes; - boost::math::detail::IntervalSieve sieve(1'000, 10'000, pre_sieved_primes, primes); + boost::math::detail::IntervalSieve sieve(static_cast(1'000), static_cast(10'000), pre_sieved_primes, primes); BOOST_TEST_EQ(primes.size(), 1'061); primes.clear(); - sieve.NewRange(10'000, 100'000, primes); + sieve.NewRange(static_cast(10'000), static_cast(100'000), primes); BOOST_TEST_EQ(primes.size(), 8'363); primes.clear(); - sieve.NewRange(100'000, 1'000'000, primes); + sieve.NewRange(static_cast(100'000), static_cast(1'000'000), primes); BOOST_TEST_EQ(primes.size(), 68'906); } @@ -182,28 +183,30 @@ void test_linear_sieve() int main() { // Individual Algorithms - //test_linear_sieve(); - //test_linear_sieve(); + test_linear_sieve(); + test_linear_sieve(); test_linear_sieve(); - //test_linear_sieve(); + test_linear_sieve(); + test_linear_sieve(); - //test_interval_sieve(); - //test_interval_sieve(); + test_interval_sieve(); + test_interval_sieve(); test_interval_sieve(); - //test_interval_sieve(); + test_interval_sieve(); + test_interval_sieve(); // Composite - //test_prime_sieve(); - //test_prime_sieve(); + test_prime_sieve(); + test_prime_sieve(); test_prime_sieve(); - //test_prime_sieve(); - //test_prime_sieve(); + test_prime_sieve(); + test_prime_sieve(); - //test_sequential_prime_sieve(); - //test_sequential_prime_sieve(); + test_sequential_prime_sieve(); + test_sequential_prime_sieve(); test_sequential_prime_sieve(); - //test_sequential_prime_sieve(); - //test_sequential_prime_sieve(); + test_sequential_prime_sieve(); + test_sequential_prime_sieve(); //test_prime_range(); //test_prime_range(); @@ -213,6 +216,7 @@ int main() //test_prime_sieve_overflow(); test_par_prime_sieve_large(); + test_par_prime_sieve_large(); boost::report_errors(); } From 81e4a6ca608f3166071c651b81b9431c0fbfc1d2 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 26 Aug 2020 22:29:27 -0500 Subject: [PATCH 52/83] mpz_int passes unit tests [CI SKIP] --- test/test_prime_sieve.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index fa950a2742..a6811bb0cd 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -15,6 +15,7 @@ #include #include #include +#include template void test_prime_sieve() @@ -188,12 +189,14 @@ int main() test_linear_sieve(); test_linear_sieve(); test_linear_sieve(); + test_linear_sieve(); test_interval_sieve(); test_interval_sieve(); test_interval_sieve(); test_interval_sieve(); test_interval_sieve(); + test_interval_sieve(); // Composite test_prime_sieve(); @@ -201,12 +204,14 @@ int main() test_prime_sieve(); test_prime_sieve(); test_prime_sieve(); + test_prime_sieve(); test_sequential_prime_sieve(); test_sequential_prime_sieve(); test_sequential_prime_sieve(); test_sequential_prime_sieve(); test_sequential_prime_sieve(); + test_sequential_prime_sieve(); //test_prime_range(); //test_prime_range(); @@ -215,8 +220,24 @@ int main() //test_prime_sieve_overflow(); + //std::cout << "Primes less than 2^30" << std::endl; + //auto int64_time_start {std::chrono::high_resolution_clock::now()}; test_par_prime_sieve_large(); + //auto int64_time_stop {std::chrono::high_resolution_clock::now()}; + //auto int64_duration {std::chrono::duration_cast(int64_time_stop - int64_time_start).count()}; + //std::cout << "int64_t: " << int64_duration << " ms" << std::endl; + + //auto cppint_time_start {std::chrono::high_resolution_clock::now()}; test_par_prime_sieve_large(); + //auto cppint_time_stop {std::chrono::high_resolution_clock::now()}; + //auto cppint_duration{std::chrono::duration_cast(cppint_time_stop - cppint_time_start).count()}; + //std::cout << "cpp_int: " << cppint_duration << " ms" << std::endl; + + //auto mpzint_time_start {std::chrono::high_resolution_clock::now()}; + test_par_prime_sieve_large(); + //auto mpzint_time_stop {std::chrono::high_resolution_clock::now()}; + //auto mpzint_duration{std::chrono::duration_cast(mpzint_time_stop - mpzint_time_start).count()}; + //std::cout << "mpz_int: " << mpzint_duration << " ms" << std::endl; boost::report_errors(); } From 0b8f1d51ed9f21d6e2a561c85b8a872e00293f12 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 30 Aug 2020 18:02:17 -0500 Subject: [PATCH 53/83] Documentation edits [CI SKIP] --- doc/sf/number_series.qbk | 101 ++++++++++++++---- .../math/special_functions/interval_sieve.hpp | 2 +- .../performance/prime_sieve_performance.cpp | 23 ++-- 3 files changed, 93 insertions(+), 33 deletions(-) diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index ebdd857d0d..c4d3cefee6 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -271,43 +271,100 @@ This function is `constexpr` only if the compiler supports C++14 constexpr funct namespace boost { namespace math { - template - auto prime_sieve(ExecutionPolicy&& policy, Z upper_bound, OutputIterator output) -> decltype(output) + template + void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &resultant_primes) - template - auto prime_range(ExecutionPolicy&& policy, Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) + template + void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &resultant_primes) - template - auto prime_sieve(Z upper_bound, OutputIterator output) -> decltype(output) + template + void prime_sieve(Integer upper_bound, Container &resultant_primes) - template - auto prime_range(Z lower_bound, Z upper_bound, OutputIterator output) -> decltype(output) + template + void prime_range(Integer lower_bound, Integer upper_bound, Container &resultant_primes) }} // End namespaces [h4 Description] -There are two sets of functions available `prime_sieve` and `prime_range`. `prime_sieve` will return all primes in the -range [2, `upper_bound`). `prime_range` will return all the primes in the range [lower_bound, upper_bound). +There are two sets of functions available: `prime_sieve` and `prime_range`. +`prime_sieve` will return all primes in the range [2, `upper_bound`). +`prime_range` will return all primes in the range [lower_bound, upper_bound). +Both `prime_sieve` and `prime_range` can perform arbitrary precision calculations using `boost::multiprecision::cpp_int` or `boost::multiprecision::mpz_int`. -If you have a C++17 compatible compiler you are able to pass an execution policy to both functions. Any -policy besides `std::execution::seq` will enable internal multi-threading. If your compiler is not C++17 compatible the -sequential overloads will be used. - -For upper_bound <= 2[super 24] two threads will be used. For any value larger than 2[super 24] `std::thread::hardware_concurrency()` will be called, -and all available concurrency will be used. Additionally, the memory requirements are `bigo[](sqrt(N))`. - -/Nota bene:/ If `std::thread::hardware_concurrency()` returns 0 the max number of threads will be set to 2. +`prime_sieve` and `prime_range` both have a parameter for an execution policy. +The policies `std::execution::par` or `std::execution::par_unseq` will enable internal multi-threading. +All other policies or no policy will result in the `prime_sieve` and `prime_range` executing sequentially. [h4 Examples] - // To calculate primes 2 - 1,000,000 in parallel + // To calculate primes [2, 1,000,000) in parallel std::vector primes; - boost::math::prime_sieve(std::execution::par, 1000000, std::back_inserter(primes)); + boost::math::prime_sieve(std::execution::par, 1'000'000, primes); - // To calculate primes 100 - 1,000 sequentially + // To calculate primes [100, 1,000) sequentially std::vector primes; - boost::math::prime_range(100, 1000, std::back_inserter(primes)); + boost::math::prime_range(100, 1'000, primes); + +[h4 Complexity] +These functions were tested for complexity using [@https://github.com/google/benchmark google benchmark] on a system with the following specifications: +* CPU: Intel i5-10500 +* OS: Red Hat Enterprise Linux 8.2 +* Compiler: gcc-g++ 10.2.0 (Compiled from source with GMP 6.2.0, MPC 1.2.0, and MPFR 4.1.0 using RHEL gcc-toolset-9) +* Compiler flags: -Ofast -MMD -march=native -g -lbenchmark -lpthread -lgmp + +Range of test upper_bound: 2[super 1] - 2[super 30] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Complexity Calculations `bigo[](N)`
TypeTime ComplexityTime Complexity RMSCPU ComplexityCPU Complexity RMS
int32_t0.72N5%0.02N13%
int64_t0.78N16%0.03N6%
uint32_t0.71N10%0.02N14%
cpp_int6.24N44%0.35N4%
mpz_int9.27N11%0.69N8%
+ [h4 References] * Sorensen, Jonathan [@https://research.cs.wisc.edu/techreports/1990/TR909.pdf An Introduction to Prime Number Sieves] diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp index 4c79dc30ac..2103a13dff 100644 --- a/include/boost/math/special_functions/interval_sieve.hpp +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -216,7 +216,7 @@ bool IntervalSieve::Psstest(const std::size_ { const Integer n {left_ + pos}; const Integer exponent {(n - 1) / 2}; - const std::int_fast64_t nmod8 = n % 8; + const std::int_fast64_t nmod8 = static_cast(n % 8); std::int_fast64_t negative_one_count {0}; diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 58e63b01f3..69aa67509b 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -7,6 +7,8 @@ #include #include +#include +#include #include #include #include @@ -102,7 +104,6 @@ inline auto kimwalish_primes_helper(Integer upper, std::vector primes) template void kimwalish_primes(benchmark::State& state) { - Integer upper = static_cast(state.range(0)); for (auto _ : state) { @@ -114,20 +115,22 @@ void kimwalish_primes(benchmark::State& state) // Invidiual Implementations // Linear -//BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -//BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -//BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); // Segmented -//BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); -//BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); -//BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(); -//BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); +BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(); +BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); // Complete Implemenations -//BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark -//BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 22)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_MAIN(); From 5ba0a1d1f83f9603976c90fefe2dd9ef0c71d7ec Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 30 Aug 2020 23:15:46 -0500 Subject: [PATCH 54/83] Replace magic number w variable template [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index c43e3290f9..99ad2693b2 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -48,6 +48,10 @@ void linear_sieve(Integer upper_bound, Container &resultant_primes) } } +// 4096 is where benchmarked performance of linear_sieve begins to diverge +template +const Integer linear_sieve_limit = Integer(4096); // Constexpr does not work with boost::multiprecision types + template void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& primes, Container &resultant_primes) { @@ -175,21 +179,22 @@ void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesConta template void segmented_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) { + using boost::math::detail::linear_sieve_limit; + Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; std::vector primes {}; primes.reserve(static_cast(limit) / std::log(static_cast(limit))); // Prepare for max value so you do not have to calculate this again - if(limit < 4096) + if(limit < linear_sieve_limit) { boost::math::detail::linear_sieve(static_cast(limit), primes); } else { - //boost::math::detail::mask_sieve(static_cast(2), limit, primes); - boost::math::detail::linear_sieve(static_cast(4096), primes); - boost::math::detail::segmented_sieve(static_cast(4096), limit, primes, primes); + boost::math::detail::linear_sieve(linear_sieve_limit, primes); + boost::math::detail::segmented_sieve(linear_sieve_limit, limit, primes, primes); } boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes, resultant_primes); @@ -238,20 +243,22 @@ constexpr void prime_reserve(Integer upper_bound, std::vector &prime_co template void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) { + using boost::math::detail::linear_sieve_limit; + if(upper_bound == 2) { return; } - if(upper_bound <= 4096) + if(upper_bound <= linear_sieve_limit) { boost::math::detail::linear_sieve(static_cast(upper_bound), primes); } else if(typeid(policy) == typeid(std::execution::seq)) { - boost::math::detail::linear_sieve(static_cast(4096), primes); - boost::math::detail::sequential_segmented_sieve(static_cast(4096), upper_bound, primes); + boost::math::detail::linear_sieve(linear_sieve_limit, primes); + boost::math::detail::sequential_segmented_sieve(linear_sieve_limit, upper_bound, primes); } else @@ -260,10 +267,10 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime small_primes.reserve(1028); std::thread t1([&small_primes] { - boost::math::detail::linear_sieve(static_cast(8192), small_primes); + boost::math::detail::linear_sieve(static_cast(linear_sieve_limit * 2), small_primes); }); std::thread t2([upper_bound, &primes] { - boost::math::detail::segmented_sieve(static_cast(8192), upper_bound, primes); + boost::math::detail::segmented_sieve(static_cast(linear_sieve_limit * 2), upper_bound, primes); }); t1.join(); From 8e240f7eb7cc5faeb1c6ec3025eacd7ccf875360 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 30 Aug 2020 23:51:19 -0500 Subject: [PATCH 55/83] Better prime_range implementation [WIP][CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 89 ++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 99ad2693b2..ee57abd338 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -289,7 +289,94 @@ void prime_sieve(Integer upper_bound, Container &primes) template void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &primes) { - boost::math::prime_sieve(policy, upper_bound, primes); + using boost::math::detail::linear_sieve_limit; + Integer limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + + if(upper_bound == 2) + { + return; + } + + if(upper_bound <= linear_sieve_limit) + { + boost::math::detail::linear_sieve(static_cast(upper_bound), primes); + } + + else if(typeid(policy) == typeid(std::execution::seq)) + { + if(limit <= linear_sieve_limit) + { + boost::math::detail::linear_sieve(limit, primes); + + if(lower_bound <= limit) + { + boost::math::detail::sequential_segmented_sieve(limit, upper_bound, primes); + } + else + { + boost::math::detail::sequential_segmented_sieve(lower_bound, upper_bound, primes); + } + + } + + else + { + boost::math::detail::linear_sieve(linear_sieve_limit, primes); + boost::math::detail::sequential_segmented_sieve(linear_sieve_limit, limit, primes); + boost::math::detail::sequential_segmented_sieve(lower_bound, upper_bound, primes); + } + } + + else + { + std::vector small_primes {}; + + if(limit <= static_cast(linear_sieve_limit * 2)) + { + small_primes.reserve(1028); + + std::thread t1([limit, &small_primes] { + boost::math::detail::linear_sieve(limit, small_primes); + }); + + std::thread t2([lower_bound, limit, upper_bound, &primes] { + if(lower_bound <= limit) + { + boost::math::detail::segmented_sieve(limit, upper_bound, primes); + } + else + { + boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes); + } + + }); + + t1.join(); + t2.join(); + + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + } + + else + { + boost::math::prime_reserve(limit, small_primes); + + std::thread t1([&small_primes] { + boost::math::detail::linear_sieve(static_cast(linear_sieve_limit * 2), small_primes); + }); + + std::thread t2([limit, &primes] { + boost::math::detail::segmented_sieve(static_cast(linear_sieve_limit * 2), limit, primes); + }); + + t1.join(); + t2.join(); + + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + + boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes); + } + } auto it{primes.begin()}; while(*it < lower_bound && it != primes.end()) From 3d9b77c784b5dfd7e4b10a33549a75dd3ae746a8 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 2 Sep 2020 16:41:41 -0500 Subject: [PATCH 56/83] Minor changes and doc updates --- doc/math.qbk | 4 +- doc/sf/number_series.qbk | 70 ++------ .../math/special_functions/interval_sieve.hpp | 24 +-- .../math/special_functions/prime_sieve.hpp | 1 - .../performance/prime_sieve_performance.cpp | 20 +-- test/Jamfile.v2 | 2 +- test/test_prime_sieve.cpp | 158 +++++++++++++----- 7 files changed, 147 insertions(+), 132 deletions(-) diff --git a/doc/math.qbk b/doc/math.qbk index 984140d840..161193dc4a 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -1,13 +1,13 @@ [book Math Toolkit [quickbook 1.7] - [copyright 2006-2020 Nikhar Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos, Hubert Holin, Bruno Lalande, John Maddock, Evan Miller, Jeremy Murphy, Matthew Pulver, Johan RÃ¥de, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg, Daryle Walker and Xiaogang Zhang] + [copyright 2006-2020 Nikhar Agrawal, Anton Bikineev, Matthew Borland, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos, Hubert Holin, Bruno Lalande, John Maddock, Evan Miller, Jeremy Murphy, Matthew Pulver, Johan RÃ¥de, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg, Daryle Walker and Xiaogang Zhang] [/purpose ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22] [license Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at [@http://www.boost.org/LICENSE_1_0.txt]) ] - [authors [Agrawal, Nikhar], [Bikineev, Anton], [Bristow, Paul A.], [Holin, Hubert], [Guazzone, Marco], [Kormanyos, Christopher], [Lalande, Bruno], [Maddock, John], [Miller, Evan], [Murphy, Jeremy W.], [Pulver, Matthew], [RÃ¥de, Johan], [Sobotta, Benjamin], [Sewani, Gautam], [Thompson, Nicholas], [van den Berg, Thijs], [Walker, Daryle], [Zhang, Xiaogang]] + [authors [Agrawal, Nikhar], [Bikineev, Anton], [Borland, Matthew], [Bristow, Paul A.], [Holin, Hubert], [Guazzone, Marco], [Kormanyos, Christopher], [Lalande, Bruno], [Maddock, John], [Miller, Evan], [Murphy, Jeremy W.], [Pulver, Matthew], [RÃ¥de, Johan], [Sobotta, Benjamin], [Sewani, Gautam], [Thompson, Nicholas], [van den Berg, Thijs], [Walker, Daryle], [Zhang, Xiaogang]] [/last-revision $Date$] [version 2.12.0] ] diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index c4d3cefee6..98ab09873c 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -283,12 +283,15 @@ namespace boost { namespace math { template void prime_range(Integer lower_bound, Integer upper_bound, Container &resultant_primes) + template + constexpr void prime_reserve(Integer upper_bound, std::vector &prime_container) + }} // End namespaces [h4 Description] -There are two sets of functions available: `prime_sieve` and `prime_range`. +There are two sets of prime sieveing functions available: `prime_sieve` and `prime_range`. `prime_sieve` will return all primes in the range [2, `upper_bound`). `prime_range` will return all primes in the range [lower_bound, upper_bound). Both `prime_sieve` and `prime_range` can perform arbitrary precision calculations using `boost::multiprecision::cpp_int` or `boost::multiprecision::mpz_int`. @@ -297,9 +300,12 @@ Both `prime_sieve` and `prime_range` can perform arbitrary precision calculation The policies `std::execution::par` or `std::execution::par_unseq` will enable internal multi-threading. All other policies or no policy will result in the `prime_sieve` and `prime_range` executing sequentially. +`prime_reserve` uses the prime number theorem to reserve for approximately the correct number of primes given `upper_bound`. + [h4 Examples] - // To calculate primes [2, 1,000,000) in parallel + // To reserve space and calculate primes [2, 1,000,000) in parallel std::vector primes; + boost::math::prime_reserve(1'000'000, primes); boost::math::prime_sieve(std::execution::par, 1'000'000, primes); // To calculate primes [100, 1,000) sequentially @@ -315,56 +321,16 @@ These functions were tested for complexity using [@https://github.com/google/ben Range of test upper_bound: 2[super 1] - 2[super 30] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Complexity Calculations `bigo[](N)`
TypeTime ComplexityTime Complexity RMSCPU ComplexityCPU Complexity RMS
int32_t0.72N5%0.02N13%
int64_t0.78N16%0.03N6%
uint32_t0.71N10%0.02N14%
cpp_int6.24N44%0.35N4%
mpz_int9.27N11%0.69N8%
- +[pre''' +[table:id Complexity Calculations `bigo[](N)` + [[Type] [Time Complexity] [Time Complexity RMS] [CPU Complexity] [CPU Complexity RMS]] + [[int32_t] [0.72N] [5%] [0.02N] [13%]] + [[int64_t] [0.78N] [16%] [0.03N] [6%]] + [[uint32_t] [0.71N] [10%] [0.02N] [14%]] + [[cpp_int] [6.24N] [44%] [0.35N] [4%]] + [[mpz_int] [9.27N] [11%] [0.69N] [8%]] +] +'''] [h4 References] * Sorensen, Jonathan [@https://research.cs.wisc.edu/techreports/1990/TR909.pdf An Introduction to Prime Number Sieves] diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp index 2103a13dff..7c021a5c10 100644 --- a/include/boost/math/special_functions/interval_sieve.hpp +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -19,25 +19,6 @@ namespace boost::math::detail { - -#if defined(__MPIR_VERSION) || defined(__GNU_MP_VERSION) -// GNU GMP C or MPIR -inline double get_double(const mpz_t &x) noexcept -{ - return mpz_get_d(x); -} -#endif - -#if defined(__GNU_MP_VERSION) -#if __has_include() -// GNU GMP C++ bindings -inline double get_double(const mpz_class &x) noexcept -{ - return x.get_d() -} -#endif -#endif - // boost::multiprecision and POD template inline double get_double(const Integer &x) noexcept @@ -47,8 +28,9 @@ inline double get_double(const Integer &x) noexcept template class IntervalSieve -{ -#ifdef __SIZEOF_INT128__ // Defined in GCC 4.6+, clang, intel. MSVC does not define. +{ + +#ifdef BOOST_HAS_INT128 // Defined in GCC 4.6+, clang, intel. MSVC does not define. using int_128t = __int128; // One machine word smaller than the boost equivalent #else using int_128t = boost::multiprecision::int128_t; diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index ee57abd338..5e50c32fcb 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 69aa67509b..a806e6efb9 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -115,20 +115,20 @@ void kimwalish_primes(benchmark::State& state) // Invidiual Implementations // Linear -BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); // Segmented -BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); -BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); -BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(); -BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(); +//BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); // Complete Implemenations -BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark +//BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 65639306dd..c65753984d 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -970,7 +970,7 @@ test-suite misc : [ run compile_test/catmull_rom_concept_test.cpp compile_test_main : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ] [ run ooura_fourier_integral_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] - [ run test_prime_sieve.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run test_prime_sieve.cpp ../../test/build//boost_unit_test_framework : : : -lgmp [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run empirical_cumulative_distribution_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index a6811bb0cd..e49f949f05 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -53,13 +53,6 @@ void test_prime_sieve() boost::math::prime_sieve(std::execution::par, static_cast(1'000'000), primes); BOOST_TEST_EQ(primes.size(), 78498); - /* - // Does the function work with a list? - std::list l_primes; - boost::math::prime_sieve(1000, l_primes); - BOOST_TEST_EQ(l_primes.size(), ref); - */ - // Does the function work with a deque? std::deque d_primes; boost::math::prime_sieve(std::execution::par, static_cast(1'000), d_primes); @@ -94,44 +87,109 @@ void test_prime_range() Integer ref {168}; // Calculated with wolfram-alpha // Does the upper and lower bound call work - boost::math::prime_range(static_cast(2), static_cast(1000), primes); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), primes); BOOST_TEST_EQ(primes.size(), ref); - // Does the upper bound call work + // Does parallel version work primes.clear(); - boost::math::prime_range(static_cast(2), static_cast(1000), primes); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), primes); BOOST_TEST_EQ(primes.size(), ref); // Does it work with a deque? std::deque d_primes; - boost::math::prime_range(static_cast(2), static_cast(1000), d_primes); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), d_primes); BOOST_TEST_EQ(d_primes.size(), ref); - // Does it work with a list? - std::list l_primes; - boost::math::prime_range(static_cast(2), static_cast(1000), l_primes); - BOOST_TEST_EQ(l_primes.size(), ref); + // Does the lower bound change the results? + ref = 143; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(100), static_cast(1'000), primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Will it call the sieve for large input + ref = 78498; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000'000), primes); + BOOST_TEST_EQ(primes.size(), ref); +} + +template +void test_prime_range_large() +{ + std::vector primes; + Integer ref; + + // Larger numbers + ref = 586'081; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(1'000'000), static_cast(10'000'000), primes); + BOOST_TEST_EQ(primes.size(), ref); + + ref = 5'096'876; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(10'000'000), static_cast(100'000'000), primes); + BOOST_TEST_EQ(primes.size(), ref); + + ref = 48'638'573; + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(100'000'000), static_cast(1'073'741'824), primes); + BOOST_TEST_EQ(primes.size(), ref); +} + +template +void test_prime_range_seq() +{ + std::vector primes; + Integer ref {168}; // Calculated with wolfram-alpha + + // Does the upper and lower bound call work + boost::math::prime_range(static_cast(2), static_cast(1'000), primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Does parallel version work + primes.clear(); + boost::math::prime_range(static_cast(2), static_cast(1'000), primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Does it work with a deque? + std::deque d_primes; + boost::math::prime_range(static_cast(2), static_cast(1'000), d_primes); + BOOST_TEST_EQ(d_primes.size(), ref); // Does the lower bound change the results? ref = 143; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(static_cast(100), static_cast(1000), primes); + boost::math::prime_range(static_cast(100), static_cast(1'000), primes); BOOST_TEST_EQ(primes.size(), ref); // Will it call the sieve for large input ref = 78498; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(static_cast(2), static_cast(1000000), primes); + boost::math::prime_range(static_cast(2), static_cast(1'000'000), primes); BOOST_TEST_EQ(primes.size(), ref); } template -void test_prime_sieve_overflow() +void test_prime_range_seq_large() { std::vector primes; + Integer ref; + + // Larger numbers + ref = 586'081; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(static_cast(1'000'000), static_cast(10'000'000), primes); + BOOST_TEST_EQ(primes.size(), ref); + + ref = 5'096'876; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(static_cast(10'000'000), static_cast(100'000'000), primes); + BOOST_TEST_EQ(primes.size(), ref); - // Should die with call to BOOST_ASSERT - boost::math::prime_sieve(static_cast(2), static_cast(std::numeric_limits::max()), primes); + ref = 48'638'573; + primes.clear(); + boost::math::prime_range(static_cast(100'000'000), static_cast(1'073'741'824), primes); + BOOST_TEST_EQ(primes.size(), ref); } template @@ -213,31 +271,41 @@ int main() test_sequential_prime_sieve(); test_sequential_prime_sieve(); - //test_prime_range(); - //test_prime_range(); - //test_prime_range(); - //test_prime_range(); - - //test_prime_sieve_overflow(); - - //std::cout << "Primes less than 2^30" << std::endl; - //auto int64_time_start {std::chrono::high_resolution_clock::now()}; - test_par_prime_sieve_large(); - //auto int64_time_stop {std::chrono::high_resolution_clock::now()}; - //auto int64_duration {std::chrono::duration_cast(int64_time_stop - int64_time_start).count()}; - //std::cout << "int64_t: " << int64_duration << " ms" << std::endl; - - //auto cppint_time_start {std::chrono::high_resolution_clock::now()}; - test_par_prime_sieve_large(); - //auto cppint_time_stop {std::chrono::high_resolution_clock::now()}; - //auto cppint_duration{std::chrono::duration_cast(cppint_time_stop - cppint_time_start).count()}; - //std::cout << "cpp_int: " << cppint_duration << " ms" << std::endl; - - //auto mpzint_time_start {std::chrono::high_resolution_clock::now()}; - test_par_prime_sieve_large(); - //auto mpzint_time_stop {std::chrono::high_resolution_clock::now()}; - //auto mpzint_duration{std::chrono::duration_cast(mpzint_time_stop - mpzint_time_start).count()}; - //std::cout << "mpz_int: " << mpzint_duration << " ms" << std::endl; + test_prime_range(); + test_prime_range(); + test_prime_range(); + test_prime_range(); + test_prime_range(); + test_prime_range(); + + test_prime_range_seq(); + test_prime_range_seq(); + test_prime_range_seq(); + test_prime_range_seq(); + test_prime_range_seq(); + test_prime_range_seq(); + + // Large composite tests (Commented out for CI) + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + + //test_prime_range_large(); + //test_prime_range_large(); + //test_prime_range_large(); + //test_prime_range_large(); + //test_prime_range_large(); + //test_prime_range_large(); + + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); boost::report_errors(); } From 9792a23b4ebc163027d34f534af165bd700d460b Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 6 Sep 2020 18:33:55 -0500 Subject: [PATCH 57/83] Minor change to policy handling [CI SKIP] --- include/boost/math/special_functions/prime_sieve.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 5e50c32fcb..3e79dea449 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace boost::math { namespace detail { @@ -254,7 +255,8 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime boost::math::detail::linear_sieve(static_cast(upper_bound), primes); } - else if(typeid(policy) == typeid(std::execution::seq)) + else if constexpr (std::is_same_v, decltype(std::execution::seq)> || + std::is_same_v, decltype(std::execution::unseq)>) { boost::math::detail::linear_sieve(linear_sieve_limit, primes); boost::math::detail::sequential_segmented_sieve(linear_sieve_limit, upper_bound, primes); @@ -301,7 +303,8 @@ void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bo boost::math::detail::linear_sieve(static_cast(upper_bound), primes); } - else if(typeid(policy) == typeid(std::execution::seq)) + else if constexpr (std::is_same_v, decltype(std::execution::seq)> || + std::is_same_v, decltype(std::execution::unseq)>) { if(limit <= linear_sieve_limit) { From 84a69f0304b09921a5d55feffcee09457c4f5b79 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 8 Sep 2020 18:02:10 -0500 Subject: [PATCH 58/83] diffs from @jzmaddock [CI SKIP] --- .../math/special_functions/interval_sieve.hpp | 23 ++++++++++++------- .../math/special_functions/prime_sieve.hpp | 10 ++++++-- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp index 7c021a5c10..172f2b4f2f 100644 --- a/include/boost/math/special_functions/interval_sieve.hpp +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -31,9 +31,9 @@ class IntervalSieve { #ifdef BOOST_HAS_INT128 // Defined in GCC 4.6+, clang, intel. MSVC does not define. -using int_128t = __int128; // One machine word smaller than the boost equivalent +using int_128t = unsigned __int128; // One machine word smaller than the boost equivalent #else -using int_128t = boost::multiprecision::int128_t; +using int_128t = boost::multiprecision::uint128_t; #endif private: @@ -56,10 +56,17 @@ using int_128t = boost::multiprecision::int128_t; 23'616'331'489, 85'157'610'409, 196'265'095'009, 2'871'842'842'801, 26'250'887'023'729, 112'434'732'901'969, 178'936'222'537'081, 696'161'110'209'049, 2'854'909'648'103'881, 6'450'045'516'630'769, 11'641'399'247'947'921, 190'621'428'905'186'449, 196'640'148'121'928'601, 712'624'335'095'093'521, 1'773'855'791'877'850'321, - 2'327'687'064'124'474'441, 6'384'991'873'059'836'689, 8'019'204'661'305'419'761, 10'198'100'582'046'287'689, - 69'848'288'320'900'186'969, 208'936'365'799'044'975'961, 533'552'663'339'828'203'681, 936'664'079'266'714'697'089, - 2'142'202'860'370'269'916'129, 13'649'154'491'558'298'803'281, 34'594'858'801'670'127'778'801, - 99'492'945'930'479'213'334'049, 295'363'187'400'900'310'880'401 + 2'327'687'064'124'474'441, 6'384'991'873'059'836'689, 8'019'204'661'305'419'761, 10'198'100'582'046'287'689u, + + (static_cast(0x3uLL) << 64) | 0xc956f827e0524359uLL, // 69'848'288'320'900'186'969 + (static_cast(0xbuLL) << 64) | 0x539315b3b1268d59uLL, // 208'936'365'799'044'975'961 + (static_cast(0x1cuLL) << 64) | 0xec87d86ca60b50a1uLL, // 533'552'663'339'828'203'681 + (static_cast(0x32uLL) << 64) | 0xc6d3496f20db3d81uLL, // 936'664'079'266'714'697'089 + (static_cast(0x74uLL) << 64) | 0x210967a12ba94be1uLL, // 2'142'202'860'370'269'916'129 + (static_cast(0x2e3uLL) << 64) | 0xec11ddc09fd65c51uLL, // 13'649'154'491'558'298'803'281 + (static_cast(0x753uLL) << 64) | 0x641c14b397c27bf1uLL, // 34'594'858'801'670'127'778'801 + (static_cast(0x1511uLL) << 64) | 0x85fdf38d1fc9ce21uLL, // 99'492'945'930'479'213'334'049 + (static_cast(0x3e8buLL) << 64) | 0xaba417e222ca5091uLL // 295'363'187'400'900'310'880'401 }; }; @@ -126,7 +133,7 @@ void IntervalSieve::Settdlimit() noexcept } plimit_ = pss_.prime[i]; - double tdlimit_guess = 1 + std::fmod(dr, pss_.ps[i]); + double tdlimit_guess = 1 + std::fmod(dr, static_cast(pss_.ps[i])); if(tdlimit_guess * tdlimit_guess >= dr) { tdlimit_ = static_cast(std::sqrt(dr)); @@ -196,7 +203,7 @@ void IntervalSieve::WriteOutput(Container &r template bool IntervalSieve::Psstest(const std::size_t pos) noexcept { - const Integer n {left_ + pos}; + const Integer n {static_cast(left_ + pos)}; const Integer exponent {(n - 1) / 2}; const std::int_fast64_t nmod8 = static_cast(n % 8); diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 3e79dea449..3102954743 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -256,7 +256,10 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime } else if constexpr (std::is_same_v, decltype(std::execution::seq)> || - std::is_same_v, decltype(std::execution::unseq)>) + #if __cpp_lib_execution > 201900 + std::is_same_v, decltype(std::execution::unseq)> + #endif + ) { boost::math::detail::linear_sieve(linear_sieve_limit, primes); boost::math::detail::sequential_segmented_sieve(linear_sieve_limit, upper_bound, primes); @@ -304,7 +307,10 @@ void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bo } else if constexpr (std::is_same_v, decltype(std::execution::seq)> || - std::is_same_v, decltype(std::execution::unseq)>) + #if __cpp_lib_execution > 201900 + std::is_same_v, decltype(std::execution::unseq)> + #endif + ) { if(limit <= linear_sieve_limit) { From 5dc35236d0f5a618213f799cd7544e79d41d4e59 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 9 Sep 2020 17:59:47 -0500 Subject: [PATCH 59/83] Implemented linear sieve with iterators [WIP][CI SKIP] --- .../detail/linear_prime_sieve.hpp | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/boost/math/special_functions/detail/linear_prime_sieve.hpp diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp new file mode 100644 index 0000000000..7f2847b12a --- /dev/null +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -0,0 +1,44 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_LINEAR_PRIME_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_LINEAR_PRIME_SIEVE_HPP + +#include + +namespace boost::math::detail::prime_sieve +{ +// https://mathworld.wolfram.com/SieveofEratosthenes.html +// https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf +template +void linear_sieve(const Integer upper_bound, const ForwardIterator first, const ForwardIterator last) noexcept +{ + const std::size_t least_divisors_size{static_cast(upper_bound + 1)}; + std::unique_ptr least_divisors{new Integer[least_divisors_size]{0}}; + auto current {first}; + + for (std::size_t i{2}; i < upper_bound; ++i) + { + if (least_divisors[i] == 0) + { + least_divisors[i] = i; + *current++ = i; + } + + for (std::size_t j{}; i * *(first + j) <= upper_bound && *(first + j) <= least_divisors[i] && j < least_divisors_size; ++j) + { + least_divisors[i * static_cast(*(first + j))] = *(first + j); + } + } +} + +// 4'096 is where benchmarked performance of linear_sieve begins to diverge +template +static const Integer linear_sieve_limit = Integer(4'096); // Constexpr does not work with boost::multiprecision types +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_LINEAR_PRIME_SIEVE_HPP \ No newline at end of file From 0dbe69cb72584dc2991879f8d17442265f2020d7 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 9 Sep 2020 18:30:13 -0500 Subject: [PATCH 60/83] Sanitize linear prime sieve and add wheel [WIP][CI SKIP] --- .../detail/linear_prime_sieve.hpp | 8 +- .../special_functions/detail/prime_wheel.hpp | 293 ++++++++++++++++++ 2 files changed, 297 insertions(+), 4 deletions(-) create mode 100644 include/boost/math/special_functions/detail/prime_wheel.hpp diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp index 7f2847b12a..d2cb6a31ae 100644 --- a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -5,8 +5,8 @@ // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_LINEAR_PRIME_SIEVE_HPP -#define BOOST_MATH_SPECIAL_FUNCTIONS_LINEAR_PRIME_SIEVE_HPP +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP #include @@ -29,7 +29,7 @@ void linear_sieve(const Integer upper_bound, const ForwardIterator first, const *current++ = i; } - for (std::size_t j{}; i * *(first + j) <= upper_bound && *(first + j) <= least_divisors[i] && j < least_divisors_size; ++j) + for (std::size_t j{}; (first + j) < last && i * *(first + j) <= upper_bound && *(first + j) <= least_divisors[i] && j < least_divisors_size; ++j) { least_divisors[i * static_cast(*(first + j))] = *(first + j); } @@ -41,4 +41,4 @@ template static const Integer linear_sieve_limit = Integer(4'096); // Constexpr does not work with boost::multiprecision types } -#endif // BOOST_MATH_SPECIAL_FUNCTIONS_LINEAR_PRIME_SIEVE_HPP \ No newline at end of file +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP diff --git a/include/boost/math/special_functions/detail/prime_wheel.hpp b/include/boost/math/special_functions/detail/prime_wheel.hpp new file mode 100644 index 0000000000..1297eb5a96 --- /dev/null +++ b/include/boost/math/special_functions/detail/prime_wheel.hpp @@ -0,0 +1,293 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP + +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +class Wheel +{ +private: + struct Wheelrec + { + std::int_fast32_t rp; + std::int_fast32_t dist; + std::int_fast32_t pos; + std::int_fast32_t inv; + }; + + std::unique_ptr W_; + Integer M_; + Integer k_; + Integer phi_; + + static constexpr std::array P_ {2, 3, 5, 7, 11, 13, 17, 19}; + + void build(Integer korsize); + +public: + Wheel() : W_{nullptr}, M_{0}, k_{0}, phi_{0} {}; + explicit Wheel(Integer korsize) { build(korsize); } + explicit Wheel(const Wheel &x) { build(x.K()); } + + constexpr bool operator!() const noexcept { return W_ == nullptr; } + constexpr const Wheelrec& operator[](const Integer i) const noexcept { return W_[i % M_]; } + const Wheel& operator=(const Wheel &x) + { + if(this != &x) + { + build(x.K()); + } + return *this; + } + + constexpr Integer Size() const noexcept { return M_; } + constexpr Integer K() const noexcept { return k_; } + constexpr Integer Phi() const noexcept { return phi_; } + + constexpr Integer Next(const Integer i) const noexcept { return i + W_[i % M_].dist; } + constexpr Integer MakeRP(const Integer i) const noexcept + { + if(W_[i % M_].rp) + { + return i; + } + return Next(i); + } + constexpr Integer Prev(const Integer i) const noexcept { return i - W_[(M_ - (i % M_)) % M_].dist; } + constexpr Integer Pos(const Integer i) const noexcept { return phi_ * (i / M_) + W_[i % M_].pos; } + constexpr Integer Inv(const Integer i) const noexcept { return M_ * (i / phi_) + W_[i % phi_].inv; } + + void Print(); +}; + +template +void Wheel::build(Integer korsize) +{ + // Calculate k_ and M_ + if(korsize >= 10) + { + --korsize; + for(k_ = 0; korsize > 0; ++k_) + { + korsize /= P_[k_]; + } + } + else + { + k_ = korsize; + } + + Integer i {0}; + Integer dist {0}; + Integer pos {1}; + + for(M_ = 1; i < k_; ++i) + { + M_ *= P_[i]; + } + + W_ = std::make_unique(M_); + + // Compute the RP field + for(i = 0; i < M_; ++i) + { + W_[i].rp = 1; + } + + for(i = 0; i < k_; ++i) + { + for(Integer j {0}; j < M_; j += P_[i]) + { + W_[j].rp = 0; + } + } + + // Compute the dist field + W_[M_- 1].dist = 2; + for(i = M_ - 2; i >= 0; --i) + { + W_[i].dist = ++dist; + if(W_[i].rp) + { + dist = 0; + } + } + + // Copute pos and inv fields + for(i = 0; i < M_; ++i) + { + W_[i].inv = 0; + if(W_[i].rp) + { + W_[pos].inv = i; + W_[i].pos = pos++; + } + else + { + W_[i].pos = 0; + } + + } + + W_[0].inv = -1; + phi_ = W_[M_- 1].pos; +} + +template +void Wheel::Print() +{ + std::int_fast32_t i {}; + std::cout << "Wheel size = " << this->Size() + << "\nk = " << this->K() + << "\nphi(M) = " << this->Phi() << std::endl; + + // Verify size + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << i << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nRP Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].rp << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nDist Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].dist << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nPos Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].pos << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nInv Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << W_[i].inv << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + std::cout << std::endl; +} + +// Pre-computed MOD 210 wheel +template +class MOD210Wheel final +{ +private: + static constexpr auto M_ {210}; + static constexpr auto k_ {4}; + static constexpr auto phi_ {28}; + + static constexpr std::array rp_ + { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 + }; + + static constexpr std::array inv_ + { + -1, 1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, + 107, 109, 113, 121, 127, 131, 137, 139, 143, 149, 151, 157, 163, 167, 169, 173, 179, 181, 187, 191, 193, 197, 199, 209, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + static constexpr std::array pos_ + { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 4, 0, 5, 0, 0, 0, 6, 0, + 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, 0, 0, 9, 0, 0, 0, 10, 0, 11, 0, 0, 0, 12, 0, 0, + 0, 0, 0, 13, 0, 0, 0, 0, 0, 14, 0, 15, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 0, + 0, 0, 0, 0, 19, 0, 0, 0, 20, 0, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, + 0, 23, 0, 24, 0, 0, 0, 25, 0, 26, 0, 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, + 0, 0, 29, 0, 0, 0, 30, 0, 0, 0, 0, 0, 31, 0, 32, 0, 0, 0, 33, 0, 0, 0, 0, 0, 34, + 0, 35, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 37, 0, 0, 0, 38, 0, 39, 0, 0, 0, 40, 0, + 0, 0, 0, 0, 41, 0, 42, 0, 0, 0, 0, 0, 43, 0, 0, 0, 44, 0, 45, 0, 0, 0, 46, 0, 47, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 48 + }; + + static constexpr std::array dist_ + { + 1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, + 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 6, 5, + 4, 3, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, + 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, + 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, + 1, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 10, + 9, 8, 7, 6, 5, 4, 3, 2, 1, 2 + }; + +public: + constexpr MOD210Wheel() = default; + ~MOD210Wheel() = default; + + constexpr auto Size() const noexcept { return M_; } + constexpr auto K() const noexcept { return k_; } + constexpr auto Phi() const noexcept { return phi_; } + + constexpr auto Next(const Integer i) const noexcept { return i + dist_[static_cast(i % M_)]; } + constexpr auto MakeRP(const Integer i) const noexcept + { + if(rp_[i % M_]) + { + return i; + } + return Next(i); + } + constexpr auto Prev(const Integer i) const noexcept { return i - dist_[static_cast((M_ - (i % M_)) % M_)]; } + constexpr auto Pos(const Integer i) const noexcept { return phi_ * (i / M_) + pos_[static_cast(i % M_)]; } + constexpr auto Inv(const Integer i) const noexcept { return M_ * (i / phi_) + inv_[static_cast(i % phi_)]; } +}; +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP \ No newline at end of file From eee2c8627ba82302eb2d0cb4822b2e92d6e600be Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sat, 26 Sep 2020 12:46:15 -0500 Subject: [PATCH 61/83] Added container method --- .../special_functions/detail/linear_prime_sieve.hpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp index d2cb6a31ae..68c592171f 100644 --- a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -14,8 +14,8 @@ namespace boost::math::detail::prime_sieve { // https://mathworld.wolfram.com/SieveofEratosthenes.html // https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf -template -void linear_sieve(const Integer upper_bound, const ForwardIterator first, const ForwardIterator last) noexcept +template +void linear_sieve(const Integer upper_bound, ForwardIterator first, ForwardIterator last) noexcept { const std::size_t least_divisors_size{static_cast(upper_bound + 1)}; std::unique_ptr least_divisors{new Integer[least_divisors_size]{0}}; @@ -36,6 +36,12 @@ void linear_sieve(const Integer upper_bound, const ForwardIterator first, const } } +template +inline void linear_sieve(const Integer upper_bound, Container& c) +{ + linear_sieve(upper_bound, std::begin(c), std::end(c)); +} + // 4'096 is where benchmarked performance of linear_sieve begins to diverge template static const Integer linear_sieve_limit = Integer(4'096); // Constexpr does not work with boost::multiprecision types From f5d789adbc6e33f9746c59059f7622dd3ed6d45c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sat, 26 Sep 2020 20:36:44 -0500 Subject: [PATCH 62/83] Improved Linear Algo and testing --- .../detail/linear_prime_sieve.hpp | 38 ++++++++----------- .../performance/prime_sieve_performance.cpp | 18 ++++++++- test/test_prime_sieve.cpp | 29 ++++++++++++++ 3 files changed, 62 insertions(+), 23 deletions(-) diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp index 68c592171f..9f5545e58a 100644 --- a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -12,34 +12,28 @@ namespace boost::math::detail::prime_sieve { -// https://mathworld.wolfram.com/SieveofEratosthenes.html -// https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf -template -void linear_sieve(const Integer upper_bound, ForwardIterator first, ForwardIterator last) noexcept -{ - const std::size_t least_divisors_size{static_cast(upper_bound + 1)}; - std::unique_ptr least_divisors{new Integer[least_divisors_size]{0}}; - auto current {first}; +template +decltype(auto) linear_sieve(const Integer upper_bound, OutputIterator resultant_primes) +{ + const std::size_t masks_size {static_cast(upper_bound / 2 + 1)}; + std::unique_ptr masks {new bool[masks_size]}; + memset(masks.get(), true, sizeof(*masks.get()) * (masks_size)); - for (std::size_t i{2}; i < upper_bound; ++i) - { - if (least_divisors[i] == 0) - { - least_divisors[i] = i; - *current++ = i; - } + *resultant_primes++ = 2; - for (std::size_t j{}; (first + j) < last && i * *(first + j) <= upper_bound && *(first + j) <= least_divisors[i] && j < least_divisors_size; ++j) + for(std::size_t index {1}; index < masks_size; ++index) + { + if(masks[index]) { - least_divisors[i * static_cast(*(first + j))] = *(first + j); + *resultant_primes++ = static_cast(2 * index + 1); + for(std::size_t clear {index * 3 + 1}; clear < masks_size; clear += index * 2 + 1) + { + masks[clear] = false; + } } } -} -template -inline void linear_sieve(const Integer upper_bound, Container& c) -{ - linear_sieve(upper_bound, std::begin(c), std::end(c)); + return resultant_primes; } // 4'096 is where benchmarked performance of linear_sieve begins to diverge diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index a806e6efb9..9c7ce83a66 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,19 @@ void linear_sieve(benchmark::State& state) state.SetComplexityN(state.range(0)); } +template +void linear_sieve_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + benchmark::DoNotOptimize(boost::math::detail::prime_sieve::linear_sieve(upper, std::back_inserter(primes))); + } + state.SetComplexityN(state.range(0)); +} + template inline auto mask_sieve_helper(Integer lower_bound, Integer upper_bound, std::vector primes) -> std::vector { @@ -116,9 +130,11 @@ void kimwalish_primes(benchmark::State& state) // Invidiual Implementations // Linear //BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -//BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); //BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); + // Segmented //BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); //BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index e49f949f05..2ec19b6004 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -10,12 +10,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include template void test_prime_sieve() @@ -239,6 +241,25 @@ void test_linear_sieve() BOOST_TEST_EQ(primes.size(), 9592); } +template +void test_linear_sieve_iterator() +{ + constexpr std::size_t array_size {10'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + boost::math::detail::prime_sieve::linear_sieve(static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + std::fill(primes.begin(), primes.end(), 0); + boost::math::detail::prime_sieve::linear_sieve(static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + std::fill(primes.begin(), primes.end(), 0); + boost::math::detail::prime_sieve::linear_sieve(static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); +} + int main() { // Individual Algorithms @@ -256,6 +277,14 @@ int main() test_interval_sieve(); test_interval_sieve(); + // Individual Algorithms with Iterators + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + // Composite test_prime_sieve(); test_prime_sieve(); From c361cdea656d742438df9adadc288514142416b0 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sat, 26 Sep 2020 21:02:28 -0500 Subject: [PATCH 63/83] Linear output iterator and refactoring [CI SKIP] --- .../boost/math/special_functions/detail/linear_prime_sieve.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp index 9f5545e58a..9f67e2e4a1 100644 --- a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -32,7 +32,6 @@ decltype(auto) linear_sieve(const Integer upper_bound, OutputIterator resultant_ } } } - return resultant_primes; } From 66c26426831909c49296d6b95d3b980ba1b952b7 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sat, 26 Sep 2020 22:14:35 -0500 Subject: [PATCH 64/83] Added prime approximation function --- include/boost/math/special_functions/prime_sieve.hpp | 7 +++++++ test/test_prime_sieve.cpp | 3 +++ 2 files changed, 10 insertions(+) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 3102954743..cf6a39dc9d 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -234,6 +234,13 @@ void sequential_segmented_sieve(Integer lower_bound, Integer upper_bound, Contai } } // End namespace detail +template +constexpr Integer prime_approximation(Integer upper_bound) +{ + constexpr auto c = 30 * std::log(113) / 113; // Magic numbers from wikipedia + return static_cast(std::floor(c * upper_bound / std::log(static_cast(upper_bound)))); +} + template constexpr void prime_reserve(Integer upper_bound, std::vector &prime_container) { diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 2ec19b6004..674fe70039 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -262,6 +262,9 @@ void test_linear_sieve_iterator() int main() { + // Test prime approximation for constexpr + static_assert(boost::math::prime_approximation(100) != 0, "Not constexpr"); + // Individual Algorithms test_linear_sieve(); test_linear_sieve(); From de1f331ebc93ccf3880c9684e3ba577f87b8b363 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sat, 26 Sep 2020 22:18:54 -0500 Subject: [PATCH 65/83] prime approximation function for a range --- include/boost/math/special_functions/prime_sieve.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index cf6a39dc9d..e63adfc219 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -235,12 +235,18 @@ void sequential_segmented_sieve(Integer lower_bound, Integer upper_bound, Contai } // End namespace detail template -constexpr Integer prime_approximation(Integer upper_bound) +constexpr Integer prime_approximation(const Integer upper_bound) { constexpr auto c = 30 * std::log(113) / 113; // Magic numbers from wikipedia return static_cast(std::floor(c * upper_bound / std::log(static_cast(upper_bound)))); } +template +constexpr Integer prime_approximation(const Integer lower_bound, const Integer upper_bound) +{ + return prime_approximation(upper_bound) - prime_approximation(lower_bound); +} + template constexpr void prime_reserve(Integer upper_bound, std::vector &prime_container) { From eaea5f92fb07433da5ac169f395524310e50e5ff Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sat, 26 Sep 2020 22:54:36 -0500 Subject: [PATCH 66/83] Added interval sieve for output iterators Currently untested --- .../detail/interval_prime_sieve.hpp | 307 ++++++++++++++++++ .../special_functions/prime_approximation.hpp | 36 ++ .../math/special_functions/prime_sieve.hpp | 20 +- test/test_prime_sieve.cpp | 1 + 4 files changed, 345 insertions(+), 19 deletions(-) create mode 100644 include/boost/math/special_functions/detail/interval_prime_sieve.hpp create mode 100644 include/boost/math/special_functions/prime_approximation.hpp diff --git a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp new file mode 100644 index 0000000000..d5407d9a6b --- /dev/null +++ b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp @@ -0,0 +1,307 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +class IntervalSieve +{ + +#ifdef BOOST_HAS_INT128 // Defined in GCC 4.6+, clang, intel. MSVC does not define. +using int_128t = unsigned __int128; // One machine word smaller than the boost equivalent +#else +using int_128t = boost::multiprecision::uint128_t; +#endif + +private: + // Table of pseudo-sqares (https://mathworld.wolfram.com/Pseudosquare.html) + // This table is from page 421, table 16.3.1, Hugh Williams' book + // Last 8 entries added from Wooding's MS thesis, 2003, pp. 92-93 + struct pssentry + { + static constexpr std::size_t len {49}; + static constexpr std::array prime + { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 67, 71, 79, 83, 101, 103, 107, 113, 131, 149, 157, + 173, 181, 193, 197, 211, 227, 229, 233, 239, 241, 251, 257, 263, 277, 281, 283, 293, 311, 331, 337, 347, 353 + }; + + static constexpr std::array ps + { + 73, 241, 1'009, 2'641, 8'089, 18'001, 53'881, 87'481, 117'049, 515'761, 1'083'289, 3'206'641, 3'818'929, + 9'257'329, 22'000'801, 48'473'881, 175'244'281, 427'733'329, 898'716'289, 2'805'544'681, 10'310'263'441, + 23'616'331'489, 85'157'610'409, 196'265'095'009, 2'871'842'842'801, 26'250'887'023'729, 112'434'732'901'969, + 178'936'222'537'081, 696'161'110'209'049, 2'854'909'648'103'881, 6'450'045'516'630'769, 11'641'399'247'947'921, + 190'621'428'905'186'449, 196'640'148'121'928'601, 712'624'335'095'093'521, 1'773'855'791'877'850'321, + 2'327'687'064'124'474'441, 6'384'991'873'059'836'689, 8'019'204'661'305'419'761, 10'198'100'582'046'287'689u, + + (static_cast(0x3uLL) << 64) | 0xc956f827e0524359uLL, // 69'848'288'320'900'186'969 + (static_cast(0xbuLL) << 64) | 0x539315b3b1268d59uLL, // 208'936'365'799'044'975'961 + (static_cast(0x1cuLL) << 64) | 0xec87d86ca60b50a1uLL, // 533'552'663'339'828'203'681 + (static_cast(0x32uLL) << 64) | 0xc6d3496f20db3d81uLL, // 936'664'079'266'714'697'089 + (static_cast(0x74uLL) << 64) | 0x210967a12ba94be1uLL, // 2'142'202'860'370'269'916'129 + (static_cast(0x2e3uLL) << 64) | 0xec11ddc09fd65c51uLL, // 13'649'154'491'558'298'803'281 + (static_cast(0x753uLL) << 64) | 0x641c14b397c27bf1uLL, // 34'594'858'801'670'127'778'801 + (static_cast(0x1511uLL) << 64) | 0x85fdf38d1fc9ce21uLL, // 99'492'945'930'479'213'334'049 + (static_cast(0x3e8buLL) << 64) | 0xaba417e222ca5091uLL // 295'363'187'400'900'310'880'401 + }; + }; + + static constexpr pssentry pss_{}; + static constexpr boost::math::detail::prime_sieve::MOD210Wheel w_{}; + std::size_t tdlimit_; + + Integer delta_; + Integer left_; + Integer right_; + + // https://www.researchgate.net/publication/220803585_Performance_of_C_bit-vector_implementations + boost::dynamic_bitset<> b_; + + std::vector primes_; + std::int_fast64_t plimit_; + + void Settdlimit() noexcept; + void SeiveLength(const Integer d) noexcept; + void Sieve() noexcept; + bool Psstest(const std::size_t pos) noexcept; + void Psstestall() noexcept; + decltype(auto) WriteOutput(OutputIterator resultant_primes) noexcept; + +public: + IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept; + decltype(auto) NewRange(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept; +}; + +template +void IntervalSieve::Settdlimit() noexcept +{ + const double dr = static_cast(right_); + const double delta = static_cast(delta_); + const double tdest = delta * std::log(dr); + + // Small cases + if(tdest * tdest >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + return; + } + + // First guess + if(tdest <= 1ul<<30) + { + tdlimit_ = static_cast(tdest); + } + + else + { + tdlimit_ = 1ul<<30; + } + + // Find the corresponding prime + std::size_t i; + for(i = pss_.len - 1; i > 0; --i) + { + if(static_cast(pss_.ps[i]) * tdlimit_ < dr) + { + break; + } + } + plimit_ = pss_.prime[i]; + + double tdlimit_guess = 1 + std::fmod(dr, static_cast(pss_.ps[i])); + if(tdlimit_guess * tdlimit_guess >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + } +} + +template +void IntervalSieve::SeiveLength(const Integer d) noexcept +{ + Integer r {left_ % d}; + Integer start {0}; + + if(r != 0) + { + start = d - r; + } + + for(Integer i {start}; i >= 0 && i < b_.size(); i += d) + { + b_[static_cast(i)] = 0; + } +} + +template +void IntervalSieve::Sieve() noexcept +{ + std::int_fast64_t primes_range {}; + if(plimit_ <= 10) + { + primes_range = 10; + } + + else + { + primes_range = plimit_; + } + + // Sieve with pre-computed (or small) primes and then use the wheel for the remainder + std::size_t i {}; + if(plimit_ <= pss_.prime.back()) + { + for(; pss_.prime[i] < primes_range; ++i) + { + SeiveLength(pss_.prime[i]); + } + } + else + { + prime_reserve(primes_range, primes_); + linear_sieve(primes_range, primes_.begin()); + primes_.shrink_to_fit(); + + for(; primes_[i] < primes_range; ++i) + { + SeiveLength(primes_[i]); + } + } + + for(Integer j = w_.Next(primes_[--i]); j <= tdlimit_; j = w_.Next(j)) + { + SeiveLength(j); + } +} + +template +decltype(auto) IntervalSieve::WriteOutput(OutputIterator resultant_primes) noexcept +{ + for(std::size_t i {}; i < b_.size(); ++i) + { + if(b_[i]) + { + *resultant_primes++ = left_ + i; + } + } + return resultant_primes; +} + +// Performs the pseduosqaure prime test on n = left + pos +// return 1 if prime or prime power, 0 otherwise +// Begins with a base-2 test +template +bool IntervalSieve::Psstest(const std::size_t pos) noexcept +{ + const Integer n {static_cast(left_ + pos)}; + const Integer exponent {(n - 1) / 2}; + const std::int_fast64_t nmod8 = static_cast(n % 8); + + std::int_fast64_t negative_one_count {0}; + + for(std::size_t i {}; i < primes_.size(); ++i) + { + Integer temp = primes_[i]; + temp = static_cast(std::pow(static_cast(temp), static_cast(n))); + + if(temp == 1) + { + if(i == 0 && nmod8 == 5) + { + return false; + } + } + + else + { + ++temp; + if(temp == n) + { + if(i > 0) + { + ++negative_one_count; + } + } + else + { + return false; + } + } + } + + return (nmod8 != 1 || negative_one_count > 0); +} + +template +void IntervalSieve::Psstestall() noexcept +{ + for(std::size_t i {}; i < b_.size(); ++i) + { + if(b_[i]) + { + if(!Psstest(i)) + { + b_[i] = 0; + } + } + } +} + +template +IntervalSieve::IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept : + left_ {left}, right_ {right} +{ + delta_ = right_ - left_; + b_.resize(static_cast(delta_), 1); + Settdlimit(); + Sieve(); + + if(plimit_ != 0 ) + { + Psstestall(); + } + + WriteOutput(resultant_primes); +} + +template +decltype(auto) IntervalSieve::NewRange(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept +{ + left_ = left; + right_ = right; + delta_ = right_ - left_; + + b_.resize(static_cast(delta_)); + b_.set(); + Settdlimit(); + Sieve(); + + if(plimit_ != 0) + { + Psstestall(); + } + + return WriteOutput(resultant_primes); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP diff --git a/include/boost/math/special_functions/prime_approximation.hpp b/include/boost/math/special_functions/prime_approximation.hpp new file mode 100644 index 0000000000..8060423c2f --- /dev/null +++ b/include/boost/math/special_functions/prime_approximation.hpp @@ -0,0 +1,36 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP + +namespace boost::math +{ +template +constexpr Integer prime_approximation(const Integer upper_bound) +{ + constexpr auto c = 30 * std::log(113) / 113; // Magic numbers from wikipedia + return static_cast(std::floor(c * upper_bound / std::log(static_cast(upper_bound)))); +} + +template +constexpr Integer prime_approximation(const Integer lower_bound, const Integer upper_bound) +{ + return prime_approximation(upper_bound) - prime_approximation(lower_bound); +} + +template +inline void prime_reserve(Integer upper_bound, std::vector& prime_container) +{ + prime_container.reserve(static_cast(upper_bound) / std::log(static_cast(upper_bound))); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index e63adfc219..6cff6bcb50 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -234,25 +235,6 @@ void sequential_segmented_sieve(Integer lower_bound, Integer upper_bound, Contai } } // End namespace detail -template -constexpr Integer prime_approximation(const Integer upper_bound) -{ - constexpr auto c = 30 * std::log(113) / 113; // Magic numbers from wikipedia - return static_cast(std::floor(c * upper_bound / std::log(static_cast(upper_bound)))); -} - -template -constexpr Integer prime_approximation(const Integer lower_bound, const Integer upper_bound) -{ - return prime_approximation(upper_bound) - prime_approximation(lower_bound); -} - -template -constexpr void prime_reserve(Integer upper_bound, std::vector &prime_container) -{ - prime_container.reserve(static_cast(upper_bound) / std::log(static_cast(upper_bound))); -} - template void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) { diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 674fe70039..40d4c519b6 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -6,6 +6,7 @@ // or copy at http://www.boost.org/LICENSE_1_0.txt) #include +#include #include #include #include From 44ae862ce9bcaa57635cb6e65c3aac55aa5dcbf1 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Sun, 27 Sep 2020 10:57:45 +0100 Subject: [PATCH 67/83] Experimental prime_sieve. --- .../math/special_functions/prime_sieve.hpp | 7 +- .../math/special_functions/prime_sieve_jm.hpp | 397 ++++++++++++++++++ .../prime_sieve_performance_jm.cpp | 171 ++++++++ 3 files changed, 572 insertions(+), 3 deletions(-) create mode 100644 include/boost/math/special_functions/prime_sieve_jm.hpp create mode 100644 reporting/performance/prime_sieve_performance_jm.cpp diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 3102954743..2aea6235a2 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -255,8 +255,9 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime boost::math::detail::linear_sieve(static_cast(upper_bound), primes); } - else if constexpr (std::is_same_v, decltype(std::execution::seq)> || + else if constexpr (std::is_same_v, decltype(std::execution::seq)> #if __cpp_lib_execution > 201900 + || std::is_same_v, decltype(std::execution::unseq)> #endif ) @@ -306,9 +307,9 @@ void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bo boost::math::detail::linear_sieve(static_cast(upper_bound), primes); } - else if constexpr (std::is_same_v, decltype(std::execution::seq)> || + else if constexpr (std::is_same_v, decltype(std::execution::seq)> #if __cpp_lib_execution > 201900 - std::is_same_v, decltype(std::execution::unseq)> + || std::is_same_v, decltype(std::execution::unseq)> #endif ) { diff --git a/include/boost/math/special_functions/prime_sieve_jm.hpp b/include/boost/math/special_functions/prime_sieve_jm.hpp new file mode 100644 index 0000000000..1a2b005b78 --- /dev/null +++ b/include/boost/math/special_functions/prime_sieve_jm.hpp @@ -0,0 +1,397 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_JM_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_JM_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace jm { namespace detail +{ +// https://mathworld.wolfram.com/SieveofEratosthenes.html +template +struct simple_bitset +{ + std::unique_ptr bits; + std::size_t m_size; + simple_bitset(std::size_t n) : bits(new I[n / (sizeof(I) * CHAR_BIT) + (n % (sizeof(I) * CHAR_BIT) ? 1 : 0)]), m_size(n) + { + std::memset(bits.get(), 0xff, n / CHAR_BIT + (n % CHAR_BIT ? 1 : 0)); + } + static constexpr std::size_t ln2(std::size_t n) + { + return n <= 1 ? 0 : 1 + ln2(n >> 1); + } + I test(std::size_t n)const + { + constexpr I masks[] = { static_cast(1uLL), static_cast(2uLL), static_cast(4uLL), static_cast(8uLL), static_cast(16uLL), static_cast(32uLL), static_cast(64uLL), static_cast(128uLL), static_cast(256uLL), + static_cast(1uLL << 9), static_cast(1uLL << 10), static_cast(1uLL << 11), static_cast(1uLL << 12), static_cast(1uLL << 13), static_cast(1uLL << 14), static_cast(1uLL << 15), static_cast(1uLL << 16), + static_cast(1uLL << 17), static_cast(1uLL << 18), static_cast(1uLL << 19), static_cast(1uLL << 20), static_cast(1uLL << 21), static_cast(1uLL << 22), static_cast(1uLL << 23), static_cast(1uLL << 24), + static_cast(1uLL << 25), static_cast(1uLL << 26), static_cast(1uLL << 27), static_cast(1uLL << 28), static_cast(1uLL << 29), static_cast(1uLL << 30), static_cast(1uLL << 31), static_cast(1uLL << 32), + static_cast(1uLL << 33), static_cast(1uLL << 34), static_cast(1uLL << 35), static_cast(1uLL << 36), static_cast(1uLL << 37), static_cast(1uLL << 38), static_cast(1uLL << 39), static_cast(1uLL << 40), + static_cast(1uLL << 41), static_cast(1uLL << 42), static_cast(1uLL << 43), static_cast(1uLL << 44), static_cast(1uLL << 45), static_cast(1uLL << 46), static_cast(1uLL << 47), static_cast(1uLL << 48), + static_cast(1uLL << 49), static_cast(1uLL << 50), static_cast(1uLL << 51), static_cast(1uLL << 52), static_cast(1uLL << 53), static_cast(1uLL << 54), static_cast(1uLL << 55), static_cast(1uLL << 56), + static_cast(1uLL << 57), static_cast(1uLL << 58), static_cast(1uLL << 59), static_cast(1uLL << 60), static_cast(1uLL << 61), static_cast(1uLL << 62), static_cast(1uLL << 63), + }; + I mask = (sizeof(I) * CHAR_BIT) - 1; + std::size_t shift = ln2(sizeof(I) * CHAR_BIT); + BOOST_ASSERT((n >> shift) < (m_size / (sizeof(I) * CHAR_BIT) + (m_size % (sizeof(I) * CHAR_BIT) ? 1 : 0))); + return bits[n >> shift] & masks[n & mask]; + } + void clear(std::size_t n) + { + constexpr I masks[] = { static_cast(~1uLL), static_cast(~2uLL), static_cast(~4uLL), static_cast(~8uLL), static_cast(~16uLL), static_cast(~32uLL), static_cast(~64uLL), static_cast(~128uLL), static_cast(~256uLL), + static_cast(~(1uLL << 9)), static_cast(~(1uLL << 10)), static_cast(~(1uLL << 11)), static_cast(~(1uLL << 12)), static_cast(~(1uLL << 13)), static_cast(~(1uLL << 14)), static_cast(~(1uLL << 15)), static_cast(~(1uLL << 16)), + static_cast(~(1uLL << 17)), static_cast(~(1uLL << 18)), static_cast(~(1uLL << 19)), static_cast(~(1uLL << 20)), static_cast(~(1uLL << 21)), static_cast(~(1uLL << 22)), static_cast(~(1uLL << 23)), static_cast(~(1uLL << 24)), + static_cast(~(1uLL << 25)), static_cast(~(1uLL << 26)), static_cast(~(1uLL << 27)), static_cast(~(1uLL << 28)), static_cast(~(1uLL << 29)), static_cast(~(1uLL << 30)), static_cast(~(1uLL << 31)), static_cast(~(1uLL << 32)), + static_cast(~(1uLL << 33)), static_cast(~(1uLL << 34)), static_cast(~(1uLL << 35)), static_cast(~(1uLL << 36)), static_cast(~(1uLL << 37)), static_cast(~(1uLL << 38)), static_cast(~(1uLL << 39)), static_cast(~(1uLL << 40)), + static_cast(~(1uLL << 41)), static_cast(~(1uLL << 42)), static_cast(~(1uLL << 43)), static_cast(~(1uLL << 44)), static_cast(~(1uLL << 45)), static_cast(~(1uLL << 46)), static_cast(~(1uLL << 47)), static_cast(~(1uLL << 48)), + static_cast(~(1uLL << 49)), static_cast(~(1uLL << 50)), static_cast(~(1uLL << 51)), static_cast(~(1uLL << 52)), static_cast(~(1uLL << 53)), static_cast(~(1uLL << 54)), static_cast(~(1uLL << 55)), static_cast(~(1uLL << 56)), + static_cast(~(1uLL << 57)), static_cast(~(1uLL << 58)), static_cast(~(1uLL << 59)), static_cast(~(1uLL << 60)), static_cast(~(1uLL << 61)), static_cast(~(1uLL << 62)), static_cast(~(1uLL << 63)), + }; + constexpr I mask = (sizeof(I) * CHAR_BIT) - 1; + constexpr std::size_t shift = ln2(sizeof(I) * CHAR_BIT); + BOOST_ASSERT((n >> shift) < (m_size / (sizeof(I) * CHAR_BIT) + (m_size % (sizeof(I) * CHAR_BIT) ? 1 : 0))); + bits[n >> shift] &= masks[n & mask]; + } + std::size_t size()const { return m_size; } + void reset(){ std::memset(bits.get(), 0xff, m_size / CHAR_BIT + (m_size % CHAR_BIT ? 1 : 0)); } +}; + +template +constexpr bool has_output_iterator_terminated(const T&) +{ + return false; +} + +template +struct dual_output_iterator +{ + OutputIterator out; + Container& container; + + dual_output_iterator(OutputIterator o, Container& c) : out(o), container(c) {} + + struct proxy + { + dual_output_iterator& ref; + proxy(dual_output_iterator& o) : ref(o) {} + ~proxy() { ++ref.out; } + dual_output_iterator& operator*() { return ref; } + }; + dual_output_iterator& operator++() { ++out; return *this; } + proxy operator++(int) { return proxy(*this); } + dual_output_iterator& operator*() { return *this; } + template + dual_output_iterator& operator=(const Value& val) + { + *out = val; + container.push_back(val); + return *this; + } +}; + +template +inline bool linear_sieve_classical(Sieve& masks, OutputIterator out) +{ + Integer max_sqr = Integer(1) << (std::numeric_limits::digits / 2 - 1); + *out++ = 2u; + for (Integer index = 1; index < masks.size(); ++index) + { + if (masks.test(index)) + { + *out++ = 2 * index + 1; + if (has_output_iterator_terminated(out)) + return false; + if(index < max_sqr) + for (Integer clear = index * (2 * index + 2); clear < masks.size(); clear += index * 2 + 1) + masks.clear(clear); + } + } + return true; +} + +template +inline bool linear_sieve_classical_segment(Container& primes, Sieve& masks, Integer start_offset, Integer max_points, OutputIterator out, bool output_to_container) +{ + masks.reset(); + Integer limit = static_cast(std::sqrt(static_cast(start_offset + max_points)) + 1); + // Begin by striking out odd numbered multiples of all the primes we have so far. + // 1-based index, we only have odd numbers in the sieve so don't need to handle 2: + for (std::size_t i = 1; i < primes.size(); ++i) + { + Integer prime = primes[i]; + if (prime > limit) + break; + Integer index = prime - start_offset % prime; + if ((index & 1) == 0) + index += prime; + index >>= 1; + for (; index < max_points / 2; index += prime) + masks.clear(index); + } + // + // Now walk through the sieve outputting primes. + // + for (Integer index = 0; index < max_points / 2; ++index) + { + if (masks.test(index)) + { + *out++ = start_offset + 2 * index + 1; + if (output_to_container) + primes.push_back(start_offset + 2 * index + 1); + if (has_output_iterator_terminated(out)) + return false; + } + } + return true; +} + +// 4096 is where benchmarked performance of linear_sieve begins to diverge +template +const Integer linear_sieve_limit = Integer(524288); // Constexpr does not work with boost::multiprecision types + +template +void prime_sieve_imp(ExecutionPolicy&& policy, Integer upper_bound, Container& primes, OutputIterator out, bool output_to_container) +{ + if (upper_bound <= 2) + { + return; + } + + simple_bitset sieve((upper_bound <= linear_sieve_limit ? upper_bound : linear_sieve_limit) / 2); + + if (output_to_container && (upper_bound > linear_sieve_limit)) + { + if (!jm::detail::linear_sieve_classical(sieve, dual_output_iterator(out, primes))) + return; + } + else + { + if (!jm::detail::linear_sieve_classical(sieve, out)) + return; + } + + if (upper_bound <= linear_sieve_limit) + return; + + if constexpr (std::is_same_v>, std::execution::sequenced_policy> +#if __cpp_lib_execution > 201900 + || std::is_same_v>, std::execution::unsequenced_policy> +#endif + ) + { + for (Integer offset = linear_sieve_limit; offset < upper_bound; offset += linear_sieve_limit) + { + if (!linear_sieve_classical_segment(primes, sieve, offset, (std::min)(linear_sieve_limit, upper_bound - offset), out, output_to_container)) + return; + } + } +#if 0 + + else + { + std::vector small_primes{}; + small_primes.reserve(1028); + + std::thread t1([&small_primes] { + boost::math::detail::linear_sieve(static_cast(linear_sieve_limit * 2), small_primes); + }); + std::thread t2([upper_bound, &primes] { + boost::math::detail::segmented_sieve(static_cast(linear_sieve_limit * 2), upper_bound, primes); + }); + + t1.join(); + t2.join(); + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + } +#endif +} + +template +struct counted_output_iterator +{ + std::shared_ptr target; + OutputIterator out; + counted_output_iterator(std::size_t n, OutputIterator o) : target(new std::size_t()), out(o) + { + *target = n; + } + + struct counted_output_iterator_proxy + { + counted_output_iterator& out; + counted_output_iterator_proxy(counted_output_iterator& o) : out(o) {} + counted_output_iterator& operator*()const { return out; } + }; + + counted_output_iterator_proxy operator++(int){ return counted_output_iterator_proxy(*this); } + counted_output_iterator& operator++() { return *this; } + counted_output_iterator& operator*() { return *this; } + + template + counted_output_iterator& operator=(const T& value) + { + *out++ = value; + --*target; + return *this; + } +}; + +template +constexpr bool has_output_iterator_terminated(const counted_output_iterator& out) +{ + return *(out.target) == 0; +} + +template +struct prime_factors_output_iterator +{ + Integer number; + Container& out; + + prime_factors_output_iterator(Integer n, Container& o) : number(n), out(o) {} + + prime_factors_output_iterator& operator = (Integer prime) + { + std::size_t count = 0; + while (number % prime == 0) + { + ++count; + number /= prime; + out.push_back(std::make_pair(prime, count)); + } + return *this; + } + prime_factors_output_iterator& operator*() { return *this; } + prime_factors_output_iterator& operator++() { return *this; } + prime_factors_output_iterator& operator++(int) { return *this; } + + bool complete()const { return number == 1; } +}; + +template +constexpr bool has_output_iterator_terminated(const prime_factors_output_iterator& out) +{ + return out.complete(); +} + + +} // End namespace detail + +template +void prime_reserve(Integer upper_bound, Container &prime_container) +{ + typedef typename Container::size_type size_type; + // + // How big does the container have to be to hold all the primes in the range [2, upper_bound]? + // + // The prime number theorem (https://en.wikipedia.org/wiki/Prime_number_theorem) gives an asymptotic + // estimate of upper_bound / log(upper_bound), but this only really holds as upper_bound -> infinity. + // + // Non asymptotic limits follow: + // + double limit; + if (upper_bound >= 60184) + // Dusart, Pierre (2010). "Estimates of Some Functions Over Primes without R.H" + limit = upper_bound / (std::log(static_cast(upper_bound)) - 1.1); + else + { + // There are other loose relations available, but it's easy enough to search + // all x in [3, 60184] and note how many primes there are and by how much + // it differs from x / log(x). The fudge factor of 1.25 is sufficient + // always provide enough space: + // + limit = 1.25 * upper_bound / std::log(static_cast(upper_bound)); + } + prime_container.reserve(static_cast(limit)); +} + +template +inline std::enable_if_t::value> prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) +{ + typedef typename Container::value_type integer_type; + prime_reserve(upper_bound, primes); + return detail::prime_sieve_imp(policy, static_cast(upper_bound), primes, std::back_inserter(primes), false); +} + +template +inline std::enable_if_t::value> prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, OutputIterator out) +{ + std::vector primes; + if (upper_bound > detail::linear_sieve_limit) + prime_reserve(upper_bound, primes); + return detail::prime_sieve_imp(policy, upper_bound, primes, out, true); +} + +template +inline std::enable_if_t::value> prime_sieve(Integer upper_bound, Container &primes) +{ + prime_sieve(std::execution::seq, upper_bound, primes); +} +template +inline std::enable_if_t::value> prime_sieve(Integer upper_bound, OutputIterator primes) +{ + prime_sieve(std::execution::seq, upper_bound, primes); +} + +template +inline std::enable_if_t::value> prime_sieve_n(ExecutionPolicy&& policy, Integer n, Container &primes) +{ + typedef typename Container::value_type integer_type; + prime_reserve(n, primes); + return detail::prime_sieve_imp(policy, (std::numeric_limits::max)(), primes, detail::counted_output_iterator >(n, std::back_inserter(primes)), false); +} + +template +inline std::enable_if_t::value> prime_sieve_n(ExecutionPolicy&& policy, Integer n, OutputIterator out) +{ + std::vector primes; + return detail::prime_sieve_imp(policy, (std::numeric_limits::max)(), primes, detail::counted_output_iterator(n, out), true); +} + +template +inline std::enable_if_t::value> prime_sieve_n(Integer upper_bound, Container &primes) +{ + prime_sieve_n(std::execution::seq, upper_bound, primes); +} +template +inline std::enable_if_t::value> prime_sieve_n(Integer upper_bound, OutputIterator primes) +{ + prime_sieve_n(std::execution::seq, upper_bound, primes); +} + +template +inline std::vector > factorize(ExecutionPolicy&& policy, Integer n) +{ + std::vector primes; + std::vector > result; + detail::prime_sieve_imp(policy, n / 2, primes, detail::prime_factors_output_iterator(n, result), true); + return result; +} +template +inline std::vector > factorize(Integer n) +{ + return factorize(std::execution::seq, n); +} + +} // namespace boost::math + + +#endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP diff --git a/reporting/performance/prime_sieve_performance_jm.cpp b/reporting/performance/prime_sieve_performance_jm.cpp new file mode 100644 index 0000000000..a0d1697b77 --- /dev/null +++ b/reporting/performance/prime_sieve_performance_jm.cpp @@ -0,0 +1,171 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include + +template +void linear_sieve(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + boost::math::detail::linear_sieve(upper, primes); + } + state.SetComplexityN(state.range(0)); +} + +template +void linear_sieve_jm_helper(Integer upper, std::vector& primes) +{ + jm::detail::simple_bitset masks((upper + 3) / 2); + jm::detail::linear_sieve_classical(masks, std::back_inserter(primes)); +} + +template +void linear_sieve_jm(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + linear_sieve_jm_helper(upper, primes); + } + state.SetComplexityN(state.range(0)); +} + +// Complete Implementations +template +void prime_sieve_seq(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + boost::math::prime_sieve(upper, primes); + } + state.SetComplexityN(state.range(0)); +} +template +void prime_sieve_seq_jm(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + jm::prime_sieve(upper, primes); + } + state.SetComplexityN(state.range(0)); +} +template +void prime_sieve_seq_jm_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + jm::prime_sieve(upper, std::back_inserter(primes)); + } + state.SetComplexityN(state.range(0)); +} + +template +void kimwalish_primes(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + for (auto _ : state) + { + primes.clear(); + primesieve::generate_primes(upper, &primes); + } + state.SetComplexityN(state.range(0)); +} + +template +inline Integer kimwalish_prime_factorizer_helper(Integer upper, I2 value) +{ + std::vector primes; + primesieve::generate_primes(upper, &primes); + for (unsigned i = 0; i < primes.size(); ++i) + while (value % primes[i] == 0) + value /= primes[i]; + return value; +} + +template +void kimwalish_prime_factorizer(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + for (auto _ : state) + { + benchmark::DoNotOptimize(kimwalish_prime_factorizer_helper(upper, std::numeric_limits::max())); + } + state.SetComplexityN(state.range(0)); +} + +template +void prime_sieve_seq_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for (auto _ : state) + { + benchmark::DoNotOptimize(boost::math::detail::prime_sieve::linear_sieve(upper, std::back_inserter(primes))); + } + state.SetComplexityN(state.range(0)); +} + +constexpr uint64_t low_range = 4; +constexpr uint64_t high_range = uint64_t(1) << 32; + +// Invidiual Implementations +// Linear +//BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); + +//BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve_jm, uint32_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); + +// Segmented +//BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(); +//BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); + +// Complete Implemenations +//BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(prime_sieve_seq, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(prime_sieve_seq_oi, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(prime_sieve_seq_jm, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(prime_sieve_seq_jm_oi, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); + +BENCHMARK_MAIN(); From cb5d978c3ced4c9acab7b9d4b3745c1cf44c3e6e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 27 Sep 2020 11:38:01 -0500 Subject: [PATCH 68/83] Interval sieve with OI passes unit tests [CI SKIP] --- .../detail/interval_prime_sieve.hpp | 24 +++++++++------ .../special_functions/detail/prime_wheel.hpp | 2 +- .../special_functions/prime_approximation.hpp | 6 ++-- test/test_prime_sieve.cpp | 29 ++++++++++++++++++- 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp index d5407d9a6b..f9354fd07e 100644 --- a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp @@ -24,7 +24,7 @@ template class IntervalSieve { -#ifdef BOOST_HAS_INT128 // Defined in GCC 4.6+, clang, intel. MSVC does not define. +#ifdef BOOST_HAS_INT128 // Defined in GCC 4.6+, clang, intel. MSVC does not define. using int_128t = unsigned __int128; // One machine word smaller than the boost equivalent #else using int_128t = boost::multiprecision::uint128_t; @@ -93,9 +93,9 @@ using int_128t = boost::multiprecision::uint128_t; template void IntervalSieve::Settdlimit() noexcept { - const double dr = static_cast(right_); - const double delta = static_cast(delta_); - const double tdest = delta * std::log(dr); + const double dr {static_cast(right_)}; + const double delta {static_cast(delta_)}; + const double tdest {delta * std::log(dr)}; // Small cases if(tdest * tdest >= dr) @@ -168,26 +168,32 @@ void IntervalSieve::Sieve() noexcept // Sieve with pre-computed (or small) primes and then use the wheel for the remainder std::size_t i {}; + Integer j; if(plimit_ <= pss_.prime.back()) { + SeiveLength(static_cast(2)); for(; pss_.prime[i] < primes_range; ++i) { SeiveLength(pss_.prime[i]); } + + j = w_.Next(pss_.prime[--i]); } + else { - prime_reserve(primes_range, primes_); + prime_reserve(right_, primes_); linear_sieve(primes_range, primes_.begin()); - primes_.shrink_to_fit(); for(; primes_[i] < primes_range; ++i) { SeiveLength(primes_[i]); } + + j = w_.Next(primes_[--i]); } - for(Integer j = w_.Next(primes_[--i]); j <= tdlimit_; j = w_.Next(j)) + for(; j <= tdlimit_; j = w_.Next(j)) { SeiveLength(j); } @@ -216,7 +222,7 @@ bool IntervalSieve::Psstest(const std::size_t pos) noex const Integer exponent {(n - 1) / 2}; const std::int_fast64_t nmod8 = static_cast(n % 8); - std::int_fast64_t negative_one_count {0}; + std::int_fast64_t negative_one_count {}; for(std::size_t i {}; i < primes_.size(); ++i) { @@ -275,7 +281,7 @@ IntervalSieve::IntervalSieve(const Integer left, const Settdlimit(); Sieve(); - if(plimit_ != 0 ) + if(plimit_ != 0) { Psstestall(); } diff --git a/include/boost/math/special_functions/detail/prime_wheel.hpp b/include/boost/math/special_functions/detail/prime_wheel.hpp index 1297eb5a96..3b8ec9a70d 100644 --- a/include/boost/math/special_functions/detail/prime_wheel.hpp +++ b/include/boost/math/special_functions/detail/prime_wheel.hpp @@ -290,4 +290,4 @@ class MOD210Wheel final }; } -#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP \ No newline at end of file +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP diff --git a/include/boost/math/special_functions/prime_approximation.hpp b/include/boost/math/special_functions/prime_approximation.hpp index 8060423c2f..c15ae4989c 100644 --- a/include/boost/math/special_functions/prime_approximation.hpp +++ b/include/boost/math/special_functions/prime_approximation.hpp @@ -16,8 +16,8 @@ namespace boost::math template constexpr Integer prime_approximation(const Integer upper_bound) { - constexpr auto c = 30 * std::log(113) / 113; // Magic numbers from wikipedia - return static_cast(std::floor(c * upper_bound / std::log(static_cast(upper_bound)))); + constexpr auto c = 30 * ::log(113) / 113; // Magic numbers from wikipedia + return static_cast(::floor(c * static_cast(upper_bound) / ::log(static_cast(upper_bound)))); } template @@ -29,7 +29,7 @@ constexpr Integer prime_approximation(const Integer lower_bound, const Integer u template inline void prime_reserve(Integer upper_bound, std::vector& prime_container) { - prime_container.reserve(static_cast(upper_bound) / std::log(static_cast(upper_bound))); + prime_container.reserve(static_cast(prime_approximation(upper_bound))); } } diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 40d4c519b6..1d6e14ee68 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -225,6 +226,25 @@ void test_interval_sieve() BOOST_TEST_EQ(primes.size(), 68'906); } +template +void test_interval_sieve_iterator() +{ + const std::size_t array_size {70'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + boost::math::detail::prime_sieve::IntervalSieve sieve(static_cast(1'000), static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'061); + + std::fill(primes.begin(), primes.end(), 0); + sieve.NewRange(static_cast(10'000), static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 8'363); + + std::fill(primes.begin(), primes.end(), 0); + sieve.NewRange(static_cast(100'000), static_cast(1'000'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 68'906); +} + template void test_linear_sieve() { @@ -264,7 +284,7 @@ void test_linear_sieve_iterator() int main() { // Test prime approximation for constexpr - static_assert(boost::math::prime_approximation(100) != 0, "Not constexpr"); + static_assert(boost::math::prime_approximation(100) != 0, "log and/or floor is/are not constexpr"); // Individual Algorithms test_linear_sieve(); @@ -289,6 +309,13 @@ int main() test_linear_sieve_iterator(); test_linear_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + // Composite test_prime_sieve(); test_prime_sieve(); From 4dc3eb204c95df1a58c73bb383148b0e19250a4d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 29 Sep 2020 22:17:25 -0500 Subject: [PATCH 69/83] Add sequential composite sieve with OI [CI SKIP] --- .../detail/interval_prime_sieve.hpp | 40 ++++++-- .../special_functions/prime_sieve_iter.hpp | 95 +++++++++++++++++++ test/test_prime_sieve.cpp | 37 ++++++++ 3 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 include/boost/math/special_functions/prime_sieve_iter.hpp diff --git a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp index f9354fd07e..fa1b4a5595 100644 --- a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp @@ -72,6 +72,8 @@ using int_128t = boost::multiprecision::uint128_t; Integer left_; Integer right_; + OutputIterator resultant_primes_; + // https://www.researchgate.net/publication/220803585_Performance_of_C_bit-vector_implementations boost::dynamic_bitset<> b_; @@ -83,10 +85,11 @@ using int_128t = boost::multiprecision::uint128_t; void Sieve() noexcept; bool Psstest(const std::size_t pos) noexcept; void Psstestall() noexcept; - decltype(auto) WriteOutput(OutputIterator resultant_primes) noexcept; + decltype(auto) WriteOutput() noexcept; public: IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept; + decltype(auto) NewRange(const Integer left, const Integer right) noexcept; decltype(auto) NewRange(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept; }; @@ -200,16 +203,16 @@ void IntervalSieve::Sieve() noexcept } template -decltype(auto) IntervalSieve::WriteOutput(OutputIterator resultant_primes) noexcept +decltype(auto) IntervalSieve::WriteOutput() noexcept { for(std::size_t i {}; i < b_.size(); ++i) { if(b_[i]) { - *resultant_primes++ = left_ + i; + *resultant_primes_++ = left_ + i; } } - return resultant_primes; + return resultant_primes_; } // Performs the pseduosqaure prime test on n = left + pos @@ -273,8 +276,8 @@ void IntervalSieve::Psstestall() noexcept } template -IntervalSieve::IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept : - left_ {left}, right_ {right} +IntervalSieve::IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept : + left_ {left}, right_ {right}, resultant_primes_ {resultant_primes} { delta_ = right_ - left_; b_.resize(static_cast(delta_), 1); @@ -286,12 +289,33 @@ IntervalSieve::IntervalSieve(const Integer left, const Psstestall(); } - WriteOutput(resultant_primes); + WriteOutput(); +} + +template +decltype(auto) IntervalSieve::NewRange(const Integer left, const Integer right) noexcept +{ + left_ = left; + right_ = right; + delta_ = right_ - left_; + + b_.resize(static_cast(delta_)); + b_.set(); + Settdlimit(); + Sieve(); + + if(plimit_ != 0) + { + Psstestall(); + } + + return WriteOutput(); } template decltype(auto) IntervalSieve::NewRange(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept { + resultant_primes_ = resultant_primes; left_ = left; right_ = right; delta_ = right_ - left_; @@ -306,7 +330,7 @@ decltype(auto) IntervalSieve::NewRange(const Integer le Psstestall(); } - return WriteOutput(resultant_primes); + return WriteOutput(); } } diff --git a/include/boost/math/special_functions/prime_sieve_iter.hpp b/include/boost/math/special_functions/prime_sieve_iter.hpp new file mode 100644 index 0000000000..9324a61e78 --- /dev/null +++ b/include/boost/math/special_functions/prime_sieve_iter.hpp @@ -0,0 +1,95 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP + +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +// TODO(mborland): Allow this value to be changed once cache functions are integerated +inline std::size_t L1_SIZE {32768}; + +template +decltype(auto) sequential_segmented_sieve(const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) +{ + const Integer interval {static_cast(L1_SIZE * 8)}; + Integer current_lower_bound {lower_bound}; + Integer current_upper_bound {current_lower_bound + interval}; + + if (current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + + std::size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; + + IntervalSieve sieve(current_lower_bound, current_upper_bound, resultant_primes); + + for(std::size_t i {}; i < ranges; ++i) + { + current_lower_bound = current_upper_bound; + current_upper_bound += interval; + if(current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + resultant_primes = sieve.NewRange(current_lower_bound, current_upper_bound); + } + + return resultant_primes; +} +} + +namespace boost::math +{ +template +decltype(auto) prime_sieve_iter(ExecutionPolicy&& policy, const Integer upper_bound, OutputIterator resultant_primes) +{ + if (upper_bound == 2) + { + return resultant_primes; + } + + else if (upper_bound <= detail::prime_sieve::linear_sieve_limit) + { + detail::prime_sieve::linear_sieve(upper_bound, resultant_primes); + } + + else if constexpr (std::is_same_v, decltype(std::execution::seq)> + #if __cpp_lib_execution > 201900 + || std::is_same_v, decltype(std::execution::unseq)> + #endif + ) + { + resultant_primes = detail::prime_sieve::linear_sieve(detail::prime_sieve::linear_sieve_limit, resultant_primes); + detail::prime_sieve::sequential_segmented_sieve(detail::prime_sieve::linear_sieve_limit, upper_bound, resultant_primes); + } + + else + { + //TODO(mborland): The threaded part + } + + + return resultant_primes; +} + +template +inline decltype(auto) prime_sieve_iter(const Integer upper_bound, OutputIterator resultant_primes) +{ + return prime_sieve_iter(std::execution::seq, upper_bound, resultant_primes); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 1d6e14ee68..ed830c66ae 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -6,6 +6,7 @@ // or copy at http://www.boost.org/LICENSE_1_0.txt) #include +#include #include #include #include @@ -84,6 +85,34 @@ void test_sequential_prime_sieve() BOOST_TEST_EQ(primes.size(), 78498); } + +template +void test_sequential_prime_sieve_iter() +{ + constexpr std::size_t array_size {100'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + // 1'000 + boost::math::prime_sieve_iter(static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + // 10'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + // 100'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); + + // 1'000'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(static_cast(1'000'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); +} + template void test_prime_range() { @@ -345,6 +374,14 @@ int main() test_prime_range_seq(); test_prime_range_seq(); + // Composite Algorithms with Iterators + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + // Large composite tests (Commented out for CI) //test_par_prime_sieve_large(); //test_par_prime_sieve_large(); From 90be1009662b65a06bddc726daf214830efc454b Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 30 Sep 2020 16:13:00 -0500 Subject: [PATCH 70/83] Add threaded method. Currently INOP [CI SKIP] --- .../special_functions/prime_approximation.hpp | 4 +- .../special_functions/prime_sieve_iter.hpp | 68 ++++++++++++++++--- test/test_prime_sieve.cpp | 35 +++++++++- 3 files changed, 95 insertions(+), 12 deletions(-) diff --git a/include/boost/math/special_functions/prime_approximation.hpp b/include/boost/math/special_functions/prime_approximation.hpp index c15ae4989c..399d59c593 100644 --- a/include/boost/math/special_functions/prime_approximation.hpp +++ b/include/boost/math/special_functions/prime_approximation.hpp @@ -14,14 +14,14 @@ namespace boost::math { template -constexpr Integer prime_approximation(const Integer upper_bound) +constexpr Integer prime_approximation(const Integer upper_bound) noexcept { constexpr auto c = 30 * ::log(113) / 113; // Magic numbers from wikipedia return static_cast(::floor(c * static_cast(upper_bound) / ::log(static_cast(upper_bound)))); } template -constexpr Integer prime_approximation(const Integer lower_bound, const Integer upper_bound) +constexpr Integer prime_approximation(const Integer lower_bound, const Integer upper_bound) noexcept { return prime_approximation(upper_bound) - prime_approximation(lower_bound); } diff --git a/include/boost/math/special_functions/prime_sieve_iter.hpp b/include/boost/math/special_functions/prime_sieve_iter.hpp index 9324a61e78..467dd7bc34 100644 --- a/include/boost/math/special_functions/prime_sieve_iter.hpp +++ b/include/boost/math/special_functions/prime_sieve_iter.hpp @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include namespace boost::math::detail::prime_sieve { @@ -49,6 +52,52 @@ decltype(auto) sequential_segmented_sieve(const Integer lower_bound, const Integ return resultant_primes; } + +template +decltype(auto) segmented_sieve(const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) +{ + const auto num_threads {std::thread::hardware_concurrency() > 0 ? std::thread::hardware_concurrency() : 2u}; + const Integer thread_range {(upper_bound - lower_bound) / static_cast(num_threads)}; + + std::vector> prime_vectors(num_threads); + std::vector> future_manager; + future_manager.reserve(num_threads); + + Integer current_lower_bound {lower_bound}; + Integer current_upper_bound {current_lower_bound + thread_range}; + + for(std::size_t i {}; i < num_threads - 1; ++i) + { + prime_vectors[i].reserve(static_cast(prime_approximation(current_lower_bound, current_upper_bound))); + + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, current_upper_bound, &prime_vectors, i]{ + sequential_segmented_sieve(current_lower_bound, current_upper_bound, prime_vectors[i].begin()); + })); + + current_lower_bound = current_upper_bound; + current_upper_bound += thread_range; + } + + prime_vectors.back().reserve(static_cast(prime_approximation(current_lower_bound, upper_bound))); + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, upper_bound, &prime_vectors]{ + sequential_segmented_sieve(current_lower_bound, upper_bound, prime_vectors.back().begin()); + })); + + std::size_t i {}; + for(auto&& future : future_manager) + { + future.get(); // Blocks to maintain proper sorting + + for(auto val : prime_vectors[i]) + { + *resultant_primes++ = val; + } + + ++i; + } + + return resultant_primes; +} } namespace boost::math @@ -64,24 +113,25 @@ decltype(auto) prime_sieve_iter(ExecutionPolicy&& policy, const Integer upper_bo else if (upper_bound <= detail::prime_sieve::linear_sieve_limit) { detail::prime_sieve::linear_sieve(upper_bound, resultant_primes); + return resultant_primes; } - else if constexpr (std::is_same_v, decltype(std::execution::seq)> - #if __cpp_lib_execution > 201900 - || std::is_same_v, decltype(std::execution::unseq)> - #endif - ) + resultant_primes = detail::prime_sieve::linear_sieve(detail::prime_sieve::linear_sieve_limit, resultant_primes); + + if constexpr (std::is_same_v, decltype(std::execution::seq)> + #if __cpp_lib_execution > 201900 + || std::is_same_v, decltype(std::execution::unseq)> + #endif + ) { - resultant_primes = detail::prime_sieve::linear_sieve(detail::prime_sieve::linear_sieve_limit, resultant_primes); - detail::prime_sieve::sequential_segmented_sieve(detail::prime_sieve::linear_sieve_limit, upper_bound, resultant_primes); + resultant_primes = detail::prime_sieve::sequential_segmented_sieve(detail::prime_sieve::linear_sieve_limit, upper_bound, resultant_primes); } else { - //TODO(mborland): The threaded part + resultant_primes = detail::prime_sieve::segmented_sieve(detail::prime_sieve::linear_sieve_limit, upper_bound, resultant_primes); } - return resultant_primes; } diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index ed830c66ae..2181afa9f0 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -21,6 +21,7 @@ #include #include #include +#include template void test_prime_sieve() @@ -85,7 +86,6 @@ void test_sequential_prime_sieve() BOOST_TEST_EQ(primes.size(), 78498); } - template void test_sequential_prime_sieve_iter() { @@ -113,6 +113,37 @@ void test_sequential_prime_sieve_iter() BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); } +template +void test_prime_sieve_iter() +{ + constexpr std::size_t array_size {100'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + // 1'000 + std::cout << "1'000" << std::endl; + boost::math::prime_sieve_iter(std::execution::par, static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + // 10'000 + std::cout << "10'000" << std::endl; + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(std::execution::par, static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + // 100'000 + std::cout << "100'000" << std::endl; + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(std::execution::par, static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); + + // 1'000'000 + std::cout << "1'000'000" << std::endl; + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(std::execution::par, static_cast(1'000'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); +} + template void test_prime_range() { @@ -382,6 +413,8 @@ int main() test_sequential_prime_sieve_iter(); test_sequential_prime_sieve_iter(); + test_prime_sieve_iter(); + // Large composite tests (Commented out for CI) //test_par_prime_sieve_large(); //test_par_prime_sieve_large(); From a77278211f7be5b78ae789fb0cd1fd309249dde4 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 30 Sep 2020 19:20:36 -0500 Subject: [PATCH 71/83] Threaded method completed and validated [CI SKIP] --- .../boost/math/special_functions/prime_sieve_iter.hpp | 7 +++---- test/test_prime_sieve.cpp | 9 +++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve_iter.hpp b/include/boost/math/special_functions/prime_sieve_iter.hpp index 467dd7bc34..678268fb67 100644 --- a/include/boost/math/special_functions/prime_sieve_iter.hpp +++ b/include/boost/math/special_functions/prime_sieve_iter.hpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -68,7 +67,7 @@ decltype(auto) segmented_sieve(const Integer lower_bound, const Integer upper_bo for(std::size_t i {}; i < num_threads - 1; ++i) { - prime_vectors[i].reserve(static_cast(prime_approximation(current_lower_bound, current_upper_bound))); + prime_vectors[i].resize(static_cast(prime_approximation(current_lower_bound, current_upper_bound))); future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, current_upper_bound, &prime_vectors, i]{ sequential_segmented_sieve(current_lower_bound, current_upper_bound, prime_vectors[i].begin()); @@ -78,7 +77,7 @@ decltype(auto) segmented_sieve(const Integer lower_bound, const Integer upper_bo current_upper_bound += thread_range; } - prime_vectors.back().reserve(static_cast(prime_approximation(current_lower_bound, upper_bound))); + prime_vectors.back().resize(static_cast(prime_approximation(current_lower_bound, upper_bound))); future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, upper_bound, &prime_vectors]{ sequential_segmented_sieve(current_lower_bound, upper_bound, prime_vectors.back().begin()); })); @@ -88,7 +87,7 @@ decltype(auto) segmented_sieve(const Integer lower_bound, const Integer upper_bo { future.get(); // Blocks to maintain proper sorting - for(auto val : prime_vectors[i]) + for(auto& val : prime_vectors[i]) { *resultant_primes++ = val; } diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 2181afa9f0..a58ad52d37 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -121,24 +121,20 @@ void test_prime_sieve_iter() std::fill(primes.begin(), primes.end(), 0); // 1'000 - std::cout << "1'000" << std::endl; boost::math::prime_sieve_iter(std::execution::par, static_cast(1'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); // 10'000 - std::cout << "10'000" << std::endl; std::fill(primes.begin(), primes.end(), 0); boost::math::prime_sieve_iter(std::execution::par, static_cast(10'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); // 100'000 - std::cout << "100'000" << std::endl; std::fill(primes.begin(), primes.end(), 0); boost::math::prime_sieve_iter(std::execution::par, static_cast(100'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); // 1'000'000 - std::cout << "1'000'000" << std::endl; std::fill(primes.begin(), primes.end(), 0); boost::math::prime_sieve_iter(std::execution::par, static_cast(1'000'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); @@ -414,6 +410,11 @@ int main() test_sequential_prime_sieve_iter(); test_prime_sieve_iter(); + test_prime_sieve_iter(); + test_prime_sieve_iter(); + test_prime_sieve_iter(); + test_prime_sieve_iter(); + test_prime_sieve_iter(); // Large composite tests (Commented out for CI) //test_par_prime_sieve_large(); From 2d1461fdd18de6adf90ae4655c43c1792ed0ad10 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 1 Oct 2020 18:04:42 -0500 Subject: [PATCH 72/83] Resolves issue #439 [CI SKIP] Sets default L1D value but offers interface for user to change --- .../boost/math/special_functions/prime_sieve_iter.hpp | 11 ++++++++--- test/test_prime_sieve.cpp | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve_iter.hpp b/include/boost/math/special_functions/prime_sieve_iter.hpp index 678268fb67..ef8206fe12 100644 --- a/include/boost/math/special_functions/prime_sieve_iter.hpp +++ b/include/boost/math/special_functions/prime_sieve_iter.hpp @@ -19,13 +19,12 @@ namespace boost::math::detail::prime_sieve { -// TODO(mborland): Allow this value to be changed once cache functions are integerated -inline std::size_t L1_SIZE {32768}; +inline std::size_t L1D_SIZE {32'768}; template decltype(auto) sequential_segmented_sieve(const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) { - const Integer interval {static_cast(L1_SIZE * 8)}; + const Integer interval {static_cast(L1D_SIZE * 8)}; Integer current_lower_bound {lower_bound}; Integer current_upper_bound {current_lower_bound + interval}; @@ -139,6 +138,12 @@ inline decltype(auto) prime_sieve_iter(const Integer upper_bound, OutputIterator { return prime_sieve_iter(std::execution::seq, upper_bound, resultant_primes); } + +template +inline void set_l1d_size(const Integer size) +{ + detail::prime_sieve::L1D_SIZE = static_cast(size); +} } #endif // BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index a58ad52d37..4a68ea2ea4 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -437,6 +437,9 @@ int main() //test_prime_range_seq_large(); //test_prime_range_seq_large(); //test_prime_range_seq_large(); + + boost::math::set_l1d_size(100'000); + BOOST_ASSERT_MSG(boost::math::detail::prime_sieve::L1D_SIZE == 100'000, "L1 Size not set"); boost::report_errors(); } From e7cdb3248342c2acdad0a7ee47b4efc211ae5d14 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 1 Oct 2020 20:49:33 -0500 Subject: [PATCH 73/83] Updated benchmarks [CI SKIP] --- .../special_functions/prime_sieve_iter.hpp | 2 +- .../performance/prime_sieve_performance.cpp | 35 +++++++++++++++---- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve_iter.hpp b/include/boost/math/special_functions/prime_sieve_iter.hpp index ef8206fe12..ec84aa9ebd 100644 --- a/include/boost/math/special_functions/prime_sieve_iter.hpp +++ b/include/boost/math/special_functions/prime_sieve_iter.hpp @@ -88,7 +88,7 @@ decltype(auto) segmented_sieve(const Integer lower_bound, const Integer upper_bo for(auto& val : prime_vectors[i]) { - *resultant_primes++ = val; + *resultant_primes++ = std::move(val); } ++i; diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 9c7ce83a66..33072f2060 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -6,6 +6,7 @@ // or copy at http://www.boost.org/LICENSE_1_0.txt) #include +#include #include #include #include @@ -108,6 +109,26 @@ void prime_sieve(benchmark::State& state) state.SetComplexityN(state.range(0)); } +template +inline decltype(auto) prime_sieve_oi_helper(ExecutionPolicy policy, Integer upper, OutputIterator resultant_primes) +{ + return boost::math::prime_sieve_iter(policy, upper, resultant_primes); +} + +template +void prime_sieve_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + + for(auto _ : state) + { + benchmark::DoNotOptimize(prime_sieve_oi_helper(std::execution::par, upper, primes.begin())); + } + state.SetComplexityN(state.range(0)); +} + template inline auto kimwalish_primes_helper(Integer upper, std::vector primes) -> std::vector { @@ -130,10 +151,11 @@ void kimwalish_primes(benchmark::State& state) // Invidiual Implementations // Linear //BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); //BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +// Linear output iterator +//BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); // Segmented //BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); @@ -143,10 +165,11 @@ BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, // Complete Implemenations //BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -//BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark -BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_MAIN(); From 263c7a2ff89c6f3bea3b764bb18121dc0a6eebb7 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Fri, 2 Oct 2020 18:10:53 +0100 Subject: [PATCH 74/83] Add experimental threaded support. --- .../math/special_functions/prime_sieve_jm.hpp | 124 +++++++++++++++--- .../prime_sieve_performance_jm.cpp | 38 +++++- 2 files changed, 141 insertions(+), 21 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve_jm.hpp b/include/boost/math/special_functions/prime_sieve_jm.hpp index 1a2b005b78..a83e5f1689 100644 --- a/include/boost/math/special_functions/prime_sieve_jm.hpp +++ b/include/boost/math/special_functions/prime_sieve_jm.hpp @@ -163,10 +163,93 @@ inline bool linear_sieve_classical_segment(Container& primes, Sieve& masks, Inte return true; } + // 4096 is where benchmarked performance of linear_sieve begins to diverge template const Integer linear_sieve_limit = Integer(524288); // Constexpr does not work with boost::multiprecision types +template +inline bool linear_sieve_classical_segment_threaded(std::atomic* current_max_processed_value, std::mutex* lock, Container* primes, Integer start_offset, Integer end_offset, Integer stride, OutputIterator out, bool output_to_container) +{ + simple_bitset masks(linear_sieve_limit / 2); + + std::unique_lock l(*lock); + std::size_t prime_count = primes->size(); + l.unlock(); + + while (start_offset < end_offset) + { + Integer max_points = end_offset - start_offset > linear_sieve_limit ? linear_sieve_limit : end_offset - start_offset; + Integer limit = static_cast(std::sqrt(static_cast(start_offset + max_points)) + 1); + // Begin by striking out odd numbered multiples of all the primes we have so far. + // 1-based index, we only have odd numbers in the sieve so don't need to handle 2: + for (std::size_t i = 1; i < prime_count; ++i) + { + Integer prime = (*primes)[i]; + if (prime > limit) + break; + Integer index = prime - start_offset % prime; + if ((index & 1) == 0) + index += prime; + index >>= 1; + for (; index < max_points / 2; index += prime) + masks.clear(index); + } + // + // Now we must wait until the previous thread has completed the segment before this one: + // + while (current_max_processed_value->load() != start_offset) + std::this_thread::yield(); + // + // Maybe process all the primes we didn't have access to in the loop above: + // + if ((*primes)[prime_count - 1] < limit) + { + l.lock(); + prime_count = primes->size(); + l.unlock(); + // Begin by striking out odd numbered multiples of all the primes we have so far. + // 1-based index, we only have odd numbers in the sieve so don't need to handle 2: + for (std::size_t i = 1; i < prime_count; ++i) + { + Integer prime = (*primes)[i]; + if (prime > limit) + break; + Integer index = prime - start_offset % prime; + if ((index & 1) == 0) + index += prime; + index >>= 1; + for (; index < max_points / 2; index += prime) + masks.clear(index); + } + } + // + // Now walk through the sieve outputting primes. + // + l.lock(); + for (Integer index = 0; index < max_points / 2; ++index) + { + if (masks.test(index)) + { + *out++ = start_offset + 2 * index + 1; + if (output_to_container) + primes->push_back(start_offset + 2 * index + 1); + if (has_output_iterator_terminated(out)) + return false; + } + } + prime_count = primes->size(); + l.unlock(); + // + // We're done on this segment, signal the next thread: + // + masks.reset(); + *current_max_processed_value = start_offset + max_points; + start_offset += stride; + } + return true; +} + template void prime_sieve_imp(ExecutionPolicy&& policy, Integer upper_bound, Container& primes, OutputIterator out, bool output_to_container) { @@ -203,25 +286,34 @@ void prime_sieve_imp(ExecutionPolicy&& policy, Integer upper_bound, Container& p return; } } -#if 0 - else { - std::vector small_primes{}; - small_primes.reserve(1028); - - std::thread t1([&small_primes] { - boost::math::detail::linear_sieve(static_cast(linear_sieve_limit * 2), small_primes); - }); - std::thread t2([upper_bound, &primes] { - boost::math::detail::segmented_sieve(static_cast(linear_sieve_limit * 2), upper_bound, primes); - }); - - t1.join(); - t2.join(); - primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + unsigned hardware_concurrency = std::thread::hardware_concurrency(); + if ((hardware_concurrency == 0) || (upper_bound <= linear_sieve_limit * 2)) + { + // + // No point in using threads as there's only one more segment to process: + // + linear_sieve_classical_segment(primes, sieve, linear_sieve_limit, upper_bound - linear_sieve_limit, out, output_to_container); + } + else + { + unsigned n_threads = (upper_bound - linear_sieve_limit) / linear_sieve_limit +((upper_bound - linear_sieve_limit) % linear_sieve_limit ? 1 : 0); + n_threads = (std::min)(n_threads, hardware_concurrency / 2); + + std::atomic current_max_processed_value = linear_sieve_limit; + std::vector> threads(n_threads); + std::mutex mutex; + + for (unsigned i = 0; i < n_threads; ++i) + threads[i].reset(new std::thread(linear_sieve_classical_segment_threaded, + ¤t_max_processed_value, &mutex, &primes, linear_sieve_limit * (i + 1), upper_bound, + linear_sieve_limit * n_threads, out, output_to_container)); + + for (unsigned i = 0; i < n_threads; ++i) + threads[i]->join(); + } } -#endif } template diff --git a/reporting/performance/prime_sieve_performance_jm.cpp b/reporting/performance/prime_sieve_performance_jm.cpp index a0d1697b77..6fcee3b6c8 100644 --- a/reporting/performance/prime_sieve_performance_jm.cpp +++ b/reporting/performance/prime_sieve_performance_jm.cpp @@ -66,6 +66,19 @@ void prime_sieve_seq(benchmark::State& state) state.SetComplexityN(state.range(0)); } template +void prime_sieve_par(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + boost::math::prime_sieve(std::execution::par, upper, primes); + } + state.SetComplexityN(state.range(0)); +} +template void prime_sieve_seq_jm(benchmark::State& state) { Integer upper = static_cast(state.range(0)); @@ -79,6 +92,19 @@ void prime_sieve_seq_jm(benchmark::State& state) state.SetComplexityN(state.range(0)); } template +void prime_sieve_seq_jm_par(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + jm::prime_sieve(std::execution::par, upper, primes); + } + state.SetComplexityN(state.range(0)); +} +template void prime_sieve_seq_jm_oi(benchmark::State& state) { Integer upper = static_cast(state.range(0)); @@ -160,11 +186,13 @@ constexpr uint64_t high_range = uint64_t(1) << 32; // Complete Implemenations //BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(prime_sieve_seq, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(prime_sieve_seq_oi, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(prime_sieve_seq_jm, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); -BENCHMARK_TEMPLATE(prime_sieve_seq_jm_oi, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_seq, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_par, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve_seq_oi, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(prime_sieve_seq_jm, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_seq_jm_par, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve_seq_jm_oi, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); //BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); From 29eef8879ab04c2d4b6fd41efee51bc570e82315 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 2 Oct 2020 21:20:36 -0500 Subject: [PATCH 75/83] Correct benchmark memory allocation [CI SKIP] --- reporting/performance/prime_sieve_performance.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 33072f2060..1f34538003 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -119,8 +119,7 @@ template void prime_sieve_oi(benchmark::State& state) { Integer upper = static_cast(state.range(0)); - std::vector primes; - boost::math::prime_reserve(upper, primes); + std::vector primes(boost::math::prime_approximation(upper)); for(auto _ : state) { From b5a28b5b05d7a7a425070a93e079adac16d744cc Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sat, 3 Oct 2020 21:44:32 -0500 Subject: [PATCH 76/83] Add linear sieve direct from stepanov [CI SKIP] --- .../detail/interval_prime_sieve.hpp | 2 +- .../detail/linear_prime_sieve.hpp | 48 +++++++++++++++++++ .../performance/prime_sieve_performance.cpp | 26 +++++++--- test/test_prime_sieve.cpp | 19 ++++++++ 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp index fa1b4a5595..65ba43086b 100644 --- a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp @@ -185,7 +185,7 @@ void IntervalSieve::Sieve() noexcept else { - prime_reserve(right_, primes_); + primes_.resize(prime_approximation(right_)); linear_sieve(primes_range, primes_.begin()); for(; primes_[i] < primes_range; ++i) diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp index 9f67e2e4a1..461d9c8dca 100644 --- a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -9,6 +9,8 @@ #define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP #include +#include +#include namespace boost::math::detail::prime_sieve { @@ -38,6 +40,52 @@ decltype(auto) linear_sieve(const Integer upper_bound, OutputIterator resultant_ // 4'096 is where benchmarked performance of linear_sieve begins to diverge template static const Integer linear_sieve_limit = Integer(4'096); // Constexpr does not work with boost::multiprecision types + +// Stepanov Sieve - From Mathematics to Generic Programming Chap 3 +template +void mark_sieve(RandomAccessIterator first, RandomAccessIterator last, Integer factor) +{ + *first = false; + while(last - first > factor) + { + first = first + factor; + *first = false; + } +} + +template +void sift(RandomAccessIterator first, Integer n) +{ + const auto last {std::next(first, static_cast(n))}; + std::fill(first, last, true); + Integer i {0}; + Integer index_square {3}; + Integer factor {3}; + + for(; index_square < n; index_square += factor + factor - 2) + { + if(first[i]) + { + mark_sieve(first + index_square, last, factor); + } + + ++i; + factor += 2; + } +} + +// TODO(mborland): Pass in a more efficient data structure (likely dynamic_bitset) to sift and post-process +template +inline decltype(auto) stepanov_sieve(Integer upper_bound, OutputIterator resultant_primes) +{ + if(upper_bound == 2) + { + return resultant_primes; + } + + sift(resultant_primes, upper_bound); + return resultant_primes; +} } #endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 1f34538003..98c2b23600 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -39,11 +39,24 @@ template void linear_sieve_oi(benchmark::State& state) { Integer upper = static_cast(state.range(0)); - std::vector primes; - boost::math::prime_reserve(upper, primes); + std::vector primes(boost::math::prime_approximation(upper)); + + for(auto _ : state) + { + benchmark::DoNotOptimize(boost::math::detail::prime_sieve::linear_sieve(upper, primes.begin())); + } + state.SetComplexityN(state.range(0)); +} + +template +void stepanov_sieve_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)) / 2 - 1; + std::vector primes(upper, 1); + for(auto _ : state) { - benchmark::DoNotOptimize(boost::math::detail::prime_sieve::linear_sieve(upper, std::back_inserter(primes))); + benchmark::DoNotOptimize(boost::math::detail::prime_sieve::stepanov_sieve(upper, primes.begin())); } state.SetComplexityN(state.range(0)); } @@ -154,7 +167,8 @@ void kimwalish_primes(benchmark::State& state) //BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); // Linear output iterator -//BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(stepanov_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN)->UseRealTime(); // Segmented //BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); @@ -164,8 +178,8 @@ void kimwalish_primes(benchmark::State& state) // Complete Implemenations //BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(prime_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark //BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 4a68ea2ea4..193621df70 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -335,6 +335,23 @@ void test_linear_sieve_iterator() std::fill(primes.begin(), primes.end(), 0); boost::math::detail::prime_sieve::linear_sieve(static_cast(100'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); + + std::vector primes_v; + //boost::math::prime_reserve(static_cast(100'000), primes_v); Prime reserve does not work with OI. Need to use resize. + primes_v.resize(10'000, 0); + boost::math::detail::prime_sieve::linear_sieve(static_cast(100'000), primes_v.begin()); + BOOST_TEST_EQ(array_size - std::count(primes_v.cbegin(), primes_v.cend(), 0), 9'592); + +} + +template +void test_stepanov_sieve() +{ + constexpr std::size_t array_size {500}; + std::array primes; + + boost::math::detail::prime_sieve::stepanov_sieve(static_cast(500), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 167); // Skips 2 } int main() @@ -372,6 +389,8 @@ int main() test_interval_sieve_iterator(); test_interval_sieve_iterator(); + test_stepanov_sieve(); + // Composite test_prime_sieve(); test_prime_sieve(); From 6c26b53c51569ce0051d77cd2ddbfd3c412496ca Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sat, 3 Oct 2020 23:07:30 -0500 Subject: [PATCH 77/83] First draft of eratosthenes w/ wheel [CI SKIP] --- .../detail/interval_prime_sieve.hpp | 2 +- .../detail/linear_prime_sieve.hpp | 53 +++++++++++++++++++ test/test_prime_sieve.cpp | 13 +++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp index 65ba43086b..ee815fe7d6 100644 --- a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp @@ -185,7 +185,7 @@ void IntervalSieve::Sieve() noexcept else { - primes_.resize(prime_approximation(right_)); + primes_.resize(static_cast(prime_approximation(right_))); linear_sieve(primes_range, primes_.begin()); for(; primes_[i] < primes_range; ++i) diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp index 461d9c8dca..1e1061a5f3 100644 --- a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -8,9 +8,11 @@ #ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP #define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP +#include #include #include #include +#include namespace boost::math::detail::prime_sieve { @@ -53,6 +55,15 @@ void mark_sieve(RandomAccessIterator first, RandomAccessIterator last, Integer f } } +template +inline void mark_sieve(Bitset& bits, Integer factor) +{ + for(Integer i {factor * factor}; i < bits.size(); i += factor) + { + bits[static_cast(i)] = 0; + } +} + template void sift(RandomAccessIterator first, Integer n) { @@ -86,6 +97,48 @@ inline decltype(auto) stepanov_sieve(Integer upper_bound, OutputIterator resulta sift(resultant_primes, upper_bound); return resultant_primes; } + +// TODO(mborland): Pass in execution policy. mark_sieve can readily be converted to std::for_each, but dynamic_bitset would need replaced with something +// that has iterators +template +decltype(auto) wheel_sieve_of_eratosthenes(Integer upper_bound, OutputIterator resultant_primes) +{ + const Integer sqrt_upper_bound {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + boost::dynamic_bitset<> trial(static_cast(upper_bound)); + trial.set(); + std::array primes {2, 3, 5}; // Wheel basis + std::array wheel {7, 11, 13, 17, 19, 23, 29, 31}; // MOD 30 wheel + + for(auto& prime : primes) + { + mark_sieve(trial, prime); + *resultant_primes++ = prime; + } + + for(auto& co_prime : wheel) + { + mark_sieve(trial, co_prime); + *resultant_primes++ = co_prime; + } + + for(Integer i {wheel.back()}; i < static_cast(sqrt_upper_bound); i += wheel.back() - 1) + { + if(trial[static_cast(i)] == true) + { + mark_sieve(trial, i * i); + } + } + + for(Integer i {wheel.back() + 2}; i < upper_bound; i += 2) + { + if(trial[static_cast(i)]) + { + *resultant_primes++ = i; + } + } + + return resultant_primes; +} } #endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 193621df70..14a4ca81ef 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -354,6 +354,17 @@ void test_stepanov_sieve() BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 167); // Skips 2 } +template +void test_wheel_sieve_of_eratosthenes() +{ + constexpr std::size_t array_size {10'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + boost::math::detail::prime_sieve::wheel_sieve_of_eratosthenes(static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); +} + int main() { // Test prime approximation for constexpr @@ -391,6 +402,8 @@ int main() test_stepanov_sieve(); + test_wheel_sieve_of_eratosthenes(); + // Composite test_prime_sieve(); test_prime_sieve(); From 980bfe71a5d263d96fbdca3c4585e7ee044f316a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 4 Oct 2020 10:28:08 -0500 Subject: [PATCH 78/83] Fixes for multiprecision types [CI SKIP] Primes up to sqrt upper_bound are now captured as found --- .../detail/linear_prime_sieve.hpp | 30 ++++++++++++------- test/test_prime_sieve.cpp | 5 ++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp index 1e1061a5f3..b48918a75e 100644 --- a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -56,7 +56,7 @@ void mark_sieve(RandomAccessIterator first, RandomAccessIterator last, Integer f } template -inline void mark_sieve(Bitset& bits, Integer factor) +inline void mark_sieve(Bitset& bits, const Integer factor) { for(Integer i {factor * factor}; i < bits.size(); i += factor) { @@ -101,35 +101,43 @@ inline decltype(auto) stepanov_sieve(Integer upper_bound, OutputIterator resulta // TODO(mborland): Pass in execution policy. mark_sieve can readily be converted to std::for_each, but dynamic_bitset would need replaced with something // that has iterators template -decltype(auto) wheel_sieve_of_eratosthenes(Integer upper_bound, OutputIterator resultant_primes) +decltype(auto) wheel_sieve_of_eratosthenes(const Integer upper_bound, OutputIterator resultant_primes) { + if(upper_bound == 2) + { + *resultant_primes++ = static_cast(2); + return resultant_primes; + } + const Integer sqrt_upper_bound {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; boost::dynamic_bitset<> trial(static_cast(upper_bound)); trial.set(); std::array primes {2, 3, 5}; // Wheel basis std::array wheel {7, 11, 13, 17, 19, 23, 29, 31}; // MOD 30 wheel - for(auto& prime : primes) + for(std::size_t i {}; i < primes.size(); ++i) { - mark_sieve(trial, prime); - *resultant_primes++ = prime; + mark_sieve(trial, primes[i]); + *resultant_primes++ = primes[i]; } - for(auto& co_prime : wheel) + // Last value in the wheel is the starting point for the next step + for(std::size_t i {}; i < wheel.size() - 1; ++i) { - mark_sieve(trial, co_prime); - *resultant_primes++ = co_prime; + mark_sieve(trial, wheel[i]); + *resultant_primes++ = wheel[i]; } - for(Integer i {wheel.back()}; i < static_cast(sqrt_upper_bound); i += wheel.back() - 1) + for(Integer i {wheel.back()}; i < sqrt_upper_bound; i += wheel.back() - 1) { if(trial[static_cast(i)] == true) { - mark_sieve(trial, i * i); + mark_sieve(trial, i); + *resultant_primes++ = i; } } - for(Integer i {wheel.back() + 2}; i < upper_bound; i += 2) + for(Integer i {sqrt_upper_bound % 2 == 0 ? sqrt_upper_bound + 1 : sqrt_upper_bound + 2}; i < upper_bound; i += 2) { if(trial[static_cast(i)]) { diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 14a4ca81ef..0363d06158 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -403,6 +403,11 @@ int main() test_stepanov_sieve(); test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); // Composite test_prime_sieve(); From 07e6f58ba19ae7ef7971296d9df334eb75a47bc1 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 4 Oct 2020 16:34:41 -0500 Subject: [PATCH 79/83] wheel sieve passes standard battery [CI SKIP] --- .../detail/linear_prime_sieve.hpp | 26 +++++++++++++------ test/test_prime_sieve.cpp | 8 ++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp index b48918a75e..96176309c0 100644 --- a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -114,6 +114,7 @@ decltype(auto) wheel_sieve_of_eratosthenes(const Integer upper_bound, OutputIter trial.set(); std::array primes {2, 3, 5}; // Wheel basis std::array wheel {7, 11, 13, 17, 19, 23, 29, 31}; // MOD 30 wheel + const Integer wheel_mod {30}; for(std::size_t i {}; i < primes.size(); ++i) { @@ -122,26 +123,35 @@ decltype(auto) wheel_sieve_of_eratosthenes(const Integer upper_bound, OutputIter } // Last value in the wheel is the starting point for the next step - for(std::size_t i {}; i < wheel.size() - 1; ++i) + for(std::size_t i {}; i < wheel.size(); ++i) { mark_sieve(trial, wheel[i]); *resultant_primes++ = wheel[i]; } - for(Integer i {wheel.back()}; i < sqrt_upper_bound; i += wheel.back() - 1) + Integer i {wheel_mod}; + for(; (i + wheel.front()) < sqrt_upper_bound; i += wheel_mod) { - if(trial[static_cast(i)] == true) + for(std::size_t j {}; j < wheel.size(); ++j) { - mark_sieve(trial, i); - *resultant_primes++ = i; + Integer spoke {i + wheel[j]}; + if(trial[static_cast(spoke)]) + { + mark_sieve(trial, spoke); + *resultant_primes++ = std::move(spoke); + } } } - for(Integer i {sqrt_upper_bound % 2 == 0 ? sqrt_upper_bound + 1 : sqrt_upper_bound + 2}; i < upper_bound; i += 2) + for(; (i + wheel.front()) < upper_bound; i += wheel_mod) { - if(trial[static_cast(i)]) + for(std::size_t j {}; j < wheel.size(); ++j) { - *resultant_primes++ = i; + Integer spoke {i + wheel[j]}; + if(trial[static_cast(spoke)]) + { + *resultant_primes++ = std::move(spoke); + } } } diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 0363d06158..2201f52644 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -363,6 +363,14 @@ void test_wheel_sieve_of_eratosthenes() boost::math::detail::prime_sieve::wheel_sieve_of_eratosthenes(static_cast(1'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + std::fill(primes.begin(), primes.end(), 0); + boost::math::detail::prime_sieve::wheel_sieve_of_eratosthenes(static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + std::fill(primes.begin(), primes.end(), 0); + boost::math::detail::prime_sieve::wheel_sieve_of_eratosthenes(static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); } int main() From 86b9e5af4b9d4c9b42e1a0dfda03f5fbe76a439a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 4 Oct 2020 21:49:06 -0500 Subject: [PATCH 80/83] Add prime sieve wrapper [CI SKIP] --- .../special_functions/prime_sieve_iter.hpp | 12 +++++++ .../performance/prime_sieve_performance.cpp | 20 +++++++++-- test/test_prime_sieve.cpp | 34 +++++++++++++++++++ 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve_iter.hpp b/include/boost/math/special_functions/prime_sieve_iter.hpp index ec84aa9ebd..0da126717d 100644 --- a/include/boost/math/special_functions/prime_sieve_iter.hpp +++ b/include/boost/math/special_functions/prime_sieve_iter.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -139,6 +140,17 @@ inline decltype(auto) prime_sieve_iter(const Integer upper_bound, OutputIterator return prime_sieve_iter(std::execution::seq, upper_bound, resultant_primes); } + +template +inline decltype(auto) prime_sieve_wrapper(ExecutionPolicy&& policy, Integer upper_bound, OutputIterator resultant_primes) +{ + std::vector primes; + prime_reserve(upper_bound, primes); + prime_sieve(policy, upper_bound, primes); + + return std::move(primes.begin(), primes.end(), resultant_primes); +} + template inline void set_l1d_size(const Integer size) { diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 98c2b23600..b4cc6c1122 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -141,6 +141,19 @@ void prime_sieve_oi(benchmark::State& state) state.SetComplexityN(state.range(0)); } +template +void prime_sieve_wrapper(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes(boost::math::prime_approximation(upper)); + + for(auto _ : state) + { + benchmark::DoNotOptimize(boost::math::prime_sieve_wrapper(std::execution::par, upper, primes.begin())); + } + state.SetComplexityN(state.range(0)); +} + template inline auto kimwalish_primes_helper(Integer upper, std::vector primes) -> std::vector { @@ -167,8 +180,8 @@ void kimwalish_primes(benchmark::State& state) //BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); // Linear output iterator -BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN)->UseRealTime(); -BENCHMARK_TEMPLATE(stepanov_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(stepanov_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN)->UseRealTime(); // Segmented //BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); @@ -179,7 +192,8 @@ BENCHMARK_TEMPLATE(stepanov_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1 // Complete Implemenations //BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -//BENCHMARK_TEMPLATE(prime_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_wrapper, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark //BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index 2201f52644..af734fc2bf 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -140,6 +140,33 @@ void test_prime_sieve_iter() BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); } +template +void test_prime_sieve_wrapper() +{ + constexpr std::size_t array_size {100'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + // 1'000 + boost::math::prime_sieve_wrapper(std::execution::par, static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + // 10'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_wrapper(std::execution::par, static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + // 100'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_wrapper(std::execution::par, static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); + + // 1'000'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_wrapper(std::execution::par, static_cast(1'000'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); +} + template void test_prime_range() { @@ -461,6 +488,13 @@ int main() test_prime_sieve_iter(); test_prime_sieve_iter(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + // Large composite tests (Commented out for CI) //test_par_prime_sieve_large(); //test_par_prime_sieve_large(); From f19149eb6a9f2101bb8d44d62496f1109fc7f18e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 6 Oct 2020 22:07:21 -0500 Subject: [PATCH 81/83] Implement dual interface for prime sieve [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 63 +++++++++++++++++-- .../special_functions/prime_sieve_iter.hpp | 11 ---- .../performance/prime_sieve_performance.cpp | 8 +-- test/test_prime_sieve.cpp | 35 ++++++----- 4 files changed, 83 insertions(+), 34 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 6cff6bcb50..8e84b85353 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -233,10 +233,23 @@ void sequential_segmented_sieve(Integer lower_bound, Integer upper_bound, Contai sieve.NewRange(current_lower_bound, current_upper_bound, resultant_primes); } } -} // End namespace detail + +// SFINAE for dual interface +template +class is_container +{ + typedef char yes; + struct no { char x[2]; }; + + template static yes test( decltype(&U::size) ); + template static no test(...); + +public: + enum { value = sizeof(test(0)) == sizeof(char) }; +}; template -void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) +void prime_sieve_impl(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) { using boost::math::detail::linear_sieve_limit; @@ -279,11 +292,53 @@ void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &prime } template -void prime_sieve(Integer upper_bound, Container &primes) +void prime_sieve_impl(Integer upper_bound, Container &primes) { - prime_sieve(std::execution::seq, upper_bound, primes); + prime_sieve_impl(std::execution::seq, upper_bound, primes); } +template +inline decltype(auto) prime_sieve_wrapper(ExecutionPolicy&& policy, Integer upper_bound, OutputIterator resultant_primes) +{ + std::vector primes; + prime_reserve(upper_bound, primes); + prime_sieve_impl(policy, upper_bound, primes); + + return std::move(primes.begin(), primes.end(), resultant_primes); +} + +template +inline decltype(auto) prime_sieve_wrapper(Integer upper_bound, OutputIterator resultant_primes) +{ + return test_prime_sieve_wrapper(std::execution::seq, upper_bound, resultant_primes); +} +} // End namespace detail + + + + + +template +inline decltype(auto) prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, T output) +{ + if constexpr (detail::is_container>::value) + { + detail::prime_sieve_impl(policy, upper_bound, *output); + return; + } + + else + { + detail::prime_sieve_wrapper(policy, upper_bound, output); + return output; + } +} + +template +inline decltype(auto) prime_sieve(Integer upper_bound, T output) +{ + return prime_sieve(std::execution::seq, upper_bound, output); +} template void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &primes) diff --git a/include/boost/math/special_functions/prime_sieve_iter.hpp b/include/boost/math/special_functions/prime_sieve_iter.hpp index 0da126717d..9b3a1691f6 100644 --- a/include/boost/math/special_functions/prime_sieve_iter.hpp +++ b/include/boost/math/special_functions/prime_sieve_iter.hpp @@ -140,17 +140,6 @@ inline decltype(auto) prime_sieve_iter(const Integer upper_bound, OutputIterator return prime_sieve_iter(std::execution::seq, upper_bound, resultant_primes); } - -template -inline decltype(auto) prime_sieve_wrapper(ExecutionPolicy&& policy, Integer upper_bound, OutputIterator resultant_primes) -{ - std::vector primes; - prime_reserve(upper_bound, primes); - prime_sieve(policy, upper_bound, primes); - - return std::move(primes.begin(), primes.end(), resultant_primes); -} - template inline void set_l1d_size(const Integer size) { diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index b4cc6c1122..258c6ac010 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -106,7 +106,7 @@ void interval_sieve(benchmark::State& state) template inline auto prime_sieve_helper(ExecuitionPolicy policy, Integer upper, Container primes) { - boost::math::prime_sieve(policy, upper, primes); + boost::math::prime_sieve(policy, upper, &primes); return primes; } @@ -149,7 +149,7 @@ void prime_sieve_wrapper(benchmark::State& state) for(auto _ : state) { - benchmark::DoNotOptimize(boost::math::prime_sieve_wrapper(std::execution::par, upper, primes.begin())); + benchmark::DoNotOptimize(boost::math::prime_sieve(std::execution::par, upper, primes.begin())); } state.SetComplexityN(state.range(0)); } @@ -191,10 +191,10 @@ void kimwalish_primes(benchmark::State& state) // Complete Implemenations //BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -//BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve_wrapper, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); BENCHMARK_TEMPLATE(prime_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); -//BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark +BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark //BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); //BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index af734fc2bf..e864eeaa46 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -30,38 +30,38 @@ void test_prime_sieve() Integer ref {168}; // Calculated with wolfram-alpha // Does the function work with a vector - boost::math::prime_sieve(std::execution::par, static_cast(1'000), primes); + boost::math::prime_sieve(std::execution::par, static_cast(1'000), &primes); BOOST_TEST_EQ(primes.size(), ref); // Tests for correctness // 2 primes.clear(); - boost::math::prime_sieve(std::execution::par, static_cast(2), primes); + boost::math::prime_sieve(std::execution::par, static_cast(2), &primes); BOOST_TEST_EQ(primes.size(), 0); // 100 primes.clear(); - boost::math::prime_sieve(std::execution::par, static_cast(100), primes); + boost::math::prime_sieve(std::execution::par, static_cast(100), &primes); BOOST_TEST_EQ(primes.size(), 25); // 10'000 primes.clear(); - boost::math::prime_sieve(std::execution::par, static_cast(10'000), primes); + boost::math::prime_sieve(std::execution::par, static_cast(10'000), &primes); BOOST_TEST_EQ(primes.size(), 1229); // 100'000 primes.clear(); - boost::math::prime_sieve(std::execution::par, static_cast(100'000), primes); + boost::math::prime_sieve(std::execution::par, static_cast(100'000), &primes); BOOST_TEST_EQ(primes.size(), 9592); // 1'000'000 primes.clear(); - boost::math::prime_sieve(std::execution::par, static_cast(1'000'000), primes); + boost::math::prime_sieve(std::execution::par, static_cast(1'000'000), &primes); BOOST_TEST_EQ(primes.size(), 78498); // Does the function work with a deque? std::deque d_primes; - boost::math::prime_sieve(std::execution::par, static_cast(1'000), d_primes); + boost::math::prime_sieve(std::execution::par, static_cast(1'000), &d_primes); BOOST_TEST_EQ(d_primes.size(), ref); } @@ -72,17 +72,17 @@ void test_sequential_prime_sieve() // 10'000 primes.clear(); - boost::math::prime_sieve(static_cast(10'000), primes); + boost::math::prime_sieve(static_cast(10'000), &primes); BOOST_TEST_EQ(primes.size(), 1229); // 100'000 primes.clear(); - boost::math::prime_sieve(static_cast(100'000), primes); + boost::math::prime_sieve(static_cast(100'000), &primes); BOOST_TEST_EQ(primes.size(), 9592); // 1'000'000 primes.clear(); - boost::math::prime_sieve(static_cast(1'000'000), primes); + boost::math::prime_sieve(static_cast(1'000'000), &primes); BOOST_TEST_EQ(primes.size(), 78498); } @@ -148,22 +148,22 @@ void test_prime_sieve_wrapper() std::fill(primes.begin(), primes.end(), 0); // 1'000 - boost::math::prime_sieve_wrapper(std::execution::par, static_cast(1'000), primes.begin()); + boost::math::prime_sieve(std::execution::par, static_cast(1'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); // 10'000 std::fill(primes.begin(), primes.end(), 0); - boost::math::prime_sieve_wrapper(std::execution::par, static_cast(10'000), primes.begin()); + boost::math::prime_sieve(std::execution::par, static_cast(10'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); // 100'000 std::fill(primes.begin(), primes.end(), 0); - boost::math::prime_sieve_wrapper(std::execution::par, static_cast(100'000), primes.begin()); + boost::math::prime_sieve(std::execution::par, static_cast(100'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); // 1'000'000 std::fill(primes.begin(), primes.end(), 0); - boost::math::prime_sieve_wrapper(std::execution::par, static_cast(1'000'000), primes.begin()); + boost::math::prime_sieve(std::execution::par, static_cast(1'000'000), primes.begin()); BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); } @@ -287,7 +287,7 @@ void test_par_prime_sieve_large() // Force the sieve into the multi-threading section and test reserve functionality boost::math::prime_reserve(static_cast(1073741824), primes); - boost::math::prime_sieve(std::execution::par, static_cast(1073741824), primes); + boost::math::prime_sieve(std::execution::par, static_cast(1073741824), &primes); BOOST_TEST_EQ(primes.size(), ref); } @@ -404,6 +404,11 @@ int main() { // Test prime approximation for constexpr static_assert(boost::math::prime_approximation(100) != 0, "log and/or floor is/are not constexpr"); + + // Test SFINAE + std::vector test; + auto test_ref = &test; + static_assert(boost::math::detail::is_container>::value == 1, "INOP"); // Individual Algorithms test_linear_sieve(); From 1ad0d51b8ee40d5d66e5b00cb75a9827d73b4c0c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 6 Oct 2020 22:21:43 -0500 Subject: [PATCH 82/83] Implement dual interface for prime_range [CI SKIP] --- .../math/special_functions/prime_sieve.hpp | 101 ++++++++++++------ test/test_prime_sieve.cpp | 32 +++--- 2 files changed, 86 insertions(+), 47 deletions(-) diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp index 8e84b85353..0a839cee17 100644 --- a/include/boost/math/special_functions/prime_sieve.hpp +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -310,38 +310,11 @@ inline decltype(auto) prime_sieve_wrapper(ExecutionPolicy&& policy, Integer uppe template inline decltype(auto) prime_sieve_wrapper(Integer upper_bound, OutputIterator resultant_primes) { - return test_prime_sieve_wrapper(std::execution::seq, upper_bound, resultant_primes); -} -} // End namespace detail - - - - - -template -inline decltype(auto) prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, T output) -{ - if constexpr (detail::is_container>::value) - { - detail::prime_sieve_impl(policy, upper_bound, *output); - return; - } - - else - { - detail::prime_sieve_wrapper(policy, upper_bound, output); - return output; - } -} - -template -inline decltype(auto) prime_sieve(Integer upper_bound, T output) -{ - return prime_sieve(std::execution::seq, upper_bound, output); + return prime_sieve_wrapper(std::execution::seq, upper_bound, resultant_primes); } template -void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &primes) +void prime_range_impl(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &primes) { using boost::math::detail::linear_sieve_limit; Integer limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; @@ -446,9 +419,75 @@ void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bo } template -inline void prime_range(Integer lower_bound, Integer upper_bound, Container &primes) +inline void prime_range_impl(Integer lower_bound, Integer upper_bound, Container &primes) +{ + prime_range_impl(std::execution::seq, lower_bound, upper_bound, primes); +} + +template +inline decltype(auto) prime_range_wrapper(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, OutputIterator resultant_primes) +{ + std::vector primes; + prime_reserve(lower_bound, upper_bound, primes); + prime_range_impl(policy, lower_bound, upper_bound, primes); + + return std::move(primes.begin(), primes.end(), resultant_primes); +} + +template +inline decltype(auto) prime_range_wrapper(Integer lower_bound, Integer upper_bound, OutputIterator resultant_primes) +{ + return prime_range_wrapper(std::execution::seq, lower_bound, upper_bound, resultant_primes); +} + +} // End namespace detail + + + + + +template +inline decltype(auto) prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, T output) +{ + if constexpr (detail::is_container>::value) + { + detail::prime_sieve_impl(policy, upper_bound, *output); + return; + } + + else + { + detail::prime_sieve_wrapper(policy, upper_bound, output); + return output; + } +} + +template +inline decltype(auto) prime_sieve(Integer upper_bound, T output) +{ + return prime_sieve(std::execution::seq, upper_bound, output); +} + +template +inline decltype(auto) prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, T output) +{ + if constexpr (detail::is_container>::value) + { + detail::prime_range_impl(policy, lower_bound, upper_bound, *output); + return; + } + + else + { + detail::prime_range_wrapper(policy, lower_bound, upper_bound, output); + return output; + } +} + +template +inline decltype(auto) prime_range(Integer lower_bound, Integer upper_bound, T output) { - prime_range(std::execution::seq, lower_bound, upper_bound, primes); + return prime_range(std::execution::seq, lower_bound, upper_bound, output); } } diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp index e864eeaa46..dedd23b100 100644 --- a/test/test_prime_sieve.cpp +++ b/test/test_prime_sieve.cpp @@ -174,29 +174,29 @@ void test_prime_range() Integer ref {168}; // Calculated with wolfram-alpha // Does the upper and lower bound call work - boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), primes); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), &primes); BOOST_TEST_EQ(primes.size(), ref); // Does parallel version work primes.clear(); - boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), primes); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), &primes); BOOST_TEST_EQ(primes.size(), ref); // Does it work with a deque? std::deque d_primes; - boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), d_primes); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), &d_primes); BOOST_TEST_EQ(d_primes.size(), ref); // Does the lower bound change the results? ref = 143; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(std::execution::par, static_cast(100), static_cast(1'000), primes); + boost::math::prime_range(std::execution::par, static_cast(100), static_cast(1'000), &primes); BOOST_TEST_EQ(primes.size(), ref); // Will it call the sieve for large input ref = 78498; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000'000), primes); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000'000), &primes); BOOST_TEST_EQ(primes.size(), ref); } @@ -209,17 +209,17 @@ void test_prime_range_large() // Larger numbers ref = 586'081; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(std::execution::par, static_cast(1'000'000), static_cast(10'000'000), primes); + boost::math::prime_range(std::execution::par, static_cast(1'000'000), static_cast(10'000'000), &primes); BOOST_TEST_EQ(primes.size(), ref); ref = 5'096'876; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(std::execution::par, static_cast(10'000'000), static_cast(100'000'000), primes); + boost::math::prime_range(std::execution::par, static_cast(10'000'000), static_cast(100'000'000), &primes); BOOST_TEST_EQ(primes.size(), ref); ref = 48'638'573; primes.clear(); - boost::math::prime_range(std::execution::par, static_cast(100'000'000), static_cast(1'073'741'824), primes); + boost::math::prime_range(std::execution::par, static_cast(100'000'000), static_cast(1'073'741'824), &primes); BOOST_TEST_EQ(primes.size(), ref); } @@ -230,29 +230,29 @@ void test_prime_range_seq() Integer ref {168}; // Calculated with wolfram-alpha // Does the upper and lower bound call work - boost::math::prime_range(static_cast(2), static_cast(1'000), primes); + boost::math::prime_range(static_cast(2), static_cast(1'000), &primes); BOOST_TEST_EQ(primes.size(), ref); // Does parallel version work primes.clear(); - boost::math::prime_range(static_cast(2), static_cast(1'000), primes); + boost::math::prime_range(static_cast(2), static_cast(1'000), &primes); BOOST_TEST_EQ(primes.size(), ref); // Does it work with a deque? std::deque d_primes; - boost::math::prime_range(static_cast(2), static_cast(1'000), d_primes); + boost::math::prime_range(static_cast(2), static_cast(1'000), &d_primes); BOOST_TEST_EQ(d_primes.size(), ref); // Does the lower bound change the results? ref = 143; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(static_cast(100), static_cast(1'000), primes); + boost::math::prime_range(static_cast(100), static_cast(1'000), &primes); BOOST_TEST_EQ(primes.size(), ref); // Will it call the sieve for large input ref = 78498; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(static_cast(2), static_cast(1'000'000), primes); + boost::math::prime_range(static_cast(2), static_cast(1'000'000), &primes); BOOST_TEST_EQ(primes.size(), ref); } @@ -265,17 +265,17 @@ void test_prime_range_seq_large() // Larger numbers ref = 586'081; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(static_cast(1'000'000), static_cast(10'000'000), primes); + boost::math::prime_range(static_cast(1'000'000), static_cast(10'000'000), &primes); BOOST_TEST_EQ(primes.size(), ref); ref = 5'096'876; // Calculated with wolfram-alpha primes.clear(); - boost::math::prime_range(static_cast(10'000'000), static_cast(100'000'000), primes); + boost::math::prime_range(static_cast(10'000'000), static_cast(100'000'000), &primes); BOOST_TEST_EQ(primes.size(), ref); ref = 48'638'573; primes.clear(); - boost::math::prime_range(static_cast(100'000'000), static_cast(1'073'741'824), primes); + boost::math::prime_range(static_cast(100'000'000), static_cast(1'073'741'824), &primes); BOOST_TEST_EQ(primes.size(), ref); } From 157a5a8d0fa8c76dcbd0936b071958518986ccdc Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Mon, 19 Oct 2020 17:12:50 +0100 Subject: [PATCH 83/83] Fix some GCC performance issues in jm::prime_sieve. Get MB's code compiling with msvc. [CI SKIP] --- .../special_functions/prime_approximation.hpp | 2 +- .../math/special_functions/prime_sieve_jm.hpp | 57 +++++++++++++++++-- .../performance/prime_sieve_performance.cpp | 2 +- 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/include/boost/math/special_functions/prime_approximation.hpp b/include/boost/math/special_functions/prime_approximation.hpp index 399d59c593..5da12819a2 100644 --- a/include/boost/math/special_functions/prime_approximation.hpp +++ b/include/boost/math/special_functions/prime_approximation.hpp @@ -16,7 +16,7 @@ namespace boost::math template constexpr Integer prime_approximation(const Integer upper_bound) noexcept { - constexpr auto c = 30 * ::log(113) / 113; // Magic numbers from wikipedia + auto c = 30 * ::log(113) / 113; // Magic numbers from wikipedia return static_cast(::floor(c * static_cast(upper_bound) / ::log(static_cast(upper_bound)))); } diff --git a/include/boost/math/special_functions/prime_sieve_jm.hpp b/include/boost/math/special_functions/prime_sieve_jm.hpp index a83e5f1689..7b5232f5a2 100644 --- a/include/boost/math/special_functions/prime_sieve_jm.hpp +++ b/include/boost/math/special_functions/prime_sieve_jm.hpp @@ -21,6 +21,9 @@ #include #include #include +#include + +#include namespace jm { namespace detail { @@ -49,9 +52,9 @@ struct simple_bitset static_cast(1uLL << 49), static_cast(1uLL << 50), static_cast(1uLL << 51), static_cast(1uLL << 52), static_cast(1uLL << 53), static_cast(1uLL << 54), static_cast(1uLL << 55), static_cast(1uLL << 56), static_cast(1uLL << 57), static_cast(1uLL << 58), static_cast(1uLL << 59), static_cast(1uLL << 60), static_cast(1uLL << 61), static_cast(1uLL << 62), static_cast(1uLL << 63), }; - I mask = (sizeof(I) * CHAR_BIT) - 1; - std::size_t shift = ln2(sizeof(I) * CHAR_BIT); - BOOST_ASSERT((n >> shift) < (m_size / (sizeof(I) * CHAR_BIT) + (m_size % (sizeof(I) * CHAR_BIT) ? 1 : 0))); + constexpr I mask = (sizeof(I) * CHAR_BIT) - 1; + constexpr std::size_t shift = ln2(sizeof(I) * CHAR_BIT); + //BOOST_ASSERT((n >> shift) < (m_size / (sizeof(I) * CHAR_BIT) + (m_size % (sizeof(I) * CHAR_BIT) ? 1 : 0))); return bits[n >> shift] & masks[n & mask]; } void clear(std::size_t n) @@ -67,13 +70,55 @@ struct simple_bitset }; constexpr I mask = (sizeof(I) * CHAR_BIT) - 1; constexpr std::size_t shift = ln2(sizeof(I) * CHAR_BIT); - BOOST_ASSERT((n >> shift) < (m_size / (sizeof(I) * CHAR_BIT) + (m_size % (sizeof(I) * CHAR_BIT) ? 1 : 0))); + //BOOST_ASSERT((n >> shift) < (m_size / (sizeof(I) * CHAR_BIT) + (m_size % (sizeof(I) * CHAR_BIT) ? 1 : 0))); bits[n >> shift] &= masks[n & mask]; } std::size_t size()const { return m_size; } void reset(){ std::memset(bits.get(), 0xff, m_size / CHAR_BIT + (m_size % CHAR_BIT ? 1 : 0)); } }; +template <> +inline std::uint64_t simple_bitset::test(std::size_t n)const +{ + constexpr std::uint64_t mask = 63; + constexpr std::size_t shift = 6; + return _bittest64(reinterpret_cast(bits.get()) + (n >> shift), n & mask); +} +template <> +inline void simple_bitset::clear(std::size_t n) +{ + constexpr std::uint64_t mask = 63; + constexpr std::size_t shift = 6; + _bittestandreset64(reinterpret_cast(bits.get()) + (n >> shift), n & mask); +} + +struct dynamic_bitset_wrapper +{ + boost::dynamic_bitset<> data; + + bool test(std::size_t n)const + { + return data.test(n); + } + void clear(std::size_t n) + { + data.set(n, false); + } + + dynamic_bitset_wrapper(std::size_t n) + { + data.resize(n, true); + } + std::size_t size()const { return data.size(); } + void reset() + { + data.set(0, data.size(), true); + } +}; + +typedef simple_bitset bitmask_type; +//typedef dynamic_bitset_wrapper bitmask_type; + template constexpr bool has_output_iterator_terminated(const T&) { @@ -171,7 +216,7 @@ const Integer linear_sieve_limit = Integer(524288); // Constexpr does not work w template inline bool linear_sieve_classical_segment_threaded(std::atomic* current_max_processed_value, std::mutex* lock, Container* primes, Integer start_offset, Integer end_offset, Integer stride, OutputIterator out, bool output_to_container) { - simple_bitset masks(linear_sieve_limit / 2); + bitmask_type masks(linear_sieve_limit / 2); std::unique_lock l(*lock); std::size_t prime_count = primes->size(); @@ -258,7 +303,7 @@ void prime_sieve_imp(ExecutionPolicy&& policy, Integer upper_bound, Container& p return; } - simple_bitset sieve((upper_bound <= linear_sieve_limit ? upper_bound : linear_sieve_limit) / 2); + bitmask_type sieve((upper_bound <= linear_sieve_limit ? upper_bound : linear_sieve_limit) / 2); if (output_to_container && (upper_bound > linear_sieve_limit)) { diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp index 258c6ac010..e9a49c1bd3 100644 --- a/reporting/performance/prime_sieve_performance.cpp +++ b/reporting/performance/prime_sieve_performance.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +//#include #include #include #include