diff --git a/doc/generator_defs.qbk b/doc/generator_defs.qbk index 7e635eef1..73f6d5892 100644 --- a/doc/generator_defs.qbk +++ b/doc/generator_defs.qbk @@ -36,6 +36,9 @@ [template ranlux64_4_01_speed[] 2%] [template mt19937ar_c_speed[] 97%] [template splitmix64_speed[] 155%] -[template xoshiro256pp_speed[] 90%] -[template xoshiro256d_speed[] 101%] -[template xoshiro256mm_speed[] 82%] +[template xoshiro256pp_speed[] 132%] +[template xoshiro256d_speed[] 139%] +[template xoshiro256mm_speed[] 116%] +[template xoshiro512pp_speed[] 133%] +[template xoshiro512d_speed[] 144%] +[template xoshiro512mm_speed[] 113%] diff --git a/doc/generators.qbk b/doc/generators.qbk index 3eba2841b..72d6df051 100644 --- a/doc/generators.qbk +++ b/doc/generators.qbk @@ -87,6 +87,9 @@ numbers mean faster random number generation. [[__xoshiro256pp] [2[sup 256] [`4*sizeof(uint64_t)`] [[xoshiro256pp_speed]] [[xoshiro256++ from https://prng.di.unimi.it]] [[__xoshiro256d] [2[sup 256] [`4*sizeof(uint64_t)`] [[xoshiro256d_speed]] [[This generator returns doubles instead of uint64_t. It is modified xoshiro256+ from https://prng.di.unimi.it]] [[__xoshiro256mm] [2[sup 256] [`4*sizeof(uint64_t)`] [[xoshiro256mm_speed]] [[xoshiro256** from https://prng.di.unimi.it]] + [[__xoshiro512pp] [2[sup 512] [`8*sizeof(uint64_t)`] [[xoshiro512pp_speed]] [[xoshiro512++ from https://prng.di.unimi.it]] + [[__xoshiro512d] [2[sup 512] [`8*sizeof(uint64_t)`] [[xoshiro512d_speed]] [[This generator returns doubles instead of uint64_t. It is modified xoshiro512+ from https://prng.di.unimi.it]] + [[__xoshiro512mm] [2[sup 512] [`8*sizeof(uint64_t)`] [[xoshiro512mm_speed]] [[xoshiro512** from https://prng.di.unimi.it]] ] As observable from the table, there is generally a quality/performance/memory @@ -98,8 +101,8 @@ generators for a given application of Monte Carlo simulation will improve the confidence in the results. If the names of the generators don't ring any bell and you have no idea -which generator to use, it is reasonable to employ __mt19937 for a start: It -is fast and has acceptable quality. +which generator to use, it is reasonable to employ __xoshiro256pp for a start: It +is fast and of high quality. [note These random number generators are not intended for use in applications where non-deterministic random numbers are required. See __random_device diff --git a/include/boost/random/detail/xoshiro_base.hpp b/include/boost/random/detail/xoshiro_base.hpp index 2ccacd7c6..0fab8e1ea 100644 --- a/include/boost/random/detail/xoshiro_base.hpp +++ b/include/boost/random/detail/xoshiro_base.hpp @@ -27,7 +27,6 @@ #include #include #include -#include namespace boost { namespace random { @@ -79,6 +78,34 @@ class xoshiro_base state_[3] = s3; } + inline void jump_impl(const std::integral_constant&) noexcept + { + constexpr std::array jump = {{ UINT64_C(0x33ed89b6e7a353f9), UINT64_C(0x760083d7955323be), + UINT64_C(0x2837f2fbb5f22fae), UINT64_C(0x4b8c5674d309511c), + UINT64_C(0xb11ac47a7ba28c25), UINT64_C(0xf1be7667092bcc1c), + UINT64_C(0x53851efdb6df0aaf), UINT64_C(0x1ebbc8b23eaf25db) }}; + + std::array t = {{ 0, 0, 0, 0, 0, 0, 0, 0 }}; + + for (std::size_t i = 0; i < jump.size(); ++i) + { + for (std::size_t b = 0; b < 64U; ++b) + { + if (jump[i] & UINT64_C(1) << b) + { + for (std::size_t w = 0; w < state_.size(); ++w) + { + t[w] ^= state_[w]; + } + } + + next(); + } + } + + state_ = t; + } + inline void long_jump_impl(const std::integral_constant&) noexcept { constexpr std::array long_jump = {{ UINT64_C(0x76e15d3efefdcbbf), UINT64_C(0xc5004e441c522fb3), @@ -110,6 +137,34 @@ class xoshiro_base state_[2] = s2; state_[3] = s3; } + + inline void long_jump_impl(const std::integral_constant&) noexcept + { + constexpr std::array long_jump = {{ UINT64_C(0x11467fef8f921d28), UINT64_C(0xa2a819f2e79c8ea8), + UINT64_C(0xa8299fc284b3959a), UINT64_C(0xb4d347340ca63ee1), + UINT64_C(0x1cb0940bedbff6ce), UINT64_C(0xd956c5c4fa1f8e17), + UINT64_C(0x915e38fd4eda93bc), UINT64_C(0x5b3ccdfa5d7daca5) }}; + + std::array t = {{ 0, 0, 0, 0, 0, 0, 0, 0 }}; + + for (std::size_t i = 0; i < long_jump.size(); ++i) + { + for (std::size_t b = 0; b < 64U; ++b) + { + if (long_jump[i] & UINT64_C(1) << b) + { + for (std::size_t w = 0; w < state_.size(); ++w) + { + t[w] ^= state_[w]; + } + } + + next(); + } + } + + state_ = t; + } public: diff --git a/include/boost/random/xoshiro.hpp b/include/boost/random/xoshiro.hpp index eb6e5c454..cd9af496c 100644 --- a/include/boost/random/xoshiro.hpp +++ b/include/boost/random/xoshiro.hpp @@ -93,7 +93,7 @@ class xoshiro256d final : public detail::xoshiro_base return result; } - inline double next() noexcept + inline result_type next() noexcept { #if (__cplusplus >= 201703L || _MSVC_LANG >= 201703L) && defined(__cpp_hex_float) && __cpp_hex_float >= 201603L return static_cast((next_int() >> 11)) * 0x1.0p-53; @@ -101,6 +101,24 @@ class xoshiro256d final : public detail::xoshiro_base return static_cast((next_int() >> 11)) * 1.11022302462515654e-16; #endif } + + static constexpr result_type (min)() noexcept + { + #if (__cplusplus >= 201703L || _MSVC_LANG >= 201703L) && defined(__cpp_hex_float) && __cpp_hex_float >= 201603L + return static_cast((std::numeric_limits::min)()) * 0x1.0p-53; + #else + return static_cast((std::numeric_limits::min)()) * 1.11022302462515654e-16; + #endif + } + + static constexpr result_type (max)() noexcept + { + #if (__cplusplus >= 201703L || _MSVC_LANG >= 201703L) && defined(__cpp_hex_float) && __cpp_hex_float >= 201603L + return static_cast((std::numeric_limits::max)()) * 0x1.0p-53; + #else + return static_cast((std::numeric_limits::max)()) * 1.11022302462515654e-16; + #endif + } }; /** @@ -140,6 +158,173 @@ class xoshiro256mm final : public detail::xoshiro_base } }; +/** + * This is xoshiro512++ 1.0, one of our all-purpose, rock-solid + * generators. It has excellent (about 1ns) speed, a state (512 bits) that + * is large enough for any parallel application, and it passes all tests + * we are aware of. + * + * For generating just floating-point numbers, xoshiro512+ is even faster. + * + * The state must be seeded so that it is not everywhere zero. If you have + * a 64-bit seed, we suggest to seed a splitmix64 generator and use its + * output to fill s. + */ + +class xoshiro512pp final : public detail::xoshiro_base +{ +private: + + using Base = detail::xoshiro_base; + +public: + + using Base::Base; + + inline result_type next() noexcept + { + const std::uint64_t result = boost::core::rotl(state_[0] + state_[2], 17) + state_[2]; + + const std::uint64_t t = state_[1] << 11; + + state_[2] ^= state_[0]; + state_[5] ^= state_[1]; + state_[1] ^= state_[2]; + state_[7] ^= state_[3]; + state_[3] ^= state_[4]; + state_[4] ^= state_[5]; + state_[0] ^= state_[6]; + state_[6] ^= state_[7]; + + state_[6] ^= t; + + state_[7] = boost::core::rotl(state_[7], 21); + + return result; + } +}; + +/* This is xoshiro512** 1.0, one of our all-purpose, rock-solid generators + * with increased state size. It has excellent (about 1ns) speed, a state + * (512 bits) that is large enough for any parallel application, and it + * passes all tests we are aware of. + * + * For generating just floating-point numbers, xoshiro512+ is even faster. + * + * The state must be seeded so that it is not everywhere zero. If you have + * a 64-bit seed, we suggest to seed a splitmix64 generator and use its + * output to fill s. + */ + +class xoshiro512mm final : public detail::xoshiro_base +{ +private: + + using Base = detail::xoshiro_base; + +public: + + using Base::Base; + + inline result_type next() noexcept + { + const std::uint64_t result = boost::core::rotl(state_[1] * 5, 7) * 9; + + const std::uint64_t t = state_[1] << 11; + + state_[2] ^= state_[0]; + state_[5] ^= state_[1]; + state_[1] ^= state_[2]; + state_[7] ^= state_[3]; + state_[3] ^= state_[4]; + state_[4] ^= state_[5]; + state_[0] ^= state_[6]; + state_[6] ^= state_[7]; + + state_[6] ^= t; + + state_[7] = boost::core::rotl(state_[7], 21); + + return result; + } +}; + +/* This is xoshiro512+ 1.0, our generator for floating-point numbers with + * increased state size. We suggest to use its upper bits for + * floating-point generation, as it is slightly faster than xoshiro512**. + * It passes all tests we are aware of except for the lowest three bits, + * which might fail linearity tests (and just those), so if low linear + * complexity is not considered an issue (as it is usually the case) it + * can be used to generate 64-bit outputs, too. + * + * We suggest to use a sign test to extract a random Boolean value, and + * right shifts to extract subsets of bits. + * + * The state must be seeded so that it is not everywhere zero. If you have + * a 64-bit seed, we suggest to seed a splitmix64 generator and use its + * output to fill s. + */ + +class xoshiro512d final : public detail::xoshiro_base +{ +private: + + using Base = detail::xoshiro_base; + +public: + + using Base::Base; + + inline std::uint64_t next_int() noexcept + { + const std::uint64_t result = state_[0] + state_[2]; + + const std::uint64_t t = state_[1] << 11; + + state_[2] ^= state_[0]; + state_[5] ^= state_[1]; + state_[1] ^= state_[2]; + state_[7] ^= state_[3]; + state_[3] ^= state_[4]; + state_[4] ^= state_[5]; + state_[0] ^= state_[6]; + state_[6] ^= state_[7]; + + state_[6] ^= t; + + state_[7] = boost::core::rotl(state_[7], 21); + + return result; + } + + inline result_type next() noexcept + { + #if (__cplusplus >= 201703L || _MSVC_LANG >= 201703L) && defined(__cpp_hex_float) && __cpp_hex_float >= 201603L + return static_cast((next_int() >> 11)) * 0x1.0p-53; + #else + return static_cast((next_int() >> 11)) * 1.11022302462515654e-16; + #endif + } + + static constexpr result_type (min)() noexcept + { + #if (__cplusplus >= 201703L || _MSVC_LANG >= 201703L) && defined(__cpp_hex_float) && __cpp_hex_float >= 201603L + return static_cast((std::numeric_limits::min)()) * 0x1.0p-53; + #else + return static_cast((std::numeric_limits::min)()) * 1.11022302462515654e-16; + #endif + } + + static constexpr result_type (max)() noexcept + { + #if (__cplusplus >= 201703L || _MSVC_LANG >= 201703L) && defined(__cpp_hex_float) && __cpp_hex_float >= 201603L + return static_cast((std::numeric_limits::max)()) * 0x1.0p-53; + #else + return static_cast((std::numeric_limits::max)()) * 1.11022302462515654e-16; + #endif + } +}; + } // namespace random } // namespace boost diff --git a/performance/random_speed.cpp b/performance/random_speed.cpp index 0371a35d8..103860560 100644 --- a/performance/random_speed.cpp +++ b/performance/random_speed.cpp @@ -385,6 +385,9 @@ int main(int argc, char*argv[]) run(iter, "xoshiro256pp", boost::random::xoshiro256pp()); run(iter, "xoshiro256d", boost::random::xoshiro256d()); run(iter, "xoshiro256mm", boost::random::xoshiro256mm()); + run(iter, "xoshiro512pp", boost::random::xoshiro512pp()); + run(iter, "xoshiro512d", boost::random::xoshiro512d()); + run(iter, "xoshiro512mm", boost::random::xoshiro512mm()); #ifdef HAVE_MT19937INT_C // requires the original mt19937int.c diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index f7fa529cd..e8352854e 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -71,6 +71,12 @@ run test_xoshiro256d.cpp /boost/test//boost_unit_test_framework ; run test_comp_xoshiro256d.cpp ; run test_xoshiro256mm.cpp /boost/test//boost_unit_test_framework ; run test_comp_xoshiro256mm.cpp ; +run test_xoshiro512pp.cpp /boost/test//boost_unit_test_framework ; +run test_comp_xoshiro512pp.cpp ; +run test_xoshiro512mm.cpp /boost/test//boost_unit_test_framework ; +run test_comp_xoshiro512mm.cpp ; +run test_xoshiro512d.cpp /boost/test//boost_unit_test_framework ; +run test_comp_xoshiro512d.cpp ; run niederreiter_base2_validate.cpp /boost/test//boost_unit_test_framework ; run sobol_validate.cpp /boost/test//boost_unit_test_framework ; diff --git a/test/test_comp_xoshiro512d.cpp b/test/test_comp_xoshiro512d.cpp new file mode 100644 index 000000000..e5cbbb86f --- /dev/null +++ b/test/test_comp_xoshiro512d.cpp @@ -0,0 +1,295 @@ +/* + * Copyright Matt Borland 2025. + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + * This file copies and pastes the original code for comparison under the following license + * + * Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org) + * + * To the extent possible under law, the author has dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR + * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +using std::uint64_t; + + +/* This is xoshiro512+ 1.0, our generator for floating-point numbers with + increased state size. We suggest to use its upper bits for + floating-point generation, as it is slightly faster than xoshiro512**. + It passes all tests we are aware of except for the lowest three bits, + which might fail linearity tests (and just those), so if low linear + complexity is not considered an issue (as it is usually the case) it + can be used to generate 64-bit outputs, too. + + We suggest to use a sign test to extract a random Boolean value, and + right shifts to extract subsets of bits. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +static uint64_t s[8]; + +uint64_t next(void) { + const uint64_t result = s[0] + s[2]; + + const uint64_t t = s[1] << 11; + + s[2] ^= s[0]; + s[5] ^= s[1]; + s[1] ^= s[2]; + s[7] ^= s[3]; + s[3] ^= s[4]; + s[4] ^= s[5]; + s[0] ^= s[6]; + s[6] ^= s[7]; + + s[6] ^= t; + + s[7] = rotl(s[7], 21); + + return result; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^256 calls to next(); it can be used to generate 2^256 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0x33ed89b6e7a353f9, 0x760083d7955323be, 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db }; + + uint64_t t[sizeof s / sizeof *s]; + memset(t, 0, sizeof t); + for(std::size_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for(std::size_t w = 0; w < sizeof s / sizeof *s; w++) + t[w] ^= s[w]; + next(); + } + + memcpy(s, t, sizeof s); +} + + +/* This is the long-jump function for the generator. It is equivalent to + 2^384 calls to next(); it can be used to generate 2^128 starting points, + from each of which jump() will generate 2^128 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) { + static const uint64_t LONG_JUMP[] = { 0x11467fef8f921d28, 0xa2a819f2e79c8ea8, 0xa8299fc284b3959a, 0xb4d347340ca63ee1, 0x1cb0940bedbff6ce, 0xd956c5c4fa1f8e17, 0x915e38fd4eda93bc, 0x5b3ccdfa5d7daca5 }; + + uint64_t t[sizeof s / sizeof *s]; + memset(t, 0, sizeof t); + for(std::size_t i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for(int b = 0; b < 64; b++) { + if (LONG_JUMP[i] & UINT64_C(1) << b) + for(std::size_t w = 0; w < sizeof s / sizeof *s; w++) + t[w] ^= s[w]; + next(); + } + + memcpy(s, t, sizeof s); +} + +void test_no_seed() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512d boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_basic_seed() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512d boost_rng(42ULL); + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen(42ULL); + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_jump() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512d boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + boost_rng.jump(); + jump(); + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_long_jump() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512d boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + boost_rng.long_jump(); + long_jump(); + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +#if !defined(_MSVC_LANG) || _MSVC_LANG >= 202002L + +static inline double to_double(uint64_t x) { + const union { uint64_t i; double d; } u = { .i = UINT64_C(0x3FF) << 52 | x >> 12 }; + return u.d - 1.0; +} + +void test_double() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512d boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } + + const auto boost_double = boost_rng(); + const auto ref_double = to_double(next()); + + BOOST_TEST(std::fabs(boost_double - ref_double) < std::numeric_limits::epsilon()); +} + +#endif + +int main() +{ + test_no_seed(); + test_basic_seed(); + test_jump(); + test_long_jump(); + + #if !defined(_MSVC_LANG) || _MSVC_LANG >= 202002L + test_double(); + #endif + + return boost::report_errors(); +} diff --git a/test/test_comp_xoshiro512mm.cpp b/test/test_comp_xoshiro512mm.cpp new file mode 100644 index 000000000..94b0349aa --- /dev/null +++ b/test/test_comp_xoshiro512mm.cpp @@ -0,0 +1,245 @@ +/* + * Copyright Matt Borland 2025. + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + * This file copies and pastes the original code for comparison under the following license + * + * Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org) + * + * To the extent possible under law, the author has dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR + * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +using std::uint64_t; +using std::memset; +using std::memcpy; + +/* This is xoshiro512** 1.0, one of our all-purpose, rock-solid generators + with increased state size. It has excellent (about 1ns) speed, a state + (512 bits) that is large enough for any parallel application, and it + passes all tests we are aware of. + + For generating just floating-point numbers, xoshiro512+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +static uint64_t s[8]; + +uint64_t next(void) { + const uint64_t result = rotl(s[1] * 5, 7) * 9; + + const uint64_t t = s[1] << 11; + + s[2] ^= s[0]; + s[5] ^= s[1]; + s[1] ^= s[2]; + s[7] ^= s[3]; + s[3] ^= s[4]; + s[4] ^= s[5]; + s[0] ^= s[6]; + s[6] ^= s[7]; + + s[6] ^= t; + + s[7] = rotl(s[7], 21); + + return result; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^256 calls to next(); it can be used to generate 2^256 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0x33ed89b6e7a353f9, 0x760083d7955323be, 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db }; + + uint64_t t[sizeof s / sizeof *s]; + memset(t, 0, sizeof t); + for(std::size_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for(std::size_t w = 0; w < sizeof s / sizeof *s; w++) + t[w] ^= s[w]; + next(); + } + + memcpy(s, t, sizeof s); +} + + +/* This is the long-jump function for the generator. It is equivalent to + 2^384 calls to next(); it can be used to generate 2^128 starting points, + from each of which jump() will generate 2^128 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) { + static const uint64_t LONG_JUMP[] = { 0x11467fef8f921d28, 0xa2a819f2e79c8ea8, 0xa8299fc284b3959a, 0xb4d347340ca63ee1, 0x1cb0940bedbff6ce, 0xd956c5c4fa1f8e17, 0x915e38fd4eda93bc, 0x5b3ccdfa5d7daca5 }; + + uint64_t t[sizeof s / sizeof *s]; + memset(t, 0, sizeof t); + for(std::size_t i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for(int b = 0; b < 64; b++) { + if (LONG_JUMP[i] & UINT64_C(1) << b) + for(std::size_t w = 0; w < sizeof s / sizeof *s; w++) + t[w] ^= s[w]; + next(); + } + + memcpy(s, t, sizeof s); +} + +void test_no_seed() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512mm boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_basic_seed() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512mm boost_rng(42ULL); + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen(42ULL); + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_jump() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512mm boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + boost_rng.jump(); + jump(); + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_long_jump() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512mm boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + boost_rng.long_jump(); + long_jump(); + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +int main() +{ + test_no_seed(); + test_basic_seed(); + test_jump(); + test_long_jump(); + + return boost::report_errors(); +} diff --git a/test/test_comp_xoshiro512pp.cpp b/test/test_comp_xoshiro512pp.cpp new file mode 100644 index 000000000..05aaa2f82 --- /dev/null +++ b/test/test_comp_xoshiro512pp.cpp @@ -0,0 +1,245 @@ +/* + * Copyright Matt Borland 2025. + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + * This file copies and pastes the original code for comparison under the following license + * + * Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org) + * + * To the extent possible under law, the author has dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR + * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +using std::uint64_t; +using std::memset; +using std::memcpy; + +/* This is xoshiro512++ 1.0, one of our all-purpose, rock-solid + generators. It has excellent (about 1ns) speed, a state (512 bits) that + is large enough for any parallel application, and it passes all tests + we are aware of. + + For generating just floating-point numbers, xoshiro512+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +static uint64_t s[8]; + +uint64_t next(void) { + const uint64_t result = rotl(s[0] + s[2], 17) + s[2]; + + const uint64_t t = s[1] << 11; + + s[2] ^= s[0]; + s[5] ^= s[1]; + s[1] ^= s[2]; + s[7] ^= s[3]; + s[3] ^= s[4]; + s[4] ^= s[5]; + s[0] ^= s[6]; + s[6] ^= s[7]; + + s[6] ^= t; + + s[7] = rotl(s[7], 21); + + return result; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^256 calls to next(); it can be used to generate 2^256 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0x33ed89b6e7a353f9, 0x760083d7955323be, 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db }; + + uint64_t t[sizeof s / sizeof *s]; + memset(t, 0, sizeof t); + for(std::size_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for(std::size_t w = 0; w < sizeof s / sizeof *s; w++) + t[w] ^= s[w]; + next(); + } + + memcpy(s, t, sizeof s); +} + + +/* This is the long-jump function for the generator. It is equivalent to + 2^384 calls to next(); it can be used to generate 2^128 starting points, + from each of which jump() will generate 2^128 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) { + static const uint64_t LONG_JUMP[] = { 0x11467fef8f921d28, 0xa2a819f2e79c8ea8, 0xa8299fc284b3959a, 0xb4d347340ca63ee1, 0x1cb0940bedbff6ce, 0xd956c5c4fa1f8e17, 0x915e38fd4eda93bc, 0x5b3ccdfa5d7daca5 }; + + uint64_t t[sizeof s / sizeof *s]; + memset(t, 0, sizeof t); + for(std::size_t i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for(int b = 0; b < 64; b++) { + if (LONG_JUMP[i] & UINT64_C(1) << b) + for(std::size_t w = 0; w < sizeof s / sizeof *s; w++) + t[w] ^= s[w]; + next(); + } + + memcpy(s, t, sizeof s); +} + +void test_no_seed() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512pp boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_basic_seed() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512pp boost_rng(42ULL); + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen(42ULL); + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_jump() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512pp boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + boost_rng.jump(); + jump(); + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +void test_long_jump() +{ + // Default initialized to contain splitmix64 values + boost::random::xoshiro512pp boost_rng; + for (int i {}; i < 10000; ++i) + { + boost_rng(); + } + + boost::random::splitmix64 gen; + for (auto& i : s) + { + i = gen(); + } + + for (int i {}; i < 10000; ++i) + { + next(); + } + + boost_rng.long_jump(); + long_jump(); + + const auto final_state = boost_rng.state(); + + for (std::size_t i {}; i < final_state.size(); ++i) + { + BOOST_TEST_EQ(final_state[i], s[i]); + } +} + +int main() +{ + test_no_seed(); + test_basic_seed(); + test_jump(); + test_long_jump(); + + return boost::report_errors(); +} diff --git a/test/test_xoshiro512d.cpp b/test/test_xoshiro512d.cpp new file mode 100644 index 000000000..31988a316 --- /dev/null +++ b/test/test_xoshiro512d.cpp @@ -0,0 +1,28 @@ +/* test_xoshiro256d.cpp + * + * Copyright Steven Watanabe 2011 + * Copyright Matt Borland 2022 + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + * $Id$ + * + */ + +#include +#include +#include + +#define BOOST_RANDOM_URNG boost::random::xoshiro512d +#define BOOST_RANDOM_CPP11_URNG + +// principal operation validated with CLHEP, values by experiment +#define BOOST_RANDOM_VALIDATION_VALUE 0.85594919700533156 +#define BOOST_RANDOM_SEED_SEQ_VALIDATION_VALUE 0.25120475433393952 + +// Since we are using splitmix64 we need to allow 64 bit seeds +// The test harness only allows for 32 bit seeds +#define BOOST_RANDOM_PROVIDED_SEED_TYPE std::uint64_t + +#include "test_generator.ipp" diff --git a/test/test_xoshiro512mm.cpp b/test/test_xoshiro512mm.cpp new file mode 100644 index 000000000..abe2c1b01 --- /dev/null +++ b/test/test_xoshiro512mm.cpp @@ -0,0 +1,24 @@ +/* test_xoshiro256_plus.cpp + * + * Copyright Steven Watanabe 2011 + * Copyright Matt Borland 2022 + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + * $Id$ + * + */ + +#include +#include +#include + +#define BOOST_RANDOM_URNG boost::random::xoshiro512mm +#define BOOST_RANDOM_CPP11_URNG + +// principal operation validated with CLHEP, values by experiment +#define BOOST_RANDOM_VALIDATION_VALUE UINT64_C(9446215307655316885) +#define BOOST_RANDOM_SEED_SEQ_VALIDATION_VALUE UINT64_C(13183137209047681026) + +#include "test_generator.ipp" diff --git a/test/test_xoshiro512pp.cpp b/test/test_xoshiro512pp.cpp new file mode 100644 index 000000000..da3003e8a --- /dev/null +++ b/test/test_xoshiro512pp.cpp @@ -0,0 +1,24 @@ +/* test_xoshiro256_plus.cpp + * + * Copyright Steven Watanabe 2011 + * Copyright Matt Borland 2022 + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + * $Id$ + * + */ + +#include +#include +#include + +#define BOOST_RANDOM_URNG boost::random::xoshiro512pp +#define BOOST_RANDOM_CPP11_URNG + +// principal operation validated with CLHEP, values by experiment +#define BOOST_RANDOM_VALIDATION_VALUE UINT64_C(11685388408145467864) +#define BOOST_RANDOM_SEED_SEQ_VALIDATION_VALUE UINT64_C(15400500895396352743) + +#include "test_generator.ipp"