From 6bd1d9c970bff30c0c361145022d08d45f7d562c Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 4 Aug 2023 20:14:18 -0400 Subject: [PATCH 01/22] initial commit --- include/boost/math/tools/roots.hpp | 253 ++++++++++++++++++++++++++++- test/test_roots.cpp | 85 ++++++++++ 2 files changed, 334 insertions(+), 4 deletions(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 791ec1a7e5..df62c77400 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -213,6 +213,245 @@ inline std::pair bisect(F f, T min, T max, Tol tol) noexcept(policies::is_ } +////// Motivation for the Bisection namespace. ////// +// +// What's the best way to bisect between a lower bound (lb) and an upper +// bound (ub) during root finding? Let's consider options... +// +// Arithmetic bisection: +// - The natural choice, but it doesn't always work well. For example, if +// lb = 1.0 and ub = std::numeric_limits::max(), many bisections +// may be needed to converge if the root is near 1. +// +// Geometric bisection: +// - This approach performs much better for the example above, but it +// too has issues. For example, if lb = 0.0, it gets stuck at 0.0. +// It also fails if lb and ub have different signs. +// +// In addition to the limitations outlined above, neither of these approaches +// works if ub is infinity. We want a more robust way to handle bisection +// for general root finding problems. That's what this namespace is for. +// +namespace detail { +namespace Bisection { + + ////// The Midpoint754 class ////// + // + // On a conceptual level, this class is designed to solve the following root + // finding problem. + // - A function f(x) has a single root x_solution somewhere in the interval + // [-infinity, +infinity]. For all values below x_solution f(x) is -1. + // For all values above x_solution f(x) is +1. The best way to root find + // this problem is to bisect in bit space. + // + // Efficient bit space bisection is possible because of the IEEE 754 standard. + // According to the standard, the bits in floating point numbers are partitioned + // into three parts: sign, exponent, and mantissa. As long as the sign of the + // of the number stays the same, increasing numbers in bit space have increasing + // floating point values starting at zero, and ending at infinity! The table + // below shows select numbers for float (single precision). + // + // 0 | 0 00000000 00000000000000000000000 | positive zero + // 1.4013e-45 | 0 00000000 00000000000000000000001 | std::numeric_limits::denorm_min() + // 1.17549e-38 | 0 00000001 00000000000000000000000 | std::numeric_limits::min() + // 1.19209e-07 | 0 01101000 00000000000000000000000 | std::numeric_limits::epsilon() + // 1 | 0 01111111 00000000000000000000000 | positive one + // 3.40282e+38 | 0 11111110 11111111111111111111111 | std::numeric_limits::max() + // inf | 0 11111111 00000000000000000000000 | std::numeric_limits::infinity() + // nan | 0 11111111 10000000000000000000000 | std::numeric_limits::quiet_NaN() + // + // Negative values are similar, but the sign bit is set to 1. My keeping track of the possible + // sign flip, it can bisect numbers with different signs. + // + template + class Midpoint754 { + private: + // Does the bisection in bit space for IEEE 754 floating point numbers. + // Infinities are allowed. It's assumed that neither x nor X is NaN. + static_assert(std::numeric_limits::is_iec559, "Type must be IEEE 754 floating point."); + static_assert(std::is_unsigned::value, "U must be an unsigned integer type."); + static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same."); + + // Convert float to bits + static U float_to_uint(T x) { + U bits; + std::memcpy(&bits, &x, sizeof(U)); + return bits; + } + + // Convert bits to float + static T uint_to_float(U bits) { + T x; + std::memcpy(&x, &bits, sizeof(T)); + return x; + } + + public: + static T solve(T x, T X) { + using std::fabs; + + // Sort so that X has the larger magnitude + if (fabs(X) < fabs(x)) { + std::swap(x, X); + } + + const T x_mag = std::fabs(x); + const T X_mag = std::fabs(X); + const T sign_x = sign(x); + const T sign_X = sign(X); + + // Convert the magnitudes to bits + U bits_mag_x = float_to_uint(x_mag); + U bits_mag_X = float_to_uint(X_mag); + + // Calculate the average magnitude in bits + U bits_mag = (sign_x == sign_X) ? (bits_mag_X + bits_mag_x) : (bits_mag_X - bits_mag_x); + bits_mag = bits_mag >> 1; // Divide by 2 + + // Reconstruct upl_mean from average magnitude and sign of X + return uint_to_float(bits_mag) * sign_X; + } + }; // class Midpoint754 + + + template + class MidpointNon754 { + private: + static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is float"); + static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is double"); + + public: + static T solve(T x, T X) { + const T sx = sign(x); + const T sX = sign(X); + + // Sign flip return zero + if (sx * sX == -1) { return T(0.0); } + + // At least one is positive + if (0 < sx + sX) { return do_solve(x, X); } + + // At least one is negative + return -do_solve(-x, -X); + } + + private: + struct EqZero { + EqZero(T x) { BOOST_MATH_ASSERT(x == 0 && "x must be zero."); } + }; + + struct EqInf { + EqInf(T x) { BOOST_MATH_ASSERT(x == static_cast(std::numeric_limits::infinity()) && "x must be infinity."); } + }; + + class PosFinite { + public: + PosFinite(T x) : x_(x) { + BOOST_MATH_ASSERT(0 < x && "x must be positive."); + BOOST_MATH_ASSERT(x < std::numeric_limits::infinity() && "x must be less than infinity."); + } + + T value() const { return x_; } + + private: + T x_; + }; + + // Two unknowns + static T do_solve(T x, T X) { + if (X < x) { + return do_solve(X, x); + } + + if (x == 0) { + return do_solve(EqZero(x), X); + } else if (x == static_cast(std::numeric_limits::infinity())) { + return static_cast(std::numeric_limits::infinity()); + } else { + return do_solve(PosFinite(x), X); + } + } + + // One unknowns + static T do_solve(EqZero x, T X) { + if (X == 0) { + return T(0.0); + } else if (X == static_cast(std::numeric_limits::infinity())) { + return T(1.0); + } else { + return do_solve(x, PosFinite(X)); + } + } + static T do_solve(PosFinite x, T X) { + if (X == static_cast(std::numeric_limits::infinity())) { + return do_solve(x, EqInf(X)); + } else { + return do_solve(x, PosFinite(X)); + } + } + + // Zero unknowns + template + static typename std::enable_if::is_specialized, T>::type + do_solve(PosFinite x, EqInf X) { + return do_solve(x, PosFinite((std::numeric_limits::max)())); + } + template + static typename std::enable_if::is_specialized, T>::type + do_solve(PosFinite x, EqInf X) { + BOOST_MATH_ASSERT(false && "infinite bounds support requires specialization."); + return static_cast(std::numeric_limits::signaling_NaN()); + } + + template + static typename std::enable_if::is_specialized, U>::type + do_solve(EqZero x, PosFinite X) { + const auto get_smallest_value = []() { + const U denorm_min = std::numeric_limits::denorm_min(); + if (denorm_min != 0) { return denorm_min; } + + const U min = (std::numeric_limits::min)(); + if (min != 0) { return min; } + + BOOST_MATH_ASSERT(false && "denorm_min and min are both zero."); + return static_cast(std::numeric_limits::signaling_NaN()); + }; + + return do_solve(PosFinite(get_smallest_value()), X); + } + template + static typename std::enable_if::is_specialized, U>::type + do_solve(EqZero x, PosFinite X) { return X.value() / U(2); } + + static T do_solve(PosFinite x, PosFinite X) { + BOOST_MATH_ASSERT(x.value() <= X.value() && "x must be less than or equal to X."); + + const T xv = x.value(); + const T Xv = X.value(); + + // Take arithmetic mean if they are close enough + if (Xv < xv * 8) { return (Xv - xv) / 2 + xv; } // NOTE: avoids overflow + + // Take geometric mean if they are far apart + using std::sqrt; + return sqrt(xv) * sqrt(Xv); // NOTE: avoids overflow + } + }; // class MidpointNon754 + + template + static T calc_midpoint(T x, T X) { + return MidpointNon754::solve(x, X); + } + static float calc_midpoint(float x, float X) { + return Midpoint754::solve(x, X); + } + static double calc_midpoint(double x, double X) { + return Midpoint754::solve(x, X); + } + +} // namespace Bisection +} // namespace detail + template T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter) noexcept(policies::is_noexcept_error_policy >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) { @@ -257,7 +496,12 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& delta2 = delta1; delta1 = delta; detail::unpack_tuple(f(result), f0, f1); - --count; + if (count == 0) { + return policies::raise_evaluation_error(function, "Ran out of iterations in boost::math::tools::newton_raphson_iterate, perhaps we have a local minima near current best guess of %1%", guess, boost::math::policies::policy<>()); + } else { + --count; + } + if (0 == f0) break; if (f1 == 0) @@ -275,7 +519,8 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& if (fabs(delta * 2) > fabs(delta2)) { // Last two steps haven't converged. - delta = (delta > 0) ? (result - min) / 2 : (result - max) / 2; + const T x_other = (delta > 0) ? min : max; + delta = result - detail::Bisection::calc_midpoint(result, x_other); // reset delta1/2 so we don't take this branch next time round: delta1 = 3 * delta; delta2 = 3 * delta; @@ -302,7 +547,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max = guess; max_range_f = f0; } - else + else if (delta < 0) // Cannot have "else" here, as delta being zero is not indicative of failure { min = guess; min_range_f = f0; @@ -314,7 +559,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& { return policies::raise_evaluation_error(function, "There appears to be no root to be found in boost::math::tools::newton_raphson_iterate, perhaps we have a local minima near current best guess of %1%", guess, boost::math::policies::policy<>()); } - }while(count && (fabs(result * factor) < fabs(delta))); + } while (fabs(result * factor) < fabs(delta) || result == 0); max_iter -= count; diff --git a/test/test_roots.cpp b/test/test_roots.cpp index e4e68e24b8..26d4a2f606 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -23,7 +23,9 @@ #include #include +#include #include +#include #define BOOST_CHECK_CLOSE_EX(a, b, prec, i) \ {\ @@ -649,11 +651,93 @@ void test_failures() #endif } +template +void test_bisect() { + // Creates a vector of test values that include: zero, denorm_min, min, epsilon, max, + // and infinity. Also includes powers of 2 ranging from 2^-15 to 2^+15 by powers of 3. + const auto fn_create_test_ladder = [](){ + std::vector v; + + const auto fn_push_back_x_scaled = [&v](const T& x) { + const auto fn_add_if_non_zero_and_finite = [&v](const T& x) { + if (x != 0 && boost::math::isfinite(x)) { // Avoids most duplications + v.push_back(x); + } + }; + + const T two_thirds = T(2) / T(3); + const T four_thirds = T(4) / T(3); + + v.push_back(x); + fn_add_if_non_zero_and_finite(x * T(0.5)); + fn_add_if_non_zero_and_finite(x * two_thirds); + fn_add_if_non_zero_and_finite(x * four_thirds); + fn_add_if_non_zero_and_finite(x * T(2.0)); + }; + + const auto fn_push_back_pm = [&](std::vector& v, const T& x) { + fn_push_back_x_scaled( x); + fn_push_back_x_scaled(-x); + }; + + fn_push_back_pm(v, T(0.0)); + fn_push_back_pm(v, std::numeric_limits::denorm_min()); + fn_push_back_pm(v, (std::numeric_limits::min)()); + fn_push_back_pm(v, std::numeric_limits::epsilon()); + const int test_exp_range = 5; + for (int i = -test_exp_range; i < (test_exp_range + 1); ++i) { + const int exponent = i * 3; + const T x = std::ldexp(1.0, exponent); + fn_push_back_pm(v, x); + } + fn_push_back_pm(v, (std::numeric_limits::max)()); + + // Real_concept doesn't have infinity + if (std::numeric_limits::has_infinity) { + fn_push_back_pm(v, std::numeric_limits::infinity()); + } + + std::sort(v.begin(), v.end()); + + const int num_zeros = std::count(v.begin(), v.end(), T(0.0)); + BOOST_CHECK_LE(2, num_zeros); // Positive and negative zero + + return v; + }; + + // Test for all combinations from the ladder + auto v = fn_create_test_ladder(); + for (const T& x_i: v) { + for (const T& x_j: v) { + const T x_mid_ij = boost::math::tools::detail::Bisection::calc_midpoint(x_i, x_j); + + const T x_lo = (std::min)(x_i, x_j); + const T x_hi = (std::max)(x_i, x_j); + + BOOST_CHECK_LE(x_lo, x_mid_ij); + BOOST_CHECK_LE(x_mid_ij, x_hi); + } + } +} + +void test_bisect_all_cases() { + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); +} + BOOST_AUTO_TEST_CASE( test_main ) { test_beta(0.1, "double"); + test_bisect_all_cases(); +#if 0 // bug reports: boost::math::skew_normal_distribution<> dist(2.0, 1.0, -2.5); BOOST_CHECK(boost::math::isfinite(quantile(dist, 0.075))); @@ -675,4 +759,5 @@ BOOST_AUTO_TEST_CASE( test_main ) test_solve_complex_quadratic>(); #endif test_failures(); +#endif } From eef5fe01f520f55647563d4174c3a29547554b1a Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 4 Aug 2023 20:39:56 -0400 Subject: [PATCH 02/22] fix eval order issue with count --- include/boost/math/tools/roots.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index df62c77400..4bf5a8ced5 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -495,12 +495,12 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& last_f0 = f0; delta2 = delta1; delta1 = delta; - detail::unpack_tuple(f(result), f0, f1); if (count == 0) { return policies::raise_evaluation_error(function, "Ran out of iterations in boost::math::tools::newton_raphson_iterate, perhaps we have a local minima near current best guess of %1%", guess, boost::math::policies::policy<>()); } else { --count; } + detail::unpack_tuple(f(result), f0, f1); if (0 == f0) break; From af08a6c84b0ccb656009f35d6434b7ee4faaa359 Mon Sep 17 00:00:00 2001 From: Ryan Date: Sat, 5 Aug 2023 13:49:44 -0400 Subject: [PATCH 03/22] add unit test for 1008 failure case --- include/boost/math/tools/roots.hpp | 2 +- test/test_roots.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 4bf5a8ced5..e5ee561808 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -496,7 +496,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& delta2 = delta1; delta1 = delta; if (count == 0) { - return policies::raise_evaluation_error(function, "Ran out of iterations in boost::math::tools::newton_raphson_iterate, perhaps we have a local minima near current best guess of %1%", guess, boost::math::policies::policy<>()); + return policies::raise_evaluation_error(function, "Ran out of iterations in boost::math::tools::newton_raphson_iterate, guess: %1%", guess, boost::math::policies::policy<>()); } else { --count; } diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 26d4a2f606..ce284e7a20 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -731,11 +731,39 @@ void test_bisect_all_cases() { test_bisect(); } +void test_count() { + const auto fn_newton = [](std::uintmax_t& i_max) { + return boost::math::tools::newton_raphson_iterate( + [](double x) { return std::make_pair(x * x - 3, 2 * x); }, + 10.0 /* x_initial */, + 0.0 /* bound lower*/, + 100.0 /* bound upper */, 52, i_max); + }; + + // Find the minimum number of iterations to find the solution + std::uintmax_t iter_find_solution = 100; + fn_newton(iter_find_solution); + BOOST_CHECK_EQUAL(iter_find_solution, std::uintmax_t(8)); // Confirm is 8 + + for (std::uintmax_t count = 0; count < (iter_find_solution * 2); count++) { + std::uintmax_t iters = count; + if (iter_find_solution <= count) { // Finds correct answer + using std::sqrt; + BOOST_CHECK_EQUAL(fn_newton(iters), sqrt(3.0)); + BOOST_CHECK_EQUAL(iters, iter_find_solution); + } + else { // Throws error for running out of iterations + BOOST_CHECK_THROW(fn_newton(iters), boost::math::evaluation_error); + } + } +} + BOOST_AUTO_TEST_CASE( test_main ) { test_beta(0.1, "double"); + test_count(); test_bisect_all_cases(); #if 0 // bug reports: From 16b1033c71387a5c5809ca1ad0025deb3f4889a0 Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 7 Aug 2023 15:56:16 -0400 Subject: [PATCH 04/22] address @mborland feedback --- include/boost/math/tools/roots.hpp | 150 +++++++++++++++++------------ test/test_roots.cpp | 16 ++- 2 files changed, 103 insertions(+), 63 deletions(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index e5ee561808..f4522f278d 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -260,14 +260,14 @@ namespace Bisection { // inf | 0 11111111 00000000000000000000000 | std::numeric_limits::infinity() // nan | 0 11111111 10000000000000000000000 | std::numeric_limits::quiet_NaN() // - // Negative values are similar, but the sign bit is set to 1. My keeping track of the possible + // Negative values are similar, but the sign bit is set to 1. By keeping track of the possible // sign flip, it can bisect numbers with different signs. // template class Midpoint754 { private: // Does the bisection in bit space for IEEE 754 floating point numbers. - // Infinities are allowed. It's assumed that neither x nor X is NaN. + // Infinities are allowed. It's assumed that neither x nor y is NaN. static_assert(std::numeric_limits::is_iec559, "Type must be IEEE 754 floating point."); static_assert(std::is_unsigned::value, "U must be an unsigned integer type."); static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same."); @@ -287,29 +287,29 @@ namespace Bisection { } public: - static T solve(T x, T X) { + static T solve(T x, T y) { using std::fabs; - // Sort so that X has the larger magnitude - if (fabs(X) < fabs(x)) { - std::swap(x, X); + // Sort so that y has the larger magnitude + if (fabs(y) < fabs(x)) { + std::swap(x, y); } const T x_mag = std::fabs(x); - const T X_mag = std::fabs(X); + const T y_mag = std::fabs(y); const T sign_x = sign(x); - const T sign_X = sign(X); + const T sign_y = sign(y); // Convert the magnitudes to bits U bits_mag_x = float_to_uint(x_mag); - U bits_mag_X = float_to_uint(X_mag); + U bits_mag_y = float_to_uint(y_mag); // Calculate the average magnitude in bits - U bits_mag = (sign_x == sign_X) ? (bits_mag_X + bits_mag_x) : (bits_mag_X - bits_mag_x); + U bits_mag = (sign_x == sign_y) ? (bits_mag_y + bits_mag_x) : (bits_mag_y - bits_mag_x); bits_mag = bits_mag >> 1; // Divide by 2 - // Reconstruct upl_mean from average magnitude and sign of X - return uint_to_float(bits_mag) * sign_X; + // Reconstruct upl_mean from average magnitude and sign of y + return uint_to_float(bits_mag) * sign_y; } }; // class Midpoint754 @@ -317,38 +317,49 @@ namespace Bisection { template class MidpointNon754 { private: + // NOTE: The Midpoint754 solver is faster than this solver and should be used when possible. + // To use the Midpoint754 solver, two criteria must be satisfied: + // 1. The type T must conform to the IEEE 754 standard and + // 2. There must exist a corresponding unsigned integer type with the same number of bits. + // Currently, this limits the use of the Midpoint754 solver to `float` and `double`. The + // lack of a 128 bit unsigned integer type prevents the Midpoint754 solver being used for + // `long double`. static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is float"); static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is double"); + // static_assert(!std::is_same::value -- (See NOTE above) public: - static T solve(T x, T X) { - const T sx = sign(x); - const T sX = sign(X); + static T solve(T x, T y) { + const T sign_x = sign(x); + const T sign_y = sign(y); // Sign flip return zero - if (sx * sX == -1) { return T(0.0); } + if (sign_x * sign_y == -1) { return T(0.0); } // At least one is positive - if (0 < sx + sX) { return do_solve(x, X); } + if (0 < sign_x + sign_y) { return do_solve(x, y); } // At least one is negative - return -do_solve(-x, -X); + return -do_solve(-x, -y); } private: struct EqZero { - EqZero(T x) { BOOST_MATH_ASSERT(x == 0 && "x must be zero."); } + EqZero(T x) { BOOST_MATH_ASSERT_MSG(x == 0, "x must be zero.");} }; struct EqInf { - EqInf(T x) { BOOST_MATH_ASSERT(x == static_cast(std::numeric_limits::infinity()) && "x must be infinity."); } + // NOTE: (x == infinity) is checked this way to support types (e.g., boost::math::concepts::real_concept) + // that have infinity, but for which this the query std::numeric_limits::infinity() returns + // 0 due to the std::numeric_limits interface not being implemented. + EqInf(T x) { BOOST_MATH_ASSERT_MSG(0 < x && !(boost::math::isfinite)(x), "x must be positive infinity."); } }; class PosFinite { public: PosFinite(T x) : x_(x) { - BOOST_MATH_ASSERT(0 < x && "x must be positive."); - BOOST_MATH_ASSERT(x < std::numeric_limits::infinity() && "x must be less than infinity."); + BOOST_MATH_ASSERT_MSG(0 < x, "x must be positive."); + BOOST_MATH_ASSERT_MSG((boost::math::isfinite)(x), "x must be finite."); } T value() const { return x_; } @@ -358,54 +369,54 @@ namespace Bisection { }; // Two unknowns - static T do_solve(T x, T X) { - if (X < x) { - return do_solve(X, x); + static T do_solve(T x, T y) { + if (y < x) { + return do_solve(y, x); } if (x == 0) { - return do_solve(EqZero(x), X); - } else if (x == static_cast(std::numeric_limits::infinity())) { - return static_cast(std::numeric_limits::infinity()); + return do_solve(EqZero(x), y); // Zero and ??? + } else if ((boost::math::isfinite)(x)) { + return do_solve(PosFinite(x), y); // Finite and ??? } else { - return do_solve(PosFinite(x), X); + return x; // Infinity and infinity } } // One unknowns - static T do_solve(EqZero x, T X) { - if (X == 0) { - return T(0.0); - } else if (X == static_cast(std::numeric_limits::infinity())) { - return T(1.0); + static T do_solve(EqZero x, T y) { + if (y == 0) { + return T(0.0); // Zero and zero + } else if ((boost::math::isfinite)(y)) { + return do_solve(x, PosFinite(y)); // Zero and finite } else { - return do_solve(x, PosFinite(X)); + return T(1.5); // Zero and infinity } } - static T do_solve(PosFinite x, T X) { - if (X == static_cast(std::numeric_limits::infinity())) { - return do_solve(x, EqInf(X)); + static T do_solve(PosFinite x, T y) { + if ((boost::math::isfinite)(y)) { + return do_solve(x, PosFinite(y)); // Finite and finite } else { - return do_solve(x, PosFinite(X)); + return do_solve(x, EqInf(y)); // Finite and infinity } } // Zero unknowns template static typename std::enable_if::is_specialized, T>::type - do_solve(PosFinite x, EqInf X) { + do_solve(PosFinite x, EqInf y) { return do_solve(x, PosFinite((std::numeric_limits::max)())); } template static typename std::enable_if::is_specialized, T>::type - do_solve(PosFinite x, EqInf X) { - BOOST_MATH_ASSERT(false && "infinite bounds support requires specialization."); - return static_cast(std::numeric_limits::signaling_NaN()); + do_solve(PosFinite x, EqInf y) { + BOOST_MATH_THROW_EXCEPTION(std::runtime_error("infinite bounds support requires specialization")); + return T(0); // Unreachable, but suppresses warnings } template static typename std::enable_if::is_specialized, U>::type - do_solve(EqZero x, PosFinite X) { + do_solve(EqZero x, PosFinite y) { const auto get_smallest_value = []() { const U denorm_min = std::numeric_limits::denorm_min(); if (denorm_min != 0) { return denorm_min; } @@ -413,40 +424,57 @@ namespace Bisection { const U min = (std::numeric_limits::min)(); if (min != 0) { return min; } - BOOST_MATH_ASSERT(false && "denorm_min and min are both zero."); - return static_cast(std::numeric_limits::signaling_NaN()); + BOOST_MATH_THROW_EXCEPTION(std::runtime_error("This type probably isn't actually specialized.")); + return T(0); // Unreachable, but suppresses warnings }; - return do_solve(PosFinite(get_smallest_value()), X); + return do_solve(PosFinite(get_smallest_value()), y); } template static typename std::enable_if::is_specialized, U>::type - do_solve(EqZero x, PosFinite X) { return X.value() / U(2); } - - static T do_solve(PosFinite x, PosFinite X) { - BOOST_MATH_ASSERT(x.value() <= X.value() && "x must be less than or equal to X."); + do_solve(EqZero x, PosFinite y) { + // This function quickly gets a value that is small relative to y. + const auto fn_appx_denorm_min = [](U y){ + T accumulator = T(0.5); + for (int i = 1; i < 100; ++i) { + if (y * accumulator == 0) { + return y; + } else { + y = y * accumulator; + accumulator = accumulator * T(0.5); + } + } + return y; + }; + + const U denorm_min = fn_appx_denorm_min(y.value()); + return do_solve(PosFinite(denorm_min), y); + } + + static T do_solve(PosFinite x, PosFinite y) { + BOOST_MATH_ASSERT_MSG(x.value() <= y.value(), "x must be less than or equal to y."); - const T xv = x.value(); - const T Xv = X.value(); + const T value_x = x.value(); + const T value_y = y.value(); // Take arithmetic mean if they are close enough - if (Xv < xv * 8) { return (Xv - xv) / 2 + xv; } // NOTE: avoids overflow + if (value_y < value_x * 8) { return (value_y - value_x) / 2 + value_x; } // NOTE: avoids overflow // Take geometric mean if they are far apart using std::sqrt; - return sqrt(xv) * sqrt(Xv); // NOTE: avoids overflow + return sqrt(value_x) * sqrt(value_y); // NOTE: avoids overflow } }; // class MidpointNon754 template - static T calc_midpoint(T x, T X) { - return MidpointNon754::solve(x, X); + static T calc_midpoint(T x, T y) { + return MidpointNon754::solve(x, y); } - static float calc_midpoint(float x, float X) { - return Midpoint754::solve(x, X); + static float calc_midpoint(float x, float y) { + return Midpoint754::solve(x, y); } - static double calc_midpoint(double x, double X) { - return Midpoint754::solve(x, X); + static double calc_midpoint(double x, double y) { + return Midpoint754::solve(x, y); } } // namespace Bisection diff --git a/test/test_roots.cpp b/test/test_roots.cpp index ce284e7a20..a9b5cf9590 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -27,6 +27,10 @@ #include #include +#if __has_include() +# include +#endif + #define BOOST_CHECK_CLOSE_EX(a, b, prec, i) \ {\ unsigned int failures = boost::unit_test::results_collector.results( boost::unit_test::framework::current_test_case().p_id ).p_assertions_failed;\ @@ -729,6 +733,15 @@ void test_bisect_all_cases() { test_bisect(); test_bisect(); test_bisect(); + +#if __has_include() +#ifdef __STDCPP_FLOAT32_T__ + test_bisect(); +#endif +#ifdef __STDCPP_FLOAT64_T__ + test_bisect(); +#endif +#endif } void test_count() { @@ -765,7 +778,7 @@ BOOST_AUTO_TEST_CASE( test_main ) test_count(); test_bisect_all_cases(); -#if 0 + // bug reports: boost::math::skew_normal_distribution<> dist(2.0, 1.0, -2.5); BOOST_CHECK(boost::math::isfinite(quantile(dist, 0.075))); @@ -787,5 +800,4 @@ BOOST_AUTO_TEST_CASE( test_main ) test_solve_complex_quadratic>(); #endif test_failures(); -#endif } From c2d2b767176129d78bacafa25fdbcf60b2952e74 Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 7 Aug 2023 16:07:30 -0400 Subject: [PATCH 05/22] added missing header file --- include/boost/math/tools/roots.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index f4522f278d..fd30eaee6f 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include From 21dc82bdc678b1d857c18794c8c3b7ed60374d30 Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 18 Aug 2023 15:33:12 -0400 Subject: [PATCH 06/22] add __float128 support for Midpoint754 --- include/boost/math/tools/roots.hpp | 119 +++++++++++++++------ test/test_roots.cpp | 159 ++++++++++++++++++----------- 2 files changed, 183 insertions(+), 95 deletions(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index fd30eaee6f..2556881141 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -25,6 +25,11 @@ #include #include +#ifdef BOOST_HAS_FLOAT128 +#include +using boost::multiprecision::float128; +#endif + namespace boost { namespace math { namespace tools { @@ -273,6 +278,9 @@ namespace Bisection { static_assert(std::is_unsigned::value, "U must be an unsigned integer type."); static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same."); + // The sign bit is 1, all other bits are 0 + static constexpr U sign_mask() { return U(1) << (sizeof(U) * 8 - 1); } + // Convert float to bits static U float_to_uint(T x) { U bits; @@ -287,47 +295,73 @@ namespace Bisection { return x; } + // Avoids linking against libquadmath for __float128 + static T fabs_imp(T x) { + U bits_x = float_to_uint(x); + bits_x = bits_x & ~sign_mask(); // Zero the sign bit + return uint_to_float(bits_x); + } + public: static T solve(T x, T y) { - using std::fabs; - // Sort so that y has the larger magnitude - if (fabs(y) < fabs(x)) { + if (fabs_imp(y) < fabs_imp(x)) { std::swap(x, y); } - - const T x_mag = std::fabs(x); - const T y_mag = std::fabs(y); - const T sign_x = sign(x); - const T sign_y = sign(y); - // Convert the magnitudes to bits - U bits_mag_x = float_to_uint(x_mag); - U bits_mag_y = float_to_uint(y_mag); + const U bits_x = float_to_uint(x); + const U bits_y = float_to_uint(y); + + const U sign_x = bits_x & sign_mask(); + const U sign_y = bits_y & sign_mask(); + + const U mag_x = bits_x & ~sign_mask(); + const U mag_y = bits_y & ~sign_mask(); // Calculate the average magnitude in bits - U bits_mag = (sign_x == sign_y) ? (bits_mag_y + bits_mag_x) : (bits_mag_y - bits_mag_x); + U bits_mag = (sign_x == sign_y) ? (mag_y + mag_x) : (mag_y - mag_x); bits_mag = bits_mag >> 1; // Divide by 2 - // Reconstruct upl_mean from average magnitude and sign of y - return uint_to_float(bits_mag) * sign_y; + bits_mag = bits_mag | sign_y; // Make the sign of bits_mag match the sign of y + + return uint_to_float(bits_mag); + } + + // NOTE: boost::multiprecision::float128 is cast to __float128 + template + static V solve(V x, V y) { return solve(static_cast(x), static_cast(y)); } + + // Must evaluate to true in order to bisect correctly with infinity + // Ideally this should be a static assert. + static bool is_one_plus_max_bits_inf() { + const U bits_max = float_to_uint((std::numeric_limits::max)()); + const U bits_one_plus_max = bits_max + 1; + return uint_to_float(bits_one_plus_max) == std::numeric_limits::infinity(); } + + using type_float = T; // Used for unit tests }; // class Midpoint754 template class MidpointNon754 { private: - // NOTE: The Midpoint754 solver is faster than this solver and should be used when possible. - // To use the Midpoint754 solver, two criteria must be satisfied: - // 1. The type T must conform to the IEEE 754 standard and - // 2. There must exist a corresponding unsigned integer type with the same number of bits. - // Currently, this limits the use of the Midpoint754 solver to `float` and `double`. The - // lack of a 128 bit unsigned integer type prevents the Midpoint754 solver being used for - // `long double`. + // NOTE: The Midpoint754 solver should be used when possible because it is faster + // than this solver. The two criteria below must be satifsied to use the Midpoint754 + // solver: + // 1. The type T must conform to the IEEE 754 standard and + // 2. The following sequence of steps must produce `numeric_limits::infinity`. + // Start with `numeric_limits::max()`. Then reinterpret this value as an + // unsigned integer. Next add one to this unsigned integer. Finally reinterpret + // the result as type T. This result must equal infinity. + // The above two criteria are true for the following datatypes: `float`, `double`, + // and `__float128`. The 80 bit variation of `long double` does not satisfy the + // second criteria. static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is float"); static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is double"); - // static_assert(!std::is_same::value -- (See NOTE above) +#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) + static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is __float128"); +#endif public: static T solve(T x, T y) { @@ -422,6 +456,9 @@ namespace Bisection { const U denorm_min = std::numeric_limits::denorm_min(); if (denorm_min != 0) { return denorm_min; } + // NOTE: the two lines below can be removed when + // https://github.com/boostorg/multiprecision/pull/562 + // gets merged. const U min = (std::numeric_limits::min)(); if (min != 0) { return min; } @@ -467,16 +504,32 @@ namespace Bisection { } }; // class MidpointNon754 - template - static T calc_midpoint(T x, T y) { - return MidpointNon754::solve(x, y); - } - static float calc_midpoint(float x, float y) { - return Midpoint754::solve(x, y); - } - static double calc_midpoint(double x, double y) { - return Midpoint754::solve(x, y); - } + // The purposes of this class is to not cause compiler warnings from unused functions. + class CalcMidpoint { + public: + template + static MidpointNon754 get_solver(T) { + return MidpointNon754(); + } + static Midpoint754 get_solver(float) { + return Midpoint754(); + } + static Midpoint754 get_solver(double) { + return Midpoint754(); + } +#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) + static Midpoint754<__float128, boost::uint128_type> get_solver(__float128) { + return Midpoint754<__float128, boost::uint128_type>(); + } + static Midpoint754<__float128, boost::uint128_type> get_solver(boost::multiprecision::float128) { + return Midpoint754<__float128, boost::uint128_type>(); + } +#endif + template + static T calc_midpoint(T x, T y) { + return get_solver(x).solve(x, y); + } + }; } // namespace Bisection } // namespace detail @@ -549,7 +602,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& { // Last two steps haven't converged. const T x_other = (delta > 0) ? min : max; - delta = result - detail::Bisection::calc_midpoint(result, x_other); + delta = result - detail::Bisection::CalcMidpoint::calc_midpoint(result, x_other); // reset delta1/2 so we don't take this branch next time round: delta1 = 3 * delta; delta2 = 3 * delta; diff --git a/test/test_roots.cpp b/test/test_roots.cpp index a9b5cf9590..b5ef338820 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -27,6 +27,10 @@ #include #include +#ifdef BOOST_HAS_FLOAT128 +#include +#endif + #if __has_include() # include #endif @@ -655,93 +659,124 @@ void test_failures() #endif } +// Creates a vector of test values that include: zero, denorm_min, min, epsilon, max, +// and infinity. Also includes powers of 2 ranging from 2^-15 to 2^+15 by powers of 3. template -void test_bisect() { - // Creates a vector of test values that include: zero, denorm_min, min, epsilon, max, - // and infinity. Also includes powers of 2 ranging from 2^-15 to 2^+15 by powers of 3. - const auto fn_create_test_ladder = [](){ - std::vector v; - - const auto fn_push_back_x_scaled = [&v](const T& x) { - const auto fn_add_if_non_zero_and_finite = [&v](const T& x) { - if (x != 0 && boost::math::isfinite(x)) { // Avoids most duplications - v.push_back(x); - } - }; - - const T two_thirds = T(2) / T(3); - const T four_thirds = T(4) / T(3); - - v.push_back(x); - fn_add_if_non_zero_and_finite(x * T(0.5)); - fn_add_if_non_zero_and_finite(x * two_thirds); - fn_add_if_non_zero_and_finite(x * four_thirds); - fn_add_if_non_zero_and_finite(x * T(2.0)); - }; - - const auto fn_push_back_pm = [&](std::vector& v, const T& x) { - fn_push_back_x_scaled( x); - fn_push_back_x_scaled(-x); - }; - - fn_push_back_pm(v, T(0.0)); - fn_push_back_pm(v, std::numeric_limits::denorm_min()); - fn_push_back_pm(v, (std::numeric_limits::min)()); - fn_push_back_pm(v, std::numeric_limits::epsilon()); - const int test_exp_range = 5; - for (int i = -test_exp_range; i < (test_exp_range + 1); ++i) { - const int exponent = i * 3; - const T x = std::ldexp(1.0, exponent); - fn_push_back_pm(v, x); - } - fn_push_back_pm(v, (std::numeric_limits::max)()); +std::vector create_test_ladder() { + std::vector v; + + const auto fn_push_back_x_scaled = [&v](const T& x) { + const T two_thirds = T(2) / T(3); + const T four_thirds = T(4) / T(3); + + v.push_back(x); + v.push_back(x * T(0.5)); + v.push_back(x * two_thirds); + v.push_back(x * four_thirds); + v.push_back(x * T(2.0)); + }; - // Real_concept doesn't have infinity - if (std::numeric_limits::has_infinity) { - fn_push_back_pm(v, std::numeric_limits::infinity()); - } + const auto fn_push_back_pm = [&](std::vector& v, const T& x) { + fn_push_back_x_scaled( x); + fn_push_back_x_scaled(-x); + }; - std::sort(v.begin(), v.end()); + fn_push_back_pm(v, std::numeric_limits::denorm_min()); + fn_push_back_pm(v, (std::numeric_limits::min)()); + fn_push_back_pm(v, std::numeric_limits::epsilon()); - const int num_zeros = std::count(v.begin(), v.end(), T(0.0)); - BOOST_CHECK_LE(2, num_zeros); // Positive and negative zero + const int test_exp_range = 5; + for (int i = -test_exp_range; i < (test_exp_range + 1); ++i) { + const int exponent = i * 3; + const T x = std::ldexp(1.0, exponent); + fn_push_back_pm(v, x); + } + fn_push_back_pm(v, (std::numeric_limits::max)()); + fn_push_back_pm(v, std::numeric_limits::infinity()); - return v; - }; + // Take unique elements of v + std::sort(v.begin(), v.end()); + v.erase(std::unique(v.begin(), v.end()), v.end()); + + // Add both positive and negative zero + v.push_back(T(-0.0)); + v.push_back(T(+0.0)); + + const int num_zeros = std::count(v.begin(), v.end(), T(0.0)); + BOOST_CHECK_LE(2, num_zeros); // Check for both positive and negative zero + return v; +}; + +template +void test_bisect_non(S solver) { // Test for all combinations from the ladder - auto v = fn_create_test_ladder(); + auto v = create_test_ladder(); + for (const T& x_i: v) { for (const T& x_j: v) { - const T x_mid_ij = boost::math::tools::detail::Bisection::calc_midpoint(x_i, x_j); + const T x_mid_ij = solver.solve(x_i, x_j); const T x_lo = (std::min)(x_i, x_j); const T x_hi = (std::max)(x_i, x_j); - BOOST_CHECK_LE(x_lo, x_mid_ij); - BOOST_CHECK_LE(x_mid_ij, x_hi); + BOOST_CHECK(x_lo <= x_mid_ij); // NOTE: BOOST_CHECK_LE(x_lo, x_mid_ij) fails to link + BOOST_CHECK(x_mid_ij <= x_hi); // NOTE: BOOST_CHECK_LE(x_mid_ij, x_hi) fails to link } } } +template +void test_bisect_non() { + const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(T(1)); // Input value unused + test_bisect_non(solver); +} + +template +void test_bisect_754(S solver) { + BOOST_MATH_ASSERT(solver.is_one_plus_max_bits_inf()); // Catch infinity misbehavior for 80 bit `long double` + // before it causes downstream failures + test_bisect_non(solver); +} + +template +void test_bisect_754() { + const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(T(1)); // Input value unused + test_bisect_754(solver); +} + +void test_long_double_fail() { + const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); + BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); +} + void test_bisect_all_cases() { - test_bisect(); - test_bisect(); - test_bisect(); - test_bisect(); - test_bisect(); - test_bisect(); - test_bisect(); - test_bisect(); + test_bisect_754(); + test_bisect_754(); + test_bisect_non(); + test_bisect_non(); + test_bisect_non(); + test_bisect_non(); + test_bisect_non(); + test_bisect_non(); + +#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) + test_bisect_754(); + test_bisect_754<__float128>(); +#endif #if __has_include() #ifdef __STDCPP_FLOAT32_T__ - test_bisect(); + test_bisect_754(); #endif #ifdef __STDCPP_FLOAT64_T__ - test_bisect(); + test_bisect_754(); #endif #endif + +#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && defined(BOOST_HAS_INT128) + test_long_double_fail(); +#endif } void test_count() { From 21d93748db59086a9a8fd197fe9fb8be7ac82fae Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 18 Aug 2023 16:01:18 -0400 Subject: [PATCH 07/22] fixed compile issue due incorrect macro usage --- test/test_roots.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/test_roots.cpp b/test/test_roots.cpp index b5ef338820..202263fe4a 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -745,11 +745,6 @@ void test_bisect_754() { test_bisect_754(solver); } -void test_long_double_fail() { - const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); - BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); -} - void test_bisect_all_cases() { test_bisect_754(); test_bisect_754(); @@ -775,7 +770,8 @@ void test_bisect_all_cases() { #endif #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && defined(BOOST_HAS_INT128) - test_long_double_fail(); + const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); + BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); #endif } From 2f60168b7b745ea36d98604b48dc6eea3e96a2ad Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 18 Aug 2023 17:15:19 -0400 Subject: [PATCH 08/22] fix unsigned __int128 isn't unsigend config issue --- test/test_roots.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 202263fe4a..3d40785527 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -770,8 +770,10 @@ void test_bisect_all_cases() { #endif #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && defined(BOOST_HAS_INT128) - const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); - BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); + if (std::is_unsigned::value) { + const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); + BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); + } #endif } From a803be222e29b56eaf74bf98176c8d36272534d4 Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 18 Aug 2023 21:34:14 -0400 Subject: [PATCH 09/22] compile issue fix attempt 3 --- test/test_roots.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 3d40785527..0b2742606d 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -21,6 +21,7 @@ #include "table_type.hpp" #include #include +#include #include #include @@ -745,6 +746,19 @@ void test_bisect_754() { test_bisect_754(solver); } +template +struct CompileTimeLongDoubleChecker { + static void check() {} +}; + +template <> +struct CompileTimeLongDoubleChecker { + static void check() { + const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); + BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); + } +}; + void test_bisect_all_cases() { test_bisect_754(); test_bisect_754(); @@ -770,10 +784,9 @@ void test_bisect_all_cases() { #endif #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && defined(BOOST_HAS_INT128) - if (std::is_unsigned::value) { - const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); - BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); - } + // NOTE: this is necessary because BOOST_HAS_INT128 deing befined does not + // guarantee that std::is_unsigned::value is true + CompileTimeLongDoubleChecker::value>::check(); #endif } From 7acf90cd95da7631c4fb9cc38edf993f4f129129 Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 18 Aug 2023 23:55:57 -0400 Subject: [PATCH 10/22] pound if zero out compiler problem --- include/boost/math/tools/roots.hpp | 3 +++ test/test_roots.cpp | 16 ++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 2556881141..f2682ae0d2 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -309,12 +309,15 @@ namespace Bisection { std::swap(x, y); } + // Recast as unsigned integers const U bits_x = float_to_uint(x); const U bits_y = float_to_uint(y); + // Get just the sign bit const U sign_x = bits_x & sign_mask(); const U sign_y = bits_y & sign_mask(); + // Get everything but the sign bit const U mag_x = bits_x & ~sign_mask(); const U mag_y = bits_y & ~sign_mask(); diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 0b2742606d..50298ff7d5 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -784,9 +784,21 @@ void test_bisect_all_cases() { #endif #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && defined(BOOST_HAS_INT128) - // NOTE: this is necessary because BOOST_HAS_INT128 deing befined does not - // guarantee that std::is_unsigned::value is true + // CI says this fails to compile sometimes because the static_assert + // in `Midpoint754` that `std::is_unsigned::value is + // true is not met. This occurs despite the preprocessor assertion that + // `BOOST_HAS_INT128` is defined and being compile time dispatched based + // on `std::is_unsigned::value` being true. I'm not + // sure why this occurs. + // + // That being said, this test is mostly pedogogical. It shows that + // `long double` does not possess the property that increasing values in + // bitspace result in increasing float values. It is not required for + // correctness, only for completeness. If you can compile this locally + // this test should pass. +#if 0 CompileTimeLongDoubleChecker::value>::check(); +#endif #endif } From 0708745819346e00e34c771fc14b04e6a27131a4 Mon Sep 17 00:00:00 2001 From: Ryan Date: Sat, 19 Aug 2023 00:36:20 -0400 Subject: [PATCH 11/22] preprocessor out again --- test/test_roots.cpp | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 50298ff7d5..900e527367 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -746,19 +746,6 @@ void test_bisect_754() { test_bisect_754(solver); } -template -struct CompileTimeLongDoubleChecker { - static void check() {} -}; - -template <> -struct CompileTimeLongDoubleChecker { - static void check() { - const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); - BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); - } -}; - void test_bisect_all_cases() { test_bisect_754(); test_bisect_754(); @@ -787,9 +774,7 @@ void test_bisect_all_cases() { // CI says this fails to compile sometimes because the static_assert // in `Midpoint754` that `std::is_unsigned::value is // true is not met. This occurs despite the preprocessor assertion that - // `BOOST_HAS_INT128` is defined and being compile time dispatched based - // on `std::is_unsigned::value` being true. I'm not - // sure why this occurs. + // `BOOST_HAS_INT128` is defined. I'm not sure why this occurs. // // That being said, this test is mostly pedogogical. It shows that // `long double` does not possess the property that increasing values in @@ -797,7 +782,8 @@ void test_bisect_all_cases() { // correctness, only for completeness. If you can compile this locally // this test should pass. #if 0 - CompileTimeLongDoubleChecker::value>::check(); + const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); + BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); #endif #endif } From 19bbf76456952332768d2f0398ef926277294795 Mon Sep 17 00:00:00 2001 From: Ryan Date: Sat, 19 Aug 2023 15:35:05 -0400 Subject: [PATCH 12/22] duck type dispatch to Midpoint754 --- include/boost/math/tools/roots.hpp | 157 ++++++++++++++++++++++------- test/test_roots.cpp | 21 ++-- 2 files changed, 130 insertions(+), 48 deletions(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index f2682ae0d2..f6c6615cc5 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -240,6 +240,33 @@ inline std::pair bisect(F f, T min, T max, Tol tol) noexcept(policies::is_ // namespace detail { namespace Bisection { + // Check if T has a member function named "value" + template + class has_value_member + { + template + static auto test(int) -> decltype(std::declval().value(), std::true_type()); + + template + static std::false_type test(...); + + public: + static constexpr bool value = decltype(test(0))::value; + }; + + // + class StaticCast { + public: + template + static typename std::enable_if::value, T>::type value(V x) { + return static_cast(x.value()); + } + + template + static typename std::enable_if::value, T>::type value(V x) { + return static_cast(x); + } + }; ////// The Midpoint754 class ////// // @@ -250,12 +277,12 @@ namespace Bisection { // For all values above x_solution f(x) is +1. The best way to root find // this problem is to bisect in bit space. // - // Efficient bit space bisection is possible because of the IEEE 754 standard. - // According to the standard, the bits in floating point numbers are partitioned - // into three parts: sign, exponent, and mantissa. As long as the sign of the - // of the number stays the same, increasing numbers in bit space have increasing - // floating point values starting at zero, and ending at infinity! The table - // below shows select numbers for float (single precision). + // Efficient bit space bisection is possible for floating point types whose + // representation in memory is partitioned into three parts: sign, exponent, + // and mantissa. As long as the sign of the of the number stays the same, + // increasing numbers in bit space have increasing floating point values + // starting at zero, and ending at infinity! The table below shows select + // numbers for float (single precision). // // 0 | 0 00000000 00000000000000000000000 | positive zero // 1.4013e-45 | 0 00000000 00000000000000000000001 | std::numeric_limits::denorm_min() @@ -269,6 +296,11 @@ namespace Bisection { // Negative values are similar, but the sign bit is set to 1. By keeping track of the possible // sign flip, it can bisect numbers with different signs. // + // Other floating point types that share this memory representation include 64 + // and 128 bit floating point types. The 80 bit variation of `long double` does + // not share this memory representation see: + // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format + // template class Midpoint754 { private: @@ -330,16 +362,31 @@ namespace Bisection { return uint_to_float(bits_mag); } - // NOTE: boost::multiprecision::float128 is cast to __float128 + template + static V solve(V x, V y) { + return solve(StaticCast::value(x), StaticCast::value(y)); + } + +#if 0 + // NOTE: needed to cast boost::multiprecision::float128 to __float128 template static V solve(V x, V y) { return solve(static_cast(x), static_cast(y)); } - // Must evaluate to true in order to bisect correctly with infinity - // Ideally this should be a static assert. + // NOTE: needed to cast boost::math::concepts::real_concept to T for real_concept + static boost::math::concepts::real_concept solve(boost::math::concepts::real_concept x, + boost::math::concepts::real_concept y) { + return solve(x.value(), y.value()); + } +#endif + + // In order to bisect correctly with infinity, this function must return true. + // + // NOTE: Ideally this should be a static assert, but I don't see a way to + // emulate the memcpy operation at compile time. static bool is_one_plus_max_bits_inf() { const U bits_max = float_to_uint((std::numeric_limits::max)()); - const U bits_one_plus_max = bits_max + 1; - return uint_to_float(bits_one_plus_max) == std::numeric_limits::infinity(); + const U bits_inf = float_to_uint(std::numeric_limits::infinity()); + return bits_max + 1 == bits_inf; } using type_float = T; // Used for unit tests @@ -350,16 +397,8 @@ namespace Bisection { class MidpointNon754 { private: // NOTE: The Midpoint754 solver should be used when possible because it is faster - // than this solver. The two criteria below must be satifsied to use the Midpoint754 - // solver: - // 1. The type T must conform to the IEEE 754 standard and - // 2. The following sequence of steps must produce `numeric_limits::infinity`. - // Start with `numeric_limits::max()`. Then reinterpret this value as an - // unsigned integer. Next add one to this unsigned integer. Finally reinterpret - // the result as type T. This result must equal infinity. - // The above two criteria are true for the following datatypes: `float`, `double`, - // and `__float128`. The 80 bit variation of `long double` does not satisfy the - // second criteria. + // than this solver. The Midpoint754 solver supports 32, 64, and 128 bit floats. + // The 80 bit `long double` type is not supported for the reasons described above. static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is float"); static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is double"); #if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) @@ -507,30 +546,76 @@ namespace Bisection { } }; // class MidpointNon754 - // The purposes of this class is to not cause compiler warnings from unused functions. - class CalcMidpoint { + // NOTE: `float` and `_Float32` are not type aliases + class IsFloat32 { public: template - static MidpointNon754 get_solver(T) { - return MidpointNon754(); + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 24 && // Mantissa has 23 bits + 1 implicit bit + sizeof(T) == 4; } - static Midpoint754 get_solver(float) { - return Midpoint754(); + }; + + // NOTE: `double` and `_Float64` are not type aliases + class IsFloat64 { + public: + template + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 53 && // Mantissa has 52 bits + 1 implicit bit + sizeof(T) == 8; } - static Midpoint754 get_solver(double) { - return Midpoint754(); + }; + + // NOTE: 128 bit `long double` and `_Float128` are not type aliases + class IsFloat128 { + public: + template + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 113 && // Mantissa has 112 bits + 1 implicit bit + sizeof(T) == 16; } + }; + + // This class prevents compiler warnings from unused functions. + class CalcMidpoint { + public: + // IsFloat32 --> Midpoint754 + template + static typename std::enable_if(), Midpoint754>::type + get_solver() { return Midpoint754(); } + + // IsFloat64 --> Midpoint754 + template + static typename std::enable_if(), Midpoint754>::type + get_solver() { return Midpoint754(); } + + // IsFloat128 --> Midpoint754 #if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) - static Midpoint754<__float128, boost::uint128_type> get_solver(__float128) { - return Midpoint754<__float128, boost::uint128_type>(); - } - static Midpoint754<__float128, boost::uint128_type> get_solver(boost::multiprecision::float128) { - return Midpoint754<__float128, boost::uint128_type>(); - } + // NOTE: returns Midpoint754<__float128, ...> instead of Midpoint754 because + // in order to work with boost::multiprecision::float128, which is a wrapper + // around __float128. + template + static typename std::enable_if(), Midpoint754<__float128, boost::uint128_type>>::type + get_solver() { return Midpoint754<__float128, boost::uint128_type>(); } #endif + + // Default --> *** MidpointNon754 *** + template + static typename std::enable_if() && + !IsFloat64::value() && + !IsFloat128::value(), MidpointNon754>::type + get_solver() { return MidpointNon754(); } + + // NOTE: template static T calc_midpoint(T x, T y) { - return get_solver(x).solve(x, y); + return get_solver().solve(x, y); } }; diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 900e527367..f6041fc68e 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -710,7 +710,7 @@ std::vector create_test_ladder() { }; template -void test_bisect_non(S solver) { +void test_bisect(S solver) { // Test for all combinations from the ladder auto v = create_test_ladder(); @@ -729,21 +729,18 @@ void test_bisect_non(S solver) { template void test_bisect_non() { - const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(T(1)); // Input value unused - test_bisect_non(solver); -} - -template -void test_bisect_754(S solver) { - BOOST_MATH_ASSERT(solver.is_one_plus_max_bits_inf()); // Catch infinity misbehavior for 80 bit `long double` - // before it causes downstream failures - test_bisect_non(solver); + const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(); + test_bisect(solver); } template void test_bisect_754() { - const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(T(1)); // Input value unused - test_bisect_754(solver); + const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(); + BOOST_MATH_ASSERT(solver.is_one_plus_max_bits_inf()); // Catch infinity misbehavior for 80 bit `long double` + // before it causes downstream failures + // We need to use the float type associated with the solver to avoid needing to link with libquadmath + using S = decltype(solver); + test_bisect(solver); } void test_bisect_all_cases() { From 752a1c5b561056b2d5be518651e0aff73ababd92 Mon Sep 17 00:00:00 2001 From: Ryan Date: Sat, 19 Aug 2023 16:42:16 -0400 Subject: [PATCH 13/22] fix SFINAE type oversight --- include/boost/math/tools/roots.hpp | 29 ++++++++++++++--------------- test/test_roots.cpp | 1 - 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index f6c6615cc5..103af6421e 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -367,18 +367,6 @@ namespace Bisection { return solve(StaticCast::value(x), StaticCast::value(y)); } -#if 0 - // NOTE: needed to cast boost::multiprecision::float128 to __float128 - template - static V solve(V x, V y) { return solve(static_cast(x), static_cast(y)); } - - // NOTE: needed to cast boost::math::concepts::real_concept to T for real_concept - static boost::math::concepts::real_concept solve(boost::math::concepts::real_concept x, - boost::math::concepts::real_concept y) { - return solve(x.value(), y.value()); - } -#endif - // In order to bisect correctly with infinity, this function must return true. // // NOTE: Ideally this should be a static assert, but I don't see a way to @@ -603,13 +591,24 @@ namespace Bisection { template static typename std::enable_if(), Midpoint754<__float128, boost::uint128_type>>::type get_solver() { return Midpoint754<__float128, boost::uint128_type>(); } + + template + static constexpr bool use_default_solver() { + return !IsFloat32::value() && + !IsFloat64::value() && + !IsFloat128::value(); + } +#else + template + static constexpr bool use_default_solver() { + return !IsFloat32::value() && + !IsFloat64::value(); + } #endif // Default --> *** MidpointNon754 *** template - static typename std::enable_if() && - !IsFloat64::value() && - !IsFloat128::value(), MidpointNon754>::type + static typename std::enable_if(), MidpointNon754>::type get_solver() { return MidpointNon754(); } // NOTE: diff --git a/test/test_roots.cpp b/test/test_roots.cpp index f6041fc68e..2458bdd978 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -19,7 +19,6 @@ #include #include #include "table_type.hpp" -#include #include #include From 777d64adebdf427acd58cdcd546cedf98b99619b Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 22 Aug 2023 18:21:20 -0400 Subject: [PATCH 14/22] refactored ieee754_linear tools into new file --- include/boost/math/tools/ieee754_linear.hpp | 366 ++++++++++++++++++++ include/boost/math/tools/roots.hpp | 320 +++++------------ test/Jamfile.v2 | 1 + test/ieee754_linear_test.cpp | 81 +++++ test/test_roots.cpp | 96 +++-- 5 files changed, 584 insertions(+), 280 deletions(-) create mode 100644 include/boost/math/tools/ieee754_linear.hpp create mode 100644 test/ieee754_linear_test.cpp diff --git a/include/boost/math/tools/ieee754_linear.hpp b/include/boost/math/tools/ieee754_linear.hpp new file mode 100644 index 0000000000..5e1a8efab2 --- /dev/null +++ b/include/boost/math/tools/ieee754_linear.hpp @@ -0,0 +1,366 @@ +#ifndef BOOST_MATH_TOOLS_IEEE754_BITSPACE_HPP +#define BOOST_MATH_TOOLS_IEEE754_BITSPACE_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + + +#ifdef BOOST_HAS_FLOAT128 +#include +#endif + + +namespace boost { +namespace math { +namespace tools { +namespace detail { +namespace ieee754_linear { + + // The `IsFloat32` class contains a static constexpr method `value()` that + // returns true if the type T is a 32 bit floating point type. This duck type + // test for 32 bit float types returns true for the following types: + // - `float` + // - `_Float32` (NOTE: not a type alias for `float`) + // - `std_real_concept` when emulating a 32 bit float with EMULATE32 + // - other types that seem to be 32 bit floats + class IsFloat32 { + public: + template + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 24 && // Mantissa has 23 bits + 1 implicit bit + sizeof(T) == 4; + } + }; + + // The `IsFloat64` class contains a static constexpr method `value()` that + // returns true if the type T is a 64 bit floating point type. This duck type + // test for 64 bit float types returns true for the following types: + // - `double` + // - `_Float64` (NOTE: not a type alias for `double`) + // - `std_real_concept` when emulating a 64 bit float with EMULATE64 + // - other types that seem to be 64 bit floats + class IsFloat64 { + public: + template + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 53 && // Mantissa has 52 bits + 1 implicit bit + sizeof(T) == 8; + } + }; + + // The `IsFloat128` class contains a static constexpr method `value()` that + // returns true if the type T is a 128 bit floating point type. This duck type + // test for 128 bit float types returns true for the following types: + // - `__float128` + // - `_Float128` (NOTE: not a type alias for `__float128`) + // - `std_real_concept` when emulating a 128 bit float with EMULATE128 + // - other types that seem to be 128 bit floats + class IsFloat128 { + public: + template + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 113 && // Mantissa has 112 bits + 1 implicit bit + sizeof(T) == 16; + } + }; + + // The `Layout_IEEE754Linear` class represents IEEE 754 floating point types + // for which increasing float values (with the same sign) have increasing + // values in bit space. And for which there is a corresponding unsigned integer + // type U that has the same number of bits as T. Types that satisfy these + // conditions include 32, 64, and 128 bit floats. The table below shows select + // numbers for `float` (single precision). + // + // 0 | 0 00000000 00000000000000000000000 | positive zero + // 1.4013e-45 | 0 00000000 00000000000000000000001 | std::numeric_limits::denorm_min() + // 1.17549e-38 | 0 00000001 00000000000000000000000 | std::numeric_limits::min() + // 1.19209e-07 | 0 01101000 00000000000000000000000 | std::numeric_limits::epsilon() + // 1 | 0 01111111 00000000000000000000000 | positive one + // 3.40282e+38 | 0 11111110 11111111111111111111111 | std::numeric_limits::max() + // inf | 0 11111111 00000000000000000000000 | std::numeric_limits::infinity() + // nan | 0 11111111 10000000000000000000000 | std::numeric_limits::quiet_NaN() + // + // NOTE: the 80 bit `long double` is not "linear" due to the "integer part". See: + // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format + // + template + class Layout_IEEE754Linear { + static_assert(std::numeric_limits::is_iec559, "Type must be IEEE 754 floating point."); + static_assert(std::is_unsigned::value, "U must be an unsigned integer type."); + static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same."); + + public: + using type_float = T; // The linear floating point type + }; + + // The `Layout_Unspecified` class represents floating point types for which + // are not supported by the `Layout_IEEE754Linear` class. + template + class Layout_Unspecified { + public: + using type_float = T; // The linear floating point type + }; + + // The `LayoutIdentifier` class identifies the layout type for a floating + // point type T. The layout type is one of the following: + // - `Layout_IEEE754Linear` for 32, 64, and 128 bit floats + // - `Layout_Unspecified` for other types + class LayoutIdentifier { + public: + // Layout: 32 bit linear + template + static typename std::enable_if(), Layout_IEEE754Linear>::type + get_layout() { return Layout_IEEE754Linear(); } + + // Layout: 64 bit linear + template + static typename std::enable_if(), Layout_IEEE754Linear>::type + get_layout() { return Layout_IEEE754Linear(); } + + // Layout: 128 bit linear +#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) + // NOTE: returns Layout_IEEE754Linear<__float128, ...> instead of Layout_IEEE754Linear + // in order to work with boost::multiprecision::float128, which is a wrapper + // around __float128. + template + static typename std::enable_if(), Layout_IEEE754Linear<__float128, boost::uint128_type>>::type + get_layout() { return Layout_IEEE754Linear<__float128, boost::uint128_type>(); } + + template + static constexpr bool is_layout_nonlinear() { + return !IsFloat32::value() && + !IsFloat64::value() && + !IsFloat128::value(); + } +#else + template + static constexpr bool is_layout_nonlinear() { + return !IsFloat32::value() && + !IsFloat64::value(); + } +#endif + + // Layout: unspecified + template + static typename std::enable_if(), Layout_Unspecified>::type + get_layout() { return Layout_Unspecified(); } + }; + + // Base class for the `BitsInflated` and `BitsDeflated` classes. + // + // This class stores the bit values of a floating point type `T` as a + // sign bit as a unsigned integer magnitude. For example, a floating + // point type with 50 discrete values (zero is counted twice) between + // -Inf and +Inf is shown below. + // + // -24 -20 -16 -12 -8 -4 0 4 8 12 16 20 24 + // . . . . . . . . . . . . . + // |-----------------------|-----------------------| + // -Inf 0 +Inf + // + template + class BitsFromZero: public Layout_IEEE754Linear { + public: + bool sign_bit() const { return sign_bit_; } + const U& mag() const { return mag_; } + U& mag() { return mag_; } + + protected: + BitsFromZero(const bool sign, const U mag) : sign_bit_(sign), mag_(mag) {} + + BitsFromZero(const T x) { + // The sign bit is 1, all other bits are 0 + constexpr U bits_sign_mask = U(1) << (sizeof(U) * 8 - 1); + + U bits_; + std::memcpy(&bits_, &x, sizeof(U)); + sign_bit_ = bits_ & bits_sign_mask; + mag_ = bits_ & ~bits_sign_mask; + } + + void flip_sign_bit() { sign_bit_ = !sign_bit_; } + + private: + bool sign_bit_; + U mag_; + }; + + // The inflated bitspace representation of a floating point type `T`. + // + // This representation always includes denormal numbers, even if denorm + // support is turned off. For example, a floating point type with 50 + // discrete values (zero is counted twice) between -Inf and +Inf is shown + // below with *** marking denormal numbers. + // + // -24 -20 -16 -12 -8 -4 0 4 8 12 16 20 24 + // . . . . . . . . . . . . . + // |-------------------|***|***|-------------------| + // -Inf 0 +Inf + // + template + class BitsInflated : public BitsFromZero { + public: + BitsInflated(const T x) : BitsFromZero(x) {} + BitsInflated(const bool sign, const U mag) : BitsFromZero(sign, mag) {} + + T reinterpret_as_float() const { + T f_out; + std::memcpy(&f_out, &this->mag(), sizeof(T)); + return this->sign_bit() ? -f_out : f_out; + } + + void divide_by_2() { this->mag() >>= 1; } + }; + + // The deflated bitspace representation of a floating point type `T`. + // + // Denorm Support ON: + // This representation is identical to the inflated representation. + // + // Denorm Support OFF: + // This representation shifts the bitspace representation toward `0` + // to remove gaps in the bitspace caused by denormal numbers. For + // example, consider a floating point type with 50 discrete values + // (zero is counted twice) between -Inf and +Inf is shown below with + // *** marking denormal numbers shown below. + // + // Inflated representation: + // -24 -20 -16 -12 -8 -4 0 4 8 12 16 20 24 + // . . . . . . . . . . . . . + // |-------------------|***|***|-------------------| + // -Inf 0 +Inf + // _________________________________________________________________ + // + // Deflated representation: + // -24 -20 -16 -12 -8 -4 0 4 8 12 16 20 24 + // . . . . . . . . . . . . . + // |-------------------|||-------------------| + // -Inf 0 +Inf + // + template + class BitsDeflated : public BitsFromZero { + public: + BitsDeflated(const bool sign, const U mag) : BitsFromZero(sign, mag) {} + + T static_cast_int_value_to_float() const { + T mag_float = static_cast(this->mag()); + return this->sign_bit() ? -mag_float : mag_float; + } + + // Move over `n` in bitspace + BitsDeflated operator+(int n) const { + return BitsDeflated(this->sign_bit(), this->mag() + n); + } + + BitsDeflated operator-(const BitsDeflated& y) const { + auto y_copy = y; + y_copy.flip_sign_bit(); + return *this + y_copy; + } + + BitsDeflated operator+(const BitsDeflated& y) const { + // Gaurantee that y has the larger magnitude + if (y.mag() < this->mag()) { return y + *this; } + + // Call *this x + const BitsDeflated& x = *this; + + const U mag_x = x.mag(); + const U mag_y = y.mag(); + + // Calculate the deflated sum in bits (always positive) + U bits_sum = (x.sign_bit() == y.sign_bit()) ? (mag_y + mag_x) + : (mag_y - mag_x); + + // Sum always has the sign of the bigger magnitude (y) + return BitsDeflated(y.sign_bit(), bits_sum); + } + + BitsDeflated& operator>>(const int n) { + this->mag() >>= n; + return *this; + } + }; + + // The `Denormflator` converts between inflated and deflated bitspace representations + // to compensate for gaps in the bitspace caused by denormal numbers. The `deflate` + // operation shifts the bitspace representation toward `0`. The `inflate` operation + // shifts the bitspace representation away from `0`. These operations only have + // an effect if denorm support is turned off. If denorm support is turned on, then + // the shift is zero. The effect of these operations is illustrated in various + // cartoons below. + // + template + class Denormflator : public Layout_IEEE754Linear { + public: + Denormflator() : has_denorm_(calc_has_denorm()) {} + + BitsDeflated deflate(const BitsInflated& bit_view) const { + const U penalty = bits_denorm_shift(); + const U mag_un = bit_view.mag(); + const U mag_sh = (has_denorm_ | (mag_un < penalty)) ? mag_un : mag_un - penalty; + return BitsDeflated(bit_view.sign_bit(), mag_sh); + } + + BitsInflated inflate(const BitsDeflated& b) const { + const U mag = b.mag(); + const U mag_inflated = (has_denorm_ | (mag == 0)) ? mag : mag + bits_denorm_shift(); + return BitsInflated(b.sign_bit(), mag_inflated); + } + + private: + // Denorm penalty + static constexpr U bits_denorm_shift() { return (U(1) << (std::numeric_limits::digits - 1)) - 1; } + + static bool calc_has_denorm() { + return boost::math::detail::get_smallest_value() != (std::numeric_limits::min)(); + } + + bool has_denorm_; + }; + + // Check if T has a member function named "value". + template + class has_value_member + { + template + static auto test(int) -> decltype(std::declval().value(), std::true_type()); + + template + static std::false_type test(...); + + public: + static constexpr bool value = decltype(test(0))::value; + }; + + // Allows one to static_cast from ‘boost::math::concepts::std_real_concept’ to + // type ‘__float128’ + class StaticCast { + public: + template + static typename std::enable_if::value, T>::type value(V x) { + return static_cast(x.value()); + } + + template + static typename std::enable_if::value, T>::type value(V x) { + return static_cast(x); + } + }; + +} // namespace ieee754_linear +} // namespace detail +} // namespace tools +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_TOOLS_IEEE754_BITSPACE_HPP \ No newline at end of file diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 103af6421e..5e77306b32 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -240,159 +241,9 @@ inline std::pair bisect(F f, T min, T max, Tol tol) noexcept(policies::is_ // namespace detail { namespace Bisection { - // Check if T has a member function named "value" + // Midpoint calculation for floating point types that are not `Layout_IEEE754Linear` template - class has_value_member - { - template - static auto test(int) -> decltype(std::declval().value(), std::true_type()); - - template - static std::false_type test(...); - - public: - static constexpr bool value = decltype(test(0))::value; - }; - - // - class StaticCast { - public: - template - static typename std::enable_if::value, T>::type value(V x) { - return static_cast(x.value()); - } - - template - static typename std::enable_if::value, T>::type value(V x) { - return static_cast(x); - } - }; - - ////// The Midpoint754 class ////// - // - // On a conceptual level, this class is designed to solve the following root - // finding problem. - // - A function f(x) has a single root x_solution somewhere in the interval - // [-infinity, +infinity]. For all values below x_solution f(x) is -1. - // For all values above x_solution f(x) is +1. The best way to root find - // this problem is to bisect in bit space. - // - // Efficient bit space bisection is possible for floating point types whose - // representation in memory is partitioned into three parts: sign, exponent, - // and mantissa. As long as the sign of the of the number stays the same, - // increasing numbers in bit space have increasing floating point values - // starting at zero, and ending at infinity! The table below shows select - // numbers for float (single precision). - // - // 0 | 0 00000000 00000000000000000000000 | positive zero - // 1.4013e-45 | 0 00000000 00000000000000000000001 | std::numeric_limits::denorm_min() - // 1.17549e-38 | 0 00000001 00000000000000000000000 | std::numeric_limits::min() - // 1.19209e-07 | 0 01101000 00000000000000000000000 | std::numeric_limits::epsilon() - // 1 | 0 01111111 00000000000000000000000 | positive one - // 3.40282e+38 | 0 11111110 11111111111111111111111 | std::numeric_limits::max() - // inf | 0 11111111 00000000000000000000000 | std::numeric_limits::infinity() - // nan | 0 11111111 10000000000000000000000 | std::numeric_limits::quiet_NaN() - // - // Negative values are similar, but the sign bit is set to 1. By keeping track of the possible - // sign flip, it can bisect numbers with different signs. - // - // Other floating point types that share this memory representation include 64 - // and 128 bit floating point types. The 80 bit variation of `long double` does - // not share this memory representation see: - // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format - // - template - class Midpoint754 { - private: - // Does the bisection in bit space for IEEE 754 floating point numbers. - // Infinities are allowed. It's assumed that neither x nor y is NaN. - static_assert(std::numeric_limits::is_iec559, "Type must be IEEE 754 floating point."); - static_assert(std::is_unsigned::value, "U must be an unsigned integer type."); - static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same."); - - // The sign bit is 1, all other bits are 0 - static constexpr U sign_mask() { return U(1) << (sizeof(U) * 8 - 1); } - - // Convert float to bits - static U float_to_uint(T x) { - U bits; - std::memcpy(&bits, &x, sizeof(U)); - return bits; - } - - // Convert bits to float - static T uint_to_float(U bits) { - T x; - std::memcpy(&x, &bits, sizeof(T)); - return x; - } - - // Avoids linking against libquadmath for __float128 - static T fabs_imp(T x) { - U bits_x = float_to_uint(x); - bits_x = bits_x & ~sign_mask(); // Zero the sign bit - return uint_to_float(bits_x); - } - - public: - static T solve(T x, T y) { - // Sort so that y has the larger magnitude - if (fabs_imp(y) < fabs_imp(x)) { - std::swap(x, y); - } - - // Recast as unsigned integers - const U bits_x = float_to_uint(x); - const U bits_y = float_to_uint(y); - - // Get just the sign bit - const U sign_x = bits_x & sign_mask(); - const U sign_y = bits_y & sign_mask(); - - // Get everything but the sign bit - const U mag_x = bits_x & ~sign_mask(); - const U mag_y = bits_y & ~sign_mask(); - - // Calculate the average magnitude in bits - U bits_mag = (sign_x == sign_y) ? (mag_y + mag_x) : (mag_y - mag_x); - bits_mag = bits_mag >> 1; // Divide by 2 - - bits_mag = bits_mag | sign_y; // Make the sign of bits_mag match the sign of y - - return uint_to_float(bits_mag); - } - - template - static V solve(V x, V y) { - return solve(StaticCast::value(x), StaticCast::value(y)); - } - - // In order to bisect correctly with infinity, this function must return true. - // - // NOTE: Ideally this should be a static assert, but I don't see a way to - // emulate the memcpy operation at compile time. - static bool is_one_plus_max_bits_inf() { - const U bits_max = float_to_uint((std::numeric_limits::max)()); - const U bits_inf = float_to_uint(std::numeric_limits::infinity()); - return bits_max + 1 == bits_inf; - } - - using type_float = T; // Used for unit tests - }; // class Midpoint754 - - - template - class MidpointNon754 { - private: - // NOTE: The Midpoint754 solver should be used when possible because it is faster - // than this solver. The Midpoint754 solver supports 32, 64, and 128 bit floats. - // The 80 bit `long double` type is not supported for the reasons described above. - static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is float"); - static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is double"); -#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) - static_assert(!std::is_same::value, "Need to use Midpoint754 solver when T is __float128"); -#endif - + class MidpointFallback { public: static T solve(T x, T y) { const T sign_x = sign(x); @@ -532,92 +383,113 @@ namespace Bisection { using std::sqrt; return sqrt(value_x) * sqrt(value_y); // NOTE: avoids overflow } - }; // class MidpointNon754 + }; // class MidpointFallback - // NOTE: `float` and `_Float32` are not type aliases - class IsFloat32 { + // Calculates the midpoint in bitspace for numbers `x` and `y`. + class CalcMidpoint { public: template - static constexpr bool value() { - return std::numeric_limits::is_iec559 && - std::numeric_limits::radix == 2 && - std::numeric_limits::digits == 24 && // Mantissa has 23 bits + 1 implicit bit - sizeof(T) == 4; + static T solve(T x, T y) { + return solve(x, y, boost::math::tools::detail::ieee754_linear::LayoutIdentifier::get_layout()); } - }; + + private: + // For types `T` associated with the layout type `Layout_IEEE754Linear`, increasing + // values in bit space result in increasing values in floating point space. The lowest + // representable value is -Inf and the highest is +Inf as shown below. + // + // |--------------------|---|---|--------------------| + // -Inf -min 0 min +Inf + // + // When denorm support is enabled, bisecting values in bitspace is straightforward + // because the bit values of the numbers can be averaged. When denorm support is + // disabled, bisecting in bitspace is more complicated. The locations of denormal + // numbers are shown below with ***. + // + // |--------------------|***|***|--------------------| + // -Inf -min 0 min +Inf + // + // The tools in the `ieee754_linear.hpp` file allow us to calculate the midpoint + // even when denorm support is disabled. To illustrate this algorithm, consider + // a cartoon floating point type with 17 possible discrete values from -Inf to +Inf + // and denorms marked by *** as shown below: + // + // -Inf | -8 + // -2.0 | -6 + // -1.0 | -4 + // -min | -2 + // *** | -1 + // 0.0 | 0 + // *** | +1 + // +min | +2 + // 1.0 | +4 + // 2.0 | +6 + // +Inf | +8 + // + // Goal: find the midpoint of `x=-min` and `y=+Inf` in bitspace. + // + // Step 1 -- Calculate inflated representations + // negative | positive + // -8-6-4-2 0 2 4 6 8 | float value | inflated bit value + // |-----|*|*|-----| x | -min | -2 + // x y y | +Inf | +8 + // + // Step 2 -- Calculate deflated representations. + // negative | positive + // -8-6-4-2 0 2 4 6 8 | deflated bit value + // |-----|||-----| x | -1 + // x y y | +7 + // + // Step 3 -- Calculate the midpoint in deflated representation + // negative | positive + // -8-6-4-2 0 2 4 6 8 | deflated bit value + // |-----|||-----| x | -1 + // x m y y | +7 + // m | +3 <-- (x + y) / 2 where x = -1 and y = +7 + // + // Step 4 -- Calculate the midpoint in inflated representation + // negative | positive + // -8-6-4-2 0 2 4 6 8 | inflated bit value + // |-----|*|*|-----| m | +4 + // m + // + // Step 5 -- Convert the midpoint from bits to float giving the floating point answer + // + template + static T solve(T x, T y, boost::math::tools::detail::ieee754_linear::Layout_IEEE754Linear) { + using boost::math::tools::detail::ieee754_linear::BitsInflated; + using boost::math::tools::detail::ieee754_linear::Denormflator; + const auto df = Denormflator(); // Calculates if `has_denorm` is true at this instant - // NOTE: `double` and `_Float64` are not type aliases - class IsFloat64 { - public: - template - static constexpr bool value() { - return std::numeric_limits::is_iec559 && - std::numeric_limits::radix == 2 && - std::numeric_limits::digits == 53 && // Mantissa has 52 bits + 1 implicit bit - sizeof(T) == 8; - } - }; + // Step 1 -- Calculate inflated representations + const BitsInflated y_inflated(y); + const BitsInflated x_inflated(x); - // NOTE: 128 bit `long double` and `_Float128` are not type aliases - class IsFloat128 { - public: - template - static constexpr bool value() { - return std::numeric_limits::is_iec559 && - std::numeric_limits::radix == 2 && - std::numeric_limits::digits == 113 && // Mantissa has 112 bits + 1 implicit bit - sizeof(T) == 16; - } - }; + // Step 2 -- Calculate deflated representations + const auto y_def = df.deflate(y_inflated); + const auto x_def = df.deflate(x_inflated); - // This class prevents compiler warnings from unused functions. - class CalcMidpoint { - public: - // IsFloat32 --> Midpoint754 - template - static typename std::enable_if(), Midpoint754>::type - get_solver() { return Midpoint754(); } + // Step 3 -- Calculate the midpoint in deflated representation + const auto xy_def_midpoint = (y_def + x_def) >> 1; // Add then divide by 2 - // IsFloat64 --> Midpoint754 - template - static typename std::enable_if(), Midpoint754>::type - get_solver() { return Midpoint754(); } - - // IsFloat128 --> Midpoint754 -#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) - // NOTE: returns Midpoint754<__float128, ...> instead of Midpoint754 because - // in order to work with boost::multiprecision::float128, which is a wrapper - // around __float128. - template - static typename std::enable_if(), Midpoint754<__float128, boost::uint128_type>>::type - get_solver() { return Midpoint754<__float128, boost::uint128_type>(); } + // Step 4 -- Calculate the midpoint in inflated representation + const auto xy_midpoint = df.inflate(xy_def_midpoint); - template - static constexpr bool use_default_solver() { - return !IsFloat32::value() && - !IsFloat64::value() && - !IsFloat128::value(); + // Step 5 -- Convert the midpoint from bits to float + return xy_midpoint.reinterpret_as_float(); } -#else - template - static constexpr bool use_default_solver() { - return !IsFloat32::value() && - !IsFloat64::value(); + + template + static T solve(T x, T y, boost::math::tools::detail::ieee754_linear::Layout_IEEE754Linear layout) { + using boost::math::tools::detail::ieee754_linear::StaticCast; + return solve(StaticCast::value(x), StaticCast::value(y), layout); } -#endif - - // Default --> *** MidpointNon754 *** - template - static typename std::enable_if(), MidpointNon754>::type - get_solver() { return MidpointNon754(); } - // NOTE: template - static T calc_midpoint(T x, T y) { - return get_solver().solve(x, y); + static T solve(T x, T y, boost::math::tools::detail::ieee754_linear::Layout_Unspecified) { + return MidpointFallback::solve(x, y); } }; - } // namespace Bisection } // namespace detail @@ -689,7 +561,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& { // Last two steps haven't converged. const T x_other = (delta > 0) ? min : max; - delta = result - detail::Bisection::CalcMidpoint::calc_midpoint(result, x_other); + delta = result - detail::Bisection::CalcMidpoint::solve(result, x_other); // reset delta1/2 so we don't take this branch next time round: delta1 = 3 * delta; delta2 = 3 * delta; diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 7885649f71..913a12d6ff 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -1048,6 +1048,7 @@ test-suite misc : [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ] [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ] + [ run ieee754_linear_test.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_root_iterations.cpp pch ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_tuple ] ] [ run test_root_finding_concepts.cpp ../../test/build//boost_unit_test_framework ] [ run test_toms748_solve.cpp pch ../../test/build//boost_unit_test_framework ] diff --git a/test/ieee754_linear_test.cpp b/test/ieee754_linear_test.cpp new file mode 100644 index 0000000000..28800616de --- /dev/null +++ b/test/ieee754_linear_test.cpp @@ -0,0 +1,81 @@ +// (C) Copyright Ryan Elandt 2023. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_TEST_MAIN +#include +#include +#include + +#include +#include +#include +#include + +#ifdef BOOST_HAS_FLOAT128 +#include +#endif + +#if __has_include() +# include +#endif + +using boost::math::tools::detail::ieee754_linear::Layout_IEEE754Linear; +using boost::math::tools::detail::ieee754_linear::Layout_Unspecified; + + +template +bool is_layout_equal(L, L) { return true; } + +template +bool is_layout_equal(L1, L2) { return false; } + +template +bool is_unspeci(Layout_Unspecified) { return true; } + +void test_layout() { + using boost::math::tools::detail::ieee754_linear::LayoutIdentifier; + + const auto layout_f32_u32 = Layout_IEEE754Linear(); + const auto layout_f64_u64 = Layout_IEEE754Linear(); +#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) + const auto layout_f128_u128 = Layout_IEEE754Linear<__float128, boost::uint128_type>(); +#endif + + // Check layout for float types + BOOST_CHECK(is_layout_equal(layout_f32_u32, LayoutIdentifier::get_layout())); + BOOST_CHECK(is_layout_equal(layout_f64_u64, LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + +// Float128 +#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) + BOOST_CHECK(is_layout_equal(layout_f128_u128, LayoutIdentifier::get_layout())); + BOOST_CHECK(is_layout_equal(layout_f128_u128, LayoutIdentifier::get_layout<__float128>())); +#endif + +// stdfloat +#if __has_include() +#ifdef __STDCPP_FLOAT32_T__ + BOOST_CHECK(is_layout_equal(layout_f32_u32, LayoutIdentifier::get_layout())); +#endif +#ifdef __STDCPP_FLOAT64_T__ + BOOST_CHECK(is_layout_equal(layout_f64_u64, LayoutIdentifier::get_layout())); +#endif +#endif + +// 80 bit long double +#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); +#endif +} + +BOOST_AUTO_TEST_CASE( test_main ) +{ + test_layout(); + +} diff --git a/test/test_roots.cpp b/test/test_roots.cpp index 2458bdd978..f6336a67ec 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -708,80 +708,64 @@ std::vector create_test_ladder() { return v; }; -template -void test_bisect(S solver) { - // Test for all combinations from the ladder - auto v = create_test_ladder(); - - for (const T& x_i: v) { - for (const T& x_j: v) { - const T x_mid_ij = solver.solve(x_i, x_j); +template +class TestBisect { +public: + static void run() { + auto v = create_test_ladder(); + + for (const W& x_i: v) { + for (const W& x_j: v) { + const W x_mid_ij = boost::math::tools::detail::Bisection::CalcMidpoint::solve(x_i, x_j); + test_order(static_cast(x_i), static_cast(x_mid_ij), static_cast(x_j)); + } + } + } - const T x_lo = (std::min)(x_i, x_j); - const T x_hi = (std::max)(x_i, x_j); + static void test_order(const T& x_i, const T& x_mid_ij, const T& x_j) { + const T x_lo = (std::min)(x_i, x_j); + const T x_hi = (std::max)(x_i, x_j); - BOOST_CHECK(x_lo <= x_mid_ij); // NOTE: BOOST_CHECK_LE(x_lo, x_mid_ij) fails to link - BOOST_CHECK(x_mid_ij <= x_hi); // NOTE: BOOST_CHECK_LE(x_mid_ij, x_hi) fails to link - } + BOOST_CHECK(x_lo <= x_mid_ij); // NOTE: BOOST_CHECK_LE(x_lo, x_mid_ij) fails to link + BOOST_CHECK(x_mid_ij <= x_hi); // NOTE: BOOST_CHECK_LE(x_mid_ij, x_hi) fails to link } -} +}; -template -void test_bisect_non() { - const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(); - test_bisect(solver); -} +template +void test_bisect() { + // Get layout + const auto layout = boost::math::tools::detail::ieee754_linear::LayoutIdentifier::get_layout(); -template -void test_bisect_754() { - const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(); - BOOST_MATH_ASSERT(solver.is_one_plus_max_bits_inf()); // Catch infinity misbehavior for 80 bit `long double` - // before it causes downstream failures - // We need to use the float type associated with the solver to avoid needing to link with libquadmath - using S = decltype(solver); - test_bisect(solver); + // `T` and `W` are usually the same. In the case where `W` is + // `boost::multiprecision::float128`, then `W` is a wrapper and around + // the datatype `T = __float128`. + using T = typename decltype(layout)::type_float; // Get layout float type + TestBisect::run(); } void test_bisect_all_cases() { - test_bisect_754(); - test_bisect_754(); - test_bisect_non(); - test_bisect_non(); - test_bisect_non(); - test_bisect_non(); - test_bisect_non(); - test_bisect_non(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); #if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) - test_bisect_754(); - test_bisect_754<__float128>(); + test_bisect(); + test_bisect<__float128>(); #endif #if __has_include() #ifdef __STDCPP_FLOAT32_T__ - test_bisect_754(); + test_bisect(); #endif #ifdef __STDCPP_FLOAT64_T__ - test_bisect_754(); + test_bisect(); #endif #endif - -#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && defined(BOOST_HAS_INT128) - // CI says this fails to compile sometimes because the static_assert - // in `Midpoint754` that `std::is_unsigned::value is - // true is not met. This occurs despite the preprocessor assertion that - // `BOOST_HAS_INT128` is defined. I'm not sure why this occurs. - // - // That being said, this test is mostly pedogogical. It shows that - // `long double` does not possess the property that increasing values in - // bitspace result in increasing float values. It is not required for - // correctness, only for completeness. If you can compile this locally - // this test should pass. -#if 0 - const auto solver = boost::math::tools::detail::Bisection::Midpoint754(); - BOOST_CHECK(!solver.is_one_plus_max_bits_inf()); -#endif -#endif } void test_count() { From 8c46b6da0ac5724319ac01685ad38a06df5ffa72 Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 22 Aug 2023 18:35:16 -0400 Subject: [PATCH 15/22] add_newline to end of file --- include/boost/math/tools/ieee754_linear.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/math/tools/ieee754_linear.hpp b/include/boost/math/tools/ieee754_linear.hpp index 5e1a8efab2..61d1624cfb 100644 --- a/include/boost/math/tools/ieee754_linear.hpp +++ b/include/boost/math/tools/ieee754_linear.hpp @@ -363,4 +363,4 @@ namespace ieee754_linear { } // namespace math } // namespace boost -#endif // BOOST_MATH_TOOLS_IEEE754_BITSPACE_HPP \ No newline at end of file +#endif // BOOST_MATH_TOOLS_IEEE754_BITSPACE_HPP From f28048d216caf2629fbfd49e9562eaad4a1e4249 Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 22 Aug 2023 18:56:22 -0400 Subject: [PATCH 16/22] non-ascii character fix --- include/boost/math/tools/ieee754_linear.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/math/tools/ieee754_linear.hpp b/include/boost/math/tools/ieee754_linear.hpp index 61d1624cfb..8abe3ccb85 100644 --- a/include/boost/math/tools/ieee754_linear.hpp +++ b/include/boost/math/tools/ieee754_linear.hpp @@ -342,8 +342,8 @@ namespace ieee754_linear { static constexpr bool value = decltype(test(0))::value; }; - // Allows one to static_cast from ‘boost::math::concepts::std_real_concept’ to - // type ‘__float128’ + // Allows one to static_cast from `boost::math::concepts::std_real_concept` to + // type `__float` class StaticCast { public: template From a14567f52ca6d7a350dbd4026d00f3e157dd4d56 Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 22 Aug 2023 19:37:04 -0400 Subject: [PATCH 17/22] fix lint fails: C and LIC --- include/boost/math/tools/ieee754_linear.hpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/boost/math/tools/ieee754_linear.hpp b/include/boost/math/tools/ieee754_linear.hpp index 8abe3ccb85..797ff99c89 100644 --- a/include/boost/math/tools/ieee754_linear.hpp +++ b/include/boost/math/tools/ieee754_linear.hpp @@ -1,5 +1,10 @@ -#ifndef BOOST_MATH_TOOLS_IEEE754_BITSPACE_HPP -#define BOOST_MATH_TOOLS_IEEE754_BITSPACE_HPP +// (C) Copyright Ryan Elandt 2023. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_IEEE754_LINEAR_HPP +#define BOOST_MATH_TOOLS_IEEE754_LINEAR_HPP #ifdef _MSC_VER #pragma once @@ -363,4 +368,4 @@ namespace ieee754_linear { } // namespace math } // namespace boost -#endif // BOOST_MATH_TOOLS_IEEE754_BITSPACE_HPP +#endif // BOOST_MATH_TOOLS_IEEE754_LINEAR_HPP From 0a64a5fece5dc8c570dec1f3455631c13cec9ecc Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 22 Aug 2023 21:43:19 -0400 Subject: [PATCH 18/22] fix issue type issue for std::float32_t + 64_t --- test/ieee754_linear_test.cpp | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/test/ieee754_linear_test.cpp b/test/ieee754_linear_test.cpp index 28800616de..cd3ec8e99d 100644 --- a/test/ieee754_linear_test.cpp +++ b/test/ieee754_linear_test.cpp @@ -25,27 +25,24 @@ using boost::math::tools::detail::ieee754_linear::Layout_IEEE754Linear; using boost::math::tools::detail::ieee754_linear::Layout_Unspecified; -template -bool is_layout_equal(L, L) { return true; } +template +bool is_unspeci(Layout_Unspecified) { return true; } -template -bool is_layout_equal(L1, L2) { return false; } +template +bool is_paired_with_uint32_t(Layout_IEEE754Linear, T) { return true; } template -bool is_unspeci(Layout_Unspecified) { return true; } +bool is_paired_with_uint64_t(Layout_IEEE754Linear, T) { return true; } + +template +bool is_paired_with_uint128_t(Layout_IEEE754Linear, T) { return true; } void test_layout() { using boost::math::tools::detail::ieee754_linear::LayoutIdentifier; - const auto layout_f32_u32 = Layout_IEEE754Linear(); - const auto layout_f64_u64 = Layout_IEEE754Linear(); -#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) - const auto layout_f128_u128 = Layout_IEEE754Linear<__float128, boost::uint128_type>(); -#endif - // Check layout for float types - BOOST_CHECK(is_layout_equal(layout_f32_u32, LayoutIdentifier::get_layout())); - BOOST_CHECK(is_layout_equal(layout_f64_u64, LayoutIdentifier::get_layout())); + BOOST_CHECK(is_paired_with_uint32_t(LayoutIdentifier::get_layout(), float())); + BOOST_CHECK(is_paired_with_uint64_t(LayoutIdentifier::get_layout(), double())); BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); @@ -54,17 +51,17 @@ void test_layout() { // Float128 #if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) - BOOST_CHECK(is_layout_equal(layout_f128_u128, LayoutIdentifier::get_layout())); - BOOST_CHECK(is_layout_equal(layout_f128_u128, LayoutIdentifier::get_layout<__float128>())); + BOOST_CHECK(is_paired_with_uint128_t(LayoutIdentifier::get_layout<__float128>(), __float128())); + BOOST_CHECK(is_paired_with_uint128_t(LayoutIdentifier::get_layout(), __float128())); #endif // stdfloat #if __has_include() #ifdef __STDCPP_FLOAT32_T__ - BOOST_CHECK(is_layout_equal(layout_f32_u32, LayoutIdentifier::get_layout())); + BOOST_CHECK(is_paired_with_uint32_t(LayoutIdentifier::get_layout(), std::float32_t())); #endif #ifdef __STDCPP_FLOAT64_T__ - BOOST_CHECK(is_layout_equal(layout_f64_u64, LayoutIdentifier::get_layout())); + BOOST_CHECK(is_paired_with_uint64_t(LayoutIdentifier::get_layout(), std::float64_t())); #endif #endif From 57c55127c9c03ffadbd1527f7ba5a7310587c49d Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 22 Aug 2023 22:45:28 -0400 Subject: [PATCH 19/22] added missing if defined guards ofr uint128 --- test/ieee754_linear_test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/ieee754_linear_test.cpp b/test/ieee754_linear_test.cpp index cd3ec8e99d..29dd79a898 100644 --- a/test/ieee754_linear_test.cpp +++ b/test/ieee754_linear_test.cpp @@ -34,8 +34,10 @@ bool is_paired_with_uint32_t(Layout_IEEE754Linear, T) { return true template bool is_paired_with_uint64_t(Layout_IEEE754Linear, T) { return true; } +#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) template bool is_paired_with_uint128_t(Layout_IEEE754Linear, T) { return true; } +#endif void test_layout() { using boost::math::tools::detail::ieee754_linear::LayoutIdentifier; From 395709371b5279c96730f9c673c36df057402448 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 31 Aug 2023 13:19:19 -0400 Subject: [PATCH 20/22] remove multiprecision header from math files --- include/boost/math/tools/ieee754_linear.hpp | 20 +++++++++++--------- include/boost/math/tools/roots.hpp | 4 ---- test/compile_test/std_real_concept_check.cpp | 4 ++++ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/boost/math/tools/ieee754_linear.hpp b/include/boost/math/tools/ieee754_linear.hpp index 797ff99c89..0d0ceb9744 100644 --- a/include/boost/math/tools/ieee754_linear.hpp +++ b/include/boost/math/tools/ieee754_linear.hpp @@ -12,10 +12,7 @@ #include - -#ifdef BOOST_HAS_FLOAT128 -#include -#endif +#include // For BOOST_MATH_FLOAT128_TYPE namespace boost { @@ -99,6 +96,12 @@ namespace ieee754_linear { // template class Layout_IEEE754Linear { +#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) + static_assert(std::numeric_limits::is_iec559 || std::is_same::value, + "Attempting to compile Layout_IEEE754Linear with 128 bit floating point type without IEEE 754 " + "support for this type. Consider including a header file containing the required " + "`std::numeric_limits` specializations such as: #include "); +#endif static_assert(std::numeric_limits::is_iec559, "Type must be IEEE 754 floating point."); static_assert(std::is_unsigned::value, "U must be an unsigned integer type."); static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same."); @@ -133,12 +136,11 @@ namespace ieee754_linear { // Layout: 128 bit linear #if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) - // NOTE: returns Layout_IEEE754Linear<__float128, ...> instead of Layout_IEEE754Linear - // in order to work with boost::multiprecision::float128, which is a wrapper - // around __float128. + // NOTE: returns Layout_IEEE754Linear instead of Layout_IEEE754Linear + // in order to work with non-trivial types that wrap trivial 128 bit floating point types. template - static typename std::enable_if(), Layout_IEEE754Linear<__float128, boost::uint128_type>>::type - get_layout() { return Layout_IEEE754Linear<__float128, boost::uint128_type>(); } + static typename std::enable_if(), Layout_IEEE754Linear>::type + get_layout() { return Layout_IEEE754Linear(); } template static constexpr bool is_layout_nonlinear() { diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 5e77306b32..f496a3979d 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -26,10 +26,6 @@ #include #include -#ifdef BOOST_HAS_FLOAT128 -#include -using boost::multiprecision::float128; -#endif namespace boost { namespace math { diff --git a/test/compile_test/std_real_concept_check.cpp b/test/compile_test/std_real_concept_check.cpp index ee4362374b..fe71493a53 100644 --- a/test/compile_test/std_real_concept_check.cpp +++ b/test/compile_test/std_real_concept_check.cpp @@ -16,6 +16,10 @@ #include "instantiate.hpp" +#ifdef BOOST_HAS_FLOAT128 +#include +#endif + // // The purpose of this test is to verify that our code compiles // cleanly with a type whose std lib functions are in namespace From 6d27f3ae5f7e6f2f145b7d7f12075aede2b4dc0f Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 31 Aug 2023 13:52:30 -0400 Subject: [PATCH 21/22] include boost/cstdfloat.hpp --- include/boost/math/tools/ieee754_linear.hpp | 8 +------- test/compile_test/std_real_concept_check.cpp | 4 ---- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/include/boost/math/tools/ieee754_linear.hpp b/include/boost/math/tools/ieee754_linear.hpp index 0d0ceb9744..7ef03d21c2 100644 --- a/include/boost/math/tools/ieee754_linear.hpp +++ b/include/boost/math/tools/ieee754_linear.hpp @@ -13,7 +13,7 @@ #include #include // For BOOST_MATH_FLOAT128_TYPE - +#include // For numeric_limits support for 128 bit floats namespace boost { namespace math { @@ -96,12 +96,6 @@ namespace ieee754_linear { // template class Layout_IEEE754Linear { -#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) - static_assert(std::numeric_limits::is_iec559 || std::is_same::value, - "Attempting to compile Layout_IEEE754Linear with 128 bit floating point type without IEEE 754 " - "support for this type. Consider including a header file containing the required " - "`std::numeric_limits` specializations such as: #include "); -#endif static_assert(std::numeric_limits::is_iec559, "Type must be IEEE 754 floating point."); static_assert(std::is_unsigned::value, "U must be an unsigned integer type."); static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same."); diff --git a/test/compile_test/std_real_concept_check.cpp b/test/compile_test/std_real_concept_check.cpp index fe71493a53..ee4362374b 100644 --- a/test/compile_test/std_real_concept_check.cpp +++ b/test/compile_test/std_real_concept_check.cpp @@ -16,10 +16,6 @@ #include "instantiate.hpp" -#ifdef BOOST_HAS_FLOAT128 -#include -#endif - // // The purpose of this test is to verify that our code compiles // cleanly with a type whose std lib functions are in namespace From 66a61a8eeaec5dd9d80a2e186d2a776717333616 Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 1 Sep 2023 12:35:49 -0400 Subject: [PATCH 22/22] new include --- include/boost/math/tools/ieee754_linear.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/boost/math/tools/ieee754_linear.hpp b/include/boost/math/tools/ieee754_linear.hpp index 7ef03d21c2..f1f1317dfc 100644 --- a/include/boost/math/tools/ieee754_linear.hpp +++ b/include/boost/math/tools/ieee754_linear.hpp @@ -13,7 +13,14 @@ #include #include // For BOOST_MATH_FLOAT128_TYPE -#include // For numeric_limits support for 128 bit floats + +// Support a specialization of std::numeric_limits<> for the wrapped quadmath library +// (if available) and required for the 128 bit version to function. +#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) +#if !defined(BOOST_CSTDFLOAT_NO_LIBQUADMATH_LIMITS) + #include +#endif +#endif namespace boost { namespace math {