diff --git a/include/boost/math/tools/ieee754_linear.hpp b/include/boost/math/tools/ieee754_linear.hpp new file mode 100644 index 0000000000..f1f1317dfc --- /dev/null +++ b/include/boost/math/tools/ieee754_linear.hpp @@ -0,0 +1,374 @@ +// (C) Copyright Ryan Elandt 2023. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_IEEE754_LINEAR_HPP +#define BOOST_MATH_TOOLS_IEEE754_LINEAR_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + +#include // For BOOST_MATH_FLOAT128_TYPE + +// Support a specialization of std::numeric_limits<> for the wrapped quadmath library +// (if available) and required for the 128 bit version to function. +#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) +#if !defined(BOOST_CSTDFLOAT_NO_LIBQUADMATH_LIMITS) + #include +#endif +#endif + +namespace boost { +namespace math { +namespace tools { +namespace detail { +namespace ieee754_linear { + + // The `IsFloat32` class contains a static constexpr method `value()` that + // returns true if the type T is a 32 bit floating point type. This duck type + // test for 32 bit float types returns true for the following types: + // - `float` + // - `_Float32` (NOTE: not a type alias for `float`) + // - `std_real_concept` when emulating a 32 bit float with EMULATE32 + // - other types that seem to be 32 bit floats + class IsFloat32 { + public: + template + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 24 && // Mantissa has 23 bits + 1 implicit bit + sizeof(T) == 4; + } + }; + + // The `IsFloat64` class contains a static constexpr method `value()` that + // returns true if the type T is a 64 bit floating point type. This duck type + // test for 64 bit float types returns true for the following types: + // - `double` + // - `_Float64` (NOTE: not a type alias for `double`) + // - `std_real_concept` when emulating a 64 bit float with EMULATE64 + // - other types that seem to be 64 bit floats + class IsFloat64 { + public: + template + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 53 && // Mantissa has 52 bits + 1 implicit bit + sizeof(T) == 8; + } + }; + + // The `IsFloat128` class contains a static constexpr method `value()` that + // returns true if the type T is a 128 bit floating point type. This duck type + // test for 128 bit float types returns true for the following types: + // - `__float128` + // - `_Float128` (NOTE: not a type alias for `__float128`) + // - `std_real_concept` when emulating a 128 bit float with EMULATE128 + // - other types that seem to be 128 bit floats + class IsFloat128 { + public: + template + static constexpr bool value() { + return std::numeric_limits::is_iec559 && + std::numeric_limits::radix == 2 && + std::numeric_limits::digits == 113 && // Mantissa has 112 bits + 1 implicit bit + sizeof(T) == 16; + } + }; + + // The `Layout_IEEE754Linear` class represents IEEE 754 floating point types + // for which increasing float values (with the same sign) have increasing + // values in bit space. And for which there is a corresponding unsigned integer + // type U that has the same number of bits as T. Types that satisfy these + // conditions include 32, 64, and 128 bit floats. The table below shows select + // numbers for `float` (single precision). + // + // 0 | 0 00000000 00000000000000000000000 | positive zero + // 1.4013e-45 | 0 00000000 00000000000000000000001 | std::numeric_limits::denorm_min() + // 1.17549e-38 | 0 00000001 00000000000000000000000 | std::numeric_limits::min() + // 1.19209e-07 | 0 01101000 00000000000000000000000 | std::numeric_limits::epsilon() + // 1 | 0 01111111 00000000000000000000000 | positive one + // 3.40282e+38 | 0 11111110 11111111111111111111111 | std::numeric_limits::max() + // inf | 0 11111111 00000000000000000000000 | std::numeric_limits::infinity() + // nan | 0 11111111 10000000000000000000000 | std::numeric_limits::quiet_NaN() + // + // NOTE: the 80 bit `long double` is not "linear" due to the "integer part". See: + // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format + // + template + class Layout_IEEE754Linear { + static_assert(std::numeric_limits::is_iec559, "Type must be IEEE 754 floating point."); + static_assert(std::is_unsigned::value, "U must be an unsigned integer type."); + static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same."); + + public: + using type_float = T; // The linear floating point type + }; + + // The `Layout_Unspecified` class represents floating point types for which + // are not supported by the `Layout_IEEE754Linear` class. + template + class Layout_Unspecified { + public: + using type_float = T; // The linear floating point type + }; + + // The `LayoutIdentifier` class identifies the layout type for a floating + // point type T. The layout type is one of the following: + // - `Layout_IEEE754Linear` for 32, 64, and 128 bit floats + // - `Layout_Unspecified` for other types + class LayoutIdentifier { + public: + // Layout: 32 bit linear + template + static typename std::enable_if(), Layout_IEEE754Linear>::type + get_layout() { return Layout_IEEE754Linear(); } + + // Layout: 64 bit linear + template + static typename std::enable_if(), Layout_IEEE754Linear>::type + get_layout() { return Layout_IEEE754Linear(); } + + // Layout: 128 bit linear +#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128) + // NOTE: returns Layout_IEEE754Linear instead of Layout_IEEE754Linear + // in order to work with non-trivial types that wrap trivial 128 bit floating point types. + template + static typename std::enable_if(), Layout_IEEE754Linear>::type + get_layout() { return Layout_IEEE754Linear(); } + + template + static constexpr bool is_layout_nonlinear() { + return !IsFloat32::value() && + !IsFloat64::value() && + !IsFloat128::value(); + } +#else + template + static constexpr bool is_layout_nonlinear() { + return !IsFloat32::value() && + !IsFloat64::value(); + } +#endif + + // Layout: unspecified + template + static typename std::enable_if(), Layout_Unspecified>::type + get_layout() { return Layout_Unspecified(); } + }; + + // Base class for the `BitsInflated` and `BitsDeflated` classes. + // + // This class stores the bit values of a floating point type `T` as a + // sign bit as a unsigned integer magnitude. For example, a floating + // point type with 50 discrete values (zero is counted twice) between + // -Inf and +Inf is shown below. + // + // -24 -20 -16 -12 -8 -4 0 4 8 12 16 20 24 + // . . . . . . . . . . . . . + // |-----------------------|-----------------------| + // -Inf 0 +Inf + // + template + class BitsFromZero: public Layout_IEEE754Linear { + public: + bool sign_bit() const { return sign_bit_; } + const U& mag() const { return mag_; } + U& mag() { return mag_; } + + protected: + BitsFromZero(const bool sign, const U mag) : sign_bit_(sign), mag_(mag) {} + + BitsFromZero(const T x) { + // The sign bit is 1, all other bits are 0 + constexpr U bits_sign_mask = U(1) << (sizeof(U) * 8 - 1); + + U bits_; + std::memcpy(&bits_, &x, sizeof(U)); + sign_bit_ = bits_ & bits_sign_mask; + mag_ = bits_ & ~bits_sign_mask; + } + + void flip_sign_bit() { sign_bit_ = !sign_bit_; } + + private: + bool sign_bit_; + U mag_; + }; + + // The inflated bitspace representation of a floating point type `T`. + // + // This representation always includes denormal numbers, even if denorm + // support is turned off. For example, a floating point type with 50 + // discrete values (zero is counted twice) between -Inf and +Inf is shown + // below with *** marking denormal numbers. + // + // -24 -20 -16 -12 -8 -4 0 4 8 12 16 20 24 + // . . . . . . . . . . . . . + // |-------------------|***|***|-------------------| + // -Inf 0 +Inf + // + template + class BitsInflated : public BitsFromZero { + public: + BitsInflated(const T x) : BitsFromZero(x) {} + BitsInflated(const bool sign, const U mag) : BitsFromZero(sign, mag) {} + + T reinterpret_as_float() const { + T f_out; + std::memcpy(&f_out, &this->mag(), sizeof(T)); + return this->sign_bit() ? -f_out : f_out; + } + + void divide_by_2() { this->mag() >>= 1; } + }; + + // The deflated bitspace representation of a floating point type `T`. + // + // Denorm Support ON: + // This representation is identical to the inflated representation. + // + // Denorm Support OFF: + // This representation shifts the bitspace representation toward `0` + // to remove gaps in the bitspace caused by denormal numbers. For + // example, consider a floating point type with 50 discrete values + // (zero is counted twice) between -Inf and +Inf is shown below with + // *** marking denormal numbers shown below. + // + // Inflated representation: + // -24 -20 -16 -12 -8 -4 0 4 8 12 16 20 24 + // . . . . . . . . . . . . . + // |-------------------|***|***|-------------------| + // -Inf 0 +Inf + // _________________________________________________________________ + // + // Deflated representation: + // -24 -20 -16 -12 -8 -4 0 4 8 12 16 20 24 + // . . . . . . . . . . . . . + // |-------------------|||-------------------| + // -Inf 0 +Inf + // + template + class BitsDeflated : public BitsFromZero { + public: + BitsDeflated(const bool sign, const U mag) : BitsFromZero(sign, mag) {} + + T static_cast_int_value_to_float() const { + T mag_float = static_cast(this->mag()); + return this->sign_bit() ? -mag_float : mag_float; + } + + // Move over `n` in bitspace + BitsDeflated operator+(int n) const { + return BitsDeflated(this->sign_bit(), this->mag() + n); + } + + BitsDeflated operator-(const BitsDeflated& y) const { + auto y_copy = y; + y_copy.flip_sign_bit(); + return *this + y_copy; + } + + BitsDeflated operator+(const BitsDeflated& y) const { + // Gaurantee that y has the larger magnitude + if (y.mag() < this->mag()) { return y + *this; } + + // Call *this x + const BitsDeflated& x = *this; + + const U mag_x = x.mag(); + const U mag_y = y.mag(); + + // Calculate the deflated sum in bits (always positive) + U bits_sum = (x.sign_bit() == y.sign_bit()) ? (mag_y + mag_x) + : (mag_y - mag_x); + + // Sum always has the sign of the bigger magnitude (y) + return BitsDeflated(y.sign_bit(), bits_sum); + } + + BitsDeflated& operator>>(const int n) { + this->mag() >>= n; + return *this; + } + }; + + // The `Denormflator` converts between inflated and deflated bitspace representations + // to compensate for gaps in the bitspace caused by denormal numbers. The `deflate` + // operation shifts the bitspace representation toward `0`. The `inflate` operation + // shifts the bitspace representation away from `0`. These operations only have + // an effect if denorm support is turned off. If denorm support is turned on, then + // the shift is zero. The effect of these operations is illustrated in various + // cartoons below. + // + template + class Denormflator : public Layout_IEEE754Linear { + public: + Denormflator() : has_denorm_(calc_has_denorm()) {} + + BitsDeflated deflate(const BitsInflated& bit_view) const { + const U penalty = bits_denorm_shift(); + const U mag_un = bit_view.mag(); + const U mag_sh = (has_denorm_ | (mag_un < penalty)) ? mag_un : mag_un - penalty; + return BitsDeflated(bit_view.sign_bit(), mag_sh); + } + + BitsInflated inflate(const BitsDeflated& b) const { + const U mag = b.mag(); + const U mag_inflated = (has_denorm_ | (mag == 0)) ? mag : mag + bits_denorm_shift(); + return BitsInflated(b.sign_bit(), mag_inflated); + } + + private: + // Denorm penalty + static constexpr U bits_denorm_shift() { return (U(1) << (std::numeric_limits::digits - 1)) - 1; } + + static bool calc_has_denorm() { + return boost::math::detail::get_smallest_value() != (std::numeric_limits::min)(); + } + + bool has_denorm_; + }; + + // Check if T has a member function named "value". + template + class has_value_member + { + template + static auto test(int) -> decltype(std::declval().value(), std::true_type()); + + template + static std::false_type test(...); + + public: + static constexpr bool value = decltype(test(0))::value; + }; + + // Allows one to static_cast from `boost::math::concepts::std_real_concept` to + // type `__float` + class StaticCast { + public: + template + static typename std::enable_if::value, T>::type value(V x) { + return static_cast(x.value()); + } + + template + static typename std::enable_if::value, T>::type value(V x) { + return static_cast(x); + } + }; + +} // namespace ieee754_linear +} // namespace detail +} // namespace tools +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_TOOLS_IEEE754_LINEAR_HPP diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 791ec1a7e5..f496a3979d 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -18,12 +18,15 @@ #include #include +#include #include #include +#include #include #include + namespace boost { namespace math { namespace tools { @@ -213,6 +216,279 @@ inline std::pair bisect(F f, T min, T max, Tol tol) noexcept(policies::is_ } +////// Motivation for the Bisection namespace. ////// +// +// What's the best way to bisect between a lower bound (lb) and an upper +// bound (ub) during root finding? Let's consider options... +// +// Arithmetic bisection: +// - The natural choice, but it doesn't always work well. For example, if +// lb = 1.0 and ub = std::numeric_limits::max(), many bisections +// may be needed to converge if the root is near 1. +// +// Geometric bisection: +// - This approach performs much better for the example above, but it +// too has issues. For example, if lb = 0.0, it gets stuck at 0.0. +// It also fails if lb and ub have different signs. +// +// In addition to the limitations outlined above, neither of these approaches +// works if ub is infinity. We want a more robust way to handle bisection +// for general root finding problems. That's what this namespace is for. +// +namespace detail { +namespace Bisection { + // Midpoint calculation for floating point types that are not `Layout_IEEE754Linear` + template + class MidpointFallback { + public: + static T solve(T x, T y) { + const T sign_x = sign(x); + const T sign_y = sign(y); + + // Sign flip return zero + if (sign_x * sign_y == -1) { return T(0.0); } + + // At least one is positive + if (0 < sign_x + sign_y) { return do_solve(x, y); } + + // At least one is negative + return -do_solve(-x, -y); + } + + private: + struct EqZero { + EqZero(T x) { BOOST_MATH_ASSERT_MSG(x == 0, "x must be zero.");} + }; + + struct EqInf { + // NOTE: (x == infinity) is checked this way to support types (e.g., boost::math::concepts::real_concept) + // that have infinity, but for which this the query std::numeric_limits::infinity() returns + // 0 due to the std::numeric_limits interface not being implemented. + EqInf(T x) { BOOST_MATH_ASSERT_MSG(0 < x && !(boost::math::isfinite)(x), "x must be positive infinity."); } + }; + + class PosFinite { + public: + PosFinite(T x) : x_(x) { + BOOST_MATH_ASSERT_MSG(0 < x, "x must be positive."); + BOOST_MATH_ASSERT_MSG((boost::math::isfinite)(x), "x must be finite."); + } + + T value() const { return x_; } + + private: + T x_; + }; + + // Two unknowns + static T do_solve(T x, T y) { + if (y < x) { + return do_solve(y, x); + } + + if (x == 0) { + return do_solve(EqZero(x), y); // Zero and ??? + } else if ((boost::math::isfinite)(x)) { + return do_solve(PosFinite(x), y); // Finite and ??? + } else { + return x; // Infinity and infinity + } + } + + // One unknowns + static T do_solve(EqZero x, T y) { + if (y == 0) { + return T(0.0); // Zero and zero + } else if ((boost::math::isfinite)(y)) { + return do_solve(x, PosFinite(y)); // Zero and finite + } else { + return T(1.5); // Zero and infinity + } + } + static T do_solve(PosFinite x, T y) { + if ((boost::math::isfinite)(y)) { + return do_solve(x, PosFinite(y)); // Finite and finite + } else { + return do_solve(x, EqInf(y)); // Finite and infinity + } + } + + // Zero unknowns + template + static typename std::enable_if::is_specialized, T>::type + do_solve(PosFinite x, EqInf y) { + return do_solve(x, PosFinite((std::numeric_limits::max)())); + } + template + static typename std::enable_if::is_specialized, T>::type + do_solve(PosFinite x, EqInf y) { + BOOST_MATH_THROW_EXCEPTION(std::runtime_error("infinite bounds support requires specialization")); + return T(0); // Unreachable, but suppresses warnings + } + + template + static typename std::enable_if::is_specialized, U>::type + do_solve(EqZero x, PosFinite y) { + const auto get_smallest_value = []() { + const U denorm_min = std::numeric_limits::denorm_min(); + if (denorm_min != 0) { return denorm_min; } + + // NOTE: the two lines below can be removed when + // https://github.com/boostorg/multiprecision/pull/562 + // gets merged. + const U min = (std::numeric_limits::min)(); + if (min != 0) { return min; } + + BOOST_MATH_THROW_EXCEPTION(std::runtime_error("This type probably isn't actually specialized.")); + return T(0); // Unreachable, but suppresses warnings + }; + + return do_solve(PosFinite(get_smallest_value()), y); + } + template + static typename std::enable_if::is_specialized, U>::type + do_solve(EqZero x, PosFinite y) { + // This function quickly gets a value that is small relative to y. + const auto fn_appx_denorm_min = [](U y){ + T accumulator = T(0.5); + for (int i = 1; i < 100; ++i) { + if (y * accumulator == 0) { + return y; + } else { + y = y * accumulator; + accumulator = accumulator * T(0.5); + } + } + return y; + }; + + const U denorm_min = fn_appx_denorm_min(y.value()); + return do_solve(PosFinite(denorm_min), y); + } + + static T do_solve(PosFinite x, PosFinite y) { + BOOST_MATH_ASSERT_MSG(x.value() <= y.value(), "x must be less than or equal to y."); + + const T value_x = x.value(); + const T value_y = y.value(); + + // Take arithmetic mean if they are close enough + if (value_y < value_x * 8) { return (value_y - value_x) / 2 + value_x; } // NOTE: avoids overflow + + // Take geometric mean if they are far apart + using std::sqrt; + return sqrt(value_x) * sqrt(value_y); // NOTE: avoids overflow + } + }; // class MidpointFallback + + // Calculates the midpoint in bitspace for numbers `x` and `y`. + class CalcMidpoint { + public: + template + static T solve(T x, T y) { + return solve(x, y, boost::math::tools::detail::ieee754_linear::LayoutIdentifier::get_layout()); + } + + private: + // For types `T` associated with the layout type `Layout_IEEE754Linear`, increasing + // values in bit space result in increasing values in floating point space. The lowest + // representable value is -Inf and the highest is +Inf as shown below. + // + // |--------------------|---|---|--------------------| + // -Inf -min 0 min +Inf + // + // When denorm support is enabled, bisecting values in bitspace is straightforward + // because the bit values of the numbers can be averaged. When denorm support is + // disabled, bisecting in bitspace is more complicated. The locations of denormal + // numbers are shown below with ***. + // + // |--------------------|***|***|--------------------| + // -Inf -min 0 min +Inf + // + // The tools in the `ieee754_linear.hpp` file allow us to calculate the midpoint + // even when denorm support is disabled. To illustrate this algorithm, consider + // a cartoon floating point type with 17 possible discrete values from -Inf to +Inf + // and denorms marked by *** as shown below: + // + // -Inf | -8 + // -2.0 | -6 + // -1.0 | -4 + // -min | -2 + // *** | -1 + // 0.0 | 0 + // *** | +1 + // +min | +2 + // 1.0 | +4 + // 2.0 | +6 + // +Inf | +8 + // + // Goal: find the midpoint of `x=-min` and `y=+Inf` in bitspace. + // + // Step 1 -- Calculate inflated representations + // negative | positive + // -8-6-4-2 0 2 4 6 8 | float value | inflated bit value + // |-----|*|*|-----| x | -min | -2 + // x y y | +Inf | +8 + // + // Step 2 -- Calculate deflated representations. + // negative | positive + // -8-6-4-2 0 2 4 6 8 | deflated bit value + // |-----|||-----| x | -1 + // x y y | +7 + // + // Step 3 -- Calculate the midpoint in deflated representation + // negative | positive + // -8-6-4-2 0 2 4 6 8 | deflated bit value + // |-----|||-----| x | -1 + // x m y y | +7 + // m | +3 <-- (x + y) / 2 where x = -1 and y = +7 + // + // Step 4 -- Calculate the midpoint in inflated representation + // negative | positive + // -8-6-4-2 0 2 4 6 8 | inflated bit value + // |-----|*|*|-----| m | +4 + // m + // + // Step 5 -- Convert the midpoint from bits to float giving the floating point answer + // + template + static T solve(T x, T y, boost::math::tools::detail::ieee754_linear::Layout_IEEE754Linear) { + using boost::math::tools::detail::ieee754_linear::BitsInflated; + using boost::math::tools::detail::ieee754_linear::Denormflator; + const auto df = Denormflator(); // Calculates if `has_denorm` is true at this instant + + // Step 1 -- Calculate inflated representations + const BitsInflated y_inflated(y); + const BitsInflated x_inflated(x); + + // Step 2 -- Calculate deflated representations + const auto y_def = df.deflate(y_inflated); + const auto x_def = df.deflate(x_inflated); + + // Step 3 -- Calculate the midpoint in deflated representation + const auto xy_def_midpoint = (y_def + x_def) >> 1; // Add then divide by 2 + + // Step 4 -- Calculate the midpoint in inflated representation + const auto xy_midpoint = df.inflate(xy_def_midpoint); + + // Step 5 -- Convert the midpoint from bits to float + return xy_midpoint.reinterpret_as_float(); + } + + template + static T solve(T x, T y, boost::math::tools::detail::ieee754_linear::Layout_IEEE754Linear layout) { + using boost::math::tools::detail::ieee754_linear::StaticCast; + return solve(StaticCast::value(x), StaticCast::value(y), layout); + } + + template + static T solve(T x, T y, boost::math::tools::detail::ieee754_linear::Layout_Unspecified) { + return MidpointFallback::solve(x, y); + } + }; +} // namespace Bisection +} // namespace detail + template T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter) noexcept(policies::is_noexcept_error_policy >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval()(std::declval()))) { @@ -256,8 +532,13 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& last_f0 = f0; delta2 = delta1; delta1 = delta; + if (count == 0) { + return policies::raise_evaluation_error(function, "Ran out of iterations in boost::math::tools::newton_raphson_iterate, guess: %1%", guess, boost::math::policies::policy<>()); + } else { + --count; + } detail::unpack_tuple(f(result), f0, f1); - --count; + if (0 == f0) break; if (f1 == 0) @@ -275,7 +556,8 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& if (fabs(delta * 2) > fabs(delta2)) { // Last two steps haven't converged. - delta = (delta > 0) ? (result - min) / 2 : (result - max) / 2; + const T x_other = (delta > 0) ? min : max; + delta = result - detail::Bisection::CalcMidpoint::solve(result, x_other); // reset delta1/2 so we don't take this branch next time round: delta1 = 3 * delta; delta2 = 3 * delta; @@ -302,7 +584,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max = guess; max_range_f = f0; } - else + else if (delta < 0) // Cannot have "else" here, as delta being zero is not indicative of failure { min = guess; min_range_f = f0; @@ -314,7 +596,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& { return policies::raise_evaluation_error(function, "There appears to be no root to be found in boost::math::tools::newton_raphson_iterate, perhaps we have a local minima near current best guess of %1%", guess, boost::math::policies::policy<>()); } - }while(count && (fabs(result * factor) < fabs(delta))); + } while (fabs(result * factor) < fabs(delta) || result == 0); max_iter -= count; diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 7885649f71..913a12d6ff 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -1048,6 +1048,7 @@ test-suite misc : [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ] [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ] + [ run ieee754_linear_test.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_root_iterations.cpp pch ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_tuple ] ] [ run test_root_finding_concepts.cpp ../../test/build//boost_unit_test_framework ] [ run test_toms748_solve.cpp pch ../../test/build//boost_unit_test_framework ] diff --git a/test/ieee754_linear_test.cpp b/test/ieee754_linear_test.cpp new file mode 100644 index 0000000000..29dd79a898 --- /dev/null +++ b/test/ieee754_linear_test.cpp @@ -0,0 +1,80 @@ +// (C) Copyright Ryan Elandt 2023. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_TEST_MAIN +#include +#include +#include + +#include +#include +#include +#include + +#ifdef BOOST_HAS_FLOAT128 +#include +#endif + +#if __has_include() +# include +#endif + +using boost::math::tools::detail::ieee754_linear::Layout_IEEE754Linear; +using boost::math::tools::detail::ieee754_linear::Layout_Unspecified; + + +template +bool is_unspeci(Layout_Unspecified) { return true; } + +template +bool is_paired_with_uint32_t(Layout_IEEE754Linear, T) { return true; } + +template +bool is_paired_with_uint64_t(Layout_IEEE754Linear, T) { return true; } + +#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) +template +bool is_paired_with_uint128_t(Layout_IEEE754Linear, T) { return true; } +#endif + +void test_layout() { + using boost::math::tools::detail::ieee754_linear::LayoutIdentifier; + + // Check layout for float types + BOOST_CHECK(is_paired_with_uint32_t(LayoutIdentifier::get_layout(), float())); + BOOST_CHECK(is_paired_with_uint64_t(LayoutIdentifier::get_layout(), double())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); + +// Float128 +#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) + BOOST_CHECK(is_paired_with_uint128_t(LayoutIdentifier::get_layout<__float128>(), __float128())); + BOOST_CHECK(is_paired_with_uint128_t(LayoutIdentifier::get_layout(), __float128())); +#endif + +// stdfloat +#if __has_include() +#ifdef __STDCPP_FLOAT32_T__ + BOOST_CHECK(is_paired_with_uint32_t(LayoutIdentifier::get_layout(), std::float32_t())); +#endif +#ifdef __STDCPP_FLOAT64_T__ + BOOST_CHECK(is_paired_with_uint64_t(LayoutIdentifier::get_layout(), std::float64_t())); +#endif +#endif + +// 80 bit long double +#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 + BOOST_CHECK(is_unspeci(LayoutIdentifier::get_layout())); +#endif +} + +BOOST_AUTO_TEST_CASE( test_main ) +{ + test_layout(); + +} diff --git a/test/test_roots.cpp b/test/test_roots.cpp index e4e68e24b8..f6336a67ec 100644 --- a/test/test_roots.cpp +++ b/test/test_roots.cpp @@ -19,11 +19,21 @@ #include #include #include "table_type.hpp" -#include #include +#include #include +#include #include +#include + +#ifdef BOOST_HAS_FLOAT128 +#include +#endif + +#if __has_include() +# include +#endif #define BOOST_CHECK_CLOSE_EX(a, b, prec, i) \ {\ @@ -649,11 +659,150 @@ void test_failures() #endif } +// Creates a vector of test values that include: zero, denorm_min, min, epsilon, max, +// and infinity. Also includes powers of 2 ranging from 2^-15 to 2^+15 by powers of 3. +template +std::vector create_test_ladder() { + std::vector v; + + const auto fn_push_back_x_scaled = [&v](const T& x) { + const T two_thirds = T(2) / T(3); + const T four_thirds = T(4) / T(3); + + v.push_back(x); + v.push_back(x * T(0.5)); + v.push_back(x * two_thirds); + v.push_back(x * four_thirds); + v.push_back(x * T(2.0)); + }; + + const auto fn_push_back_pm = [&](std::vector& v, const T& x) { + fn_push_back_x_scaled( x); + fn_push_back_x_scaled(-x); + }; + + fn_push_back_pm(v, std::numeric_limits::denorm_min()); + fn_push_back_pm(v, (std::numeric_limits::min)()); + fn_push_back_pm(v, std::numeric_limits::epsilon()); + + const int test_exp_range = 5; + for (int i = -test_exp_range; i < (test_exp_range + 1); ++i) { + const int exponent = i * 3; + const T x = std::ldexp(1.0, exponent); + fn_push_back_pm(v, x); + } + fn_push_back_pm(v, (std::numeric_limits::max)()); + fn_push_back_pm(v, std::numeric_limits::infinity()); + + // Take unique elements of v + std::sort(v.begin(), v.end()); + v.erase(std::unique(v.begin(), v.end()), v.end()); + + // Add both positive and negative zero + v.push_back(T(-0.0)); + v.push_back(T(+0.0)); + + const int num_zeros = std::count(v.begin(), v.end(), T(0.0)); + BOOST_CHECK_LE(2, num_zeros); // Check for both positive and negative zero + + return v; +}; + +template +class TestBisect { +public: + static void run() { + auto v = create_test_ladder(); + + for (const W& x_i: v) { + for (const W& x_j: v) { + const W x_mid_ij = boost::math::tools::detail::Bisection::CalcMidpoint::solve(x_i, x_j); + test_order(static_cast(x_i), static_cast(x_mid_ij), static_cast(x_j)); + } + } + } + + static void test_order(const T& x_i, const T& x_mid_ij, const T& x_j) { + const T x_lo = (std::min)(x_i, x_j); + const T x_hi = (std::max)(x_i, x_j); + + BOOST_CHECK(x_lo <= x_mid_ij); // NOTE: BOOST_CHECK_LE(x_lo, x_mid_ij) fails to link + BOOST_CHECK(x_mid_ij <= x_hi); // NOTE: BOOST_CHECK_LE(x_mid_ij, x_hi) fails to link + } +}; + +template +void test_bisect() { + // Get layout + const auto layout = boost::math::tools::detail::ieee754_linear::LayoutIdentifier::get_layout(); + + // `T` and `W` are usually the same. In the case where `W` is + // `boost::multiprecision::float128`, then `W` is a wrapper and around + // the datatype `T = __float128`. + using T = typename decltype(layout)::type_float; // Get layout float type + TestBisect::run(); +} + +void test_bisect_all_cases() { + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + test_bisect(); + +#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128) + test_bisect(); + test_bisect<__float128>(); +#endif + +#if __has_include() +#ifdef __STDCPP_FLOAT32_T__ + test_bisect(); +#endif +#ifdef __STDCPP_FLOAT64_T__ + test_bisect(); +#endif +#endif +} + +void test_count() { + const auto fn_newton = [](std::uintmax_t& i_max) { + return boost::math::tools::newton_raphson_iterate( + [](double x) { return std::make_pair(x * x - 3, 2 * x); }, + 10.0 /* x_initial */, + 0.0 /* bound lower*/, + 100.0 /* bound upper */, 52, i_max); + }; + + // Find the minimum number of iterations to find the solution + std::uintmax_t iter_find_solution = 100; + fn_newton(iter_find_solution); + BOOST_CHECK_EQUAL(iter_find_solution, std::uintmax_t(8)); // Confirm is 8 + + for (std::uintmax_t count = 0; count < (iter_find_solution * 2); count++) { + std::uintmax_t iters = count; + if (iter_find_solution <= count) { // Finds correct answer + using std::sqrt; + BOOST_CHECK_EQUAL(fn_newton(iters), sqrt(3.0)); + BOOST_CHECK_EQUAL(iters, iter_find_solution); + } + else { // Throws error for running out of iterations + BOOST_CHECK_THROW(fn_newton(iters), boost::math::evaluation_error); + } + } +} + BOOST_AUTO_TEST_CASE( test_main ) { test_beta(0.1, "double"); + test_count(); + test_bisect_all_cases(); + // bug reports: boost::math::skew_normal_distribution<> dist(2.0, 1.0, -2.5); BOOST_CHECK(boost::math::isfinite(quantile(dist, 0.075)));