diff --git a/include/dragonbox/dragonbox.h b/include/dragonbox/dragonbox.h index 63da7f1..c6b160f 100644 --- a/include/dragonbox/dragonbox.h +++ b/include/dragonbox/dragonbox.h @@ -217,6 +217,7 @@ namespace jkj { // using JKJ_STD_REPLACEMENT_NAMESPACE::int_least32_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least16_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least32_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least64_t; // We need UINT32_C and UINT64_C macros too, but again there is nothing to do here. @@ -1782,6 +1783,102 @@ namespace jkj { } }; + template + struct compressed_cache_holder { + using cache_entry_type = cache_holder::cache_entry_type; + static constexpr int cache_bits = cache_holder::cache_bits; + static constexpr int min_k = cache_holder::min_k; + static constexpr int max_k = cache_holder::max_k; + static constexpr int compression_ratio = 13; + static constexpr detail::stdr::size_t compressed_table_size = + detail::stdr::size_t((max_k - min_k + compression_ratio) / compression_ratio); + static constexpr detail::stdr::size_t pow5_table_size = + detail::stdr::size_t((compression_ratio + 1) / 2); + + struct cache_holder_t { + cache_entry_type table[compressed_table_size]; + }; + struct pow5_holder_t { + detail::stdr::uint_least16_t table[pow5_table_size]; + }; + +#if JKJ_HAS_CONSTEXPR17 + static constexpr cache_holder_t cache JKJ_STATIC_DATA_SECTION = [] { + cache_holder_t res{}; + for (detail::stdr::size_t i = 0; i < compressed_table_size; ++i) { + res.table[i] = cache_holder::cache[i * compression_ratio]; + } + return res; + }(); + static constexpr pow5_holder_t pow5_table JKJ_STATIC_DATA_SECTION = [] { + pow5_holder_t res{}; + detail::stdr::uint_least16_t p = 1; + for (detail::stdr::size_t i = 0; i < pow5_table_size; ++i) { + res.table[i] = p; + p *= 5; + } + return res; + }(); +#else + template + static constexpr cache_holder_t make_cache(detail::index_sequence) { + return {cache_holder::cache[indices * compression_ratio]...}; + } + static constexpr cache_holder_t cache JKJ_STATIC_DATA_SECTION = + make_cache(detail::make_index_sequence{}); + + template + static constexpr pow5_holder_t make_pow5_table(detail::index_sequence) { + return {detail::compute_power(detail::stdr::uint_least16_t(5))...}; + } + static constexpr pow5_holder_t pow5_table JKJ_STATIC_DATA_SECTION = + make_pow5_table(detail::make_index_sequence{}); +#endif + + static JKJ_CONSTEXPR20 cache_entry_type get_cache(int k) noexcept { + // Compute the base index. + auto const cache_index = + int(detail::stdr::uint_least32_t(k - min_k) / compression_ratio); + auto const kb = cache_index * compression_ratio + min_k; + auto const offset = k - kb; + + // Get the base cache. + auto const base_cache = cache.table[cache_index]; + + if (offset == 0) { + return base_cache; + } + else { + // Compute the required amount of bit-shift. + auto const alpha = + detail::log::floor_log2_pow10(k) - detail::log::floor_log2_pow10(kb) - offset; + assert(alpha > 0 && alpha < 64); + + // Try to recover the real cache. + constexpr auto pow5_7 = detail::compute_power<7>(detail::stdr::uint_least32_t(5)); + auto const pow5 = + offset >= 7 + ? detail::stdr::uint_least32_t(pow5_7 * pow5_table.table[offset - 7]) + : detail::stdr::uint_least32_t(pow5_table.table[offset]); + auto mul_result = detail::wuint::umul128(base_cache, pow5); + auto const recovered_cache = cache_entry_type( + (((mul_result.high() << (64 - alpha)) | (mul_result.low() >> alpha)) + 1) & + UINT64_C(0xffffffffffffffff)); + assert(recovered_cache != 0); + + return recovered_cache; + } + } + }; +#if !JKJ_HAS_INLINE_VARIABLE + template + constexpr typename compressed_cache_holder::cache_holder_t + compressed_cache_holder::cache; + template + constexpr typename compressed_cache_holder::pow5_holder_t + compressed_cache_holder::pow5_table; +#endif + template struct compressed_cache_holder { using cache_entry_type = cache_holder::cache_entry_type; @@ -1790,13 +1887,15 @@ namespace jkj { static constexpr int max_k = cache_holder::max_k; static constexpr int compression_ratio = 27; static constexpr detail::stdr::size_t compressed_table_size = - (max_k - min_k + compression_ratio) / compression_ratio; + detail::stdr::size_t((max_k - min_k + compression_ratio) / compression_ratio); + static constexpr detail::stdr::size_t pow5_table_size = + detail::stdr::size_t(compression_ratio); struct cache_holder_t { cache_entry_type table[compressed_table_size]; }; struct pow5_holder_t { - detail::stdr::uint_least64_t table[compression_ratio]; + detail::stdr::uint_least64_t table[pow5_table_size]; }; #if JKJ_HAS_CONSTEXPR17 @@ -1810,7 +1909,7 @@ namespace jkj { static constexpr pow5_holder_t pow5_table JKJ_STATIC_DATA_SECTION = [] { pow5_holder_t res{}; detail::stdr::uint_least64_t p = 1; - for (detail::stdr::size_t i = 0; i < compression_ratio; ++i) { + for (detail::stdr::size_t i = 0; i < pow5_table_size; ++i) { res.table[i] = p; p *= 5; } @@ -1829,7 +1928,7 @@ namespace jkj { return {detail::compute_power(detail::stdr::uint_least64_t(5))...}; } static constexpr pow5_holder_t pow5_table JKJ_STATIC_DATA_SECTION = - make_pow5_table(detail::make_index_sequence{}); + make_pow5_table(detail::make_index_sequence{}); #endif static JKJ_CONSTEXPR20 cache_entry_type get_cache(int k) noexcept { @@ -3088,6 +3187,18 @@ namespace jkj { // 10^-18) and 2f_c = 29711482, e = -80 // (1.2288529832819387448703332688104694625508273020386695861816... * // 10^-17). + // For the case of compressed cache for binary32, there is another + // exceptional case 2f_c = 33554430, e = -10 (16383.9990234375). In this + // case, the recovered cache is two large to make compute_mul_parity + // mistakenly conclude that z is not an integer, but actually z = 16384 is + // an integer. + JKJ_IF_CONSTEXPR( + stdr::is_same>::value) { + if (two_fc == 33554430 && binary_exponent == -10) { + break; + } + } auto const z_result = multiplication_traits_::compute_mul_parity(two_fc + 2, cache, beta); if (z_result.parity || z_result.is_integer) { diff --git a/subproject/test/source/verify_compressed_cache.cpp b/subproject/test/source/verify_compressed_cache.cpp index 372691b..82919fa 100644 --- a/subproject/test/source/verify_compressed_cache.cpp +++ b/subproject/test/source/verify_compressed_cache.cpp @@ -24,134 +24,202 @@ #include #include -int main() { - // We are trying to verify that an appropriate right-shift of phi_k * 5^a plus one - // can be used instead of phi_(a+k). (Here, phi_k and phi_(a+k) are supposed to be the "tilde" ones; - // tilde is omitted for simplicity.) Since phi_k is defined in terms of ceiling, what we get from - // phi_k * 5^a will be phi_(a+k) + (error) for some nonnegative (error). - // - // For correct multiplication, the margin for binary32 is at least - // 2^64 * 5091154818982829 / 12349290596248284087255008291061760 = 7.60..., - // so we are safe if the error is up to 7. - // The margin for binary64 is at least - // 2^128 * 723173431431821867556830303887 / - // 18550103527668669801949286474444582643081334006759269899933694558208 - // = 13.26..., so we are safe if the error is up to 13. - // - // For correct integer checks, the case b > n_max is fine because the only condition on the - // recovered cache is a lower bound which must be already true for phi_(a+k). - // For the case b <= n_max, we only need to check the upper bound - // (recovered_cache) < 2^(Q-beta) * a/b + 2^(q-beta)/(floor(nmax/b) * b), - // so we check it manually for each e. - - using namespace jkj::dragonbox::detail::log; - using namespace jkj::dragonbox::detail::wuint; - using info = jkj::dragonbox::compressed_cache_holder; - using impl = jkj::dragonbox::detail::impl< - jkj::dragonbox::ieee754_binary_traits>; - - std::cout << "[Verifying cache recovery for compressed cache...]\n"; +// We are trying to verify that an appropriate right-shift of phi_k * 5^a plus one +// can be used instead of phi_(a+k). (Here, phi_k and phi_(a+k) are supposed to be the "tilde" ones; +// tilde is omitted for simplicity.) Since phi_k is defined in terms of ceiling, what we get from +// phi_k * 5^a will be phi_(a+k) + (error) for some nonnegative (error). +// +// For correct multiplication, the margin for binary32 is at least +// 2^64 * 5091154818982829 / 12349290596248284087255008291061760 = 7.60..., +// so we are safe if the error is up to 7. +// The margin for binary64 is at least +// 2^128 * 723173431431821867556830303887 / +// 18550103527668669801949286474444582643081334006759269899933694558208 +// = 13.26..., so we are safe if the error is up to 13. +// +// For correct integer checks, the case b > n_max is fine because the only condition on the +// recovered cache is a lower bound which must be already true for phi_(a+k). +// For the case b <= n_max, we only need to check the upper bound +// (recovered_cache) < 2^(Q-beta) * a/b + 2^(q-beta)/(floor(nmax/b) * b), +// so we check it manually for each e. + +template +struct recovered_cache_t { + CacheEntryType value; + bool success; +}; + +template +bool verify_compressed_cache(GetCache&& get_cache, ConvertToBigUInt&& convert_to_big_uint, + std::size_t max_diff_for_multiplication) { + using format = typename FormatTraits::format; + using cache_holder_type = jkj::dragonbox::compressed_cache_holder; + using impl = jkj::dragonbox::detail::impl; jkj::unsigned_rational unit; - auto n_max = jkj::big_uint::power_of_2(impl::significand_bits + 2); - for (int e = impl::min_exponent - impl::significand_bits; - e <= impl::max_exponent - impl::significand_bits; ++e) { - int const k = impl::kappa - floor_log10_pow2(e); - - using jkj::dragonbox::policy::cache::full; - auto const real_cache = full.get_cache(k); - - // Compute the base index. - int const kb = - ((k - impl::min_k) / info::compression_ratio) * info::compression_ratio + impl::min_k; - - // Get the base cache. - auto const base_cache = full.get_cache(kb); - - // Get the index offset. - auto const offset = k - kb; - - if (offset != 0) { - // Obtain the corresponding power of 5. - auto const pow5 = info::pow5_table.table[offset]; - - // Compute the required amount of bit-shifts. - auto const alpha = floor_log2_pow10(k) - floor_log2_pow10(kb) - offset; - assert(alpha > 0 && alpha < 64); + auto n_max = jkj::big_uint::power_of_2(format::significand_bits + 2); + for (int e = format::min_exponent - format::significand_bits; + e <= format::max_exponent - format::significand_bits; ++e) { + int const k = impl::kappa - jkj::dragonbox::detail::log::floor_log10_pow2(e); - // Try to recover the real cache. - auto recovered_cache = umul128(base_cache.high(), pow5); - auto const middle_low = umul128(base_cache.low(), pow5); + auto const real_cache = jkj::dragonbox::policy::cache::full.get_cache(k); - recovered_cache += middle_low.high(); + auto const recovered_cache = get_cache(k); + if (!recovered_cache.success) { + std::cout << " (e = " << e << ")\n"; + return false; + } - auto const high_to_middle = recovered_cache.high() << (64 - alpha); - auto const middle_to_low = recovered_cache.low() << (64 - alpha); + auto const rc = convert_to_big_uint(recovered_cache.value); + auto const diff = rc - convert_to_big_uint(real_cache); + if (diff != 0) { + if (diff > max_diff_for_multiplication) { + std::cout << "Multiplication is no longer valid. (e = " << e << ")\n"; + return false; + } - recovered_cache = uint128{(recovered_cache.low() >> alpha) | high_to_middle, - ((middle_low.low() >> alpha) | middle_to_low)}; + // For the case b <= n_max, integer check might be no longer valid. + int const beta = e + jkj::dragonbox::detail::log::floor_log2_pow10(k); - if (recovered_cache.low() + 1 == 0) { - std::cout << "Overflow detected - taking the ceil requires addition-with-carry (e = " - << e << ")\n "; - return -1; + // unit = 2^(e + k - 1) * 5^k = a/b. + unit.numerator = 1; + unit.denominator = 1; + if (k >= 0) { + unit.numerator = jkj::big_uint::pow(5, k); } else { - recovered_cache = {recovered_cache.high(), recovered_cache.low() + 1}; + unit.denominator = jkj::big_uint::pow(5, -k); } - - // Measure the difference - if (real_cache.high() != recovered_cache.high() || - real_cache.low() > recovered_cache.low()) { - std::cout << "Overflow detected.\n"; - return -1; + if (e + k - 1 >= 0) { + unit.numerator *= jkj::big_uint::power_of_2(e + k - 1); + } + else { + unit.denominator *= jkj::big_uint::power_of_2(-e - k + 1); } - auto const diff = std::uint32_t(recovered_cache.low() - real_cache.low()); - if (diff != 0) { - if (diff > 13) { - // Multiplication might be no longer valid. - std::cout << "Overflow detected.\n"; - return -1; + if (unit.denominator <= n_max) { + // Check (recovered_cache) < 2^(Q-beta) * a/b + 2^(q-beta)/(floor(nmax/b) * b), + // or equivalently, + // b * (recovered_cache) - 2^(Q-beta) * a < 2^(q-beta) / floor(nmax/b). + auto const left_hand_side = + unit.denominator * rc - + jkj::big_uint::power_of_2(cache_holder_type::cache_bits - beta) * unit.numerator; + + if (left_hand_side * (n_max / unit.denominator) >= + jkj::big_uint::power_of_2(FormatTraits::carrier_bits - beta)) { + // This exceptional case is carefully examined, so okay. + if (std::is_same::value && e == -10) { + // The exceptional case only occurs when n is exactly n_max. + if (left_hand_side * ((n_max - 1) / unit.denominator) >= + jkj::big_uint::power_of_2(FormatTraits::carrier_bits - beta)) { + std::cout << "Integer check is no longer valid. (e = " << e << ")\n"; + return false; + } + } + else { + std::cout << "Integer check is no longer valid. (e = " << e << ")\n"; + return false; + } } + } + } + } - // For the case b <= n_max, integer check might be no longer valid. - int const beta = e + floor_log2_pow10(k); + return true; +} - // unit = 2^(e + k - 1) * 5^k = a/b. - unit.numerator = 1; - unit.denominator = 1; - if (k >= 0) { - unit.numerator = jkj::big_uint::pow(5, k); - } - else { - unit.denominator = jkj::big_uint::pow(5, -k); - } - if (e + k - 1 >= 0) { - unit.numerator *= jkj::big_uint::power_of_2(e + k - 1); - } - else { - unit.denominator *= jkj::big_uint::power_of_2(-e - k + 1); - } +int main() { + bool success = true; + + std::cout << "[Verifying compressed cache for binary32...]\n"; + { + using cache_holder_type = + jkj::dragonbox::compressed_cache_holder; + + if (verify_compressed_cache>( + [](int k) { + return recovered_cache_t{ + cache_holder_type::get_cache(k), true}; + }, + [](cache_holder_type::cache_entry_type value) { return jkj::big_uint{value}; }, 7)) { + std::cout << "Verification succeeded. No error detected.\n\n"; + } + else { + std::cout << "\n"; + success = false; + } + } - if (unit.denominator <= n_max) { - // Check (recovered_cache) < 2^(Q-beta) * a/b + 2^(q-beta)/(floor(nmax/b) * b), - // or equivalently, - // b * (recovered_cache) - 2^(Q-beta) * a < 2^(q-beta) / floor(nmax/b). - auto const rc = jkj::big_uint{recovered_cache.low(), recovered_cache.high()}; - auto const left_hand_side = - unit.denominator * rc - - jkj::big_uint::power_of_2(info::cache_bits - beta) * unit.numerator; - - if (left_hand_side * (n_max / unit.denominator) >= - jkj::big_uint::power_of_2(impl::carrier_bits - beta)) { - std::cout << "Overflow detected.\n"; - return -1; + std::cout << "[Verifying compressed cache for binary64...]\n"; + { + using cache_holder_type = + jkj::dragonbox::compressed_cache_holder; + + if (verify_compressed_cache>( + [](int k) { + // Compute the base index. + auto const cache_index = int(std::uint_least32_t(k - cache_holder_type::min_k) / + cache_holder_type::compression_ratio); + auto const kb = + cache_index * cache_holder_type::compression_ratio + cache_holder_type::min_k; + auto const offset = k - kb; + + // Get the base cache. + auto const base_cache = cache_holder_type::cache.table[cache_index]; + + if (offset != 0) { + // Obtain the corresponding power of 5. + auto const pow5 = cache_holder_type::pow5_table.table[offset]; + + // Compute the required amount of bit-shifts. + using jkj::dragonbox::detail::log::floor_log2_pow10; + auto const alpha = floor_log2_pow10(k) - floor_log2_pow10(kb) - offset; + assert(alpha > 0 && alpha < 64); + + // Try to recover the real cache. + using jkj::dragonbox::detail::wuint::umul128; + auto recovered_cache = umul128(base_cache.high(), pow5); + auto const middle_low = umul128(base_cache.low(), pow5); + + recovered_cache += middle_low.high(); + + auto const high_to_middle = recovered_cache.high() << (64 - alpha); + auto const middle_to_low = recovered_cache.low() << (64 - alpha); + + recovered_cache = jkj::dragonbox::detail::wuint::uint128{ + (recovered_cache.low() >> alpha) | high_to_middle, + ((middle_low.low() >> alpha) | middle_to_low)}; + recovered_cache = {recovered_cache.high(), recovered_cache.low() + 1}; + + if (recovered_cache.low() == 0) { + std::cout + << "Overflow detected - taking the ceil requires addition-with-carry"; + return recovered_cache_t{ + recovered_cache, false}; + } + else { + return recovered_cache_t{ + recovered_cache, true}; + } } - } - } + else { + return recovered_cache_t{base_cache, true}; + } + }, + [](cache_holder_type::cache_entry_type value) { + return jkj::big_uint{value.low(), value.high()}; + }, + 13)) { + std::cout << "Verification succeeded. No error detected.\n\n"; + } + else { + std::cout << "\n"; + success = false; } } - std::cout << "Verification succeeded. No error detected.\n"; + std::cout << "Done.\n\n\n"; + return success ? 0 : -1; } diff --git a/subproject/test/source/verify_fast_multiplication.cpp b/subproject/test/source/verify_fast_multiplication.cpp index cae6229..d8680a3 100644 --- a/subproject/test/source/verify_fast_multiplication.cpp +++ b/subproject/test/source/verify_fast_multiplication.cpp @@ -149,6 +149,13 @@ int main() { jkj::dragonbox::policy::cache::full); std::cout << "Done.\n\n\n"; + std::cout << "[Verifying fast computation of xi and zi for the shorter interval case " + "with compressed cache (binary32)...]\n"; + success &= verify_fast_multiplication_xz< + jkj::dragonbox::ieee754_binary_traits>( + jkj::dragonbox::policy::cache::compact); + std::cout << "Done.\n\n\n"; + std::cout << "[Verifying fast computation of yru for the shorter interval case " "with full cache (binary32)...]\n"; success &= verify_fast_multiplication_yru< @@ -156,6 +163,13 @@ int main() { jkj::dragonbox::policy::cache::full); std::cout << "Done.\n\n\n"; + std::cout << "[Verifying fast computation of yru for the shorter interval case " + "with compressed cache (binary32)...]\n"; + success &= verify_fast_multiplication_yru< + jkj::dragonbox::ieee754_binary_traits>( + jkj::dragonbox::policy::cache::compact); + std::cout << "Done.\n\n\n"; + std::cout << "[Verifying fast computation of xi and zi for the shorter interval case " "with full cache (binary64)...]\n"; success &= verify_fast_multiplication_xz<