Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove intrinsics portability functions. #1480

Merged
merged 1 commit into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 8 additions & 52 deletions include/bitcoin/system/intrinsics/haves.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ BC_PUSH_WARNING(USE_CONSTEXPR_FOR_FUNCTION)
constexpr auto with_neon = false;
#endif

/// Runtime checks for Intel SIMD and ARM Neon availability.
/// Runtime checks for Intel SIMD and ARM SIMD (Neon) availability.
/// ---------------------------------------------------------------------------

namespace cpu1_0
Expand Down Expand Up @@ -154,51 +154,7 @@ constexpr bool try_neon() NOEXCEPT
return false;
}

/// Runtime tests for Intel SIMD, and ARM SIMD (Neon) availability.
/// ---------------------------------------------------------------------------
/// These keep binary portable, otherwise can reply on "with" symbols.
/// TODO: evaluate performance impact of removing the thread statics.

inline bool have_shani() NOEXCEPT
{
if constexpr (with_shani)
return try_shani();
else
return false;
}

inline bool have_avx512() NOEXCEPT
{
if constexpr (with_avx512)
return try_avx512();
else
return false;
}

inline bool have_avx2() NOEXCEPT
{
if constexpr (with_avx2)
return try_avx2();
else
return false;
}

inline bool have_sse41() NOEXCEPT
{
if constexpr (with_sse41)
return try_sse41();
else
return false;
}

inline bool have_neon() NOEXCEPT
{
if constexpr (with_shani)
return try_shani();
else
return false;
}

/// Type system helpers.
/// ---------------------------------------------------------------------------
/// xint types are always defined, though are mocked when not compiled.
/// Use with_ constants to check for compiled option and have_ functions to
Expand Down Expand Up @@ -232,11 +188,11 @@ template <typename Extended, if_extended<Extended> = true>
inline bool have() NOEXCEPT
{
if constexpr (is_same_type<Extended, xint512_t>)
return have_avx512();
return with_avx512;
else if constexpr (is_same_type<Extended, xint256_t>)
return have_avx2();
return with_avx2;
else if constexpr (is_same_type<Extended, xint128_t>)
return have_sse41();
return with_sse41;
else return false;
}

Expand All @@ -246,11 +202,11 @@ template <typename Integral, size_t Lanes,
inline bool have_lanes() NOEXCEPT
{
if constexpr (capacity<xint512_t, Integral> == Lanes)
return have_avx512();
return with_avx512;
else if constexpr (capacity<xint256_t, Integral> == Lanes)
return have_avx2();
return with_avx2;
else if constexpr (capacity<xint128_t, Integral> == Lanes)
return have_sse41();
return with_sse41;
else return false;
}

Expand Down
90 changes: 16 additions & 74 deletions test/intrinsics/haves.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@

BOOST_AUTO_TEST_SUITE(intrinsics_haves_tests)

// helper
template <typename>
constexpr bool is_defined = true;

// Build symbols to constexpr.
// ----------------------------------------------------------------------------

Expand Down Expand Up @@ -56,10 +52,6 @@ constexpr bool is_defined = true;
// try()
// ----------------------------------------------------------------------------

////test/intrinsics/haves.cpp(99): error: in "intrinsics_haves_tests/intrinsics_haves__try_avx2__always__match": check tryit == with_avx2 has failed [true != false]
////test/intrinsics/haves.cpp(113): error: in "intrinsics_haves_tests/intrinsics_haves__try_sse41__always__match": check tryit == with_sse41 has failed [true != false]
////test/intrinsics/haves.cpp(129): error: in "intrinsics_haves_tests/intrinsics_haves__try_shani__always__match": check tryit == with_shani has failed [true != false]

BOOST_AUTO_TEST_CASE(intrinsics_haves__try_avx512__always__match)
{
uint64_t extended{};
Expand Down Expand Up @@ -124,17 +116,16 @@ BOOST_AUTO_TEST_CASE(intrinsics_haves__try_shani__always__match)

BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__avx512__expected)
{
const auto have512 = have_avx512();
auto have = false;

have = have_lanes<uint64_t, 8>();
BOOST_CHECK_EQUAL(have, have512);
BOOST_CHECK_EQUAL(have, with_avx512);
have = have_lanes<uint32_t, 16>();
BOOST_CHECK_EQUAL(have, have512);
BOOST_CHECK_EQUAL(have, with_avx512);
have = have_lanes<uint16_t, 32>();
BOOST_CHECK_EQUAL(have, have512);
BOOST_CHECK_EQUAL(have, with_avx512);
have = have_lanes<uint8_t, 64>();
BOOST_CHECK_EQUAL(have, have512);
BOOST_CHECK_EQUAL(have, with_avx512);

have = have_lanes<uint64_t, 7>();
BOOST_CHECK(!have);
Expand All @@ -148,17 +139,16 @@ BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__avx512__expected)

BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__avx2__expected)
{
const auto have256 = have_avx2();
auto have = false;

have = have_lanes<uint64_t, 4>();
BOOST_CHECK_EQUAL(have, have256);
BOOST_CHECK_EQUAL(have, with_avx2);
have = have_lanes<uint32_t, 8>();
BOOST_CHECK_EQUAL(have, have256);
BOOST_CHECK_EQUAL(have, with_avx2);
have = have_lanes<uint16_t, 16>();
BOOST_CHECK_EQUAL(have, have256);
BOOST_CHECK_EQUAL(have, with_avx2);
have = have_lanes<uint8_t, 32>();
BOOST_CHECK_EQUAL(have, have256);
BOOST_CHECK_EQUAL(have, with_avx2);

have = have_lanes<uint64_t, 3>();
BOOST_CHECK(!have);
Expand All @@ -172,17 +162,16 @@ BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__avx2__expected)

BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__sse41__expected)
{
const auto have128 = have_sse41();
auto have = false;

have = have_lanes<uint64_t, 2>();
BOOST_CHECK_EQUAL(have, have128);
BOOST_CHECK_EQUAL(have, with_sse41);
have = have_lanes<uint32_t, 4>();
BOOST_CHECK_EQUAL(have, have128);
BOOST_CHECK_EQUAL(have, with_sse41);
have = have_lanes<uint16_t, 8>();
BOOST_CHECK_EQUAL(have, have128);
BOOST_CHECK_EQUAL(have, with_sse41);
have = have_lanes<uint8_t, 16>();
BOOST_CHECK_EQUAL(have, have128);
BOOST_CHECK_EQUAL(have, with_sse41);

have = have_lanes<uint64_t, 1>();
BOOST_CHECK(!have);
Expand All @@ -194,60 +183,13 @@ BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__sse41__expected)
BOOST_CHECK(!have);
}

// have() [CI matrix platform assumptions]
// ----------------------------------------------------------------------------
// These use BOOST_WARN to let us know if vectorization did not execute due to
// CI platform processor configuration. Currently all CI platforms have SSE41
// and AVX2, while about 50% have AVX512BW. Windows platforms now have SHANI.

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_avx512__when_defined__true)
{
#if defined(HAVE_AVX512)
BOOST_WARN(have_avx512());
#else
BOOST_REQUIRE(!have_avx512());
#endif
}

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_avx2__when_defined__true)
{
#if defined(HAVE_AVX2)
BOOST_WARN(have_avx2());
#else
BOOST_REQUIRE(!have_avx2());
#endif
}

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_sse41__when_defined__true)
{
#if defined(HAVE_SSE41)
BOOST_WARN(have_sse41());
#else
BOOST_REQUIRE(!have_sse41());
#endif
}

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_shani__when_defined__true)
{
#if defined(HAVE_SHANI)
BOOST_WARN(have_shani());
#else
BOOST_REQUIRE(!have_shani());
#endif
}

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_neon__always__when_defined__true)
{
#if defined(HAVE_NEON)
BOOST_WARN(have_neon());
#else
BOOST_REQUIRE(!have_neon());
#endif
}

// is_extended
// ----------------------------------------------------------------------------

// helper
template <typename>
constexpr bool is_defined = true;

// is_extended is true even with mock type.
static_assert(!is_extended<uint32_t>);
static_assert(is_extended<xint128_t>);
Expand Down
24 changes: 12 additions & 12 deletions test/intrinsics/xcpu/functional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ BOOST_AUTO_TEST_SUITE(functional_tests)
#if defined(HAVE_SSE41)
BOOST_AUTO_TEST_CASE(functional__sse4__set32__get_expected)
{
if (have_sse41())
if (with_sse41)
{
const auto xword = set<xint128_t>(0, 1, 2, 3);
const auto word0 = get<uint32_t, 0>(xword);
Expand All @@ -43,7 +43,7 @@ BOOST_AUTO_TEST_CASE(functional__sse4__set64__get_expected)
{
if constexpr (!build_x32)
{
if (have_sse41())
if (with_sse41)
{
const auto xword = set<xint128_t>(0, 1);
const auto word0 = get<uint64_t, 0>(xword);
Expand All @@ -58,7 +58,7 @@ BOOST_AUTO_TEST_CASE(functional__sse4__set64__get_expected)
#if defined(HAVE_AVX2)
BOOST_AUTO_TEST_CASE(functional__avx2__set32__get_expected)
{
if (have_avx2())
if (with_avx2)
{
const auto xword = set<xint256_t>(0, 1, 2, 3, 4, 5, 6, 7);
const auto word0 = get<uint32_t, 0>(xword);
Expand All @@ -83,7 +83,7 @@ BOOST_AUTO_TEST_CASE(functional__avx2__set64__get_expected)
{
if constexpr (!build_x32)
{
if (have_avx2())
if (with_avx2)
{
const auto xword = set<xint256_t>(0, 1, 2, 3);
const auto word0 = get<uint64_t, 0>(xword);
Expand All @@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(functional__avx2__set64__get_expected)
#if defined(HAVE_AVX512)
BOOST_AUTO_TEST_CASE(functional__avx512__set32__get_expected)
{
if (have_avx512())
if (with_avx512)
{
const auto xword = set<xint512_t>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const auto word0 = get<uint32_t, 0>(xword);
Expand Down Expand Up @@ -143,7 +143,7 @@ BOOST_AUTO_TEST_CASE(functional__avx512__set64__get_expected)
{
if constexpr (!build_x32)
{
if (have_avx512())
if (with_avx512)
{
const auto xword = set<xint512_t>(0, 1, 2, 3, 4, 5, 6, 7);
const auto word0 = get<uint64_t, 0>(xword);
Expand Down Expand Up @@ -174,7 +174,7 @@ BOOST_AUTO_TEST_CASE(functional__avx512__set64__get_expected)
#if defined(HAVE_SSE41)
BOOST_AUTO_TEST_CASE(functional__sse4__byteswap32__expected)
{
if (have_sse41())
if (with_sse41)
{
const auto xword = byteswap<uint32_t>(set<xint128_t>(
0x00000001, 0x00000002, 0x00000003, 0x00000004));
Expand All @@ -192,7 +192,7 @@ BOOST_AUTO_TEST_CASE(functional__sse4__byteswap64__expected)
{
if constexpr (!build_x32)
{
if (have_sse41())
if (with_sse41)
{
const auto xword = byteswap<uint64_t>(set<xint128_t>(
0x0000000000000001, 0x0000000000000002));
Expand All @@ -208,7 +208,7 @@ BOOST_AUTO_TEST_CASE(functional__sse4__byteswap64__expected)
#if defined(HAVE_AVX2)
BOOST_AUTO_TEST_CASE(functional__avx2__byteswap32__expected)
{
if (have_avx2())
if (with_avx2)
{
const auto xword = byteswap<uint32_t>(set<xint256_t>(
0x00000001, 0x00000002, 0x00000003, 0x00000004,
Expand All @@ -235,7 +235,7 @@ BOOST_AUTO_TEST_CASE(functional__avx2__byteswap64__expected)
{
if constexpr (!build_x32)
{
if (have_avx2())
if (with_avx2)
{
const auto xword = byteswap<uint64_t>(set<xint256_t>(
0x0000000000000001, 0x0000000000000002,
Expand All @@ -256,7 +256,7 @@ BOOST_AUTO_TEST_CASE(functional__avx2__byteswap64__expected)
#if defined(HAVE_AVX512)
BOOST_AUTO_TEST_CASE(functional__avx512__byteswap32__get_expected)
{
if (have_avx512())
if (with_avx512)
{
const auto xword = byteswap<uint32_t>(set<xint512_t>(
0x00000001, 0x00000002, 0x00000003, 0x00000004,
Expand Down Expand Up @@ -301,7 +301,7 @@ BOOST_AUTO_TEST_CASE(functional__avx512__byteswap64__get_expected)
{
if constexpr (!build_x32)
{
if (have_avx512())
if (with_avx512)
{
const auto xword = byteswap<uint64_t>(set<xint512_t>(
0x0000000000000001,
Expand Down
Loading