Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize sha intrinsics. #1565

Merged
merged 7 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ build
/configure
/libtool
.dirstamp
/.vs
15 changes: 6 additions & 9 deletions include/bitcoin/system/hash/sha/algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ class algorithm
INLINE static constexpr void input(buffer_t& buffer, const block_t& block) NOEXCEPT;
INLINE static constexpr void input_left(auto& buffer, const half_t& half) NOEXCEPT;
INLINE static constexpr void input_right(auto& buffer, const half_t& half) NOEXCEPT;
INLINE static constexpr void reinput_left(auto& buffer, const auto& left) NOEXCEPT;
INLINE static constexpr void reinput_right(auto& buffer, const auto& right) NOEXCEPT;
INLINE static constexpr digest_t output(const state_t& state) NOEXCEPT;

/// Padding.
Expand All @@ -257,12 +259,6 @@ class algorithm
static constexpr void pad_half(auto& buffer) NOEXCEPT;
static constexpr void pad_n(auto& buffer, count_t blocks) NOEXCEPT;

/// Double hashing.
/// -----------------------------------------------------------------------

static constexpr void reinput_left(auto& buffer, const auto& left) NOEXCEPT;
static constexpr void reinput_right(auto& buffer, const auto& right) NOEXCEPT;

/// Iteration (message scheduling vectorized for multiple blocks).
/// -----------------------------------------------------------------------

Expand Down Expand Up @@ -386,9 +382,12 @@ class algorithm
xint128_t message) NOEXCEPT;

template <bool Swap>
static void native_rounds(xint128_t& lo, xint128_t& hi,
INLINE static void native_rounds(xint128_t& lo, xint128_t& hi,
const block_t& block) NOEXCEPT;

INLINE static void native_rounds(xint128_t& lo, xint128_t& hi,
const half_t& left, const chunk_t& pad) NOEXCEPT;

template <bool Swap>
static void native_transform(state_t& state, const auto& block) NOEXCEPT;
static void native_transform(state_t& state, iblocks_t& blocks) NOEXCEPT;
Expand All @@ -409,8 +408,6 @@ class algorithm
static digest_t native_double_hash(const half_t& half) NOEXCEPT;
static digest_t native_double_hash(const half_t& left, const half_t& right) NOEXCEPT;



public:
/// Summary public values.
/// -----------------------------------------------------------------------
Expand Down
21 changes: 2 additions & 19 deletions include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -243,25 +243,8 @@ template <size_t Lane>
constexpr void CLASS::
compress(state_t& state, const buffer_t& buffer) NOEXCEPT
{
if (std::is_constant_evaluated())
{
compress_<Lane>(state, buffer);
}
////else if constexpr (native)
////{
//// // Single block shani compression optimization.
//// compress_native<Lane>(state, buffer);
////}
////else if constexpr (vector)
////{
//// // Compression is not vectorized within a block, however this is
//// // feasible but may not be optimal (see round() comments).
//// compress_vector(buffer);
////}
else
{
compress_<Lane>(state, buffer);
}
// block-internal vectorization is suboptimal.
compress_<Lane>(state, buffer);
}

} // namespace sha
Expand Down
53 changes: 0 additions & 53 deletions include/bitcoin/system/impl/hash/sha/algorithm_double.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -28,59 +28,6 @@ namespace libbitcoin {
namespace system {
namespace sha {

// protected
// ----------------------------------------------------------------------------

TEMPLATE
INLINE constexpr void CLASS::
reinput_left(auto& buffer, const auto& left) NOEXCEPT
{
using words = decltype(buffer);
static_assert(array_count<words> >= SHA::state_words);

if (std::is_constant_evaluated())
{
buffer.at(0) = left.at(0);
buffer.at(1) = left.at(1);
buffer.at(2) = left.at(2);
buffer.at(3) = left.at(3);
buffer.at(4) = left.at(4);
buffer.at(5) = left.at(5);
buffer.at(6) = left.at(6);
buffer.at(7) = left.at(7);
}
else
{
using word = array_element<words>;
array_cast<word, SHA::state_words>(buffer) = left;
}
}

TEMPLATE
INLINE constexpr void CLASS::
reinput_right(auto& buffer, const auto& right) NOEXCEPT
{
using words = decltype(buffer);
static_assert(array_count<words> >= SHA::state_words);

if (std::is_constant_evaluated())
{
buffer.at(8) = right.at(0);
buffer.at(9) = right.at(1);
buffer.at(10) = right.at(2);
buffer.at(11) = right.at(3);
buffer.at(12) = right.at(4);
buffer.at(13) = right.at(5);
buffer.at(14) = right.at(6);
buffer.at(15) = right.at(7);
}
else
{
using word = array_element<words>;
array_cast<word, SHA::state_words, SHA::state_words>(buffer) = right;
}
}

// public
// ----------------------------------------------------------------------------
// These benefit from avoiding state endian transition and reusing buffer.
Expand Down
13 changes: 4 additions & 9 deletions include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -401,18 +401,13 @@ merkle_hash_vector(digests_t& digests) NOEXCEPT
auto idigests = idigests_t{ to_half(size), data };
const auto start = iblocks.size();

// Merkle hash vector dispatch.
// Always use if available.
if constexpr (use_x512)
merkle_hash_vector<xint512_t>(idigests, iblocks);

// Use if shani is not available or at least 32 blocks.
if constexpr (use_x256)
{
if constexpr (!native)
merkle_hash_vector<xint256_t>(idigests, iblocks);
else if (start >= 32_size)
merkle_hash_vector<xint256_t>(idigests, iblocks);
}
// Only use if shani is not available.
if constexpr (use_x256 && !native)
merkle_hash_vector<xint256_t>(idigests, iblocks);

// Only use if shani is not available.
if constexpr (use_x128 && !native)
Expand Down
9 changes: 8 additions & 1 deletion include/bitcoin/system/impl/hash/sha/algorithm_native.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ round_4(xint128_t& state0, xint128_t& state1, xint128_t message) NOEXCEPT

TEMPLATE
template <bool Swap>
void CLASS::
INLINE void CLASS::
native_rounds(xint128_t& lo, xint128_t& hi, const block_t& block) NOEXCEPT
{
const auto& wblock = array_cast<xint128_t>(block);
Expand Down Expand Up @@ -186,11 +186,13 @@ TEMPLATE
void CLASS::
native_transform(state_t& state, iblocks_t& blocks) NOEXCEPT
{
// Individual state vars are used vs. array to ensure register persistence.
auto& wstate = array_cast<xint128_t>(state);
auto lo = load(wstate[0]);
auto hi = load(wstate[1]);
shuffle(lo, hi);

// native_rounds must be inlined here (register boundary).
for (auto& block: blocks)
native_rounds<true>(lo, hi, block);

Expand All @@ -208,7 +210,10 @@ native_transform(state_t& state, const auto& block) NOEXCEPT
auto lo = load(wstate[0]);
auto hi = load(wstate[1]);
shuffle(lo, hi);

// native_rounds must be inlined here (register boundary).
native_rounds<Swap>(lo, hi, array_cast<byte_t>(block));

unshuffle(lo, hi);
store(wstate[0], lo);
store(wstate[1], hi);
Expand All @@ -228,6 +233,8 @@ native_finalize(state_t& state, const words_t& pad) NOEXCEPT
auto lo = load(wstate[0]);
auto hi = load(wstate[1]);
shuffle(lo, hi);

// native_rounds must be inlined here (register boundary).
native_rounds<false>(lo, hi, array_cast<byte_t>(pad));
unshuffle(lo, hi);

Expand Down
Loading
Loading