Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor sha algorithm, fix perf test drift, comments. #1555

Merged
merged 4 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,6 @@ src_libbitcoin_system_la_SOURCES = \
src/hash/accumulator.cpp \
src/hash/checksum.cpp \
src/hash/siphash.cpp \
src/hash/vectorization/sha256_1_native.cpp \
src/hash/vectorization/sha256_2_shani.cpp \
src/hash/vectorization/sha256_4_neon.cpp \
src/hash/vectorization/sha256_4_sse4.cpp \
src/hash/vectorization/sha256_4_sse41.cpp \
src/hash/vectorization/sha256_8_avx2.cpp \
src/math/math.cpp \
src/radix/base_10.cpp \
src/radix/base_2048.cpp \
Expand Down Expand Up @@ -622,6 +616,7 @@ include_bitcoin_system_impl_hash_sha_HEADERS = \
include/bitcoin/system/impl/hash/sha/algorithm_double.ipp \
include/bitcoin/system/impl/hash/sha/algorithm_functions.ipp \
include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp \
include/bitcoin/system/impl/hash/sha/algorithm_konstant.ipp \
include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp \
include/bitcoin/system/impl/hash/sha/algorithm_native.ipp \
include/bitcoin/system/impl/hash/sha/algorithm_padding.ipp \
Expand Down
6 changes: 0 additions & 6 deletions builds/cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -529,12 +529,6 @@ add_library( ${CANONICAL_LIB_NAME}
"../../src/hash/accumulator.cpp"
"../../src/hash/checksum.cpp"
"../../src/hash/siphash.cpp"
"../../src/hash/vectorization/sha256_1_native.cpp"
"../../src/hash/vectorization/sha256_2_shani.cpp"
"../../src/hash/vectorization/sha256_4_neon.cpp"
"../../src/hash/vectorization/sha256_4_sse4.cpp"
"../../src/hash/vectorization/sha256_4_sse41.cpp"
"../../src/hash/vectorization/sha256_8_avx2.cpp"
"../../src/math/math.cpp"
"../../src/radix/base_10.cpp"
"../../src/radix/base_2048.cpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,6 @@
<ClCompile Include="..\..\..\..\src\hash\accumulator.cpp" />
<ClCompile Include="..\..\..\..\src\hash\checksum.cpp" />
<ClCompile Include="..\..\..\..\src\hash\siphash.cpp" />
<ClCompile Include="..\..\..\..\src\hash\vectorization\sha256_1_native.cpp" />
<ClCompile Include="..\..\..\..\src\hash\vectorization\sha256_2_shani.cpp" />
<ClCompile Include="..\..\..\..\src\hash\vectorization\sha256_4_neon.cpp" />
<ClCompile Include="..\..\..\..\src\hash\vectorization\sha256_4_sse4.cpp" />
<ClCompile Include="..\..\..\..\src\hash\vectorization\sha256_4_sse41.cpp" />
<ClCompile Include="..\..\..\..\src\hash\vectorization\sha256_8_avx2.cpp" />
<ClCompile Include="..\..\..\..\src\math\math.cpp" />
<ClCompile Include="..\..\..\..\src\radix\base_10.cpp" />
<ClCompile Include="..\..\..\..\src\radix\base_2048.cpp" />
Expand Down Expand Up @@ -548,6 +542,7 @@
<None Include="..\..\..\..\include\bitcoin\system\impl\hash\sha\algorithm_double.ipp" />
<None Include="..\..\..\..\include\bitcoin\system\impl\hash\sha\algorithm_functions.ipp" />
<None Include="..\..\..\..\include\bitcoin\system\impl\hash\sha\algorithm_iterate.ipp" />
<None Include="..\..\..\..\include\bitcoin\system\impl\hash\sha\algorithm_konstant.ipp" />
<None Include="..\..\..\..\include\bitcoin\system\impl\hash\sha\algorithm_merkle.ipp" />
<None Include="..\..\..\..\include\bitcoin\system\impl\hash\sha\algorithm_native.ipp" />
<None Include="..\..\..\..\include\bitcoin\system\impl\hash\sha\algorithm_padding.ipp" />
Expand Down
140 changes: 61 additions & 79 deletions builds/msvc/vs2022/libbitcoin-system/libbitcoin-system.vcxproj.filters

Large diffs are not rendered by default.

61 changes: 48 additions & 13 deletions include/bitcoin/system/hash/sha/algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ class algorithm
/// Intrinsics types.
/// -----------------------------------------------------------------------

/// Extended integer capacity for uint32_t/uint64_t is 2/4/8/16 only.
/// Expand is multiple of buffer/state for Lane concurrent blocks.
/// Multiple blocks are "striped" across the expanded buffer in xWords.
template <size_t Lanes, bool_if<is_valid_lanes<Lanes>> = true>
using xblock_t = std_array<words_t, Lanes>;

Expand All @@ -157,6 +158,17 @@ class algorithm
template <typename xWord, if_extended<xWord> = true>
using xchunk_t = std_array<xWord, SHA::state_words>;

/// Wide is casting of buffer_t to xWord for single block concurrency.
/// This is not multi-block or block striping, just larger words.
template <typename xWord, if_extended<xWord> = true>
using wbuffer_t = std_array<xWord, sizeof(buffer_t) / sizeof(xWord)>;

template <typename xWord, if_extended<xWord> = true>
using wstate_t = std_array<xWord, sizeof(state_t) / sizeof(xWord)>;

/// Other types.
/// -----------------------------------------------------------------------

using uint = unsigned int;
using idigests_t = mutable_iterable<digest_t>;
using pad_t = std_array<word_t, subtract(SHA::block_words,
Expand Down Expand Up @@ -210,7 +222,6 @@ class algorithm

template <size_t Round>
INLINE static constexpr void prepare(auto& buffer) NOEXCEPT;
INLINE static constexpr void add_k(auto& buffer) NOEXCEPT;
static constexpr void schedule_(auto& buffer) NOEXCEPT;
static constexpr void schedule(buffer_t& buffer) NOEXCEPT;

Expand Down Expand Up @@ -242,7 +253,7 @@ class algorithm

static constexpr void reinput(auto& buffer, const auto& state) NOEXCEPT;

/// Iteration.
/// Iteration (message scheduling vectorized for multiple blocks).
/// -----------------------------------------------------------------------

template <size_t Word, size_t Lanes>
Expand Down Expand Up @@ -280,7 +291,7 @@ class algorithm
const ablocks_t<Size>& blocks) NOEXCEPT;
INLINE static void iterate(state_t& state, iblocks_t& blocks) NOEXCEPT;

/// Merkle hashing.
/// Merkle hashing (fully vectorized for multiple blocks).
/// -----------------------------------------------------------------------

template <typename xWord>
Expand Down Expand Up @@ -311,7 +322,7 @@ class algorithm
VCONSTEXPR static void merkle_hash_(digests_t& digests,
size_t offset=zero) NOEXCEPT;

/// sigma0 vectorization.
/// sigma0 vectorization (single blocks).
/// -----------------------------------------------------------------------

template <typename xWord, if_extended<xWord> = true>
Expand All @@ -328,22 +339,45 @@ class algorithm
INLINE static void schedule_sigma(xbuffer_t<xWord>& xbuffer) NOEXCEPT;
INLINE static void schedule_sigma(buffer_t& buffer) NOEXCEPT;

/// Native.
/// [K]onstant vectorization (single and multiple blocks).
/// -----------------------------------------------------------------------

template <size_t Round>
INLINE static constexpr void konstant(auto& buffer) NOEXCEPT;

template<size_t Round, typename xWord>
INLINE static void vector_konstant(wbuffer_t<xWord>& wbuffer) NOEXCEPT;
INLINE static void vector_konstant(buffer_t& buffer) NOEXCEPT;

template <typename xWord>
static constexpr void konstant(xbuffer_t<xWord>& xbuffer) NOEXCEPT;
static constexpr void konstant(buffer_t& buffer) NOEXCEPT;
static constexpr void konstant_(auto& buffer) NOEXCEPT;

/// Native SHA optimizations (single blocks).
/// -----------------------------------------------------------------------
static constexpr auto native_lanes = capacity<xint128_t, word_t>;
static constexpr auto native_rounds = SHA::rounds / native_lanes;
using cbuffer_t = std_array<xint128_t, native_rounds>;
using cstate_t = std_array<xint128_t, two>;

template<size_t Round>
INLINE static void prepare(cbuffer_t& buffer) NOEXCEPT;
INLINE static void add_k(cbuffer_t& buffer) NOEXCEPT;
static void schedule(cbuffer_t& buffer) NOEXCEPT;
INLINE static void prepare_native(wbuffer_t<xint128_t>& wbuffer) NOEXCEPT;
static void schedule(wbuffer_t<xint128_t>& wbuffer) NOEXCEPT;

template <typename xWord>
INLINE static void schedule_native(xbuffer_t<xWord>& xbuffer) NOEXCEPT;
INLINE static void schedule_native(buffer_t& buffer) NOEXCEPT;

template<size_t Round, size_t Lane>
INLINE static void round_native(wstate_t<xint128_t>& state,
const wbuffer_t<xint128_t>& wk) NOEXCEPT;

INLINE static void shuffle(wstate_t<xint128_t>& wstate) NOEXCEPT;
INLINE static void unshuffle(wstate_t<xint128_t>& wstate) NOEXCEPT;
INLINE static void summarize_native(wstate_t<xint128_t>& out,
const wstate_t<xint128_t>& in) NOEXCEPT;

template <size_t Lane>
INLINE static void compress_native(wstate_t<xint128_t>& state,
const wbuffer_t<xint128_t>& wbuffer) NOEXCEPT;

template <typename xWord, size_t Lane>
INLINE static void compress_native(xstate_t<xWord>& xstate,
const xbuffer_t<xWord>& xbuffer) NOEXCEPT;
Expand Down Expand Up @@ -381,6 +415,7 @@ BC_PUSH_WARNING(NO_POINTER_ARITHMETIC)
BC_PUSH_WARNING(NO_ARRAY_INDEXING)

#include <bitcoin/system/impl/hash/sha/algorithm_compress.ipp>
#include <bitcoin/system/impl/hash/sha/algorithm_konstant.ipp>
#include <bitcoin/system/impl/hash/sha/algorithm_double.ipp>
#include <bitcoin/system/impl/hash/sha/algorithm_functions.ipp>
#include <bitcoin/system/impl/hash/sha/algorithm_iterate.ipp>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ template <size_t Lane>
constexpr void CLASS::
compress_(auto& state, const auto& buffer) NOEXCEPT
{
// SHA-NI/256: 64/4 = 16 quad rounds, 8/4 = 2 state elements.
// This is a copy (state type varies due to vectorization).
const auto start = state;

Expand Down
Loading
Loading