diff --git a/include/bitcoin/system/hash/sha/algorithm.hpp b/include/bitcoin/system/hash/sha/algorithm.hpp index ceaa903b62..3903148da0 100644 --- a/include/bitcoin/system/hash/sha/algorithm.hpp +++ b/include/bitcoin/system/hash/sha/algorithm.hpp @@ -76,16 +76,15 @@ class algorithm using iblocks_t = iterable; using digests_t = std::vector; - /// Constants (and count_t). + /// Constants. /// ----------------------------------------------------------------------- + /// count_t is uint64_t (sha160/256) or uint128_t (sha512). /// All extended integer intrinsics currently have a "64 on 32" limit. - static constexpr auto count_bits = SHA::block_words * SHA::word_bytes; static constexpr auto count_bytes = bytes; using count_t = unsigned_exact_type>; - static constexpr auto caching = Cached; static constexpr auto limit_bits = maximum - count_bits; static constexpr auto limit_bytes = to_floored_bytes(limit_bits); static constexpr auto big_end_count = true; @@ -102,8 +101,6 @@ class algorithm /// Double hashing (sha256/512). /// ----------------------------------------------------------------------- - static constexpr void reinput(auto& buffer, const auto& state) NOEXCEPT; - template static constexpr digest_t double_hash(const ablocks_t& blocks) NOEXCEPT; static constexpr digest_t double_hash(const block_t& block) NOEXCEPT; @@ -111,11 +108,6 @@ class algorithm static constexpr digest_t double_hash(const half_t& left, const half_t& right) NOEXCEPT; static digest_t double_hash(iblocks_t&& blocks) NOEXCEPT; - /// Merkle hashing (sha256/512). - /// ----------------------------------------------------------------------- - static VCONSTEXPR digests_t& merkle_hash(digests_t& digests) NOEXCEPT; - static VCONSTEXPR digest_t merkle_root(digests_t&& digests) NOEXCEPT; - /// Streamed hashing (explicitly finalized). /// ----------------------------------------------------------------------- static void accumulate(state_t& state, iblocks_t&& blocks) NOEXCEPT; @@ -125,10 +117,53 @@ class algorithm static constexpr digest_t finalize_second(const state_t& state) NOEXCEPT; static constexpr digest_t finalize_double(state_t& state, size_t blocks) NOEXCEPT; + /// Merkle hashing (sha256/512). + /// ----------------------------------------------------------------------- + static VCONSTEXPR digests_t& merkle_hash(digests_t& digests) NOEXCEPT; + static VCONSTEXPR digest_t merkle_root(digests_t&& digests) NOEXCEPT; + protected: - /// Functions + /// Intrinsics constants. + /// ----------------------------------------------------------------------- + + static constexpr auto use_shani = Native && system::with_shani; + static constexpr auto use_neon = Native && system::with_neon; + static constexpr auto use_x128 = Vector && system::with_sse41; + static constexpr auto use_x256 = Vector && system::with_avx2; + static constexpr auto use_x512 = Vector && system::with_avx512; + + template + static constexpr auto is_valid_lanes = + (Lanes == 16u || Lanes == 8u || Lanes == 4u || Lanes == 2u); + + static constexpr auto min_lanes = + (use_x128 ? bytes<128> : + (use_x256 ? bytes<256> : + (use_x512 ? bytes<512> : 0))) / SHA::word_bytes; + + /// Intrinsics types. /// ----------------------------------------------------------------------- + + /// Extended integer capacity for uint32_t/uint64_t is 2/4/8/16 only. + template > = true> + using xblock_t = std_array; + + template = true> + using xbuffer_t = std_array; + + template = true> + using xstate_t = std_array; + + template = true> + using xchunk_t = std_array; + using uint = unsigned int; + using idigests_t = mutable_iterable; + using pad_t = std_array; + + /// Functions. + /// ----------------------------------------------------------------------- INLINE static constexpr auto parity(auto x, auto y, auto z) NOEXCEPT; INLINE static constexpr auto choice(auto x, auto y, auto z) NOEXCEPT; @@ -144,9 +179,12 @@ class algorithm INLINE static constexpr auto Sigma0(auto x) NOEXCEPT; INLINE static constexpr auto Sigma1(auto x) NOEXCEPT; - /// Compression + /// Compression. /// ----------------------------------------------------------------------- + template + INLINE static constexpr auto extract(Word a) NOEXCEPT; + template static CONSTEVAL auto functor() NOEXCEPT; @@ -165,26 +203,33 @@ class algorithm template static constexpr void compress_(auto& state, const auto& buffer) NOEXCEPT; template - static constexpr void compress(auto& state, const auto& buffer) NOEXCEPT; + static constexpr void compress(state_t& state, const buffer_t& buffer) NOEXCEPT; - /// Message Scheduling + /// Message scheduling. /// ----------------------------------------------------------------------- template INLINE static constexpr void prepare(auto& buffer) NOEXCEPT; INLINE static constexpr void add_k(auto& buffer) NOEXCEPT; static constexpr void schedule_(auto& buffer) NOEXCEPT; - static constexpr void schedule(auto& buffer) NOEXCEPT; + static constexpr void schedule(buffer_t& buffer) NOEXCEPT; - /// Parsing (endian sensitive) + /// Parsing (endian sensitive). /// ----------------------------------------------------------------------- + INLINE static constexpr void input(buffer_t& buffer, const block_t& block) NOEXCEPT; INLINE static constexpr void input_left(buffer_t& buffer, const half_t& half) NOEXCEPT; INLINE static constexpr void input_right(buffer_t& buffer, const half_t& half) NOEXCEPT; INLINE static constexpr digest_t output(const state_t& state) NOEXCEPT; - /// Padding + /// Padding. /// ----------------------------------------------------------------------- + + template + static CONSTEVAL buffer_t scheduled_pad() NOEXCEPT; + static CONSTEVAL chunk_t chunk_pad() NOEXCEPT; + static CONSTEVAL pad_t stream_pad() NOEXCEPT; + template static constexpr void schedule_n(buffer_t& buffer) NOEXCEPT; static constexpr void schedule_n(buffer_t& buffer, size_t blocks) NOEXCEPT; @@ -192,47 +237,12 @@ class algorithm static constexpr void pad_half(buffer_t& buffer) NOEXCEPT; static constexpr void pad_n(buffer_t& buffer, count_t blocks) NOEXCEPT; -/// Block iteration. -/// --------------------------------------------------------------------------- -protected: - template - INLINE static constexpr void iterate_(state_t& state, - const ablocks_t& blocks) NOEXCEPT; - INLINE static void iterate_(state_t& state, iblocks_t& blocks) NOEXCEPT; - - template - INLINE static constexpr void iterate(state_t& state, - const ablocks_t& blocks) NOEXCEPT; - INLINE static void iterate(state_t& state, iblocks_t& blocks) NOEXCEPT; - -private: - using pad_t = std_array; - - template - static CONSTEVAL buffer_t scheduled_pad() NOEXCEPT; - static CONSTEVAL chunk_t chunk_pad() NOEXCEPT; - static CONSTEVAL pad_t stream_pad() NOEXCEPT; - -/// Vectorization. -/// --------------------------------------------------------------------------- -protected: - /// Extended integer capacity for uint32_t/uint64_t is 2/4/8/16 only. - template - static constexpr auto is_valid_lanes = - (Lanes == 16u || Lanes == 8u || Lanes == 4u || Lanes == 2u); + /// Double hashing. + /// ----------------------------------------------------------------------- - template > = true> - using xblock_t = std_array; - template = true> - using xbuffer_t = std_array; - template = true> - using xstate_t = std_array; - template = true> - using xchunk_t = std_array; - using idigests_t = mutable_iterable; + static constexpr void reinput(auto& buffer, const auto& state) NOEXCEPT; - /// Common. + /// Iteration. /// ----------------------------------------------------------------------- template @@ -242,7 +252,35 @@ class algorithm INLINE static void xinput(xbuffer_t& xbuffer, iblocks_t& blocks) NOEXCEPT; - /// Merkle Hash. + template = true> + INLINE static Word extract(xWord a) NOEXCEPT; + + template + INLINE static void sequential_compress(state_t& state, + const xbuffer_t& xbuffer) NOEXCEPT; + + template = true> + INLINE static void vector_schedule_sequential_compress(state_t& state, + iblocks_t& blocks) NOEXCEPT; + + template + INLINE static void iterate_vector(state_t& state, + const ablocks_t& blocks) NOEXCEPT; + INLINE static void iterate_vector(state_t& state, + iblocks_t& blocks) NOEXCEPT; + + template + INLINE static constexpr void iterate_(state_t& state, + const ablocks_t& blocks) NOEXCEPT; + INLINE static void iterate_(state_t& state, iblocks_t& blocks) NOEXCEPT; + + template + INLINE static constexpr void iterate(state_t& state, + const ablocks_t& blocks) NOEXCEPT; + INLINE static void iterate(state_t& state, iblocks_t& blocks) NOEXCEPT; + + /// Merkle hashing. /// ----------------------------------------------------------------------- template @@ -264,32 +302,14 @@ class algorithm INLINE static digest_t unpack(const xstate_t& xstate) NOEXCEPT; template - INLINE static void output(idigests_t& digests, + INLINE static void xoutput(idigests_t& digests, const xstate_t& xstate) NOEXCEPT; - /// Message Schedule (block vectorization). - /// ----------------------------------------------------------------------- - - template - INLINE static constexpr auto extract(Word a) NOEXCEPT; - - template = true> - INLINE static Word extract(xWord a) NOEXCEPT; - - template - INLINE static void sequential_compress(state_t& state, - const xbuffer_t& xbuffer) NOEXCEPT; - template = true> - INLINE static void vector_schedule_sequential_compress(state_t& state, - iblocks_t& blocks) NOEXCEPT; - - template - INLINE static void iterate_vector(state_t& state, - const ablocks_t& blocks) NOEXCEPT; - INLINE static void iterate_vector(state_t& state, - iblocks_t& blocks) NOEXCEPT; + INLINE static void merkle_hash_vector(idigests_t& digests, iblocks_t& blocks) NOEXCEPT; + INLINE static void merkle_hash_vector(digests_t& digests) NOEXCEPT; + VCONSTEXPR static void merkle_hash_(digests_t& digests, + size_t offset=zero) NOEXCEPT; /// sigma0 vectorization. /// ----------------------------------------------------------------------- @@ -300,6 +320,7 @@ class algorithm template INLINE static void prepare1(buffer_t& buffer, const auto& xsigma0) NOEXCEPT; + template INLINE static void prepare8(buffer_t& buffer) NOEXCEPT; @@ -309,51 +330,35 @@ class algorithm /// Native. /// ----------------------------------------------------------------------- -protected: - using cword_t = xint128_t; - static constexpr auto cratio = sizeof(cword_t) / SHA::word_bytes; - static constexpr auto crounds = SHA::rounds / cratio; - using cbuffer_t = std_array; - using cstate_t = std_array; + ////using cword_t = xint128_t; + ////static constexpr auto cratio = sizeof(cword_t) / SHA::word_bytes; + ////static constexpr auto crounds = SHA::rounds / cratio; + ////using cbuffer_t = std_array; + ////using cstate_t = std_array; template INLINE static void schedule_native(xbuffer_t& xbuffer) NOEXCEPT; INLINE static void schedule_native(buffer_t& buffer) NOEXCEPT; - template + template INLINE static void compress_native(xstate_t& xstate, const xbuffer_t& xbuffer) NOEXCEPT; - template - INLINE static void compress_native(state_t& state, - const buffer_t& buffer) NOEXCEPT; - /// Merkle. - /// ----------------------------------------------------------------------- -protected: - VCONSTEXPR static void merkle_hash_(digests_t& digests, - size_t offset = zero) NOEXCEPT; - - template = true> - INLINE static void merkle_hash_vector(idigests_t& digests, - iblocks_t& blocks) NOEXCEPT; + template + INLINE static void compress_native(state_t& state, + const xbuffer_t& xbuffer) NOEXCEPT; - INLINE static void merkle_hash_vector(digests_t& digests) NOEXCEPT; + template + INLINE static void compress_native(state_t& state, + const buffer_t& buffer) NOEXCEPT; public: - static constexpr auto use_neon = Native && system::with_neon; - static constexpr auto use_shani = Native && system::with_shani; + /// Summary public values. + /// ----------------------------------------------------------------------- + static constexpr auto caching = Cached; static constexpr auto native = use_shani || use_neon; - - static constexpr auto use_x128 = Vector && system::with_sse41; - static constexpr auto use_x256 = Vector && system::with_avx2; - static constexpr auto use_x512 = Vector && system::with_avx512; static constexpr auto vector = (use_x128 || use_x256 || use_x512) && !(build_x32 && is_same_size); - - static constexpr auto min_lanes = - (use_x128 ? bytes<128> : - (use_x256 ? bytes<256> : - (use_x512 ? bytes<512> : 0))) / SHA::word_bytes; }; } // namespace sha diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp index c6feb98e07..ada7bae8f4 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp @@ -31,6 +31,16 @@ namespace sha { // protected // ---------------------------------------------------------------------------- +TEMPLATE +template +INLINE constexpr auto CLASS:: +extract(Word a) NOEXCEPT +{ + // Bypass lane extraction for non-expanded (normal form) buffer. + static_assert(Lane == zero); + return a; +} + TEMPLATE template CONSTEVAL auto CLASS:: @@ -99,16 +109,6 @@ round(auto a, auto b, auto c, auto& d, auto e, auto f, auto g, auto& h, // efgh = vsha256h2q(efgh, abcd, value); } -TEMPLATE -template -INLINE constexpr auto CLASS:: -extract(Word a) NOEXCEPT -{ - // Bypass lane extraction for non-expanded (normal form) buffer. - static_assert(Lane == zero); - return a; -} - TEMPLATE template INLINE constexpr void CLASS:: @@ -149,8 +149,24 @@ round(auto& state, const auto& wk) NOEXCEPT } } -// msvc++ not inlined in x32. -BC_PUSH_WARNING(NOT_INLINED) +TEMPLATE +INLINE constexpr void CLASS:: +summarize(auto& out, const auto& in) NOEXCEPT +{ + constexpr auto s = SHA::word_bits; + out[0] = f::add(out[0], in[0]); + out[1] = f::add(out[1], in[1]); + out[2] = f::add(out[2], in[2]); + out[3] = f::add(out[3], in[3]); + out[4] = f::add(out[4], in[4]); + + if constexpr (SHA::strength != 160) + { + out[5] = f::add(out[5], in[5]); + out[6] = f::add(out[6], in[6]); + out[7] = f::add(out[7], in[7]); + } +} TEMPLATE template @@ -252,31 +268,10 @@ compress_(auto& state, const auto& buffer) NOEXCEPT summarize(state, start); } -BC_POP_WARNING() - -TEMPLATE -INLINE constexpr void CLASS:: -summarize(auto& out, const auto& in) NOEXCEPT -{ - constexpr auto s = SHA::word_bits; - out[0] = f::add(out[0], in[0]); - out[1] = f::add(out[1], in[1]); - out[2] = f::add(out[2], in[2]); - out[3] = f::add(out[3], in[3]); - out[4] = f::add(out[4], in[4]); - - if constexpr (SHA::strength != 160) - { - out[5] = f::add(out[5], in[5]); - out[6] = f::add(out[6], in[6]); - out[7] = f::add(out[7], in[7]); - } -} - TEMPLATE template constexpr void CLASS:: -compress(auto& state, const auto& buffer) NOEXCEPT +compress(state_t& state, const buffer_t& buffer) NOEXCEPT { if (std::is_constant_evaluated()) { diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp index 25ba8a4ebc..03154d3d9c 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp @@ -122,8 +122,8 @@ xinput(xbuffer_t& xbuffer, iblocks_t& blocks) NOEXCEPT TEMPLATE template > - INLINE Word CLASS:: - extract(xWord a) NOEXCEPT +INLINE Word CLASS:: +extract(xWord a) NOEXCEPT { // Extract word from lane of vectorized buffer. return get(a); @@ -139,33 +139,33 @@ sequential_compress(state_t& state, const xbuffer_t& xbuffer) NOEXCEPT // Sequential compression uses non-expanded state (normal form). constexpr auto lanes = capacity; - compress<0>(state, xbuffer); - compress<1>(state, xbuffer); + compress_<0>(state, xbuffer); + compress_<1>(state, xbuffer); if constexpr (lanes >= 4) { - compress<2>(state, xbuffer); - compress<3>(state, xbuffer); + compress_<2>(state, xbuffer); + compress_<3>(state, xbuffer); } if constexpr (lanes >= 8) { - compress<4>(state, xbuffer); - compress<5>(state, xbuffer); - compress<6>(state, xbuffer); - compress<7>(state, xbuffer); + compress_<4>(state, xbuffer); + compress_<5>(state, xbuffer); + compress_<6>(state, xbuffer); + compress_<7>(state, xbuffer); } if constexpr (lanes >= 16) { - compress<8>(state, xbuffer); - compress<9>(state, xbuffer); - compress<10>(state, xbuffer); - compress<11>(state, xbuffer); - compress<12>(state, xbuffer); - compress<13>(state, xbuffer); - compress<14>(state, xbuffer); - compress<15>(state, xbuffer); + compress_<8>(state, xbuffer); + compress_<9>(state, xbuffer); + compress_<10>(state, xbuffer); + compress_<11>(state, xbuffer); + compress_<12>(state, xbuffer); + compress_<13>(state, xbuffer); + compress_<14>(state, xbuffer); + compress_<15>(state, xbuffer); } } @@ -273,9 +273,15 @@ iterate(state_t& state, const ablocks_t& blocks) NOEXCEPT { iterate_(state, blocks); } + else if constexpr (native) + { + // Multiple block shani message schduling and compression optimization. + iterate_(state, blocks); + } else if constexpr (vector) { - // Multi-block vectorized message scheduling optimization. + // TODO: evaluate 4/8/16 lane message scheduling vs. shani scheduling. + // Multiple block vectorized message scheduling optimization. iterate_vector(state, blocks); } else @@ -288,9 +294,15 @@ TEMPLATE INLINE void CLASS:: iterate(state_t& state, iblocks_t& blocks) NOEXCEPT { - if constexpr (vector) + if constexpr (native) + { + // TODO: evaluate 4/8/16 lane message scheduling vs. shani scheduling. + // Multiple block shani message schduling and compression optimization. + iterate_(state, blocks); + } + else if constexpr (vector) { - // Multi-block vectorized message scheduling optimization. + // Multiple block vectorized message scheduling optimization. iterate_vector(state, blocks); } else diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp index f08f2367a2..9e04c545e9 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp @@ -271,6 +271,7 @@ template INLINE typename CLASS::digest_t CLASS:: unpack(const xstate_t& xstate) NOEXCEPT { + // TODO: byteswap state in full one time before unpacking (vs. 8 times). return array_cast(state_t { get(byteswap(xstate[0])), @@ -287,7 +288,7 @@ unpack(const xstate_t& xstate) NOEXCEPT TEMPLATE template INLINE void CLASS:: -output(idigests_t& digests, const xstate_t& xstate) NOEXCEPT +xoutput(idigests_t& digests, const xstate_t& xstate) NOEXCEPT { constexpr auto lanes = capacity; BC_ASSERT(digests.size() >= lanes); @@ -354,6 +355,7 @@ merkle_hash_vector(idigests_t& digests, iblocks_t& blocks) NOEXCEPT { if (blocks.size() >= lanes) { + // TODO: expose const structs to avoid local static. static auto initial = pack(H::get); xbuffer_t xbuffer{}; @@ -362,22 +364,22 @@ merkle_hash_vector(idigests_t& digests, iblocks_t& blocks) NOEXCEPT { auto xstate = initial; - // input() advances block iterator by lanes. + // xinput() advances block iterator by lanes. xinput(xbuffer, blocks); - schedule(xbuffer); - compress(xstate, xbuffer); + schedule_(xbuffer); + compress_(xstate, xbuffer); schedule_1(xbuffer); - compress(xstate, xbuffer); + compress_(xstate, xbuffer); // Second hash reinput(xbuffer, xstate); pad_half(xbuffer); - schedule(xbuffer); + schedule_(xbuffer); xstate = initial; - compress(xstate, xbuffer); + compress_(xstate, xbuffer); - // output() advances digest iterator by lanes. - output(digests, xstate); + // xoutput() advances digest iterator by lanes. + xoutput(digests, xstate); } while (blocks.size() >= lanes); } diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp index 83fdffa16d..c27995f8ab 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp @@ -47,7 +47,7 @@ TEMPLATE INLINE void CLASS:: schedule_native(buffer_t& buffer) NOEXCEPT { - // TODO: + // TODO: single block compression. schedule_(buffer); } @@ -57,8 +57,17 @@ INLINE void CLASS:: compress_native(xstate_t& xstate, const xbuffer_t& xbuffer) NOEXCEPT { - // Merkle extended buffer is not native dispatched. - compress_(xstate, xbuffer); + // Merkle extended state/buffer is not native dispatched. + compress_(xstate, xbuffer); +} + +TEMPLATE +template +INLINE void CLASS:: +compress_native(state_t& state, const xbuffer_t& xbuffer) NOEXCEPT +{ + // Iterate extended buffer is not native dispatched. + compress_(state, xbuffer); } TEMPLATE @@ -66,8 +75,8 @@ template INLINE void CLASS:: compress_native(state_t& state, const buffer_t& buffer) NOEXCEPT { - // TODO: - compress_(state, buffer); + // TODO: single block compression. + compress_(state, buffer); } } // namespace sha diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp index a9ee56097a..b40f3d6290 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp @@ -58,6 +58,7 @@ input(buffer_t& buffer, const block_t& block) NOEXCEPT } else if constexpr (bc::is_little_endian) { + // TODO: evaluate 4/8/16 lane optimization using byteswap. const auto& in = array_cast(block); buffer[0] = native_from_big_end(in[0]); buffer[1] = native_from_big_end(in[1]); @@ -91,6 +92,7 @@ input_left(buffer_t& buffer, const half_t& half) NOEXCEPT if (std::is_constant_evaluated()) { + // TODO: evaluate 4/8 lane optimization using byteswap. constexpr auto size = SHA::word_bytes; from_big<0 * size>(buffer.at(0), half); from_big<1 * size>(buffer.at(1), half); @@ -139,6 +141,7 @@ input_right(buffer_t& buffer, const half_t& half) NOEXCEPT } else if constexpr (bc::is_little_endian) { + // TODO: evaluate 4/8 lane optimization using byteswap. const auto& in = array_cast(half); buffer[8] = native_from_big_end(in[0]); buffer[9] = native_from_big_end(in[1]); @@ -195,6 +198,7 @@ output(const state_t& state) NOEXCEPT } else { + // TODO: evaluate 4/8 lane optimization using byteswap. return array_cast(state_t { native_to_big_end(state[0]), diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_schedule.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_schedule.ipp index 63e368487d..ad84f99d76 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_schedule.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_schedule.ipp @@ -124,9 +124,6 @@ add_k(auto& buffer) NOEXCEPT buffer[r + 15] = f::addc(buffer[r + 15]); } -// msvc++ not inlined in x32. -BC_PUSH_WARNING(NOT_INLINED) - TEMPLATE constexpr void CLASS:: schedule_(auto& buffer) NOEXCEPT @@ -206,11 +203,9 @@ schedule_(auto& buffer) NOEXCEPT add_k(buffer); } -BC_POP_WARNING() - TEMPLATE constexpr void CLASS:: -schedule(auto& buffer) NOEXCEPT +schedule(buffer_t& buffer) NOEXCEPT { if (std::is_constant_evaluated()) { @@ -218,12 +213,11 @@ schedule(auto& buffer) NOEXCEPT } else if constexpr (native) { - // Single block shani message scheduling optimization. + // Single block (with shani) message scheduling optimization. schedule_native(buffer); } else if constexpr (vector) { - // [Multi-block vectorized scheduling is implemented by iterate().] // Single block (without shani) message scheduling optimization. schedule_sigma(buffer); }