From cd1efac0622b56765aa9b89334206485ecc09f81 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Fri, 23 Feb 2024 21:27:30 +0100 Subject: [PATCH] chore(ricepp): force inlining, massively speeds up Windows version --- ricepp/include/ricepp/bitstream_reader.h | 21 +++++++++-------- ricepp/include/ricepp/bitstream_writer.h | 11 +++++---- ricepp/include/ricepp/byteswap.h | 12 ++++++---- ricepp/include/ricepp/detail/compiler.h | 30 ++++++++++++++++++++++++ ricepp/include/ricepp/detail/decode.h | 1 + ricepp/ricepp_cpuspecific.cpp | 13 ++++++---- 6 files changed, 65 insertions(+), 23 deletions(-) create mode 100644 ricepp/include/ricepp/detail/compiler.h diff --git a/ricepp/include/ricepp/bitstream_reader.h b/ricepp/include/ricepp/bitstream_reader.h index 449ebf083..709d1bf26 100644 --- a/ricepp/include/ricepp/bitstream_reader.h +++ b/ricepp/include/ricepp/bitstream_reader.h @@ -30,6 +30,7 @@ #include #include +#include namespace ricepp { @@ -46,10 +47,10 @@ class bitstream_reader final { : beg_{std::move(beg)} , end_{std::move(end)} {} - bool read_bit() { return read_bits_impl(1); } + RICEPP_FORCE_INLINE bool read_bit() { return read_bits_impl(1); } template - T read_bits(size_t num_bits) { + RICEPP_FORCE_INLINE T read_bits(size_t num_bits) { assert(num_bits <= std::numeric_limits::digits); T bits = 0; uint16_t pos = 0; @@ -68,7 +69,7 @@ class bitstream_reader final { return bits; } - size_t find_first_set() { + RICEPP_FORCE_INLINE size_t find_first_set() { size_t zeros = 0; if (bit_pos_ != 0) { if (peek_bit()) [[likely]] { @@ -103,24 +104,24 @@ class bitstream_reader final { } private: - bits_type read_bits_impl(size_t num_bits) { + RICEPP_FORCE_INLINE bits_type read_bits_impl(size_t num_bits) { auto bits = peek_bits(num_bits); skip_bits(num_bits); return bits; } - void skip_bits(size_t num_bits) { + RICEPP_FORCE_INLINE void skip_bits(size_t num_bits) { assert(bit_pos_ + num_bits <= kBitsTypeBits); bit_pos_ += num_bits; bit_pos_ &= kBitsTypeBits - 1; } - bool peek_bit() { + RICEPP_FORCE_INLINE bool peek_bit() { assert(bit_pos_ > 0 && bit_pos_ < kBitsTypeBits); return (data_ >> bit_pos_) & 1; } - bits_type peek_bits(size_t num_bits) { + RICEPP_FORCE_INLINE bits_type peek_bits(size_t num_bits) { assert(bit_pos_ + num_bits <= kBitsTypeBits); if (bit_pos_ == 0) [[unlikely]] { data_ = read_packet(); @@ -138,14 +139,14 @@ class bitstream_reader final { return bits; } - bits_type read_packet() { + RICEPP_FORCE_INLINE bits_type read_packet() { if (beg_ == end_) [[unlikely]] { throw std::out_of_range{"bitstream_reader::read_packet"}; } return read_packet_nocheck(); } - bits_type read_packet_nocheck() + RICEPP_FORCE_INLINE bits_type read_packet_nocheck() requires std::contiguous_iterator { bits_type bits{}; @@ -160,7 +161,7 @@ class bitstream_reader final { return byteswap(bits); } - bits_type read_packet_nocheck() + RICEPP_FORCE_INLINE bits_type read_packet_nocheck() requires(!std::contiguous_iterator) { bits_type bits{}; diff --git a/ricepp/include/ricepp/bitstream_writer.h b/ricepp/include/ricepp/bitstream_writer.h index c585c328a..d096c45d5 100644 --- a/ricepp/include/ricepp/bitstream_writer.h +++ b/ricepp/include/ricepp/bitstream_writer.h @@ -28,6 +28,7 @@ #include #include +#include namespace ricepp { @@ -57,12 +58,12 @@ class bitstream_writer final { bitstream_writer(OutputIt out) : out_{out} {} - void write_bit(bool bit) { + RICEPP_FORCE_INLINE void write_bit(bool bit) { assert(bit_pos_ < kBitsTypeBits); write_bits_impl(bit, 1); } - void write_bit(bool bit, size_t repeat) { + RICEPP_FORCE_INLINE void write_bit(bool bit, size_t repeat) { bits_type const bits = bit ? ~bits_type{} : bits_type{}; if (bit_pos_ != 0) [[likely]] { auto remaining_bits = kBitsTypeBits - bit_pos_; @@ -81,7 +82,7 @@ class bitstream_writer final { } template - void write_bits(T bits, size_t num_bits) { + RICEPP_FORCE_INLINE void write_bits(T bits, size_t num_bits) { static constexpr size_t kArgBits{std::numeric_limits::digits}; assert(bit_pos_ < kBitsTypeBits); assert(num_bits <= kArgBits); @@ -108,7 +109,7 @@ class bitstream_writer final { iterator_type iterator() const { return out_; } private: - void write_bits_impl(bits_type bits, size_t num_bits) { + RICEPP_FORCE_INLINE void write_bits_impl(bits_type bits, size_t num_bits) { assert(bit_pos_ + num_bits <= kBitsTypeBits); if (num_bits < kBitsTypeBits) { bits &= (static_cast(1) << num_bits) - 1; @@ -122,7 +123,7 @@ class bitstream_writer final { } } - void write_packet(bits_type bits) { + RICEPP_FORCE_INLINE void write_packet(bits_type bits) { size_t const to_copy = bit_pos_ == 0 ? sizeof(bits_type) : (bit_pos_ + 7) / 8; bits = byteswap(bits); diff --git a/ricepp/include/ricepp/byteswap.h b/ricepp/include/ricepp/byteswap.h index 60eaaf1af..d38b23cd7 100644 --- a/ricepp/include/ricepp/byteswap.h +++ b/ricepp/include/ricepp/byteswap.h @@ -28,19 +28,22 @@ #include +#include + namespace ricepp { namespace detail { template -[[nodiscard]] constexpr T byteswap_fallback(T value) noexcept { +[[nodiscard]] RICEPP_FORCE_INLINE constexpr T +byteswap_fallback(T value) noexcept { auto value_repr = std::bit_cast>(value); ranges::reverse(value_repr); return std::bit_cast(value_repr); } template -[[nodiscard]] constexpr T byteswap(T value) noexcept { +[[nodiscard]] RICEPP_FORCE_INLINE constexpr T byteswap(T value) noexcept { #if __cpp_lib_byteswap >= 202110L return std::byteswap(value); #elif defined(__GNUC__) || defined(__clang__) @@ -73,7 +76,8 @@ template } // namespace detail template -[[nodiscard]] T byteswap(T value, std::endian byteorder) noexcept { +[[nodiscard]] RICEPP_FORCE_INLINE T byteswap(T value, + std::endian byteorder) noexcept { static_assert(std::endian::native == std::endian::little || std::endian::native == std::endian::big); if constexpr (sizeof(T) > 1) { @@ -85,7 +89,7 @@ template } template -[[nodiscard]] constexpr T byteswap(T value) noexcept { +[[nodiscard]] RICEPP_FORCE_INLINE constexpr T byteswap(T value) noexcept { static_assert(std::endian::native == std::endian::little || std::endian::native == std::endian::big); if constexpr (sizeof(T) > 1 && byteorder != std::endian::native) { diff --git a/ricepp/include/ricepp/detail/compiler.h b/ricepp/include/ricepp/detail/compiler.h new file mode 100644 index 000000000..f64179b1e --- /dev/null +++ b/ricepp/include/ricepp/detail/compiler.h @@ -0,0 +1,30 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of ricepp. + * + * ricepp is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ricepp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ricepp. If not, see . + */ + +#pragma once + +#if defined(__GNUC__) || defined(__clang__) +#define RICEPP_FORCE_INLINE inline __attribute__((__always_inline__)) +#elif defined(_MSC_VER) +#define RICEPP_FORCE_INLINE __forceinline +#else +#define RICEPP_FORCE_INLINE inline +#endif diff --git a/ricepp/include/ricepp/detail/decode.h b/ricepp/include/ricepp/detail/decode.h index a38399387..278596ef5 100644 --- a/ricepp/include/ricepp/detail/decode.h +++ b/ricepp/include/ricepp/detail/decode.h @@ -28,6 +28,7 @@ #include #include +#include namespace ricepp::detail { diff --git a/ricepp/ricepp_cpuspecific.cpp b/ricepp/ricepp_cpuspecific.cpp index d4a73a9dc..db0c7e870 100644 --- a/ricepp/ricepp_cpuspecific.cpp +++ b/ricepp/ricepp_cpuspecific.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "ricepp_cpuspecific.h" @@ -57,13 +58,15 @@ class dynamic_pixel_traits { assert(unused_lsb_count < kBitCount); } - [[nodiscard]] value_type read(value_type value) const noexcept { + [[nodiscard]] RICEPP_FORCE_INLINE value_type + read(value_type value) const noexcept { value_type tmp = byteswap(value, byteorder_); assert((tmp & lsb_mask_) == 0); return tmp >> unused_lsb_count_; } - [[nodiscard]] value_type write(value_type value) const noexcept { + [[nodiscard]] RICEPP_FORCE_INLINE value_type + write(value_type value) const noexcept { assert((value & msb_mask_) == 0); return byteswap(static_cast(value << unused_lsb_count_), byteorder_); @@ -95,13 +98,15 @@ class static_pixel_traits { static_cast(~(kAllOnes >> kUnusedLsbCount)); static_assert(kUnusedLsbCount < kBitCount); - [[nodiscard]] static value_type read(value_type value) noexcept { + [[nodiscard]] static RICEPP_FORCE_INLINE value_type + read(value_type value) noexcept { value_type tmp = byteswap(value); assert((tmp & kLsbMask) == 0); return tmp >> kUnusedLsbCount; } - [[nodiscard]] static value_type write(value_type value) noexcept { + [[nodiscard]] static RICEPP_FORCE_INLINE value_type + write(value_type value) noexcept { assert((value & kMsbMask) == 0); return byteswap( static_cast(value << kUnusedLsbCount));