Skip to content

Commit

Permalink
chore(ricepp): force inlining, massively speeds up Windows version
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Feb 23, 2024
1 parent 3b7d6ed commit cd1efac
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 23 deletions.
21 changes: 11 additions & 10 deletions ricepp/include/ricepp/bitstream_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <stdexcept>

#include <ricepp/byteswap.h>
#include <ricepp/detail/compiler.h>

namespace ricepp {

Expand All @@ -46,10 +47,10 @@ class bitstream_reader final {
: beg_{std::move(beg)}
, end_{std::move(end)} {}

bool read_bit() { return read_bits_impl(1); }
RICEPP_FORCE_INLINE bool read_bit() { return read_bits_impl(1); }

template <std::unsigned_integral T>
T read_bits(size_t num_bits) {
RICEPP_FORCE_INLINE T read_bits(size_t num_bits) {
assert(num_bits <= std::numeric_limits<T>::digits);
T bits = 0;
uint16_t pos = 0;
Expand All @@ -68,7 +69,7 @@ class bitstream_reader final {
return bits;
}

size_t find_first_set() {
RICEPP_FORCE_INLINE size_t find_first_set() {
size_t zeros = 0;
if (bit_pos_ != 0) {
if (peek_bit()) [[likely]] {
Expand Down Expand Up @@ -103,24 +104,24 @@ class bitstream_reader final {
}

private:
bits_type read_bits_impl(size_t num_bits) {
RICEPP_FORCE_INLINE bits_type read_bits_impl(size_t num_bits) {
auto bits = peek_bits(num_bits);
skip_bits(num_bits);
return bits;
}

void skip_bits(size_t num_bits) {
RICEPP_FORCE_INLINE void skip_bits(size_t num_bits) {
assert(bit_pos_ + num_bits <= kBitsTypeBits);
bit_pos_ += num_bits;
bit_pos_ &= kBitsTypeBits - 1;
}

bool peek_bit() {
RICEPP_FORCE_INLINE bool peek_bit() {
assert(bit_pos_ > 0 && bit_pos_ < kBitsTypeBits);
return (data_ >> bit_pos_) & 1;
}

bits_type peek_bits(size_t num_bits) {
RICEPP_FORCE_INLINE bits_type peek_bits(size_t num_bits) {
assert(bit_pos_ + num_bits <= kBitsTypeBits);
if (bit_pos_ == 0) [[unlikely]] {
data_ = read_packet();
Expand All @@ -138,14 +139,14 @@ class bitstream_reader final {
return bits;
}

bits_type read_packet() {
RICEPP_FORCE_INLINE bits_type read_packet() {
if (beg_ == end_) [[unlikely]] {
throw std::out_of_range{"bitstream_reader::read_packet"};
}
return read_packet_nocheck();
}

bits_type read_packet_nocheck()
RICEPP_FORCE_INLINE bits_type read_packet_nocheck()
requires std::contiguous_iterator<iterator_type>
{
bits_type bits{};
Expand All @@ -160,7 +161,7 @@ class bitstream_reader final {
return byteswap<std::endian::little>(bits);
}

bits_type read_packet_nocheck()
RICEPP_FORCE_INLINE bits_type read_packet_nocheck()
requires(!std::contiguous_iterator<iterator_type>)
{
bits_type bits{};
Expand Down
11 changes: 6 additions & 5 deletions ricepp/include/ricepp/bitstream_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <type_traits>

#include <ricepp/byteswap.h>
#include <ricepp/detail/compiler.h>

namespace ricepp {

Expand Down Expand Up @@ -57,12 +58,12 @@ class bitstream_writer final {
bitstream_writer(OutputIt out)
: out_{out} {}

void write_bit(bool bit) {
RICEPP_FORCE_INLINE void write_bit(bool bit) {
assert(bit_pos_ < kBitsTypeBits);
write_bits_impl(bit, 1);
}

void write_bit(bool bit, size_t repeat) {
RICEPP_FORCE_INLINE void write_bit(bool bit, size_t repeat) {
bits_type const bits = bit ? ~bits_type{} : bits_type{};
if (bit_pos_ != 0) [[likely]] {
auto remaining_bits = kBitsTypeBits - bit_pos_;
Expand All @@ -81,7 +82,7 @@ class bitstream_writer final {
}

template <std::unsigned_integral T>
void write_bits(T bits, size_t num_bits) {
RICEPP_FORCE_INLINE void write_bits(T bits, size_t num_bits) {
static constexpr size_t kArgBits{std::numeric_limits<T>::digits};
assert(bit_pos_ < kBitsTypeBits);
assert(num_bits <= kArgBits);
Expand All @@ -108,7 +109,7 @@ class bitstream_writer final {
iterator_type iterator() const { return out_; }

private:
void write_bits_impl(bits_type bits, size_t num_bits) {
RICEPP_FORCE_INLINE void write_bits_impl(bits_type bits, size_t num_bits) {
assert(bit_pos_ + num_bits <= kBitsTypeBits);
if (num_bits < kBitsTypeBits) {
bits &= (static_cast<bits_type>(1) << num_bits) - 1;
Expand All @@ -122,7 +123,7 @@ class bitstream_writer final {
}
}

void write_packet(bits_type bits) {
RICEPP_FORCE_INLINE void write_packet(bits_type bits) {
size_t const to_copy =
bit_pos_ == 0 ? sizeof(bits_type) : (bit_pos_ + 7) / 8;
bits = byteswap<std::endian::little>(bits);
Expand Down
12 changes: 8 additions & 4 deletions ricepp/include/ricepp/byteswap.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,22 @@

#include <range/v3/algorithm/reverse.hpp>

#include <ricepp/detail/compiler.h>

namespace ricepp {

namespace detail {

template <std::unsigned_integral T>
[[nodiscard]] constexpr T byteswap_fallback(T value) noexcept {
[[nodiscard]] RICEPP_FORCE_INLINE constexpr T
byteswap_fallback(T value) noexcept {
auto value_repr = std::bit_cast<std::array<std::byte, sizeof(T)>>(value);
ranges::reverse(value_repr);
return std::bit_cast<T>(value_repr);
}

template <std::unsigned_integral T>
[[nodiscard]] constexpr T byteswap(T value) noexcept {
[[nodiscard]] RICEPP_FORCE_INLINE constexpr T byteswap(T value) noexcept {
#if __cpp_lib_byteswap >= 202110L
return std::byteswap(value);
#elif defined(__GNUC__) || defined(__clang__)
Expand Down Expand Up @@ -73,7 +76,8 @@ template <std::unsigned_integral T>
} // namespace detail

template <std::unsigned_integral T>
[[nodiscard]] T byteswap(T value, std::endian byteorder) noexcept {
[[nodiscard]] RICEPP_FORCE_INLINE T byteswap(T value,
std::endian byteorder) noexcept {
static_assert(std::endian::native == std::endian::little ||
std::endian::native == std::endian::big);
if constexpr (sizeof(T) > 1) {
Expand All @@ -85,7 +89,7 @@ template <std::unsigned_integral T>
}

template <std::endian byteorder, std::unsigned_integral T>
[[nodiscard]] constexpr T byteswap(T value) noexcept {
[[nodiscard]] RICEPP_FORCE_INLINE constexpr T byteswap(T value) noexcept {
static_assert(std::endian::native == std::endian::little ||
std::endian::native == std::endian::big);
if constexpr (sizeof(T) > 1 && byteorder != std::endian::native) {
Expand Down
30 changes: 30 additions & 0 deletions ricepp/include/ricepp/detail/compiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz ([email protected])
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of ricepp.
*
* ricepp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ricepp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ricepp. If not, see <https://www.gnu.org/licenses/>.
*/

#pragma once

#if defined(__GNUC__) || defined(__clang__)
#define RICEPP_FORCE_INLINE inline __attribute__((__always_inline__))
#elif defined(_MSC_VER)
#define RICEPP_FORCE_INLINE __forceinline
#else
#define RICEPP_FORCE_INLINE inline
#endif
1 change: 1 addition & 0 deletions ricepp/include/ricepp/detail/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <range/v3/range/concepts.hpp>

#include <ricepp/bitstream_reader.h>
#include <ricepp/detail/compiler.h>

namespace ricepp::detail {

Expand Down
13 changes: 9 additions & 4 deletions ricepp/ricepp_cpuspecific.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <ricepp/bitstream_writer.h>
#include <ricepp/byteswap.h>
#include <ricepp/codec.h>
#include <ricepp/detail/compiler.h>
#include <ricepp/ricepp.h>

#include "ricepp_cpuspecific.h"
Expand Down Expand Up @@ -57,13 +58,15 @@ class dynamic_pixel_traits {
assert(unused_lsb_count < kBitCount);
}

[[nodiscard]] value_type read(value_type value) const noexcept {
[[nodiscard]] RICEPP_FORCE_INLINE value_type
read(value_type value) const noexcept {
value_type tmp = byteswap(value, byteorder_);
assert((tmp & lsb_mask_) == 0);
return tmp >> unused_lsb_count_;
}

[[nodiscard]] value_type write(value_type value) const noexcept {
[[nodiscard]] RICEPP_FORCE_INLINE value_type
write(value_type value) const noexcept {
assert((value & msb_mask_) == 0);
return byteswap(static_cast<value_type>(value << unused_lsb_count_),
byteorder_);
Expand Down Expand Up @@ -95,13 +98,15 @@ class static_pixel_traits {
static_cast<value_type>(~(kAllOnes >> kUnusedLsbCount));
static_assert(kUnusedLsbCount < kBitCount);

[[nodiscard]] static value_type read(value_type value) noexcept {
[[nodiscard]] static RICEPP_FORCE_INLINE value_type
read(value_type value) noexcept {
value_type tmp = byteswap<kByteOrder>(value);
assert((tmp & kLsbMask) == 0);
return tmp >> kUnusedLsbCount;
}

[[nodiscard]] static value_type write(value_type value) noexcept {
[[nodiscard]] static RICEPP_FORCE_INLINE value_type
write(value_type value) noexcept {
assert((value & kMsbMask) == 0);
return byteswap<kByteOrder>(
static_cast<value_type>(value << kUnusedLsbCount));
Expand Down

0 comments on commit cd1efac

Please sign in to comment.