Skip to content

Commit

Permalink
add huffman table constructor from symbol-bitsize range
Browse files Browse the repository at this point in the history
Add a constructor to huffman::table that takes a range of symbol-bitsize
tuples.

This commit also moves public tag types into a utility header.

Change-Id: Ia125173ce91e2e189c23de0956d9eaa7aa51762c
  • Loading branch information
oliverlee committed Oct 5, 2023
1 parent 0396f5d commit 8bac466
Show file tree
Hide file tree
Showing 7 changed files with 216 additions and 95 deletions.
1 change: 1 addition & 0 deletions huffman/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ cc_library(
"src/detail/table_storage.hpp",
"src/encoding.hpp",
"src/table.hpp",
"src/utility.hpp",
],
hdrs = ["huffman.hpp"],
visibility = ["//:__subpackages__"],
Expand Down
24 changes: 4 additions & 20 deletions huffman/src/detail/table_storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "huffman/src/code.hpp"
#include "huffman/src/detail/static_vector.hpp"
#include "huffman/src/encoding.hpp"
#include "huffman/src/utility.hpp"

#include <algorithm>
#include <cassert>
Expand All @@ -13,18 +14,7 @@
#include <type_traits>
#include <vector>

namespace starflate::huffman {

/// Disambiguation tag to specify a table is constructed with a code-symbol
/// mapping
///
struct table_contents_tag
{
explicit table_contents_tag() = default;
};
inline constexpr auto table_contents = table_contents_tag{};

namespace detail {
namespace starflate::huffman::detail {

struct frequency_tag
{
Expand Down Expand Up @@ -55,7 +45,6 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
template <class R>
constexpr table_storage(
frequency_tag, const R& frequencies, std::optional<symbol_type> eot)
: base_type{}
{
base_type::reserve(
std::ranges::size(frequencies) + std::size_t{eot.has_value()});
Expand All @@ -74,7 +63,6 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
template <class R>
constexpr table_storage(
data_tag, const R& data, std::optional<symbol_type> eot)
: base_type{}
{
if (eot) {
base_type::emplace_back(*eot, 1UZ);
Expand All @@ -97,7 +85,7 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
}

template <class R>
constexpr table_storage(table_contents_tag, const R& map) : base_type{}
constexpr table_storage(table_contents_tag, const R& map)
{
const auto as_code = [](auto& node) -> auto& {
return static_cast<code&>(node);
Expand All @@ -116,9 +104,6 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
as_symbol(*it) = s;
++it;
}

assert(std::ranges::unique(*this, {}, as_code).empty());
assert(std::ranges::unique(*this, {}, as_symbol).empty());
}

using base_type::begin;
Expand All @@ -131,5 +116,4 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
using base_type::size;
};

} // namespace detail
} // namespace starflate::huffman
} // namespace starflate::huffman::detail
135 changes: 96 additions & 39 deletions huffman/src/table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "huffman/src/detail/element_base_iterator.hpp"
#include "huffman/src/detail/table_node.hpp"
#include "huffman/src/detail/table_storage.hpp"
#include "huffman/src/utility.hpp"

#include <algorithm>
#include <compare>
Expand All @@ -22,8 +23,47 @@

namespace starflate::huffman {

template <class T, std::size_t N>
using c_array = T[N];
namespace detail {

/// Convert an unsigned integer to signed
///
template <std::signed_integral S, std::unsigned_integral U>
static constexpr auto to_signed(U uint)
{
using C [[maybe_unused]] = std::common_type_t<std::make_unsigned_t<S>, U>;

assert(static_cast<C>(uint) < static_cast<C>(std::numeric_limits<S>::max()));
return static_cast<S>(uint);
}

/// Finds the next internal node that satifies a predicate
///
template <std::random_access_iterator I, std::indirect_unary_predicate<I> P>
constexpr static auto find_node_if(I first, I last, P pred)
{
using S = std::iter_difference_t<I>;

for (; first != last; first += to_signed<S>(first->node_size())) {
if (pred(*first)) {
break;
}
}

return first;
}

/// Transforms a symbol-bitsize range to a code-symbol range
///
template <class R>
constexpr auto to_code_symbol(const R& rng)
{
return std::views::transform(rng, [](const auto& elem) {
const auto& [symbol, bitsize] = elem;
return std::tuple{code{bitsize, {}}, symbol};
});
}

} // namespace detail

/// Huffman code table
/// @tparam Symbol symbol type
Expand All @@ -44,31 +84,10 @@ class table

detail::table_storage<node_type, Extent> table_;

template <std::unsigned_integral U>
static constexpr auto to_index(U uint)
constexpr auto encode_symbols() -> void
{
using S = std::ranges::range_difference_t<decltype(table_)>;
using C [[maybe_unused]] = std::common_type_t<std::make_unsigned_t<S>, U>;

assert(
static_cast<C>(uint) < static_cast<C>(std::numeric_limits<S>::max()));
return static_cast<S>(uint);
}

template <std::forward_iterator I, std::indirect_unary_predicate<I> P>
constexpr static auto find_node_if(I first, I last, P pred)
{
for (; first != last; first += to_index(first->node_size())) {
if (pred(*first)) {
break;
}
}

return first;
}

constexpr auto encode_symbols() -> void
{
auto reversed = std::views::reverse(table_);

// precondition, audit
Expand All @@ -79,13 +98,13 @@ class table
const auto last = reversed.end();

while (first->node_size() != total_size) {
join_reversed(first[0], first[to_index(first->node_size())]);
join_reversed(first[0], first[detail::to_signed<S>(first->node_size())]);

const auto has_higher_freq = [&first](const auto& n) {
return n.frequency() > first->frequency();
};

auto lower = first + to_index(first->node_size());
auto lower = first + detail::to_signed<S>(first->node_size());
auto upper = find_node_if(lower, last, has_higher_freq);

// re-sort after creating a new internal node
Expand Down Expand Up @@ -149,13 +168,13 @@ class table
}

public:
/// Code point type
/// Symbol type
///
using encoding_type = encoding<Symbol>;
using symbol_type = Symbol;

/// Symbol type
/// Code point type
///
using symbol_type = typename encoding_type::symbol_type;
using encoding_type = encoding<symbol_type>;

/// Const iterator type
///
Expand All @@ -176,13 +195,6 @@ class table
///
/// @{

template <std::ranges::sized_range R>
requires std::convertible_to<
std::ranges::range_value_t<R>,
std::tuple<symbol_type, std::size_t>>
constexpr explicit table(const R& frequencies) : table{frequencies, {}}
{}

template <std::ranges::sized_range R>
requires std::convertible_to<
std::ranges::range_value_t<R>,
Expand All @@ -194,6 +206,13 @@ class table
set_skip_fields();
}

template <std::ranges::sized_range R>
requires std::convertible_to<
std::ranges::range_value_t<R>,
std::tuple<symbol_type, std::size_t>>
constexpr explicit table(const R& frequencies) : table{frequencies, {}}
{}

template <std::integral I, auto N>
constexpr explicit table(
const c_array<std::pair<symbol_type, I>, N>& frequencies)
Expand Down Expand Up @@ -251,21 +270,51 @@ class table
std::tuple_element_t<1, std::ranges::range_value_t<R>>,
symbol_type>)
constexpr table(table_contents_tag, const R& map)
: table_{table_contents_tag{}, map}
: table_{table_contents, map}
{
set_skip_fields();
}

template <std::size_t N>
constexpr table(
table_contents_tag, const c_array<std::pair<code, symbol_type>, N>& map)
: table_{table_contents_tag{}, map}
: table_{table_contents, map}
{
set_skip_fields();
}

/// @}

/// Constructs a `table` from a symbol-bitsize mapping
/// @tparam R sized-range of symbol-bitsize tuple-likes
/// @param rng range of symbol-bitsize tuple-likes
/// @pre all symbols are unique
/// @pre the number of symbols with the same bitsize does not exceed the
/// available number of prefix free codes with that bitsize
///
/// @{

template <std::ranges::sized_range R>
requires std::convertible_to<
std::ranges::range_reference_t<R>,
std::tuple<symbol_type, std::uint8_t>>
constexpr table(symbol_bitsize_tag, const R& map)
: table{table_contents, detail::to_code_symbol(map)}
{
canonicalize();
}

template <std::size_t N>
constexpr table(
symbol_bitsize_tag,
const c_array<std::pair<symbol_type, std::uint8_t>, N>& map)
: table{table_contents, detail::to_code_symbol(map)}
{
canonicalize();
}

/// @}

/// Returns an iterator to the first `encoding`
///
/// @note elements are ordered by code bitsize. If multiple elements have the
Expand Down Expand Up @@ -461,4 +510,12 @@ template <class R>
table(table_contents_tag, const R&)
-> table<detail::tuple_arg_t<1, R>, detail::tuple_size_v<R>()>;

template <class S, class I, std::size_t N>
table(symbol_bitsize_tag, const c_array<std::pair<S, I>, N>&) -> table<S, N>;

template <class R>
requires (detail::tuple_size_v<std::ranges::range_value_t<R>>() == 2)
table(symbol_bitsize_tag, const R&)
-> table<detail::tuple_arg_t<0, R>, detail::tuple_size_v<R>()>;

} // namespace starflate::huffman
30 changes: 30 additions & 0 deletions huffman/src/utility.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#pragma once

#include <cstddef>

namespace starflate::huffman {

/// Convenience alias for a C-style array
///
template <class T, std::size_t N>
using c_array = T[N];

/// Disambiguation tag to specify a table is constructed with a code-symbol
/// mapping
///
struct table_contents_tag
{
explicit table_contents_tag() = default;
};
inline constexpr auto table_contents = table_contents_tag{};

/// Disambiguation tag to specify a table is constructed with a symbol-bitsize
/// mapping
///
struct symbol_bitsize_tag
{
explicit symbol_bitsize_tag() = default;
};
inline constexpr auto symbol_bitsize = symbol_bitsize_tag{};

} // namespace starflate::huffman
22 changes: 16 additions & 6 deletions huffman/test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,29 @@ cc_test(
)

cc_test(
name = "table_from_frequencies_test",
name = "table_canonicalize_test",
timeout = "short",
srcs = ["table_from_frequencies_test.cpp"],
srcs = ["table_canonicalize_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
],
)

cc_test(
name = "table_canonicalize_test",
name = "table_find_code_test",
timeout = "short",
srcs = ["table_canonicalize_test.cpp"],
srcs = ["table_find_code_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
],
)

cc_test(
name = "table_from_frequencies_test",
timeout = "short",
srcs = ["table_from_frequencies_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
Expand Down Expand Up @@ -61,9 +71,9 @@ cc_test(
)

cc_test(
name = "table_find_code_test",
name = "table_from_symbol_bitsize_test",
timeout = "short",
srcs = ["table_find_code_test.cpp"],
srcs = ["table_from_symbol_bitsize_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
Expand Down
Loading

0 comments on commit 8bac466

Please sign in to comment.