From 5bac1df2998f693736900f3ab4b37cd4ed43ca96 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Tue, 26 Sep 2023 18:37:26 -0700 Subject: [PATCH] store huffman table elements in forward order Remove use of std::reverse_iterator when iterating over table elements. Instead, invoke reverse on the underlying table storage after construction. Change-Id: I45916a591f51298fff8b4012b9507b781021100e --- huffman/src/detail/static_vector.hpp | 1 + huffman/src/detail/table_node.hpp | 56 +++++------ huffman/src/detail/table_storage.hpp | 3 +- huffman/src/table.hpp | 115 +++++++++++++--------- huffman/test/decode_test.cpp | 14 ++- huffman/test/table_canonicalize_test.cpp | 54 +++++----- huffman/test/table_find_code_test.cpp | 22 ++--- huffman/test/table_from_contents_test.cpp | 106 ++++++++++---------- huffman/test/table_from_data_test.cpp | 17 ++++ 9 files changed, 207 insertions(+), 181 deletions(-) diff --git a/huffman/src/detail/static_vector.hpp b/huffman/src/detail/static_vector.hpp index 368845a..504b94f 100644 --- a/huffman/src/detail/static_vector.hpp +++ b/huffman/src/detail/static_vector.hpp @@ -79,6 +79,7 @@ class static_vector : std::array } constexpr auto size() const noexcept -> size_type { return size_; } + constexpr auto empty() const noexcept -> size_type { return not size(); } constexpr auto end() noexcept -> iterator { return begin() + size(); } constexpr auto end() const noexcept -> const_iterator diff --git a/huffman/src/detail/table_node.hpp b/huffman/src/detail/table_node.hpp index 6e6b65b..5d25295 100644 --- a/huffman/src/detail/table_node.hpp +++ b/huffman/src/detail/table_node.hpp @@ -96,60 +96,48 @@ class table_node : public encoding return init_.frequency; } - constexpr auto node_size() const - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) - return init_.node_size; - } - - /// Obtains the next node, with respect to node size + /// Distance to next internal node /// - /// Given a contiguous container of nodes, `next()` returns a pointer to - /// the next node, using `node_size()` to determine if `*this` is an - /// internal node or a leaf node. If `*this` is an internal node, (i.e. - /// `*this` represents a node with children) `next()` skips the appropriate - /// number of elements in the associated container. + /// Given a contiguous container of nodes, `node_size()` returns the distance + /// to the next internal node. /// /// | freq: 3 | freq: 1 | freq: 1 | freq: 4 | freq: 2 | /// | ns: 3 | ns: 1 | ns: 1 | ns: 2 | ns: 1 | /// ^ ^ /// this | /// | - /// this->next() -----------------+ + /// next internal node -----------+ /// - /// @{ - - constexpr auto next() -> table_node* { return this + node_size(); } - constexpr auto next() const -> const table_node* + constexpr auto node_size() const { - return this + node_size(); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + return init_.node_size; } - - /// @} - /// "Joins" two `table_node`s + /// @param lhs left table_node + /// @param rhs right table_node + /// @pre `&lhs + lhs.node_size() == &rhs` /// - /// Logically "join" `*this` with the next adjacent node `*next()`, - /// "creating" an internal node. This adds the frequency of the next node to - /// this node, left pads all the codes of the internal nodes of `*this` with - /// 0s and left pads all the code of the internal nodes of `*next()` with - /// 1s. + /// Logically "join" `lhs` with the next adjacent node `rhs` "creating" an + /// internal node. This adds the frequency of `rhs` to `lhs`, left pads all + /// the codes of the internal nodes of `lhs` with 0s and left pads all the + /// code of the internal nodes of `rhs` with 1s. /// - /// @pre `*this` is not `back()` of the associated container - /// - constexpr auto join_with_next() & -> void + friend constexpr auto join(table_node& lhs, table_node& rhs) -> void { + assert( + &lhs + lhs.node_size() == &rhs and "`lhs` and `rhs` are not adjacent"); + const auto left_pad_with = [](auto b) { return [b](table_node& n) { b >> static_cast(n); }; }; - std::for_each(this, next(), left_pad_with(bit{0})); - std::for_each(next(), next()->next(), left_pad_with(bit{1})); + std::for_each(&lhs, &rhs, left_pad_with(bit{0})); + std::for_each(&rhs, &rhs + rhs.node_size(), left_pad_with(bit{1})); - const auto& n = *next(); // NOLINTBEGIN(cppcoreguidelines-pro-type-union-access) - init_.frequency += n.frequency(); - init_.node_size += n.node_size(); + lhs.init_.frequency += rhs.frequency(); + lhs.init_.node_size += rhs.node_size(); // NOLINTEND(cppcoreguidelines-pro-type-union-access) } diff --git a/huffman/src/detail/table_storage.hpp b/huffman/src/detail/table_storage.hpp index 74d1411..02a26ec 100644 --- a/huffman/src/detail/table_storage.hpp +++ b/huffman/src/detail/table_storage.hpp @@ -117,8 +117,6 @@ class table_storage : table_storage_base_t ++it; } - std::ranges::sort(*this, std::ranges::greater{}, as_code); - assert(std::ranges::unique(*this, {}, as_code).empty()); assert(std::ranges::unique(*this, {}, as_symbol).empty()); } @@ -127,6 +125,7 @@ class table_storage : table_storage_base_t using base_type::cbegin; using base_type::cend; using base_type::data; + using base_type::empty; using base_type::end; using base_type::front; using base_type::size; diff --git a/huffman/src/table.hpp b/huffman/src/table.hpp index 41e5344..dc1f8fa 100644 --- a/huffman/src/table.hpp +++ b/huffman/src/table.hpp @@ -44,9 +44,19 @@ class table detail::table_storage table_; - constexpr static auto find_node_if(auto first, auto last, auto pred) + template + static constexpr auto to_index(U uint) { - for (; first != last; first = first->next()) { + using S = std::ranges::range_difference_t; + + assert(std::cmp_less_equal(uint, std::numeric_limits::max())); + return static_cast(uint); + } + + template P> + constexpr static auto find_node_if(I first, I last, P pred) + { + for (; first != last; first += to_index(first->node_size())) { if (pred(*first)) { break; } @@ -55,10 +65,38 @@ class table return first; } + constexpr auto encode_symbols() -> void + { + // precondition, audit + assert(std::ranges::is_sorted(table_)); + + const auto total_size = table_.size(); + auto first = table_.begin(); + const auto last = table_.end(); + + while (first->node_size() != total_size) { + join(first[0], first[to_index(first->node_size())]); + + const auto has_higher_freq = [f = first->frequency()](const auto& n) { + return n.frequency() > f; + }; + + auto lower = first + to_index(first->node_size()); + auto upper = find_node_if(lower, last, has_higher_freq); + + // re-sort after creating a new internal node + std::rotate(first, lower, upper); + } + } + constexpr auto construct_table() -> void { using size_type = decltype(table_.size()); + if (table_.empty()) { + return; + } + if (table_.size() == size_type{1}) { using namespace huffman::literals; static_cast(table_.front()) = 0_c; @@ -67,27 +105,31 @@ class table std::ranges::sort(table_); + // precondition assert( std::ranges::unique( - table_, [](auto& x, auto& y) { return x.symbol == y.symbol; }) + table_, {}, [](const auto& elem) { return elem.symbol; }) .empty() and - "`frequencies` cannot contain duplicate symbols"); + "a `table` cannot contain duplicate symbols"); - while (table_.front().node_size() != table_.size()) { - table_.front().join_with_next(); + const auto frequencies = std::views::transform( + table_, [](const auto& elem) { return elem.frequency(); }); + [[maybe_unused]] const auto total_freq = + std::accumulate(std::cbegin(frequencies), std::cend(frequencies), 0UZ); - const auto last = table_.data() + table_.size(); - const auto has_higher_freq = - [f = table_.front().frequency()](const auto& n) { - return n.frequency() > f; - }; + encode_symbols(); - auto lower = table_.front().next(); - auto upper = find_node_if(lower, last, has_higher_freq); + // postcondition + assert(total_freq == table_.front().frequency()); - // re-sort after creating a new internal node - std::rotate(&table_.front(), lower, upper); - } + // Implicit construction of the Huffman tree results in the least frequent + // symbols at the beginning (largest bitsize) and most frequent at the end + // (smallest bitsize). See details in `table_node.hpp` for how this is + // represented. + // + // Reversing the elements allows code search to start with the symbols with + // the smallest bitsize. + std::ranges::reverse(table_); } constexpr auto set_skip_fields() -> void @@ -96,7 +138,7 @@ class table const node_type* prev{}; - for (auto& elem : table_) { + for (auto& elem : std::views::reverse(table_)) { const auto s = skip_type{1} + (prev and (elem.bitsize() == prev->bitsize()) @@ -121,10 +163,7 @@ class table /// Const iterator type /// using const_iterator = detail::element_base_iterator< - // TODO: construct_table builds the table in reverse order. - // would be nice to fix that so we can get rid of reverse_iterator here. - std::reverse_iterator< - typename detail::table_storage::const_iterator>, + typename detail::table_storage::const_iterator, encoding>; /// Constructs an empty table @@ -154,16 +193,7 @@ class table constexpr table(const R& frequencies, std::optional eot) : table_{detail::frequency_tag{}, frequencies, eot} { - [[maybe_unused]] const auto total_freq = std::accumulate( - std::cbegin(frequencies), - std::cend(frequencies), - std::size_t{eot.has_value()}, - [](auto acc, auto kv) { return acc + kv.second; }); - construct_table(); - - assert(total_freq == table_.front().frequency()); - set_skip_fields(); } @@ -247,7 +277,7 @@ class table [[nodiscard]] constexpr auto begin() const -> const_iterator { - return const_iterator{std::make_reverse_iterator(table_.end())}; + return const_iterator{table_.begin()}; } /// Returns an iterator past the last `encoding` @@ -256,7 +286,7 @@ class table [[nodiscard]] constexpr auto end() const -> const_iterator { - return const_iterator{std::make_reverse_iterator(table_.begin())}; + return const_iterator{table_.end()}; } /// Finds element with specific code within the code table @@ -337,12 +367,9 @@ class table { using value_type = decltype(std::declval().value()); - // elements are stored in reverse order, so we maintain that order - auto reversed = std::views::reverse(table_); - // set lexicographical order std::ranges::sort( // - reversed, // + table_, // [](const auto& x, const auto& y) { return std::pair{x.bitsize(), std::ref(x.symbol)} < std::pair{y.bitsize(), std::ref(y.symbol)}; @@ -356,19 +383,19 @@ class table auto next_code = code{}; // clang-format off - for (auto& n : reversed) { - assert(next_code.bitsize() <= n.bitsize()); + for (auto& elem : table_) { + assert(next_code.bitsize() <= elem.bitsize()); next_code = { - n.bitsize(), - next_code.bitsize() == n.bitsize() - ? next_code.value() + value_type{1} // 3) next_code[len]++; - : base_code <<= (n.bitsize() - next_code.bitsize()) // 2) next_code[bits] = code; code = (...) << 1; + elem.bitsize(), + next_code.bitsize() == elem.bitsize() + ? next_code.value() + value_type{1} // 3) next_code[len]++; + : base_code <<= (elem.bitsize() - next_code.bitsize()) // 2) next_code[bits] = code; code = (...) << 1; }; - static_cast(n) = next_code; // 3) tree[n].Code = next_code[len]; + static_cast(elem) = next_code; // 3) tree[n].Code = next_code[len]; - ++base_code; // 2) (code + bl_count[bits-1]) + ++base_code; // 2) (code + bl_count[bits-1]) } // clang-format on diff --git a/huffman/test/decode_test.cpp b/huffman/test/decode_test.cpp index b3f7ea0..df1a947 100644 --- a/huffman/test/decode_test.cpp +++ b/huffman/test/decode_test.cpp @@ -3,11 +3,9 @@ #include #include -#include #include #include #include -#include auto main() -> int { @@ -19,7 +17,7 @@ auto main() -> int test("basic") = [] { // encoded data from dahuffman readme.rst, but in hex. - constexpr std::array encoded_bytes = { + constexpr std::array encoded_bytes = { std::byte{0x86}, std::byte{0x7c}, std::byte{0x25}, @@ -31,12 +29,12 @@ auto main() -> int static constexpr auto code_table = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{00000_c, eot}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + {std::pair{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}} + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, eot}} }; // clang-format on constexpr std::array expected = { diff --git a/huffman/test/table_canonicalize_test.cpp b/huffman/test/table_canonicalize_test.cpp index 4cbb417..025db64 100644 --- a/huffman/test/table_canonicalize_test.cpp +++ b/huffman/test/table_canonicalize_test.cpp @@ -17,19 +17,19 @@ auto main() -> int static constexpr auto actual = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{010_c, 'D'}, + {std::pair{00_c, 'A'}, + {1_c, 'B'}, {011_c, 'C'}, - {00_c, 'A'}, - {1_c, 'B'}}}.canonicalize(); + {010_c, 'D'}}}.canonicalize(); // clang-format on static constexpr auto expected = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{111_c, 'D'}, - {110_c, 'C'}, + {std::pair{0_c, 'B'}, {10_c, 'A'}, - {0_c, 'B'}}}; + {110_c, 'C'}, + {111_c, 'D'}}}; // clang-format on expect(std::ranges::equal(actual, expected)); @@ -41,27 +41,27 @@ auto main() -> int static constexpr auto actual = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{1111_c, 'H'}, - {0111_c, 'G'}, - {100_c, 'E'}, - {011_c, 'D'}, - {010_c, 'C'}, + {std::pair{000_c, 'A'}, {001_c, 'B'}, - {000_c, 'A'}, - {11_c, 'F'}}}.canonicalize(); + {010_c, 'C'}, + {011_c, 'D'}, + {100_c, 'E'}, + {11_c, 'F'}, + {0111_c, 'G'}, + {1111_c, 'H'}}}.canonicalize(); // clang-format on static constexpr auto expected = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{1111_c, 'H'}, - {1110_c, 'G'}, - {110_c, 'E'}, - {101_c, 'D'}, - {100_c, 'C'}, - {011_c, 'B'}, + {std::pair{00_c, 'F'}, {010_c, 'A'}, - {00_c, 'F'}}}; + {011_c, 'B'}, + {100_c, 'C'}, + {101_c, 'D'}, + {110_c, 'E'}, + {1110_c, 'G'}, + {1111_c, 'H'}}}; // clang-format on expect(std::ranges::equal(actual, expected)); @@ -71,14 +71,14 @@ auto main() -> int static constexpr auto t1 = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{1111_c, 'H'}, - {1110_c, 'G'}, - {110_c, 'E'}, - {101_c, 'D'}, - {100_c, 'C'}, - {011_c, 'B'}, + {std::pair{00_c, 'F'}, {010_c, 'A'}, - {00_c, 'F'}}}; + {011_c, 'B'}, + {100_c, 'C'}, + {101_c, 'D'}, + {110_c, 'E'}, + {1110_c, 'G'}, + {1111_c, 'H'}}}; // clang-format on auto t2 = t1; diff --git a/huffman/test/table_find_code_test.cpp b/huffman/test/table_find_code_test.cpp index a9f1d1d..31308cf 100644 --- a/huffman/test/table_find_code_test.cpp +++ b/huffman/test/table_find_code_test.cpp @@ -16,12 +16,12 @@ auto main() -> int static constexpr auto table1 = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + {std::pair{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}} + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}} }; // clang-format on @@ -78,12 +78,12 @@ auto main() -> int static constexpr auto table = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, - {01_c, 'i'}, - {11_c, 'e'}} + {std::pair{11_c, 'e'}, + {01_c, 'i'}, + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}} }; // clang-format on diff --git a/huffman/test/table_from_contents_test.cpp b/huffman/test/table_from_contents_test.cpp index 95382f6..bf5a769 100644 --- a/huffman/test/table_from_contents_test.cpp +++ b/huffman/test/table_from_contents_test.cpp @@ -18,13 +18,15 @@ auto main() -> int test("code table constructible from code-symbol mapping") = [] { const auto t = huffman::table{ + // clang-format off huffman::table_contents, - {{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + {{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}}}; + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; + // clang-format on auto ss = std::stringstream{}; ss << t; @@ -35,8 +37,8 @@ auto main() -> int "2\t01\t1\t`i`\n" "3\t001\t1\t`n`\n" "4\t0001\t1\t`q`\n" - "5\t00000\t0\t`\4`\n" - "5\t00001\t1\t`x`\n"; + "5\t00001\t1\t`x`\n" + "5\t00000\t0\t`\4`\n"; expect(table == ss.str()) << ss.str(); }; @@ -47,26 +49,24 @@ auto main() -> int const auto t1 = // clang-format off huffman::table{ huffman::table_contents, - {{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + {{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}} - }; + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; // clang-format on const auto t2 = // clang-format off huffman::table{ huffman::table_contents, std::vector{ - std::tuple{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + std::tuple{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}} - }; + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; // clang-format on expect(std::ranges::equal(t1, t2)); @@ -77,26 +77,24 @@ auto main() -> int const auto t1 = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + {std::pair{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}} - }; + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; // clang-format on const auto t2 = // clang-format off huffman::table{ huffman::table_contents, std::vector{ - std::tuple{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + std::tuple{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}} - }; + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; // clang-format off expect(std::ranges::equal(t1, t2)); @@ -106,25 +104,23 @@ auto main() -> int static constexpr auto t1 = // clang-format off huffman::table{ huffman::table_contents, - {std::pair{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + {std::pair{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}} - }; + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; // clang-format on static constexpr auto t2 = // clang-format off huffman::table{ huffman::table_contents, - {{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {001_c, 'n'}, + {{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}} - }; + {001_c, 'n'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; // clang-format on expect(std::ranges::equal(t1, t2)); @@ -134,13 +130,13 @@ auto main() -> int expect(aborts([] { // clang-format off huffman::table{ huffman::table_contents, - {std::pair{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {0001_c, 'g'}, - {001_c, 'n'}, + {std::pair{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}}}; + {001_c, 'n'}, + {0001_c, 'g'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; // clang-format on })); }; @@ -149,13 +145,13 @@ auto main() -> int expect(aborts([] { // clang-format off huffman::table{ huffman::table_contents, - {std::pair{00000_c, '\4'}, - {00001_c, 'x'}, - {0001_c, 'q'}, - {0000_c, 'q'}, - {001_c, 'n'}, + {std::pair{1_c, 'e'}, {01_c, 'i'}, - {1_c, 'e'}}}; + {001_c, 'n'}, + {0000_c, 'q'}, + {0001_c, 'q'}, + {00001_c, 'x'}, + {00000_c, '\4'}}}; // clang-format on })); }; diff --git a/huffman/test/table_from_data_test.cpp b/huffman/test/table_from_data_test.cpp index 59fa7de..0cd26a0 100644 --- a/huffman/test/table_from_data_test.cpp +++ b/huffman/test/table_from_data_test.cpp @@ -83,6 +83,23 @@ auto main() -> int expect(std::ranges::equal(t1, t2)); }; + + test("empty code table") = [] { + const auto zero = huffman::table{}; + + expect(zero.begin() == zero.end()); + }; + + test("one symbol code table") = [] { + using namespace std::literals; + + constexpr auto data = "a"sv; + + const auto one = huffman::table{data}; + + expect('a' == one.begin()->symbol); + expect(1 == one.begin()->bitsize()); + }; } // NOLINTEND(readability-magic-numbers,google-build-using-namespace)