From bdbb1dbe4403ad42fb3f69a8e955067216c96a35 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 14:09:09 -0700 Subject: [PATCH] Get rid of `compaction_hash` and sentinel values --- .../cudf/detail/hash_reduce_by_row.cuh | 9 +++++--- cpp/src/stream_compaction/distinct.cu | 6 ++--- cpp/src/stream_compaction/distinct_count.cu | 4 ++-- .../stream_compaction_common.cuh | 22 ------------------- .../stream_compaction_common.hpp | 5 ----- 5 files changed, 11 insertions(+), 35 deletions(-) diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index 35654b90bc0..2d2b43f1d4a 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include @@ -27,8 +25,13 @@ #include #include +#include + namespace cudf::detail { +using hash_map_type = + cuco::static_map; + /** * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are * rows that compared equal. @@ -124,7 +127,7 @@ rmm::device_uvector hash_reduce_by_row( { auto const map_dview = map.get_device_view(); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + auto const key_hasher = row_hasher.device_hasher(has_nulls); auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); auto reduction_results = rmm::device_uvector(num_rows, stream, mr); diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index e031727c21a..8d2b12ab141 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -51,8 +51,8 @@ rmm::device_uvector get_distinct_indices(table_view const& input, } auto map = hash_map_type{compute_hash_table_size(input.num_rows()), - cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, - cuco::empty_value{COMPACTION_EMPTY_VALUE_SENTINEL}, + cuco::empty_key{-1}, + cuco::empty_value{std::numeric_limits::min()}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; @@ -62,7 +62,7 @@ rmm::device_uvector get_distinct_indices(table_view const& input, auto const has_nested_columns = cudf::detail::has_nested_columns(input); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + auto const key_hasher = row_hasher.device_hasher(has_nulls); auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 4bca0827efe..ac4811ad279 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -136,14 +136,14 @@ cudf::size_type distinct_count(table_view const& keys, auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(keys, stream); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const hash_key = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + auto const hash_key = row_hasher.device_hasher(has_nulls); auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); auto const comparator_helper = [&](auto const row_equal) { using hasher_type = decltype(hash_key); auto key_set = cuco::experimental::static_set{ cuco::experimental::extent{compute_hash_table_size(num_rows)}, - cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, + cuco::empty_key{-1}, row_equal, cuco::experimental::linear_probing<1, hasher_type>{hash_key}, detail::hash_table_allocator_type{default_allocator{}, stream}, diff --git a/cpp/src/stream_compaction/stream_compaction_common.cuh b/cpp/src/stream_compaction/stream_compaction_common.cuh index 4779cd990fd..839672d6a56 100644 --- a/cpp/src/stream_compaction/stream_compaction_common.cuh +++ b/cpp/src/stream_compaction/stream_compaction_common.cuh @@ -29,28 +29,6 @@ namespace cudf { namespace detail { -namespace experimental { - -/** - * @brief Device callable to hash a given row. - */ -template -class compaction_hash { - public: - compaction_hash(RowHash row_hasher) : _hash{row_hasher} {} - - __device__ inline auto operator()(size_type i) const noexcept - { - auto hash = _hash(i); - return (hash == COMPACTION_EMPTY_KEY_SENTINEL) ? (hash - 1) : hash; - } - - private: - RowHash _hash; -}; - -} // namespace experimental - /**  * @brief Device functor to determine if a row is valid.  */ diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp index 0cd2d8f4b14..58d958d2ff4 100644 --- a/cpp/src/stream_compaction/stream_compaction_common.hpp +++ b/cpp/src/stream_compaction/stream_compaction_common.hpp @@ -30,11 +30,6 @@ namespace cudf { namespace detail { -constexpr auto COMPACTION_EMPTY_KEY_SENTINEL = std::numeric_limits::max(); -constexpr auto COMPACTION_EMPTY_VALUE_SENTINEL = std::numeric_limits::min(); - -using hash_type = cuco::murmurhash3_32; - using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; using hash_map_type =