Skip to content

Commit

Permalink
Get rid of compaction_hash and sentinel values
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia committed Sep 13, 2023
1 parent a4de11b commit bdbb1db
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 35 deletions.
9 changes: 6 additions & 3 deletions cpp/include/cudf/detail/hash_reduce_by_row.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
* limitations under the License.
*/

#include <stream_compaction/stream_compaction_common.cuh>

#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/types.hpp>

Expand All @@ -27,8 +25,13 @@
#include <thrust/iterator/counting_iterator.h>
#include <thrust/uninitialized_fill.h>

#include <cuco/static_map.cuh>

namespace cudf::detail {

using hash_map_type =
cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;

/**
* @brief The base struct for customized reduction functor to perform reduce-by-key with keys are
* rows that compared equal.
Expand Down Expand Up @@ -124,7 +127,7 @@ rmm::device_uvector<OutputType> hash_reduce_by_row(
{
auto const map_dview = map.get_device_view();
auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls));
auto const key_hasher = row_hasher.device_hasher(has_nulls);
auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input);

auto reduction_results = rmm::device_uvector<OutputType>(num_rows, stream, mr);
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/stream_compaction/distinct.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ rmm::device_uvector<size_type> get_distinct_indices(table_view const& input,
}

auto map = hash_map_type{compute_hash_table_size(input.num_rows()),
cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL},
cuco::empty_value{COMPACTION_EMPTY_VALUE_SENTINEL},
cuco::empty_key{-1},
cuco::empty_value{std::numeric_limits<size_type>::min()},
detail::hash_table_allocator_type{default_allocator<char>{}, stream},
stream.value()};

Expand All @@ -62,7 +62,7 @@ rmm::device_uvector<size_type> get_distinct_indices(table_view const& input,
auto const has_nested_columns = cudf::detail::has_nested_columns(input);

auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls));
auto const key_hasher = row_hasher.device_hasher(has_nulls);

auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input);

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/stream_compaction/distinct_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,14 @@ cudf::size_type distinct_count(table_view const& keys,
auto const preprocessed_input =
cudf::experimental::row::hash::preprocessed_table::create(keys, stream);
auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
auto const hash_key = experimental::compaction_hash(row_hasher.device_hasher(has_nulls));
auto const hash_key = row_hasher.device_hasher(has_nulls);
auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input);

auto const comparator_helper = [&](auto const row_equal) {
using hasher_type = decltype(hash_key);
auto key_set = cuco::experimental::static_set{
cuco::experimental::extent{compute_hash_table_size(num_rows)},
cuco::empty_key<cudf::size_type>{COMPACTION_EMPTY_KEY_SENTINEL},
cuco::empty_key<cudf::size_type>{-1},
row_equal,
cuco::experimental::linear_probing<1, hasher_type>{hash_key},
detail::hash_table_allocator_type{default_allocator<char>{}, stream},
Expand Down
22 changes: 0 additions & 22 deletions cpp/src/stream_compaction/stream_compaction_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,6 @@
namespace cudf {
namespace detail {

namespace experimental {

/**
* @brief Device callable to hash a given row.
*/
template <typename RowHash>
class compaction_hash {
public:
compaction_hash(RowHash row_hasher) : _hash{row_hasher} {}

__device__ inline auto operator()(size_type i) const noexcept
{
auto hash = _hash(i);
return (hash == COMPACTION_EMPTY_KEY_SENTINEL) ? (hash - 1) : hash;
}

private:
RowHash _hash;
};

} // namespace experimental

/**
 * @brief Device functor to determine if a row is valid.
*/
Expand Down
5 changes: 0 additions & 5 deletions cpp/src/stream_compaction/stream_compaction_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@
namespace cudf {
namespace detail {

constexpr auto COMPACTION_EMPTY_KEY_SENTINEL = std::numeric_limits<size_type>::max();
constexpr auto COMPACTION_EMPTY_VALUE_SENTINEL = std::numeric_limits<size_type>::min();

using hash_type = cuco::murmurhash3_32<size_type>;

using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;

using hash_map_type =
Expand Down

0 comments on commit bdbb1db

Please sign in to comment.