Skip to content

Commit

Permalink
Update MurmurHash3_x86_32 to use the cuco equivalent implementation (#…
Browse files Browse the repository at this point in the history
…17429)

This PR modifies `MurmurHash3_x86_32` to utilize the cuco equivalent implementation, eliminating duplication. Additionally, it resolves a minor issue in `xxhash_64` where the seed was not correctly passed to the underlying implementation.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Basit Ayantunde (https://github.com/lamarrr)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: #17429
  • Loading branch information
PointKernel authored Nov 27, 2024
1 parent adaee75 commit 9db132a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 103 deletions.
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ if(CUDF_BUILD_TESTUTIL)
)

target_link_libraries(
cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream
cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream
$<TARGET_NAME_IF_EXISTS:conda_env>
)

Expand Down
6 changes: 1 addition & 5 deletions cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
function(find_and_configure_cucollections)
include(${rapids-cmake-dir}/cpm/cuco.cmake)

if(BUILD_SHARED_LIBS)
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports)
else()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endif()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endfunction()

find_and_configure_cucollections()
116 changes: 21 additions & 95 deletions cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2023, NVIDIA CORPORATION.
* Copyright (c) 2017-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,157 +24,83 @@
#include <cudf/structs/struct_view.hpp>
#include <cudf/types.hpp>

#include <cstddef>
#include <cuco/hash_functions.cuh>
#include <cuda/std/cstddef>

namespace cudf::hashing::detail {

// MurmurHash3_x86_32 implementation from
// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
template <typename Key>
struct MurmurHash3_x86_32 {
using result_type = hash_value_type;

constexpr MurmurHash3_x86_32() = default;
constexpr MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}

[[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
__host__ __device__ constexpr MurmurHash3_x86_32(uint32_t seed = cudf::DEFAULT_HASH_SEED)
: _impl{seed}
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}

[[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
cudf::size_type offset) const
{
// Read a 4-byte value from the data pointer as individual bytes for safe
// unaligned access (very likely for string types).
auto const block = reinterpret_cast<uint8_t const*>(data + offset);
return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
}
__device__ constexpr result_type operator()(Key const& key) const { return this->_impl(key); }

[[nodiscard]] result_type __device__ inline operator()(Key const& key) const
__device__ constexpr result_type compute_bytes(cuda::std::byte const* bytes,
std::uint64_t size) const
{
return compute(normalize_nans_and_zeros(key));
return this->_impl.compute_hash(bytes, size);
}

private:
template <typename T>
result_type __device__ inline compute(T const& key) const
__device__ constexpr result_type compute(T const& key) const
{
return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(T));
}

result_type __device__ inline compute_remaining_bytes(std::byte const* data,
cudf::size_type len,
cudf::size_type tail_offset,
result_type h) const
{
// Process remaining bytes that do not fill a four-byte chunk.
uint32_t k1 = 0;
switch (len % 4) {
case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16; [[fallthrough]];
case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8; [[fallthrough]];
case 1:
k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
k1 *= c1;
k1 = rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
};
return h;
}

result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
{
constexpr cudf::size_type BLOCK_SIZE = 4;
cudf::size_type const nblocks = len / BLOCK_SIZE;
cudf::size_type const tail_offset = nblocks * BLOCK_SIZE;
result_type h = m_seed;

// Process all four-byte chunks.
for (cudf::size_type i = 0; i < nblocks; i++) {
uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
k1 *= c1;
k1 = rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
h = rotate_bits_left(h, rot_c2);
h = h * 5 + c3;
}

h = compute_remaining_bytes(data, len, tail_offset, h);

// Finalize hash.
h ^= len;
h = fmix32(h);
return h;
}

private:
uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
static constexpr uint32_t c1 = 0xcc9e2d51;
static constexpr uint32_t c2 = 0x1b873593;
static constexpr uint32_t c3 = 0xe6546b64;
static constexpr uint32_t rot_c1 = 15;
static constexpr uint32_t rot_c2 = 13;
cuco::murmurhash3_32<Key> _impl;
};

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<bool>::operator()(bool const& key) const
{
return compute(static_cast<uint8_t>(key));
return this->compute(static_cast<uint8_t>(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<float>::operator()(float const& key) const
{
return compute(normalize_nans_and_zeros(key));
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<double>::operator()(double const& key) const
{
return compute(normalize_nans_and_zeros(key));
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return compute_bytes(data, len);
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(key.data()),
key.size_bytes());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
Expand Down
9 changes: 7 additions & 2 deletions cpp/include/cudf/hashing/detail/xxhash_64.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

#pragma once

#include "hash_functions.cuh"

#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/hashing.hpp>
#include <cudf/hashing/detail/hash_functions.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/types.hpp>

Expand All @@ -31,6 +31,11 @@ template <typename Key>
struct XXHash_64 : public cuco::xxhash_64<Key> {
using result_type = typename cuco::xxhash_64<Key>::result_type;

__host__ __device__ constexpr XXHash_64(uint64_t seed = cudf::DEFAULT_HASH_SEED)
: cuco::xxhash_64<Key>{seed}
{
}

__device__ result_type operator()(Key const& key) const
{
return cuco::xxhash_64<Key>::operator()(key);
Expand Down

0 comments on commit 9db132a

Please sign in to comment.