Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/m…
Browse files Browse the repository at this point in the history
…erge
  • Loading branch information
mroeschke committed Nov 28, 2024
2 parents 2333c3a + 9b88794 commit 322f721
Show file tree
Hide file tree
Showing 19 changed files with 349 additions and 416 deletions.
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ if(CUDF_BUILD_TESTUTIL)
)

target_link_libraries(
cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream
cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream
$<TARGET_NAME_IF_EXISTS:conda_env>
)

Expand Down
6 changes: 1 addition & 5 deletions cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
function(find_and_configure_cucollections)
include(${rapids-cmake-dir}/cpm/cuco.cmake)

if(BUILD_SHARED_LIBS)
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports)
else()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endif()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endfunction()

find_and_configure_cucollections()
116 changes: 21 additions & 95 deletions cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2023, NVIDIA CORPORATION.
* Copyright (c) 2017-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,157 +24,83 @@
#include <cudf/structs/struct_view.hpp>
#include <cudf/types.hpp>

#include <cstddef>
#include <cuco/hash_functions.cuh>
#include <cuda/std/cstddef>

namespace cudf::hashing::detail {

// MurmurHash3_x86_32 implementation from
// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
template <typename Key>
struct MurmurHash3_x86_32 {
using result_type = hash_value_type;

constexpr MurmurHash3_x86_32() = default;
constexpr MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}

[[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
__host__ __device__ constexpr MurmurHash3_x86_32(uint32_t seed = cudf::DEFAULT_HASH_SEED)
: _impl{seed}
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}

[[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
cudf::size_type offset) const
{
// Read a 4-byte value from the data pointer as individual bytes for safe
// unaligned access (very likely for string types).
auto const block = reinterpret_cast<uint8_t const*>(data + offset);
return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
}
__device__ constexpr result_type operator()(Key const& key) const { return this->_impl(key); }

[[nodiscard]] result_type __device__ inline operator()(Key const& key) const
__device__ constexpr result_type compute_bytes(cuda::std::byte const* bytes,
std::uint64_t size) const
{
return compute(normalize_nans_and_zeros(key));
return this->_impl.compute_hash(bytes, size);
}

private:
template <typename T>
result_type __device__ inline compute(T const& key) const
__device__ constexpr result_type compute(T const& key) const
{
return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(T));
}

result_type __device__ inline compute_remaining_bytes(std::byte const* data,
cudf::size_type len,
cudf::size_type tail_offset,
result_type h) const
{
// Process remaining bytes that do not fill a four-byte chunk.
uint32_t k1 = 0;
switch (len % 4) {
case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16; [[fallthrough]];
case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8; [[fallthrough]];
case 1:
k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
k1 *= c1;
k1 = rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
};
return h;
}

result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
{
constexpr cudf::size_type BLOCK_SIZE = 4;
cudf::size_type const nblocks = len / BLOCK_SIZE;
cudf::size_type const tail_offset = nblocks * BLOCK_SIZE;
result_type h = m_seed;

// Process all four-byte chunks.
for (cudf::size_type i = 0; i < nblocks; i++) {
uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
k1 *= c1;
k1 = rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
h = rotate_bits_left(h, rot_c2);
h = h * 5 + c3;
}

h = compute_remaining_bytes(data, len, tail_offset, h);

// Finalize hash.
h ^= len;
h = fmix32(h);
return h;
}

private:
uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
static constexpr uint32_t c1 = 0xcc9e2d51;
static constexpr uint32_t c2 = 0x1b873593;
static constexpr uint32_t c3 = 0xe6546b64;
static constexpr uint32_t rot_c1 = 15;
static constexpr uint32_t rot_c2 = 13;
cuco::murmurhash3_32<Key> _impl;
};

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<bool>::operator()(bool const& key) const
{
return compute(static_cast<uint8_t>(key));
return this->compute(static_cast<uint8_t>(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<float>::operator()(float const& key) const
{
return compute(normalize_nans_and_zeros(key));
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<double>::operator()(double const& key) const
{
return compute(normalize_nans_and_zeros(key));
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return compute_bytes(data, len);
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(key.data()),
key.size_bytes());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
Expand Down
47 changes: 25 additions & 22 deletions cpp/include/cudf/hashing/detail/xxhash_64.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

#pragma once

#include "hash_functions.cuh"

#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/hashing.hpp>
#include <cudf/hashing/detail/hash_functions.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/types.hpp>

Expand All @@ -28,72 +28,75 @@
namespace cudf::hashing::detail {

template <typename Key>
struct XXHash_64 : public cuco::xxhash_64<Key> {
using result_type = typename cuco::xxhash_64<Key>::result_type;
struct XXHash_64 {
using result_type = std::uint64_t;

__host__ __device__ constexpr XXHash_64(uint64_t seed = cudf::DEFAULT_HASH_SEED) : _impl{seed} {}

__device__ result_type operator()(Key const& key) const
__device__ constexpr result_type operator()(Key const& key) const { return this->_impl(key); }

__device__ constexpr result_type compute_bytes(cuda::std::byte const* bytes,
std::uint64_t size) const
{
return cuco::xxhash_64<Key>::operator()(key);
return this->_impl.compute_hash(bytes, size);
}

template <typename Extent>
__device__ result_type compute_hash(cuda::std::byte const* bytes, Extent size) const
private:
template <typename T>
__device__ constexpr result_type compute(T const& key) const
{
return cuco::xxhash_64<Key>::compute_hash(bytes, size);
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(T));
}

cuco::xxhash_64<Key> _impl;
};

template <>
XXHash_64<bool>::result_type __device__ inline XXHash_64<bool>::operator()(bool const& key) const
{
return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(key));
return this->compute(static_cast<uint8_t>(key));
}

template <>
XXHash_64<float>::result_type __device__ inline XXHash_64<float>::operator()(float const& key) const
{
return cuco::xxhash_64<float>::operator()(normalize_nans(key));
return this->compute(normalize_nans(key));
}

template <>
XXHash_64<double>::result_type __device__ inline XXHash_64<double>::operator()(
double const& key) const
{
return cuco::xxhash_64<double>::operator()(normalize_nans(key));
return this->compute(normalize_nans(key));
}

template <>
XXHash_64<cudf::string_view>::result_type
__device__ inline XXHash_64<cudf::string_view>::operator()(cudf::string_view const& key) const
{
return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(key.data()), key.size_bytes());
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(key.data()),
key.size_bytes());
}

template <>
XXHash_64<numeric::decimal32>::result_type
__device__ inline XXHash_64<numeric::decimal32>::operator()(numeric::decimal32 const& key) const
{
auto const val = key.value();
auto const len = sizeof(val);
return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(&val), len);
return this->compute(key.value());
}

template <>
XXHash_64<numeric::decimal64>::result_type
__device__ inline XXHash_64<numeric::decimal64>::operator()(numeric::decimal64 const& key) const
{
auto const val = key.value();
auto const len = sizeof(val);
return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(&val), len);
return this->compute(key.value());
}

template <>
XXHash_64<numeric::decimal128>::result_type
__device__ inline XXHash_64<numeric::decimal128>::operator()(numeric::decimal128 const& key) const
{
auto const val = key.value();
auto const len = sizeof(val);
return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(&val), len);
return this->compute(key.value());
}

} // namespace cudf::hashing::detail
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Attributes
Series.values
Series.data
Series.dtype
Series.dtypes
Series.shape
Series.ndim
Series.nullable
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ set(cython_sources
filling.pyx
groupby.pyx
interop.pyx
json.pyx
orc.pyx
parquet.pyx
reduce.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
filling,
groupby,
interop,
json,
nvtext,
orc,
parquet,
Expand Down
Loading

0 comments on commit 322f721

Please sign in to comment.