diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bb844b2b704..f25b46a52cd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1014,7 +1014,7 @@ if(CUDF_BUILD_TESTUTIL) ) target_link_libraries( - cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream + cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream $ ) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index fb82b0f5ff3..b8fb02eea97 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -16,11 +16,7 @@ function(find_and_configure_cucollections) include(${rapids-cmake-dir}/cpm/cuco.cmake) - if(BUILD_SHARED_LIBS) - rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports) - else() - rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) - endif() + rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) endfunction() find_and_configure_cucollections() diff --git a/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh b/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh index 6cf0b0fe817..38a7d927b9c 100644 --- a/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh +++ b/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023, NVIDIA CORPORATION. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,157 +24,83 @@ #include #include -#include +#include +#include namespace cudf::hashing::detail { -// MurmurHash3_x86_32 implementation from -// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp -//----------------------------------------------------------------------------- -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. -// Note - The x86 and x64 versions do _not_ produce the same results, as the -// algorithms are optimized for their respective platforms. You can still -// compile and run any of them on any platform, but your performance with the -// non-native version will be less than optimal. template struct MurmurHash3_x86_32 { using result_type = hash_value_type; - constexpr MurmurHash3_x86_32() = default; - constexpr MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {} - - [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const + __host__ __device__ constexpr MurmurHash3_x86_32(uint32_t seed = cudf::DEFAULT_HASH_SEED) + : _impl{seed} { - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - return h; } - [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data, - cudf::size_type offset) const - { - // Read a 4-byte value from the data pointer as individual bytes for safe - // unaligned access (very likely for string types). - auto const block = reinterpret_cast(data + offset); - return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); - } + __device__ constexpr result_type operator()(Key const& key) const { return this->_impl(key); } - [[nodiscard]] result_type __device__ inline operator()(Key const& key) const + __device__ constexpr result_type compute_bytes(cuda::std::byte const* bytes, + std::uint64_t size) const { - return compute(normalize_nans_and_zeros(key)); + return this->_impl.compute_hash(bytes, size); } + private: template - result_type __device__ inline compute(T const& key) const + __device__ constexpr result_type compute(T const& key) const { - return compute_bytes(reinterpret_cast(&key), sizeof(T)); + return this->compute_bytes(reinterpret_cast(&key), sizeof(T)); } - result_type __device__ inline compute_remaining_bytes(std::byte const* data, - cudf::size_type len, - cudf::size_type tail_offset, - result_type h) const - { - // Process remaining bytes that do not fill a four-byte chunk. - uint32_t k1 = 0; - switch (len % 4) { - case 3: k1 ^= std::to_integer(data[tail_offset + 2]) << 16; [[fallthrough]]; - case 2: k1 ^= std::to_integer(data[tail_offset + 1]) << 8; [[fallthrough]]; - case 1: - k1 ^= std::to_integer(data[tail_offset]); - k1 *= c1; - k1 = rotate_bits_left(k1, rot_c1); - k1 *= c2; - h ^= k1; - }; - return h; - } - - result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const - { - constexpr cudf::size_type BLOCK_SIZE = 4; - cudf::size_type const nblocks = len / BLOCK_SIZE; - cudf::size_type const tail_offset = nblocks * BLOCK_SIZE; - result_type h = m_seed; - - // Process all four-byte chunks. - for (cudf::size_type i = 0; i < nblocks; i++) { - uint32_t k1 = getblock32(data, i * BLOCK_SIZE); - k1 *= c1; - k1 = rotate_bits_left(k1, rot_c1); - k1 *= c2; - h ^= k1; - h = rotate_bits_left(h, rot_c2); - h = h * 5 + c3; - } - - h = compute_remaining_bytes(data, len, tail_offset, h); - - // Finalize hash. - h ^= len; - h = fmix32(h); - return h; - } - - private: - uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; - static constexpr uint32_t c1 = 0xcc9e2d51; - static constexpr uint32_t c2 = 0x1b873593; - static constexpr uint32_t c3 = 0xe6546b64; - static constexpr uint32_t rot_c1 = 15; - static constexpr uint32_t rot_c2 = 13; + cuco::murmurhash3_32 _impl; }; template <> hash_value_type __device__ inline MurmurHash3_x86_32::operator()(bool const& key) const { - return compute(static_cast(key)); + return this->compute(static_cast(key)); } template <> hash_value_type __device__ inline MurmurHash3_x86_32::operator()(float const& key) const { - return compute(normalize_nans_and_zeros(key)); + return this->compute(normalize_nans_and_zeros(key)); } template <> hash_value_type __device__ inline MurmurHash3_x86_32::operator()(double const& key) const { - return compute(normalize_nans_and_zeros(key)); + return this->compute(normalize_nans_and_zeros(key)); } template <> hash_value_type __device__ inline MurmurHash3_x86_32::operator()( cudf::string_view const& key) const { - auto const data = reinterpret_cast(key.data()); - auto const len = key.size_bytes(); - return compute_bytes(data, len); + return this->compute_bytes(reinterpret_cast(key.data()), + key.size_bytes()); } template <> hash_value_type __device__ inline MurmurHash3_x86_32::operator()( numeric::decimal32 const& key) const { - return compute(key.value()); + return this->compute(key.value()); } template <> hash_value_type __device__ inline MurmurHash3_x86_32::operator()( numeric::decimal64 const& key) const { - return compute(key.value()); + return this->compute(key.value()); } template <> hash_value_type __device__ inline MurmurHash3_x86_32::operator()( numeric::decimal128 const& key) const { - return compute(key.value()); + return this->compute(key.value()); } template <> diff --git a/cpp/include/cudf/hashing/detail/xxhash_64.cuh b/cpp/include/cudf/hashing/detail/xxhash_64.cuh index b00e8297ac9..49432ffd0a4 100644 --- a/cpp/include/cudf/hashing/detail/xxhash_64.cuh +++ b/cpp/include/cudf/hashing/detail/xxhash_64.cuh @@ -16,9 +16,9 @@ #pragma once -#include "hash_functions.cuh" - #include +#include +#include #include #include @@ -31,6 +31,11 @@ template struct XXHash_64 : public cuco::xxhash_64 { using result_type = typename cuco::xxhash_64::result_type; + __host__ __device__ constexpr XXHash_64(uint64_t seed = cudf::DEFAULT_HASH_SEED) + : cuco::xxhash_64{seed} + { + } + __device__ result_type operator()(Key const& key) const { return cuco::xxhash_64::operator()(key);