Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/m…

…erge
rapidsai · Nov 28, 2024 · 322f721 · 322f721
2 parents 2333c3a + 9b88794
commit 322f721
Show file tree

Hide file tree

Showing 19 changed files with 349 additions and 416 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -1014,7 +1014,7 @@ if(CUDF_BUILD_TESTUTIL)
   )
 
   target_link_libraries(
-    cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream
+    cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream
                            $<TARGET_NAME_IF_EXISTS:conda_env>
   )
 

diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -16,11 +16,7 @@
 function(find_and_configure_cucollections)
   include(${rapids-cmake-dir}/cpm/cuco.cmake)
 
-  if(BUILD_SHARED_LIBS)
-    rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports)
-  else()
-    rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
-  endif()
+  rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
 endfunction()
 
 find_and_configure_cucollections()
diff --git a/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh b/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2017-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,157 +24,83 @@
 #include <cudf/structs/struct_view.hpp>
 #include <cudf/types.hpp>
 
-#include <cstddef>
+#include <cuco/hash_functions.cuh>
+#include <cuda/std/cstddef>
 
 namespace cudf::hashing::detail {
 
-// MurmurHash3_x86_32 implementation from
-// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
 template <typename Key>
 struct MurmurHash3_x86_32 {
   using result_type = hash_value_type;
 
-  constexpr MurmurHash3_x86_32() = default;
-  constexpr MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}
-
-  [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
+  __host__ __device__ constexpr MurmurHash3_x86_32(uint32_t seed = cudf::DEFAULT_HASH_SEED)
+    : _impl{seed}
   {
-    h ^= h >> 16;
-    h *= 0x85ebca6b;
-    h ^= h >> 13;
-    h *= 0xc2b2ae35;
-    h ^= h >> 16;
-    return h;
   }
 
-  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
-                                                      cudf::size_type offset) const
-  {
-    // Read a 4-byte value from the data pointer as individual bytes for safe
-    // unaligned access (very likely for string types).
-    auto const block = reinterpret_cast<uint8_t const*>(data + offset);
-    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
-  }
+  __device__ constexpr result_type operator()(Key const& key) const { return this->_impl(key); }
 
-  [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
+  __device__ constexpr result_type compute_bytes(cuda::std::byte const* bytes,
+                                                 std::uint64_t size) const
   {
-    return compute(normalize_nans_and_zeros(key));
+    return this->_impl.compute_hash(bytes, size);
   }
 
+ private:
   template <typename T>
-  result_type __device__ inline compute(T const& key) const
+  __device__ constexpr result_type compute(T const& key) const
   {
-    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
+    return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(T));
   }
 
-  result_type __device__ inline compute_remaining_bytes(std::byte const* data,
-                                                        cudf::size_type len,
-                                                        cudf::size_type tail_offset,
-                                                        result_type h) const
-  {
-    // Process remaining bytes that do not fill a four-byte chunk.
-    uint32_t k1 = 0;
-    switch (len % 4) {
-      case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16; [[fallthrough]];
-      case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8; [[fallthrough]];
-      case 1:
-        k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
-        k1 *= c1;
-        k1 = rotate_bits_left(k1, rot_c1);
-        k1 *= c2;
-        h ^= k1;
-    };
-    return h;
-  }
-
-  result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
-  {
-    constexpr cudf::size_type BLOCK_SIZE = 4;
-    cudf::size_type const nblocks        = len / BLOCK_SIZE;
-    cudf::size_type const tail_offset    = nblocks * BLOCK_SIZE;
-    result_type h                        = m_seed;
-
-    // Process all four-byte chunks.
-    for (cudf::size_type i = 0; i < nblocks; i++) {
-      uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
-      k1 *= c1;
-      k1 = rotate_bits_left(k1, rot_c1);
-      k1 *= c2;
-      h ^= k1;
-      h = rotate_bits_left(h, rot_c2);
-      h = h * 5 + c3;
-    }
-
-    h = compute_remaining_bytes(data, len, tail_offset, h);
-
-    // Finalize hash.
-    h ^= len;
-    h = fmix32(h);
-    return h;
-  }
-
- private:
-  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
-  static constexpr uint32_t c1     = 0xcc9e2d51;
-  static constexpr uint32_t c2     = 0x1b873593;
-  static constexpr uint32_t c3     = 0xe6546b64;
-  static constexpr uint32_t rot_c1 = 15;
-  static constexpr uint32_t rot_c2 = 13;
+  cuco::murmurhash3_32<Key> _impl;
 };
 
 template <>
 hash_value_type __device__ inline MurmurHash3_x86_32<bool>::operator()(bool const& key) const
 {
-  return compute(static_cast<uint8_t>(key));
+  return this->compute(static_cast<uint8_t>(key));
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_x86_32<float>::operator()(float const& key) const
 {
-  return compute(normalize_nans_and_zeros(key));
+  return this->compute(normalize_nans_and_zeros(key));
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_x86_32<double>::operator()(double const& key) const
 {
-  return compute(normalize_nans_and_zeros(key));
+  return this->compute(normalize_nans_and_zeros(key));
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_x86_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
 {
-  auto const data = reinterpret_cast<std::byte const*>(key.data());
-  auto const len  = key.size_bytes();
-  return compute_bytes(data, len);
+  return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(key.data()),
+                             key.size_bytes());
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
 {
-  return compute(key.value());
+  return this->compute(key.value());
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal64>::operator()(
   numeric::decimal64 const& key) const
 {
-  return compute(key.value());
+  return this->compute(key.value());
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal128>::operator()(
   numeric::decimal128 const& key) const
 {
-  return compute(key.value());
+  return this->compute(key.value());
 }
 
 template <>

diff --git a/cpp/include/cudf/hashing/detail/xxhash_64.cuh b/cpp/include/cudf/hashing/detail/xxhash_64.cuh
@@ -16,9 +16,9 @@
 
 #pragma once
 
-#include "hash_functions.cuh"
-
 #include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/hashing.hpp>
+#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/types.hpp>
 
@@ -28,72 +28,75 @@
 namespace cudf::hashing::detail {
 
 template <typename Key>
-struct XXHash_64 : public cuco::xxhash_64<Key> {
-  using result_type = typename cuco::xxhash_64<Key>::result_type;
+struct XXHash_64 {
+  using result_type = std::uint64_t;
+
+  __host__ __device__ constexpr XXHash_64(uint64_t seed = cudf::DEFAULT_HASH_SEED) : _impl{seed} {}
 
-  __device__ result_type operator()(Key const& key) const
+  __device__ constexpr result_type operator()(Key const& key) const { return this->_impl(key); }
+
+  __device__ constexpr result_type compute_bytes(cuda::std::byte const* bytes,
+                                                 std::uint64_t size) const
   {
-    return cuco::xxhash_64<Key>::operator()(key);
+    return this->_impl.compute_hash(bytes, size);
   }
 
-  template <typename Extent>
-  __device__ result_type compute_hash(cuda::std::byte const* bytes, Extent size) const
+ private:
+  template <typename T>
+  __device__ constexpr result_type compute(T const& key) const
   {
-    return cuco::xxhash_64<Key>::compute_hash(bytes, size);
+    return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(T));
   }
+
+  cuco::xxhash_64<Key> _impl;
 };
 
 template <>
 XXHash_64<bool>::result_type __device__ inline XXHash_64<bool>::operator()(bool const& key) const
 {
-  return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(key));
+  return this->compute(static_cast<uint8_t>(key));
 }
 
 template <>
 XXHash_64<float>::result_type __device__ inline XXHash_64<float>::operator()(float const& key) const
 {
-  return cuco::xxhash_64<float>::operator()(normalize_nans(key));
+  return this->compute(normalize_nans(key));
 }
 
 template <>
 XXHash_64<double>::result_type __device__ inline XXHash_64<double>::operator()(
   double const& key) const
 {
-  return cuco::xxhash_64<double>::operator()(normalize_nans(key));
+  return this->compute(normalize_nans(key));
 }
 
 template <>
 XXHash_64<cudf::string_view>::result_type
   __device__ inline XXHash_64<cudf::string_view>::operator()(cudf::string_view const& key) const
 {
-  return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(key.data()), key.size_bytes());
+  return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(key.data()),
+                             key.size_bytes());
 }
 
 template <>
 XXHash_64<numeric::decimal32>::result_type
   __device__ inline XXHash_64<numeric::decimal32>::operator()(numeric::decimal32 const& key) const
 {
-  auto const val = key.value();
-  auto const len = sizeof(val);
-  return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(&val), len);
+  return this->compute(key.value());
 }
 
 template <>
 XXHash_64<numeric::decimal64>::result_type
   __device__ inline XXHash_64<numeric::decimal64>::operator()(numeric::decimal64 const& key) const
 {
-  auto const val = key.value();
-  auto const len = sizeof(val);
-  return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(&val), len);
+  return this->compute(key.value());
 }
 
 template <>
 XXHash_64<numeric::decimal128>::result_type
   __device__ inline XXHash_64<numeric::decimal128>::operator()(numeric::decimal128 const& key) const
 {
-  auto const val = key.value();
-  auto const len = sizeof(val);
-  return this->compute_hash(reinterpret_cast<cuda::std::byte const*>(&val), len);
+  return this->compute(key.value());
 }
 
 }  // namespace cudf::hashing::detail
diff --git a/docs/cudf/source/user_guide/api_docs/series.rst b/docs/cudf/source/user_guide/api_docs/series.rst
@@ -22,6 +22,7 @@ Attributes
    Series.values
    Series.data
    Series.dtype
+   Series.dtypes
    Series.shape
    Series.ndim
    Series.nullable

diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -22,7 +22,6 @@ set(cython_sources
     filling.pyx
     groupby.pyx
     interop.pyx
-    json.pyx
     orc.pyx
     parquet.pyx
     reduce.pyx

diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
@@ -9,7 +9,6 @@
     filling,
     groupby,
     interop,
-    json,
     nvtext,
     orc,
     parquet,