Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into prevent-pylibcudf-serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
pentschev authored Nov 29, 2024
2 parents 3e14ec9 + b084d74 commit 2ba7ed1
Show file tree
Hide file tree
Showing 134 changed files with 3,702 additions and 3,671 deletions.
1 change: 1 addition & 0 deletions .github/copy-pr-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/

enabled: true
auto_sync_draft: false
26 changes: 26 additions & 0 deletions .github/workflows/trigger-breaking-change-alert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Trigger Breaking Change Notifications

on:
pull_request_target:
types:
- closed
- reopened
- labeled
- unlabeled

jobs:
trigger-notifier:
if: contains(github.event.pull_request.labels.*.name, 'breaking')
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
sender_login: ${{ github.event.sender.login }}
sender_avatar: ${{ github.event.sender.avatar_url }}
repo: ${{ github.repository }}
pr_number: ${{ github.event.pull_request.number }}
pr_title: "${{ github.event.pull_request.title }}"
pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}"
pr_base_ref: ${{ github.event.pull_request.base.ref }}
pr_author: ${{ github.event.pull_request.user.login }}
event_action: ${{ github.event.action }}
pr_merged: ${{ github.event.pull_request.merged }}
15 changes: 11 additions & 4 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -926,9 +926,16 @@ add_dependencies(cudf jitify_preprocess_run)
# Specify the target module library dependencies
target_link_libraries(
cudf
PUBLIC CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool> spdlog::spdlog_header_only
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
kvikio::kvikio $<TARGET_NAME_IF_EXISTS:CUDA::cuFile${_cufile_suffix}> nanoarrow
PUBLIC CCCL::CCCL rmm::rmm rmm::rmm_logger $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
spdlog::spdlog_header_only
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
cuco::cuco
ZLIB::ZLIB
nvcomp::nvcomp
kvikio::kvikio
$<TARGET_NAME_IF_EXISTS:CUDA::cuFile${_cufile_suffix}>
nanoarrow
rmm::rmm_logger_impl
)

# Add Conda library, and include paths if specified
Expand Down Expand Up @@ -1007,7 +1014,7 @@ if(CUDF_BUILD_TESTUTIL)
)

target_link_libraries(
cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream
cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream
$<TARGET_NAME_IF_EXISTS:conda_env>
)

Expand Down
6 changes: 1 addition & 5 deletions cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
function(find_and_configure_cucollections)
include(${rapids-cmake-dir}/cpm/cuco.cmake)

if(BUILD_SHARED_LIBS)
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports)
else()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endif()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endfunction()

find_and_configure_cucollections()
3 changes: 3 additions & 0 deletions cpp/examples/basic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ project(

include(../fetch_dependencies.cmake)

include(rapids-cmake)
rapids_cmake_build_type("Release")

# Configure your project here
add_executable(basic_example src/process_csv.cpp)
target_link_libraries(basic_example PRIVATE cudf::cudf)
Expand Down
3 changes: 3 additions & 0 deletions cpp/examples/billion_rows/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ project(

include(../fetch_dependencies.cmake)

include(rapids-cmake)
rapids_cmake_build_type("Release")

list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)

add_library(groupby_results OBJECT groupby_results.cpp)
Expand Down
3 changes: 3 additions & 0 deletions cpp/examples/interop/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ project(

include(../fetch_dependencies.cmake)

include(rapids-cmake)
rapids_cmake_build_type("Release")

# The Arrow CMake is currently broken if the build type is not set
set(CMAKE_BUILD_TYPE Release)
# No need to install Arrow libs when only the final example executable is shipped.
Expand Down
3 changes: 3 additions & 0 deletions cpp/examples/nested_types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ project(

include(../fetch_dependencies.cmake)

include(rapids-cmake)
rapids_cmake_build_type("Release")

# Configure your project here
add_executable(deduplication deduplication.cpp)
target_link_libraries(deduplication PRIVATE cudf::cudf)
Expand Down
3 changes: 3 additions & 0 deletions cpp/examples/parquet_io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ project(

include(../fetch_dependencies.cmake)

include(rapids-cmake)
rapids_cmake_build_type("Release")

add_library(parquet_io_utils OBJECT common_utils.cpp io_source.cpp)
target_compile_features(parquet_io_utils PRIVATE cxx_std_17)
target_link_libraries(parquet_io_utils PRIVATE cudf::cudf)
Expand Down
3 changes: 3 additions & 0 deletions cpp/examples/strings/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ project(

include(../fetch_dependencies.cmake)

include(rapids-cmake)
rapids_cmake_build_type("Release")

list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)

add_executable(libcudf_apis libcudf_apis.cpp)
Expand Down
6 changes: 2 additions & 4 deletions cpp/include/cudf/detail/utilities/cuda.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#pragma once

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand All @@ -25,8 +24,7 @@
#include <rmm/cuda_stream_view.hpp>

#include <cub/cub.cuh>

#include <type_traits>
#include <cuda/std/type_traits>

namespace cudf {
namespace detail {
Expand Down Expand Up @@ -164,7 +162,7 @@ template <int32_t block_size, int32_t leader_lane = 0, typename T>
__device__ T single_lane_block_sum_reduce(T lane_value)
{
static_assert(block_size <= 1024, "Invalid block size.");
static_assert(std::is_arithmetic_v<T>, "Invalid non-arithmetic type.");
static_assert(cuda::std::is_arithmetic_v<T>, "Invalid non-arithmetic type.");
constexpr auto warps_per_block{block_size / warp_size};
auto const lane_id{threadIdx.x % warp_size};
auto const warp_id{threadIdx.x / warp_size};
Expand Down
116 changes: 21 additions & 95 deletions cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2023, NVIDIA CORPORATION.
* Copyright (c) 2017-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,157 +24,83 @@
#include <cudf/structs/struct_view.hpp>
#include <cudf/types.hpp>

#include <cstddef>
#include <cuco/hash_functions.cuh>
#include <cuda/std/cstddef>

namespace cudf::hashing::detail {

// MurmurHash3_x86_32 implementation from
// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
template <typename Key>
struct MurmurHash3_x86_32 {
using result_type = hash_value_type;

constexpr MurmurHash3_x86_32() = default;
constexpr MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}

[[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
__host__ __device__ constexpr MurmurHash3_x86_32(uint32_t seed = cudf::DEFAULT_HASH_SEED)
: _impl{seed}
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}

[[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
cudf::size_type offset) const
{
// Read a 4-byte value from the data pointer as individual bytes for safe
// unaligned access (very likely for string types).
auto const block = reinterpret_cast<uint8_t const*>(data + offset);
return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
}
__device__ constexpr result_type operator()(Key const& key) const { return this->_impl(key); }

[[nodiscard]] result_type __device__ inline operator()(Key const& key) const
__device__ constexpr result_type compute_bytes(cuda::std::byte const* bytes,
std::uint64_t size) const
{
return compute(normalize_nans_and_zeros(key));
return this->_impl.compute_hash(bytes, size);
}

private:
template <typename T>
result_type __device__ inline compute(T const& key) const
__device__ constexpr result_type compute(T const& key) const
{
return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(T));
}

result_type __device__ inline compute_remaining_bytes(std::byte const* data,
cudf::size_type len,
cudf::size_type tail_offset,
result_type h) const
{
// Process remaining bytes that do not fill a four-byte chunk.
uint32_t k1 = 0;
switch (len % 4) {
case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16; [[fallthrough]];
case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8; [[fallthrough]];
case 1:
k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
k1 *= c1;
k1 = rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
};
return h;
}

result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
{
constexpr cudf::size_type BLOCK_SIZE = 4;
cudf::size_type const nblocks = len / BLOCK_SIZE;
cudf::size_type const tail_offset = nblocks * BLOCK_SIZE;
result_type h = m_seed;

// Process all four-byte chunks.
for (cudf::size_type i = 0; i < nblocks; i++) {
uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
k1 *= c1;
k1 = rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
h = rotate_bits_left(h, rot_c2);
h = h * 5 + c3;
}

h = compute_remaining_bytes(data, len, tail_offset, h);

// Finalize hash.
h ^= len;
h = fmix32(h);
return h;
}

private:
uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
static constexpr uint32_t c1 = 0xcc9e2d51;
static constexpr uint32_t c2 = 0x1b873593;
static constexpr uint32_t c3 = 0xe6546b64;
static constexpr uint32_t rot_c1 = 15;
static constexpr uint32_t rot_c2 = 13;
cuco::murmurhash3_32<Key> _impl;
};

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<bool>::operator()(bool const& key) const
{
return compute(static_cast<uint8_t>(key));
return this->compute(static_cast<uint8_t>(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<float>::operator()(float const& key) const
{
return compute(normalize_nans_and_zeros(key));
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<double>::operator()(double const& key) const
{
return compute(normalize_nans_and_zeros(key));
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return compute_bytes(data, len);
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(key.data()),
key.size_bytes());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
Expand Down
Loading

0 comments on commit 2ba7ed1

Please sign in to comment.