Skip to content

Commit

Permalink
Merge branch 'branch-23.10' into jni_histogram
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia committed Sep 21, 2023
2 parents ee229a0 + 05ee260 commit e701908
Show file tree
Hide file tree
Showing 152 changed files with 4,084 additions and 2,224 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ repos:
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.1
rev: v16.0.6
hooks:
- id: clang-format
types_or: [c, c++, cuda]
Expand Down
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -413,11 +413,13 @@ add_library(
src/io/utilities/arrow_io_source.cpp
src/io/utilities/column_buffer.cpp
src/io/utilities/config_utils.cpp
src/io/utilities/data_casting.cu
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
src/io/utilities/file_io_utilities.cpp
src/io/utilities/parsing_utils.cu
src/io/utilities/row_selection.cpp
src/io/utilities/type_inference.cu
src/io/utilities/trie.cu
src/jit/cache.cpp
src/jit/parser.cpp
Expand Down Expand Up @@ -530,7 +532,7 @@ add_library(
src/stream_compaction/apply_boolean_mask.cu
src/stream_compaction/distinct.cu
src/stream_compaction/distinct_count.cu
src/stream_compaction/distinct_reduce.cu
src/stream_compaction/distinct_helpers.cu
src/stream_compaction/drop_nans.cu
src/stream_compaction/drop_nulls.cu
src/stream_compaction/stable_distinct.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/iterator/iterator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ void BM_iterator(benchmark::State& state)
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
if (cub_or_thrust) {
if (raw_or_iterator) {
raw_stream_bench_cub<T>(hasnull_F, dev_result); // driven by raw pointer
raw_stream_bench_cub<T>(hasnull_F, dev_result); // driven by raw pointer
} else {
iterator_bench_cub<T, false>(hasnull_F, dev_result); // driven by riterator without nulls
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ void calculate_bandwidth(benchmark::State& state, cudf::size_type num_columns)
int64_t const column_bytes_in = column_bytes_out; // we only read unmasked inputs

int64_t const bytes_read =
(column_bytes_in + validity_bytes_in) * num_columns + // reading columns
mask_size; // reading boolean mask
(column_bytes_in + validity_bytes_in) * num_columns + // reading columns
mask_size; // reading boolean mask
int64_t const bytes_written =
(column_bytes_out + validity_bytes_out) * num_columns; // writing columns

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/string/char_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static void bench_char_types(nvbench::state& state)
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
// gather some throughput statistics as well
auto chars_size = input.chars_size();
state.add_global_memory_reads<nvbench::int8_t>(chars_size); // all bytes are read;
state.add_global_memory_reads<nvbench::int8_t>(chars_size); // all bytes are read;
if (api_type == "all") {
state.add_global_memory_writes<nvbench::int8_t>(num_rows); // output is a bool8 per row
} else {
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/string/extract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static void bench_extract(nvbench::state& state)
std::uniform_int_distribution<int> words_dist(0, 999);
std::vector<std::string> samples(100); // 100 unique rows of data to reuse
std::generate(samples.begin(), samples.end(), [&]() {
std::string row; // build a row of random tokens
std::string row; // build a row of random tokens
while (static_cast<cudf::size_type>(row.size()) < row_width) {
row += std::to_string(words_dist(generator)) + " ";
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,7 @@ struct pair_accessor {
*/
template <typename T, bool has_nulls = false>
struct pair_rep_accessor {
column_device_view const col; ///< column view of column in device
column_device_view const col; ///< column view of column in device

using rep_type = device_storage_type_t<T>; ///< representation type

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/copy_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ __launch_bounds__(block_size) __global__
if (has_validity) {
temp_valids[threadIdx.x] = false; // init shared memory
if (threadIdx.x < cudf::detail::warp_size) temp_valids[block_size + threadIdx.x] = false;
__syncthreads(); // wait for init
__syncthreads(); // wait for init
}

if (mask_true) {
Expand Down
167 changes: 167 additions & 0 deletions cpp/include/cudf/detail/hash_reduce_by_row.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/uninitialized_fill.h>

#include <cuco/static_map.cuh>

namespace cudf::detail {

using hash_map_type =
cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;

/**
* @brief The base struct for customized reduction functor to perform reduce-by-key with keys are
* rows that compared equal.
*
* TODO: We need to switch to use `static_reduction_map` when it is ready
* (https://github.com/NVIDIA/cuCollections/pull/98).
*/
template <typename MapView, typename KeyHasher, typename KeyEqual, typename OutputType>
struct reduce_by_row_fn_base {
protected:
MapView const d_map;
KeyHasher const d_hasher;
KeyEqual const d_equal;
OutputType* const d_output;

reduce_by_row_fn_base(MapView const& d_map,
KeyHasher const& d_hasher,
KeyEqual const& d_equal,
OutputType* const d_output)
: d_map{d_map}, d_hasher{d_hasher}, d_equal{d_equal}, d_output{d_output}
{
}

/**
* @brief Return a pointer to the output array at the given index.
*
* @param idx The access index
* @return A pointer to the given index in the output array
*/
__device__ OutputType* get_output_ptr(size_type const idx) const
{
auto const iter = d_map.find(idx, d_hasher, d_equal);

if (iter != d_map.end()) {
// Only one (undetermined) index value of the duplicate rows could be inserted into the map.
// As such, looking up for all indices of duplicate rows always returns the same value.
auto const inserted_idx = iter->second.load(cuda::std::memory_order_relaxed);

// All duplicate rows will have concurrent access to this same output slot.
return &d_output[inserted_idx];
} else {
// All input `idx` values have been inserted into the map before.
// Thus, searching for an `idx` key resulting in the `end()` iterator only happens if
// `d_equal(idx, idx) == false`.
// Such situations are due to comparing nulls or NaNs which are considered as always unequal.
// In those cases, all rows containing nulls or NaNs are distinct. Just return their direct
// output slot.
return &d_output[idx];
}
}
};

/**
* @brief Perform a reduction on groups of rows that are compared equal.
*
* This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared
* equal. A hash table is used to find groups of equal rows.
*
* At the beginning of the operation, the entire output array is filled with a value given by
* the `init` parameter. Then, the reduction result for each row group is written into the output
* array at the index of an unspecified row in the group.
*
* @tparam ReduceFuncBuilder The builder class that must have a `build()` method returning a
* reduction functor derived from `reduce_by_row_fn_base`
* @tparam OutputType Type of the reduction results
* @param map The auxiliary map to perform reduction
* @param preprocessed_input The preprocessed of the input rows for computing row hashing and row
* comparisons
* @param num_rows The number of all input rows
* @param has_nulls Indicate whether the input rows has any nulls at any nested levels
* @param has_nested_columns Indicates whether the input table has any nested columns
* @param nulls_equal Flag to specify whether null elements should be considered as equal
* @param nans_equal Flag to specify whether NaN values in floating point column should be
* considered equal.
* @param init The initial value for reduction of each row group
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned vector
* @return A device_uvector containing the reduction results
*/
template <typename ReduceFuncBuilder, typename OutputType>
rmm::device_uvector<OutputType> hash_reduce_by_row(
hash_map_type const& map,
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const preprocessed_input,
size_type num_rows,
cudf::nullate::DYNAMIC has_nulls,
bool has_nested_columns,
null_equality nulls_equal,
nan_equality nans_equal,
ReduceFuncBuilder func_builder,
OutputType init,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const map_dview = map.get_device_view();
auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
auto const key_hasher = row_hasher.device_hasher(has_nulls);
auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input);

auto reduction_results = rmm::device_uvector<OutputType>(num_rows, stream, mr);
thrust::uninitialized_fill(
rmm::exec_policy(stream), reduction_results.begin(), reduction_results.end(), init);

auto const reduce_by_row = [&](auto const value_comp) {
if (has_nested_columns) {
auto const key_equal = row_comp.equal_to<true>(has_nulls, nulls_equal, value_comp);
thrust::for_each(
rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_rows),
func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin()));
} else {
auto const key_equal = row_comp.equal_to<false>(has_nulls, nulls_equal, value_comp);
thrust::for_each(
rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_rows),
func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin()));
}
};

if (nans_equal == nan_equality::ALL_EQUAL) {
using nan_equal_comparator =
cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
reduce_by_row(nan_equal_comparator{});
} else {
using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator;
reduce_by_row(nan_unequal_comparator{});
}

return reduction_results;
}

} // namespace cudf::detail
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/indexalator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ struct input_indexalator : base_indexalator<input_indexalator> {
friend struct indexalator_factory;
friend struct base_indexalator<input_indexalator>; // for CRTP

using reference = size_type const; // this keeps STL and thrust happy
using reference = size_type const; // this keeps STL and thrust happy

input_indexalator() = default;
input_indexalator(input_indexalator const&) = default;
Expand Down Expand Up @@ -332,7 +332,7 @@ struct output_indexalator : base_indexalator<output_indexalator> {
friend struct indexalator_factory;
friend struct base_indexalator<output_indexalator>; // for CRTP

using reference = output_indexalator const&; // required for output iterators
using reference = output_indexalator const&; // required for output iterators

output_indexalator() = default;
output_indexalator(output_indexalator const&) = default;
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ struct hash_join {
cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal
cudf::table_view _build; ///< input table to build the hash map
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
_preprocessed_build; ///< input table preprocssed for row operators
map_type _hash_table; ///< hash table built on `_build`
_preprocessed_build; ///< input table preprocssed for row operators
map_type _hash_table; ///< hash table built on `_build`

public:
/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/fixed_point/fixed_point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -829,5 +829,5 @@ using decimal32 = fixed_point<int32_t, Radix::BASE_10>; ///< 32-bit decima
using decimal64 = fixed_point<int64_t, Radix::BASE_10>; ///< 64-bit decimal fixed point
using decimal128 = fixed_point<__int128_t, Radix::BASE_10>; ///< 128-bit decimal fixed point

/** @} */ // end of group
/** @} */ // end of group
} // namespace numeric
4 changes: 2 additions & 2 deletions cpp/include/cudf/groupby.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,8 +386,8 @@ class groupby {
///< indicates null order
///< of each column
std::unique_ptr<detail::sort::sort_groupby_helper>
_helper; ///< Helper object
///< used by sort based implementation
_helper; ///< Helper object
///< used by sort based implementation

/**
* @brief Get the sort helper object
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ class csv_reader_options {

auto const max_row_bytes = 16 * 1024; // 16KB
auto const column_bytes = 64;
auto const base_padding = 1024; // 1KB
auto const base_padding = 1024; // 1KB

if (num_columns == 0) {
// Use flat size if the number of columns is not known
Expand Down
Loading

0 comments on commit e701908

Please sign in to comment.