Skip to content

Commit

Permalink
Merge branch 'branch-23.10' into progressive_chunked_reader
Browse files Browse the repository at this point in the history
  • Loading branch information
nvdbaranec committed Sep 26, 2023
2 parents 2ecec0f + 3196f6c commit 3772e7a
Show file tree
Hide file tree
Showing 133 changed files with 5,049 additions and 2,593 deletions.
1 change: 0 additions & 1 deletion .github/ops-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@ auto_merger: true
branch_checker: true
label_checker: true
release_drafter: true
recently_updated: true
20 changes: 0 additions & 20 deletions .github/workflows/add_to_project.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
arch: "amd64"
branch: ${{ inputs.branch }}
build_type: ${{ inputs.build_type || 'branch' }}
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
date: ${{ inputs.date }}
node_type: "gpu-v100-latest-1"
run_script: "ci/build_docs.sh"
Expand Down
35 changes: 0 additions & 35 deletions .github/workflows/new-issues-to-triage-projects.yml

This file was deleted.

6 changes: 3 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
conda-notebook-tests:
needs: conda-python-build
Expand All @@ -83,7 +83,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
Expand All @@ -93,7 +93,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build-cudf:
needs: checks
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_cpp_memcheck.sh"
conda-python-cudf-tests:
secrets: inherit
Expand Down Expand Up @@ -63,7 +63,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
Expand All @@ -75,7 +75,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
Expand Down
2 changes: 1 addition & 1 deletion ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export RAPIDS_DOCS_DIR="$(mktemp -d)"

rapids-logger "Build CPP docs"
pushd cpp/doxygen
aws s3 cp s3://rapidsai-docs/librmm/${RAPIDS_VERSION_NUMBER}/html/rmm.tag . || echo "Failed to download rmm Doxygen tag"
aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_NUMBER}/rmm.tag . || echo "Failed to download rmm Doxygen tag"
doxygen Doxyfile
mkdir -p "${RAPIDS_DOCS_DIR}/libcudf/html"
mv html/* "${RAPIDS_DOCS_DIR}/libcudf/html"
Expand Down
5 changes: 3 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ channels:
- nvidia
dependencies:
- aiobotocore>=2.2.0
- aws-sdk-cpp<1.11
- benchmark==1.8.0
- boto3>=1.21.21
- botocore>=1.24.21
Expand Down Expand Up @@ -55,8 +56,8 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba>=0.57
- numpy>=1.21
- numba>=0.57,<0.58
- numpy>=1.21,<1.25
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==2.6.1
Expand Down
5 changes: 3 additions & 2 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ channels:
- nvidia
dependencies:
- aiobotocore>=2.2.0
- aws-sdk-cpp<1.11
- benchmark==1.8.0
- boto3>=1.21.21
- botocore>=1.24.21
Expand Down Expand Up @@ -54,8 +55,8 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba>=0.57
- numpy>=1.21
- numba>=0.57,<0.58
- numpy>=1.21,<1.25
- numpydoc
- nvcomp==2.6.1
- nvtx>=0.2.1
Expand Down
6 changes: 4 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ requirements:
- typing_extensions >=4.0.0
- pandas >=1.3,<1.6.0dev0
- cupy >=12.0.0
- numba >=0.57
- numpy >=1.21
# TODO: Pin to numba<0.58 until #14160 is resolved
- numba >=0.57,<0.58
# TODO: Pin to numpy<1.25 until cudf requires pandas 2
- numpy >=1.21,<1.25
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- libcudf ={{ version }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
Expand Down
3 changes: 3 additions & 0 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ gbench_version:
gtest_version:
- ">=1.13.0"

aws_sdk_cpp_version:
- "<1.11"

libarrow_version:
- "=12"

Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ requirements:
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
- zlib {{ zlib_version }}
- aws-sdk-cpp {{ aws_sdk_cpp_version }}

outputs:
- name: libcudf
Expand Down Expand Up @@ -107,6 +108,7 @@ outputs:
- dlpack {{ dlpack_version }}
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
- aws-sdk-cpp {{ aws_sdk_cpp_version }}
test:
commands:
- test -f $PREFIX/lib/libcudf.so
Expand Down
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -413,11 +413,13 @@ add_library(
src/io/utilities/arrow_io_source.cpp
src/io/utilities/column_buffer.cpp
src/io/utilities/config_utils.cpp
src/io/utilities/data_casting.cu
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
src/io/utilities/file_io_utilities.cpp
src/io/utilities/parsing_utils.cu
src/io/utilities/row_selection.cpp
src/io/utilities/type_inference.cu
src/io/utilities/trie.cu
src/jit/cache.cpp
src/jit/parser.cpp
Expand Down
18 changes: 10 additions & 8 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <benchmarks/io/cuio_common.hpp>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/logger.hpp>

#include <cstdio>
Expand Down Expand Up @@ -141,17 +142,18 @@ std::vector<std::string> select_column_names(std::vector<std::string> const& col
return col_names_to_read;
}

std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk)
std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk_idx)
{
CUDF_EXPECTS(num_segments >= num_chunks,
"Number of chunks cannot be greater than the number of segments in the file");
auto start_segment = [num_segments, num_chunks](int chunk) {
return num_segments * chunk / num_chunks;
};
std::vector<cudf::size_type> selected_segments;
for (auto segment = start_segment(chunk); segment < start_segment(chunk + 1); ++segment) {
selected_segments.push_back(segment);
}
CUDF_EXPECTS(chunk_idx < num_chunks,
"Chunk index must be smaller than the number of chunks in the file");

auto const segments_in_chunk = cudf::util::div_rounding_up_unsafe(num_segments, num_chunks);
auto const begin_segment = std::min(chunk_idx * segments_in_chunk, num_segments);
auto const end_segment = std::min(begin_segment + segments_in_chunk, num_segments);
std::vector<cudf::size_type> selected_segments(end_segment - begin_segment);
std::iota(selected_segments.begin(), selected_segments.end(), begin_segment);

return selected_segments;
}
Expand Down
12 changes: 5 additions & 7 deletions cpp/benchmarks/io/orc/orc_reader_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <benchmarks/io/cuio_common.hpp>
#include <benchmarks/io/nvbench_helpers.hpp>

#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/io/orc.hpp>
#include <cudf/io/orc_metadata.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand All @@ -30,7 +31,7 @@
constexpr int64_t data_size = 512 << 20;
// The number of separate read calls to use when reading files in multiple chunks
// Each call reads roughly equal amounts of data
constexpr int32_t chunked_read_num_chunks = 8;
constexpr int32_t chunked_read_num_chunks = 4;

std::vector<std::string> get_top_level_col_names(cudf::io::source_info const& source)
{
Expand Down Expand Up @@ -88,7 +89,7 @@ void BM_orc_read_varying_options(nvbench::state& state,

auto const num_stripes =
cudf::io::read_orc_metadata(source_sink.make_source_info()).num_stripes();
cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks);

auto mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
Expand All @@ -99,7 +100,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
timer.start();
cudf::size_type rows_read = 0;
for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
auto const is_last_chunk = chunk == (num_chunks - 1);
switch (RowSelection) {
case row_selection::ALL: break;
case row_selection::STRIPES:
Expand All @@ -108,7 +108,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
case row_selection::NROWS:
read_options.set_skip_rows(chunk * chunk_row_cnt);
read_options.set_num_rows(chunk_row_cnt);
if (is_last_chunk) read_options.set_num_rows(-1);
break;
default: CUDF_FAIL("Unsupported row selection method");
}
Expand All @@ -132,9 +131,6 @@ using col_selections = nvbench::enum_type_list<column_selection::ALL,
column_selection::ALTERNATE,
column_selection::FIRST_HALF,
column_selection::SECOND_HALF>;
using row_selections =
nvbench::enum_type_list<row_selection::ALL, row_selection::STRIPES, row_selection::NROWS>;

NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
NVBENCH_TYPE_AXES(col_selections,
nvbench::enum_type_list<row_selection::ALL>,
Expand All @@ -146,6 +142,8 @@ NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
{"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"})
.set_min_samples(4);

using row_selections =
nvbench::enum_type_list<row_selection::ALL, row_selection::NROWS, row_selection::STRIPES>;
NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
NVBENCH_TYPE_AXES(nvbench::enum_type_list<column_selection::ALL>,
row_selections,
Expand Down
Loading

0 comments on commit 3772e7a

Please sign in to comment.