Skip to content

Commit

Permalink
Merge branch 'branch-23.12' into offsets-iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Nov 1, 2023
2 parents 704c853 + 56fe5db commit f45ee6d
Show file tree
Hide file tree
Showing 76 changed files with 1,072 additions and 374 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: ${{ inputs.build_type || 'branch' }}
build-2_28-wheels: "true"
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
build-2_28-wheels: "true"
script: "ci/build_wheel_cudf.sh"
wheel-tests-cudf:
needs: wheel-build-cudf
Expand Down
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
23.12.00
4 changes: 3 additions & 1 deletion ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ export CMAKE_GENERATOR=Ninja

rapids-print-env

version=$(rapids-generate-version)

rapids-logger "Begin cpp build"

# With boa installed conda build forward to boa
rapids-conda-retry mambabuild \
RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \
conda/recipes/libcudf

rapids-upload-conda-to-s3 cpp
17 changes: 13 additions & 4 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,40 @@ export CMAKE_GENERATOR=Ninja

rapids-print-env

package_dir="python"
version=$(rapids-generate-version)
commit=$(git rev-parse HEAD)

echo "${version}" > VERSION
for package_name in cudf dask_cudf cudf_kafka custreamz; do
sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" ${package_dir}/${package_name}/${package_name}/_version.py
done

rapids-logger "Begin py build"

CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

# TODO: Remove `--no-test` flag once importing on a CPU
# node works correctly
# With boa installed conda build forwards to the boa builder
rapids-conda-retry mambabuild \
RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
conda/recipes/cudf

rapids-conda-retry mambabuild \
RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/dask-cudf

rapids-conda-retry mambabuild \
RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf_kafka

rapids-conda-retry mambabuild \
RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
Expand Down
8 changes: 4 additions & 4 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ package_dir=$2
source rapids-configure-sccache
source rapids-date-string

# Use gha-tools rapids-pip-wheel-version to generate wheel version then
# update the necessary files
version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"
version=$(rapids-generate-version)
commit=$(git rev-parse HEAD)

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

Expand All @@ -22,8 +21,9 @@ PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
# Patch project metadata files to include the CUDA version suffix and version override.
pyproject_file="${package_dir}/pyproject.toml"

sed -i "s/^version = .*/version = \"${version_override}\"/g" ${pyproject_file}
sed -i "s/name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
echo "${version}" > VERSION
sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_dir}/${package_name}/_version.py"

# For nightlies we want to ensure that we're pulling in alphas as well. The
# easiest way to do so is to augment the spec with a constraint containing a
Expand Down
3 changes: 1 addition & 2 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"

./ci/build_wheel.sh cudf ${package_dir}

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="cudf_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
13 changes: 2 additions & 11 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,8 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'
# cpp cudf_jni update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' java/src/main/native/CMakeLists.txt

# Python __init__.py updates
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf/cudf/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/dask_cudf/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/custreamz/custreamz/__init__.py

# Python pyproject.toml updates
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/custreamz/pyproject.toml
# Centralized version file update
echo "${NEXT_FULL_TAG}" > VERSION

# Wheel testing script
sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_dask_cudf.sh
Expand Down
15 changes: 14 additions & 1 deletion ci/test_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,21 @@

set -eou pipefail

# Set the manylinux version used for downloading the wheels so that we test the
# newer ABI wheels on the newer images that support their installation.
# Need to disable pipefail for the head not to fail, see
# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q
set +o pipefail
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2)
set -o pipefail
manylinux_version="2_17"
if [[ ${glibc_minor_version} -ge 28 ]]; then
manylinux_version="2_28"
fi
manylinux="manylinux_${manylinux_version}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]
Expand Down
15 changes: 14 additions & 1 deletion ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,20 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist

# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
# Set the manylinux version used for downloading the wheels so that we test the
# newer ABI wheels on the newer images that support their installation.
# Need to disable pipefail for the head not to fail, see
# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q
set +o pipefail
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2)
set -o pipefail
manylinux_version="2_17"
if [[ ${glibc_minor_version} -ge 28 ]]; then
manylinux_version="2_28"
fi
manylinux="manylinux_${manylinux_version}"

RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install --no-deps ./local-cudf-dep/cudf*.whl

# Always install latest dask for testing
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- c-compiler
- cachetools
- cmake>=3.26.4
- cramjam
- cubinlinker
- cuda-nvtx=11.8
- cuda-python>=11.7.1,<12.0a0
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- c-compiler
- cachetools
- cmake>=3.26.4
- cramjam
- cuda-cudart-dev
- cuda-gdb
- cuda-nvcc
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
Expand All @@ -12,7 +12,7 @@ package:
version: {{ version }}

source:
git_url: ../../..
path: ../../..

build:
number: {{ GIT_DESCRIBE_NUMBER }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
Expand All @@ -12,7 +12,7 @@ package:
version: {{ version }}

source:
git_url: ../../..
path: ../../..

build:
number: {{ GIT_DESCRIBE_NUMBER }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
Expand All @@ -12,7 +12,7 @@ package:
version: {{ version }}

source:
git_url: ../../..
path: ../../..

build:
number: {{ GIT_DESCRIBE_NUMBER }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
Expand All @@ -12,7 +12,7 @@ package:
version: {{ version }}

source:
git_url: ../../..
path: ../../..

build:
number: {{ GIT_DESCRIBE_NUMBER }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
{% set cuda_major = cuda_version.split('.')[0] %}
Expand All @@ -11,7 +11,7 @@ package:
name: libcudf-split

source:
git_url: ../../..
path: ../../..

build:
script_env:
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ add_library(
src/join/mixed_join_size_kernel_nulls.cu
src/join/mixed_join_size_kernels_semi.cu
src/join/semi_join.cu
src/json/json_path.cu
src/lists/contains.cu
src/lists/combine/concatenate_list_elements.cu
src/lists/combine/concatenate_rows.cu
Expand Down Expand Up @@ -571,7 +572,6 @@ add_library(
src/strings/filter_chars.cu
src/strings/like.cu
src/strings/padding.cu
src/strings/json/json_path.cu
src/strings/regex/regcomp.cpp
src/strings/regex/regexec.cpp
src/strings/regex/regex_program.cpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ ConfigureNVBench(

# ##################################################################################################
# * json benchmark -------------------------------------------------------------------
ConfigureBench(JSON_BENCH string/json.cu)
ConfigureBench(JSON_BENCH json/json.cu)
ConfigureNVBench(FST_NVBENCH io/fst.cu)
ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp)
ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp)
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/json.cu → cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#include <cudf_test/column_wrapper.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/json/json.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/json.hpp>
#include <cudf/strings/string_view.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/types.hpp>
Expand Down Expand Up @@ -196,7 +196,7 @@ void BM_case(benchmark::State& state, std::string query_arg)

for (auto _ : state) {
cuda_event_timer raii(state, true);
auto result = cudf::strings::get_json_object(scv, json_path);
auto result = cudf::get_json_object(scv, json_path);
CUDF_CUDA_TRY(cudaStreamSynchronize(0));
}

Expand Down
62 changes: 34 additions & 28 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,35 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL)
add_library(arrow_shared ALIAS Arrow::Arrow)

# When using the libarrow inside a wheel we must build libcudf with the old ABI because pyarrow's
# `libarrow.so` is compiled for manylinux2014 (centos7 toolchain) which uses the old ABI. Note
# that these flags will often be redundant because we build wheels in manylinux containers that
# actually have the old libc++ anyway, but setting them explicitly ensures correct and consistent
# behavior in all other cases such as aarch builds on newer manylinux or testing builds in newer
# containers. Note that tests will not build successfully without also propagating these options
# to builds of GTest. Similarly, benchmarks will not work without updating GBench (and possibly
# NVBench) builds. We are currently ignoring these limitations since we don't anticipate using
# this feature except for building wheels.
target_compile_options(
Arrow::Arrow INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:-D_GLIBCXX_USE_CXX11_ABI=0>"
"$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=0>"
# When using the libarrow inside a wheel, whether or not libcudf may be built using the new C++11
# ABI is dependent on whether the libarrow inside the wheel was compiled using that ABI because we
# need the arrow library that we bundle in cudf to be ABI-compatible with the one inside pyarrow.
# We determine what options to use by checking the glibc version on the current system, which is
# also how pip determines which manylinux-versioned pyarrow wheel to install. Note that tests will
# not build successfully without also propagating these options to builds of GTest. Similarly,
# benchmarks will not work without updating GBench (and possibly NVBench) builds. We are currently
# ignoring these limitations since we don't anticipate using this feature except for building
# wheels.
EXECUTE_PROCESS(
COMMAND ${CMAKE_C_COMPILER} -print-file-name=libc.so.6
OUTPUT_VARIABLE GLIBC_EXECUTABLE
OUTPUT_STRIP_TRAILING_WHITESPACE
)
EXECUTE_PROCESS(
COMMAND ${GLIBC_EXECUTABLE}
OUTPUT_VARIABLE GLIBC_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
STRING(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT})
STRING(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION})
STRING(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION})
LIST(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR)
if(GLIBC_VERSION_MINOR LESS 28)
target_compile_options(
Arrow::Arrow INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:-D_GLIBCXX_USE_CXX11_ABI=0>"
"$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=0>"
)
endif()

rapids_export_package(BUILD Arrow cudf-exports)
rapids_export_package(INSTALL Arrow cudf-exports)
Expand Down Expand Up @@ -408,22 +424,12 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
endfunction()

if(NOT DEFINED CUDF_VERSION_Arrow)
# Temporarily use Arrow 12.0.1 in wheels and Arrow 13.0.0 otherwise
if(USE_LIBARROW_FROM_PYARROW)
set(CUDF_VERSION_Arrow
# This version must be kept in sync with the libarrow version pinned for builds in
# dependencies.yaml.
12.0.1
CACHE STRING "The version of Arrow to find (or build)"
)
else()
set(CUDF_VERSION_Arrow
# This version must be kept in sync with the libarrow version pinned for builds in
# dependencies.yaml.
13.0.0
CACHE STRING "The version of Arrow to find (or build)"
)
endif()
set(CUDF_VERSION_Arrow
# This version must be kept in sync with the libarrow version pinned for builds in
# dependencies.yaml.
13.0.0
CACHE STRING "The version of Arrow to find (or build)"
)
endif()

find_and_configure_arrow(
Expand Down
Loading

0 comments on commit f45ee6d

Please sign in to comment.