Skip to content

Commit

Permalink
merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Oct 29, 2024
2 parents 5d29713 + 3775f7b commit e958b24
Show file tree
Hide file tree
Showing 719 changed files with 13,904 additions and 10,208 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ ENV PYTHONDONTWRITEBYTECODE="1"

ENV SCCACHE_REGION="us-east-2"
ENV SCCACHE_BUCKET="rapids-sccache-devs"
ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
ENV HISTFILE="/home/coder/.cache/._bash_history"
ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache"
17 changes: 17 additions & 0 deletions .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: "Auto Assign PR"

on:
pull_request_target:
types:
- opened
- reopened
- synchronize

jobs:
add_assignees:
runs-on: ubuntu-latest
steps:
- uses: actions-ecosystem/action-add-assignees@v1
with:
repo_token: "${{ secrets.GITHUB_TOKEN }}"
assignees: ${{ github.actor }}
1 change: 1 addition & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: "Pull Request Labeler"

on:
- pull_request_target

Expand Down
24 changes: 12 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,6 @@ repos:
^cpp/cmake/thirdparty/patches/.*|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
# Use the config file specific to each subproject so that each
# project can specify its own first/third-party packages.
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
exclude: |
(?x)^(^python/cudf_polars/.*)
types_or: [python, cython, pyi]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
hooks:
Expand Down Expand Up @@ -95,6 +84,16 @@ repos:
entry: 'pytest\.xfail'
language: pygrep
types: [python]
- id: no-unseeded-default-rng
name: no-unseeded-default-rng
description: 'Enforce that no non-seeded default_rng is used and default_rng is used instead of np.random.seed'
entry: |
# Check for usage of default_rng without seeding
default_rng\(\)|
# Check for usage of np.random.seed
np.random.seed\(
language: pygrep
types: [python]
- id: cmake-format
name: cmake-format
entry: ./cpp/scripts/run-cmake-format.sh cmake-format
Expand Down Expand Up @@ -140,6 +139,7 @@ repos:
rev: v0.4.8
hooks:
- id: ruff
args: ["--fix"]
files: python/.*$
- id: ruff-format
files: python/.*$
Expand All @@ -155,7 +155,7 @@ repos:
)
- id: verify-alpha-spec
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.13.11
rev: v1.16.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
Expand Down
5 changes: 3 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ conduct. More information can be found at:
8. Verify that CI passes all [status checks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks).
Fix if needed.
9. Wait for other developers to review your code and update code as needed.
Changes to any C++ files require at least 2 approvals from the cudf-cpp-codeowners before merging.
10. Once reviewed and approved, a RAPIDS developer will merge your pull request.

If you are unsure about anything, don't hesitate to comment on issues and ask for clarification!
Expand Down Expand Up @@ -293,8 +294,8 @@ In order to run doxygen as a linter on C++/CUDA code, run
./ci/checks/doxygen.sh
```

Python code runs several linters including [Black](https://black.readthedocs.io/en/stable/),
[isort](https://pycqa.github.io/isort/), and [flake8](https://flake8.pycqa.org/en/latest/).
Python code runs several linters including [Ruff](https://docs.astral.sh/ruff/)
with its various rules for Black-like formatting or Isort.

cuDF also uses [codespell](https://github.com/codespell-project/codespell) to find spelling
mistakes, and this check is run as a pre-commit hook. To apply the suggested spelling fixes,
Expand Down
4 changes: 4 additions & 0 deletions ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ rapids-print-env

rapids-logger "Begin cpp build"

sccache --zero-stats

# With boa installed conda build forward to boa
RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild \
conda/recipes/libcudf

sccache --show-adv-stats

rapids-upload-conda-to-s3 cpp
15 changes: 15 additions & 0 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ rapids-logger "Begin py build"

CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

sccache --zero-stats

# TODO: Remove `--no-test` flag once importing on a CPU
# node works correctly
# With boa installed conda build forwards to the boa builder
Expand All @@ -28,12 +30,18 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--channel "${CPP_CHANNEL}" \
conda/recipes/pylibcudf

sccache --show-adv-stats
sccache --zero-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf

sccache --show-adv-stats
sccache --zero-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
Expand All @@ -46,11 +54,18 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf_kafka

sccache --show-adv-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/custreamz

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf-polars

rapids-upload-conda-to-s3 python
15 changes: 13 additions & 2 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

set -euo pipefail

package_dir=$1
package_name=$1
package_dir=$2

source rapids-configure-sccache
source rapids-date-string
Expand All @@ -12,4 +13,14 @@ rapids-generate-version > ./VERSION

cd "${package_dir}"

python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check
sccache --zero-stats

rapids-logger "Building '${package_name}' wheel"
python -m pip wheel \
-w dist \
-v \
--no-deps \
--disable-pip-version-check \
.

sccache --show-adv-stats
2 changes: 1 addition & 1 deletion ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf
echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt
export PIP_CONSTRAINT="/tmp/constraints.txt"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh cudf ${package_dir}

python -m auditwheel repair \
--exclude libcudf.so \
Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/cudf_polars"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh cudf-polars ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
4 changes: 2 additions & 2 deletions ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/dask_cudf"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh dask-cudf ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
24 changes: 22 additions & 2 deletions ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,30 @@

set -euo pipefail

package_name="libcudf"
package_dir="python/libcudf"

rapids-logger "Generating build requirements"

rapids-dependency-file-generator \
--output requirements \
--file-key "py_build_${package_name}" \
--file-key "py_rapids_build_${package_name}" \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \
| tee /tmp/requirements-build.txt

rapids-logger "Installing build requirements"
python -m pip install \
-v \
--prefer-binary \
-r /tmp/requirements-build.txt

# build with '--no-build-isolation', for better sccache hit rate
# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
export PIP_NO_BUILD_ISOLATION=0

export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON"
./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh "${package_name}" "${package_dir}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

Expand All @@ -16,4 +36,4 @@ python -m auditwheel repair \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
4 changes: 2 additions & 2 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt
export PIP_CONSTRAINT="/tmp/constraints.txt"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh pylibcudf ${package_dir}

python -m auditwheel repair \
--exclude libcudf.so \
--exclude libnvcomp.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
19 changes: 17 additions & 2 deletions ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ rapids-mamba-retry install \
--channel "${PYTHON_CHANNEL}" \
"dask-cudf=${RAPIDS_VERSION}" \
"cudf_kafka=${RAPIDS_VERSION}" \
"custreamz=${RAPIDS_VERSION}"
"custreamz=${RAPIDS_VERSION}" \
"cudf-polars=${RAPIDS_VERSION}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand All @@ -37,7 +38,7 @@ rapids-logger "pytest dask_cudf (legacy)"
DASK_DATAFRAME__QUERY_PLANNING=False ./ci/run_dask_cudf_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
--numprocesses=8 \
--dist=loadscope \
--dist=worksteal \
.

rapids-logger "pytest cudf_kafka"
Expand All @@ -54,5 +55,19 @@ rapids-logger "pytest custreamz"
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
--cov-report=term

# Note that cudf-polars uses rmm.mr.CudaAsyncMemoryResource() which allocates
# half the available memory. This doesn't play well with multiple workers, so
# we keep --numprocesses=1 for now. This should be resolved by
# https://github.com/rapidsai/cudf/issues/16723.
rapids-logger "pytest cudf-polars"
./ci/run_cudf_polars_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars.xml" \
--numprocesses=1 \
--dist=worksteal \
--cov-config=./pyproject.toml \
--cov=cudf_polars \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-polars-coverage.xml" \
--cov-report=term

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.8,<1.9
- polars>=1.11,<1.12
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<18.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.8,<1.9
- polars>=1.11,<1.12
- pre-commit
- pyarrow>=14.0.0,<18.0.0a0
- pydata-sphinx-theme!=0.14.2
Expand Down
4 changes: 4 additions & 0 deletions conda/recipes/cudf-polars/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
./build.sh cudf_polars
61 changes: 61 additions & 0 deletions conda/recipes/cudf-polars/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
{% set cuda_major = cuda_version.split('.')[0] %}
{% set date_string = environ['RAPIDS_DATE_STRING'] %}

package:
name: cudf-polars
version: {{ version }}

source:
path: ../../..

build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- AWS_SESSION_TOKEN
- CMAKE_C_COMPILER_LAUNCHER
- CMAKE_CUDA_COMPILER_LAUNCHER
- CMAKE_CXX_COMPILER_LAUNCHER
- CMAKE_GENERATOR
- PARALLEL_LEVEL
- SCCACHE_BUCKET
- SCCACHE_IDLE_TIMEOUT
- SCCACHE_REGION
- SCCACHE_S3_KEY_PREFIX=cudf-polars-aarch64 # [aarch64]
- SCCACHE_S3_KEY_PREFIX=cudf-polars-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS

requirements:
host:
- python
- rapids-build-backend >=0.3.0,<0.4.0.dev0
- setuptools
- cuda-version ={{ cuda_version }}
run:
- python
- pylibcudf ={{ version }}
- polars >=1.11,<1.12
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
requires:
- cuda-version ={{ cuda_version }}
imports:
- cudf_polars


about:
home: https://rapids.ai/
license: Apache-2.0
license_family: APACHE
license_file: LICENSE
summary: cudf-polars library
Loading

0 comments on commit e958b24

Please sign in to comment.