diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000000..9d35e3f97f --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,30 @@ +# syntax=docker/dockerfile:1.5 + +ARG BASE +ARG PYTHON_PACKAGE_MANAGER=conda + +FROM ${BASE} as pip-base + +ENV DEFAULT_VIRTUAL_ENV=rapids + +FROM ${BASE} as conda-base + +ENV DEFAULT_CONDA_ENV=rapids + +FROM ${PYTHON_PACKAGE_MANAGER}-base + +ARG CUDA +ENV CUDAARCHS="RAPIDS" +ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}" + +ARG PYTHON_PACKAGE_MANAGER +ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}" + +ENV PYTHONSAFEPATH="1" +ENV PYTHONUNBUFFERED="1" +ENV PYTHONDONTWRITEBYTECODE="1" + +ENV SCCACHE_REGION="us-east-2" +ENV SCCACHE_BUCKET="rapids-sccache-devs" +ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" +ENV HISTFILE="/home/coder/.cache/._bash_history" diff --git a/.devcontainer/README.md b/.devcontainer/README.md new file mode 100644 index 0000000000..3c76b8963d --- /dev/null +++ b/.devcontainer/README.md @@ -0,0 +1,64 @@ +# RAFT Development Containers + +This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces). + +This container is a turnkey development environment for building and testing the RAFT C++ and Python libraries. + +## Table of Contents + +* [Prerequisites](#prerequisites) +* [Host bind mounts](#host-bind-mounts) +* [Launch a Dev Container](#launch-a-dev-container) + +## Prerequisites + +* [VSCode](https://code.visualstudio.com/download) +* [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) + +## Host bind mounts + +By default, the following directories are bind-mounted into the devcontainer: + +* `${repo}:/home/coder/raft` +* `${repo}/../.aws:/home/coder/.aws` +* `${repo}/../.local:/home/coder/.local` +* `${repo}/../.cache:/home/coder/.cache` +* `${repo}/../.conda:/home/coder/.conda` +* `${repo}/../.config:/home/coder/.config` + +This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs. + +## Launch a Dev Container + +To launch a devcontainer from VSCode, open the RAFT repo and select the "Reopen in Container" button in the bottom right:
+ +Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command. + +## Using the devcontainer + +On startup, the devcontainer creates or updates the conda/pip environment using `raft/dependencies.yaml`. + +The container includes convenience functions to clean, configure, and build the various RAFT components: + +```shell +$ clean-raft-cpp # only cleans the C++ build dir +$ clean-pylibraft-python # only cleans the Python build dir +$ clean-raft # cleans both C++ and Python build dirs + +$ configure-raft-cpp # only configures raft C++ lib + +$ build-raft-cpp # only builds raft C++ lib +$ build-pylibraft-python # only builds raft Python lib +$ build-raft # builds both C++ and Python libs +``` + +* The C++ build script is a small wrapper around `cmake -S ~/raft/cpp -B ~/raft/cpp/build` and `cmake --build ~/raft/cpp/build` +* The Python build script is a small wrapper around `pip install --editable ~/raft/cpp` + +Unlike `build.sh`, these convenience scripts *don't* install the libraries after building them. Instead, they automatically inject the correct arguments to build the C++ libraries from source and use their build dirs as package roots: + +```shell +$ cmake -S ~/raft/cpp -B ~/raft/cpp/build +$ CMAKE_ARGS="-Draft_ROOT=~/raft/cpp/build" \ # <-- this argument is automatic + pip install -e ~/raft/cpp +``` diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json new file mode 100644 index 0000000000..203f52f1a2 --- /dev/null +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -0,0 +1,37 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "11.8", + "PYTHON_PACKAGE_MANAGER": "conda", + "BASE": "rapidsai/devcontainers:23.12-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04" + } + }, + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda11.8-envs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda11.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json new file mode 100644 index 0000000000..080ece996e --- /dev/null +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -0,0 +1,38 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "11.8", + "PYTHON_PACKAGE_MANAGER": "pip", + "BASE": "rapidsai/devcontainers:23.12-cpp-llvm16-cuda11.8-ubuntu22.04" + } + }, + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/ucx:23.12": {"version": "1.14.1"}, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/ucx", + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json new file mode 100644 index 0000000000..da8bfb4db9 --- /dev/null +++ b/.devcontainer/cuda12.0-conda/devcontainer.json @@ -0,0 +1,37 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "12.0", + "PYTHON_PACKAGE_MANAGER": "conda", + "BASE": "rapidsai/devcontainers:23.12-cpp-mambaforge-ubuntu22.04" + } + }, + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json new file mode 100644 index 0000000000..e2bee94f8a --- /dev/null +++ b/.devcontainer/cuda12.0-pip/devcontainer.json @@ -0,0 +1,38 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "12.0", + "PYTHON_PACKAGE_MANAGER": "pip", + "BASE": "rapidsai/devcontainers:23.12-cpp-llvm16-cuda12.0-ubuntu22.04" + } + }, + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/ucx:23.12": {"version": "1.14.1"}, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/ucx", + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c2b318df47..1b7fb8e1a5 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -22,6 +22,7 @@ jobs: - wheel-tests-pylibraft - wheel-build-raft-dask - wheel-tests-raft-dask + - devcontainer secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@cuda-120-arm checks: @@ -92,3 +93,11 @@ jobs: with: build_type: pull-request script: ci/test_wheel_raft_dask.sh + devcontainer: + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12 + with: + build_command: | + sccache -z; + build-all -DBUILD_PRIMS_BENCH=ON -DBUILD_ANN_BENCH=ON --verbose; + sccache -s; diff --git a/.gitignore b/.gitignore index 7939fc1622..11b7bc3eba 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,7 @@ _xml # sphinx _html _text + +# clang tooling +compile_commands.json +.clangd/ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 7a69b95da1..a867a71f68 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -91,3 +91,10 @@ sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxy sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md sed_runner "/GIT_TAG.*branch-/ s|branch-.*|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md + +# .devcontainer files +find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do + sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}" + sed_runner "s@rapidsai/devcontainers/features/ucx:[0-9.]*@rapidsai/devcontainers/features/ucx:${NEXT_SHORT_TAG_PEP440}@" "${filename}" + sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}" +done diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh index a20e950313..a9ae5dcabb 100755 --- a/ci/test_wheel_raft_dask.sh +++ b/ci/test_wheel_raft_dask.sh @@ -12,7 +12,7 @@ RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.12 +python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.12 # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/raft_dask*.whl)[test] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index dc27a0aa32..fcbf1451a8 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -10,7 +10,7 @@ dependencies: - breathe - c-compiler - clang-tools=16.0.6 -- clang=16.0.6 +- clang==16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-python>=11.7.1,<12.0a0 @@ -19,10 +19,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core>=2023.7.1 +- dask-core==2023.9.2 - dask-cuda==23.12.* -- dask>=2023.7.1 -- distributed>=2023.7.1 +- dask==2023.9.2 +- distributed==2023.9.2 - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 @@ -43,6 +43,8 @@ dependencies: - numba>=0.57 - numpy>=1.21 - numpydoc +- nvcc_linux-64=11.8 +- pre-commit - pydata-sphinx-theme - pytest - pytest-cov diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 019679592f..a1e22f50a0 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -10,19 +10,20 @@ dependencies: - breathe - c-compiler - clang-tools=16.0.6 -- clang=16.0.6 +- clang==16.0.6 - cmake>=3.26.4 - cuda-cudart-dev +- cuda-nvcc - cuda-profiler-api - cuda-python>=12.0,<13.0a0 - cuda-version=12.0 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core>=2023.7.1 +- dask-core==2023.9.2 - dask-cuda==23.12.* -- dask>=2023.7.1 -- distributed>=2023.7.1 +- dask==2023.9.2 +- distributed==2023.9.2 - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 @@ -39,6 +40,7 @@ dependencies: - numba>=0.57 - numpy>=1.21 - numpydoc +- pre-commit - pydata-sphinx-theme - pytest - pytest-cov diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 5a9ef5bd32..4f1df12dfa 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -10,7 +10,7 @@ dependencies: - benchmark>=1.8.2 - c-compiler - clang-tools=16.0.6 -- clang=16.0.6 +- clang==16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-version=11.8 @@ -34,6 +34,7 @@ dependencies: - nccl>=2.9.9 - ninja - nlohmann_json>=3.11.2 +- nvcc_linux-64=11.8 - scikit-build>=0.13.1 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index c9caa4dd9b..04dfef5063 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -60,10 +60,10 @@ requirements: - cudatoolkit {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - - dask >=2023.7.1 - - dask-core >=2023.7.1 + - dask ==2023.9.2 + - dask-core ==2023.9.2 - dask-cuda ={{ minor_version }} - - distributed >=2023.7.1 + - distributed ==2023.9.2 - joblib >=0.11 - nccl >=2.9.9 - pylibraft {{ version }} diff --git a/cpp/.clangd b/cpp/.clangd new file mode 100644 index 0000000000..7c4fe036dd --- /dev/null +++ b/cpp/.clangd @@ -0,0 +1,65 @@ +# https://clangd.llvm.org/config + +# Apply a config conditionally to all C files +If: + PathMatch: .*\.(c|h)$ + +--- + +# Apply a config conditionally to all C++ files +If: + PathMatch: .*\.(c|h)pp + +--- + +# Apply a config conditionally to all CUDA files +If: + PathMatch: .*\.cuh? +CompileFlags: + Add: + - "-x" + - "cuda" + # No error on unknown CUDA versions + - "-Wno-unknown-cuda-version" + # Allow variadic CUDA functions + - "-Xclang=-fcuda-allow-variadic-functions" +Diagnostics: + Suppress: + - "variadic_device_fn" + - "attributes_not_allowed" + +--- + +# Tweak the clangd parse settings for all files +CompileFlags: + Add: + # report all errors + - "-ferror-limit=0" + - "-fmacro-backtrace-limit=0" + - "-ftemplate-backtrace-limit=0" + # Skip the CUDA version check + - "--no-cuda-version-check" + Remove: + # remove gcc's -fcoroutines + - -fcoroutines + # remove nvc++ flags unknown to clang + - "-gpu=*" + - "-stdpar*" + # remove nvcc flags unknown to clang + - "-arch*" + - "-gencode*" + - "--generate-code*" + - "-ccbin*" + - "-t=*" + - "--threads*" + - "-Xptxas*" + - "-Xcudafe*" + - "-Xfatbin*" + - "-Xcompiler*" + - "--diag-suppress*" + - "--diag_suppress*" + - "--compiler-options*" + - "--expt-extended-lambda" + - "--expt-relaxed-constexpr" + - "-forward-unknown-to-host-compiler" + - "-Werror=cross-execution-space-call" diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/prims/neighbors/cagra_bench.cuh index bb405088bb..63f6c14686 100644 --- a/cpp/bench/prims/neighbors/cagra_bench.cuh +++ b/cpp/bench/prims/neighbors/cagra_bench.cuh @@ -18,8 +18,10 @@ #include #include +#include #include #include +#include #include @@ -40,6 +42,8 @@ struct params { int block_size; int search_width; int max_iterations; + /** Ratio of removed indices. */ + double removed_ratio; }; template @@ -49,7 +53,8 @@ struct CagraBench : public fixture { params_(ps), queries_(make_device_matrix(handle, ps.n_queries, ps.n_dims)), dataset_(make_device_matrix(handle, ps.n_samples, ps.n_dims)), - knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)) + knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)), + removed_indices_bitset_(handle, ps.n_samples) { // Generate random dataset and queriees raft::random::RngState state{42}; @@ -74,6 +79,13 @@ struct CagraBench : public fixture { auto metric = raft::distance::DistanceType::L2Expanded; + auto removed_indices = + raft::make_device_vector(handle, ps.removed_ratio * ps.n_samples); + thrust::sequence( + resource::get_thrust_policy(handle), + thrust::device_pointer_cast(removed_indices.data_handle()), + thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0))); + removed_indices_bitset_.set(handle, removed_indices.view()); index_.emplace(raft::neighbors::cagra::index( handle, metric, make_const_mdspan(dataset_.view()), make_const_mdspan(knn_graph_.view()))); } @@ -95,10 +107,18 @@ struct CagraBench : public fixture { distances.data_handle(), params_.n_queries, params_.k); auto queries_v = make_const_mdspan(queries_.view()); - loop_on_state(state, [&]() { - raft::neighbors::cagra::search( - this->handle, search_params, *this->index_, queries_v, ind_v, dist_v); - }); + if (params_.removed_ratio > 0) { + auto filter = raft::neighbors::filtering::bitset_filter(removed_indices_bitset_.view()); + loop_on_state(state, [&]() { + raft::neighbors::cagra::search_with_filtering( + this->handle, search_params, *this->index_, queries_v, ind_v, dist_v, filter); + }); + } else { + loop_on_state(state, [&]() { + raft::neighbors::cagra::search( + this->handle, search_params, *this->index_, queries_v, ind_v, dist_v); + }); + } double data_size = params_.n_samples * params_.n_dims * sizeof(T); double graph_size = params_.n_samples * params_.degree * sizeof(IdxT); @@ -120,6 +140,7 @@ struct CagraBench : public fixture { state.counters["block_size"] = params_.block_size; state.counters["search_width"] = params_.search_width; state.counters["iterations"] = iterations; + state.counters["removed_ratio"] = params_.removed_ratio; } private: @@ -128,6 +149,7 @@ struct CagraBench : public fixture { raft::device_matrix queries_; raft::device_matrix dataset_; raft::device_matrix knn_graph_; + raft::core::bitset removed_indices_bitset_; }; inline const std::vector generate_inputs() @@ -141,7 +163,8 @@ inline const std::vector generate_inputs() {64}, // itopk_size {0}, // block_size {1}, // search_width - {0} // max_iterations + {0}, // max_iterations + {0.0} // removed_ratio ); auto inputs2 = raft::util::itertools::product({2000000ull, 10000000ull}, // n_samples {128}, // dataset dim @@ -151,7 +174,22 @@ inline const std::vector generate_inputs() {64}, // itopk_size {64, 128, 256, 512, 1024}, // block_size {1}, // search_width - {0} // max_iterations + {0}, // max_iterations + {0.0} // removed_ratio + ); + inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); + + inputs2 = raft::util::itertools::product( + {2000000ull, 10000000ull}, // n_samples + {128}, // dataset dim + {1, 10, 10000}, // n_queries + {255}, // k + {64}, // knn graph degree + {300}, // itopk_size + {256}, // block_size + {2}, // search_width + {0}, // max_iterations + {0.0, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64} // removed_ratio ); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); return inputs; diff --git a/cpp/include/raft/neighbors/brute_force_types.hpp b/cpp/include/raft/neighbors/brute_force_types.hpp index cc934b7a98..19dd6b8350 100644 --- a/cpp/include/raft/neighbors/brute_force_types.hpp +++ b/cpp/include/raft/neighbors/brute_force_types.hpp @@ -66,11 +66,11 @@ struct index : ann::index { /** Dataset norms */ [[nodiscard]] inline auto norms() const -> device_vector_view { - return make_const_mdspan(norms_.value().view()); + return norms_view_.value(); } /** Whether ot not this index has dataset norms */ - [[nodiscard]] inline bool has_norms() const noexcept { return norms_.has_value(); } + [[nodiscard]] inline bool has_norms() const noexcept { return norms_view_.has_value(); } [[nodiscard]] inline T metric_arg() const noexcept { return metric_arg_; } @@ -102,10 +102,30 @@ struct index : ann::index { norms_(std::move(norms)), metric_arg_(metric_arg) { + if (norms_) { norms_view_ = make_const_mdspan(norms_.value().view()); } update_dataset(res, dataset); resource::sync_stream(res); } + /** Construct a brute force index from dataset + * + * This class stores a non-owning reference to the dataset and norms here. + * Having precomputed norms gives us a performance advantage at query time. + */ + index(raft::resources const& res, + raft::device_matrix_view dataset_view, + std::optional> norms_view, + raft::distance::DistanceType metric, + T metric_arg = 0.0) + : ann::index(), + metric_(metric), + dataset_(make_device_matrix(res, 0, 0)), + dataset_view_(dataset_view), + norms_view_(norms_view), + metric_arg_(metric_arg) + { + } + private: /** * Replace the dataset with a new dataset. @@ -135,6 +155,7 @@ struct index : ann::index { raft::distance::DistanceType metric_; raft::device_matrix dataset_; std::optional> norms_; + std::optional> norms_view_; raft::device_matrix_view dataset_view_; T metric_arg_; }; diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh index f96dd34e05..f9682a973f 100644 --- a/cpp/include/raft/neighbors/cagra.cuh +++ b/cpp/include/raft/neighbors/cagra.cuh @@ -391,7 +391,25 @@ void search(raft::resources const& res, /** * @brief Search ANN using the constructed index with the given sample filter. * - * See the [cagra::build](#cagra::build) documentation for a usage example. + * Usage example: + * @code{.cpp} + * using namespace raft::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * // use default search parameters + * cagra::search_params search_params; + * // create a bitset to filter the search + * auto removed_indices = raft::make_device_vector(res, n_removed_indices); + * raft::core::bitset removed_indices_bitset( + * res, removed_indices.view(), dataset.extent(0)); + * // search K nearest neighbours according to a bitset + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search_with_filtering(res, search_params, index, queries, neighbors, distances, + * filtering::bitset_filter(removed_indices_bitset.view())); + * @endcode * * @tparam T data element type * @tparam IdxT type of the indices diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index 5dcfcb3929..9392bde440 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -478,13 +478,15 @@ __global__ void apply_filter_kernel(INDEX_T* const result_indices_ptr, const INDEX_T query_id_offset, SAMPLE_FILTER_T sample_filter) { - const auto tid = threadIdx.x + blockIdx.x * blockDim.x; + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const auto tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid >= result_buffer_size * num_queries) { return; } const auto i = tid % result_buffer_size; const auto j = tid / result_buffer_size; const auto index = i + j * lds; - if (!sample_filter(query_id_offset + j, result_indices_ptr[index])) { + if (result_indices_ptr[index] != ~index_msb_1_mask && + !sample_filter(query_id_offset + j, result_indices_ptr[index])) { result_indices_ptr[index] = utils::get_max_value(); result_distances_ptr[index] = utils::get_max_value(); } @@ -788,12 +790,15 @@ struct search : search_plan_impl { auto result_indices_ptr = result_indices.data() + (iter & 0x1) * result_buffer_size; auto result_distances_ptr = result_distances.data() + (iter & 0x1) * result_buffer_size; - // Remove parent bit in search results - remove_parent_bit( - num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream); + if constexpr (!std::is_same::value) { + // Remove parent bit in search results + remove_parent_bit(num_queries, + result_buffer_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + stream); - if (!std::is_same::value) { apply_filter( result_indices.data() + (iter & 0x1) * itopk_size, result_distances.data() + (iter & 0x1) * itopk_size, @@ -821,6 +826,10 @@ struct search : search_plan_impl { true, topk_hint.data(), stream); + } else { + // Remove parent bit in search results + remove_parent_bit( + num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream); } // Copy results from working buffer to final buffer diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index a0f346ab51..147b8b753d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -291,6 +291,14 @@ struct search_plan_impl : public search_plan_impl_base { "`hashmap_max_fill_rate` must be equal to or greater than 0.1 and smaller than 0.9. " + std::to_string(hashmap_max_fill_rate) + " has been given."; } + if constexpr (!std::is_same::value) { + if (hashmap_mode == hash_mode::SMALL) { + error_message += "`SMALL` hash is not available when filtering"; + } else { + hashmap_mode = hash_mode::HASH; + } + } if (algo == search_algo::MULTI_CTA) { if (hashmap_mode == hash_mode::SMALL) { error_message += "`small_hash` is not available when 'search_mode' is \"multi-cta\""; diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 3e4d0409bd..009ffd4684 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -1278,8 +1278,7 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out std::thread update_and_sample_thread(update_and_sample, it); - std::cout << "# GNND iteraton: " << it + 1 << "/" << build_config_.max_iterations << "\r"; - std::fflush(stdout); + RAFT_LOG_DEBUG("# GNND iteraton: %lu / %lu", it + 1, build_config_.max_iterations); // Reuse dists_buffer_ to save GPU memory. graph_buffer_ cannot be reused, because it // contains some information for local_join. diff --git a/cpp/include/raft/neighbors/sample_filter.cuh b/cpp/include/raft/neighbors/sample_filter.cuh new file mode 100644 index 0000000000..9182d72da9 --- /dev/null +++ b/cpp/include/raft/neighbors/sample_filter.cuh @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace raft::neighbors::filtering { +/** + * @brief Filter an index with a bitset + * + * @tparam index_t Indexing type + */ +template +struct bitset_filter { + // View of the bitset to use as a filter + const raft::core::bitset_view bitset_view_; + + bitset_filter(const raft::core::bitset_view bitset_for_filtering) + : bitset_view_{bitset_for_filtering} + { + } + inline _RAFT_HOST_DEVICE bool operator()( + // query index + const uint32_t query_ix, + // the index of the current sample + const uint32_t sample_ix) const + { + return bitset_view_.test(sample_ix); + } +}; +} // namespace raft::neighbors::filtering diff --git a/cpp/test/core/math_device.cu b/cpp/test/core/math_device.cu index 15c7b2b33a..8e3a9df01b 100644 --- a/cpp/test/core/math_device.cu +++ b/cpp/test/core/math_device.cu @@ -21,7 +21,9 @@ #include #include -#if _RAFT_HAS_CUDA +#include + +#ifdef _RAFT_HAS_CUDA #include #include #endif @@ -35,7 +37,7 @@ __global__ void math_eval_kernel(OutT* out, OpT op, Args... args) template auto math_eval(OpT op, Args&&... args) { - typedef decltype(op(args...)) OutT; + using OutT = cuda::std::invoke_result_t; auto stream = rmm::cuda_stream_default; rmm::device_scalar result(stream); math_eval_kernel<<<1, 1, 0, stream>>>(result.data(), op, std::forward(args)...); diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index b750372244..e6c3873063 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -525,6 +526,119 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { } } + void testCagraRemoved() + { + size_t queries_size = ps.n_queries * ps.k; + std::vector indices_Cagra(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_Cagra(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + auto* database_filtered_ptr = database.data() + test_cagra_sample_filter::offset * ps.dim; + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database_filtered_ptr, + ps.n_queries, + ps.n_rows - test_cagra_sample_filter::offset, + ps.dim, + ps.k, + ps.metric); + raft::linalg::addScalar(indices_naive_dev.data(), + indices_naive_dev.data(), + IdxT(test_cagra_sample_filter::offset), + queries_size, + stream_); + update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + { + rmm::device_uvector distances_dev(queries_size, stream_); + rmm::device_uvector indices_dev(queries_size, stream_); + + { + cagra::index_params index_params; + index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is + // not used for knn_graph building. + cagra::search_params search_params; + search_params.algo = ps.algo; + search_params.max_queries = ps.max_queries; + search_params.team_size = ps.team_size; + search_params.hashmap_mode = cagra::hash_mode::HASH; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.n_rows, ps.dim); + + cagra::index index(handle_); + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + index = cagra::build(handle_, index_params, database_host_view); + } else { + index = cagra::build(handle_, index_params, database_view); + } + + if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.n_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, ps.k); + auto removed_indices = + raft::make_device_vector(handle_, test_cagra_sample_filter::offset); + thrust::sequence( + resource::get_thrust_policy(handle_), + thrust::device_pointer_cast(removed_indices.data_handle()), + thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0))); + resource::sync_stream(handle_); + raft::core::bitset removed_indices_bitset( + handle_, removed_indices.view(), ps.n_rows); + cagra::search_with_filtering( + handle_, + search_params, + index, + search_queries_view, + indices_out_view, + dists_out_view, + raft::neighbors::filtering::bitset_filter(removed_indices_bitset.view())); + update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_); + update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + double min_recall = ps.min_recall; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_Cagra, + distances_naive, + distances_Cagra, + ps.n_queries, + ps.k, + 0.001, + min_recall)); + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); + } + } + void SetUp() override { database.resize(((size_t)ps.n_rows) * ps.dim, stream_); diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu index 01d7e1e1ea..944c2cbc89 100644 --- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu @@ -27,7 +27,11 @@ typedef AnnCagraSortTest AnnCagraSortTestF_U32; TEST_P(AnnCagraSortTestF_U32, AnnCagraSort) { this->testCagraSort(); } typedef AnnCagraFilterTest AnnCagraFilterTestF_U32; -TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter) { this->testCagraFilter(); } +TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter) +{ + this->testCagraFilter(); + this->testCagraRemoved(); +} INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF_U32, ::testing::ValuesIn(inputs)); diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu index ee06d369fa..3d9dc76953 100644 --- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu @@ -25,7 +25,11 @@ TEST_P(AnnCagraTestI8_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraSortTest AnnCagraSortTestI8_U32; TEST_P(AnnCagraSortTestI8_U32, AnnCagraSort) { this->testCagraSort(); } typedef AnnCagraFilterTest AnnCagraFilterTestI8_U32; -TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter) { this->testCagraFilter(); } +TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter) +{ + this->testCagraFilter(); + this->testCagraRemoved(); +} INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8_U32, ::testing::ValuesIn(inputs)); diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu index 3243e73ccd..c5b1b1704b 100644 --- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu @@ -27,7 +27,11 @@ typedef AnnCagraSortTest AnnCagraSortTestU8_ TEST_P(AnnCagraSortTestU8_U32, AnnCagraSort) { this->testCagraSort(); } typedef AnnCagraFilterTest AnnCagraFilterTestU8_U32; -TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort) { this->testCagraFilter(); } +TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort) +{ + this->testCagraFilter(); + this->testCagraRemoved(); +} INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8_U32, ::testing::ValuesIn(inputs)); diff --git a/dependencies.yaml b/dependencies.yaml index f1b74cfe49..fe4a4620e0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -10,12 +10,15 @@ files: - build_pylibraft - cudatoolkit - develop + - checks + - build_wheels - test_libraft - docs - run_raft_dask - run_pylibraft - test_python_common - test_pylibraft + - cupy bench_ann: output: conda matrix: @@ -38,6 +41,7 @@ files: - py_version - test_python_common - test_pylibraft + - cupy checks: output: none includes: @@ -47,6 +51,7 @@ files: output: none includes: - test_pylibraft + - cupy - cudatoolkit - docs - py_version @@ -75,6 +80,7 @@ files: includes: - test_python_common - test_pylibraft + - cupy py_build_raft_dask: output: pyproject pyproject_dir: python/raft-dask @@ -145,11 +151,37 @@ dependencies: packages: - gcc_linux-aarch64=11.* - sysroot_linux-aarch64==2.17 + - output_types: conda + matrices: + - matrix: {cuda: "12.0"} + packages: [cuda-version=12.0, cuda-nvcc] + - matrix: {cuda: "11.8", arch: x86_64} + packages: [nvcc_linux-64=11.8] + - matrix: {cuda: "11.8", arch: aarch64} + packages: [nvcc_linux-aarch64=11.8] + - matrix: {cuda: "11.5", arch: x86_64} + packages: [nvcc_linux-64=11.5] + - matrix: {cuda: "11.5", arch: aarch64} + packages: [nvcc_linux-aarch64=11.5] + - matrix: {cuda: "11.4", arch: x86_64} + packages: [nvcc_linux-64=11.4] + - matrix: {cuda: "11.4", arch: aarch64} + packages: [nvcc_linux-aarch64=11.4] + - matrix: {cuda: "11.2", arch: x86_64} + packages: [nvcc_linux-64=11.2] + - matrix: {cuda: "11.2", arch: aarch64} + packages: [nvcc_linux-aarch64=11.2] + build_pylibraft: common: - - output_types: [conda, requirements, pyproject] + - output_types: [conda] packages: - - &rmm rmm==23.12.* + - &rmm_conda rmm==23.12.* + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for rmm-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com specific: - output_types: [conda, requirements, pyproject] matrices: @@ -160,6 +192,20 @@ dependencies: - matrix: # All CUDA 11 versions packages: - &cuda_python11 cuda-python>=11.7.1,<12.0a0 + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.2"} + packages: &build_pylibraft_packages_cu12 + - &rmm_cu12 rmm-cu12==23.12.* + - {matrix: {cuda: "12.1"}, packages: *build_pylibraft_packages_cu12} + - {matrix: {cuda: "12.0"}, packages: *build_pylibraft_packages_cu12} + - matrix: {cuda: "11.8"} + packages: &build_pylibraft_packages_cu11 + - &rmm_cu11 rmm-cu11==23.12.* + - {matrix: {cuda: "11.5"}, packages: *build_pylibraft_packages_cu11} + - {matrix: {cuda: "11.4"}, packages: *build_pylibraft_packages_cu11} + - {matrix: {cuda: "11.2"}, packages: *build_pylibraft_packages_cu11} + - {matrix: null, packages: [*rmm_conda] } checks: common: - output_types: [conda, requirements] @@ -167,11 +213,9 @@ dependencies: - pre-commit develop: common: - - output_types: [conda, requirements] - packages: - - clang=16.0.6 - - output_types: [conda] + - output_types: conda packages: + - clang==16.0.6 - clang-tools=16.0.6 nn_bench: common: @@ -265,6 +309,45 @@ dependencies: - *libcusolver114 - *libcusparse_dev114 - *libcusparse114 + + cupy: + common: + - output_types: conda + packages: + - cupy>=12.0.0 + specific: + - output_types: [requirements, pyproject] + matrices: + # All CUDA 12 + x86_64 versions + - matrix: {cuda: "12.2", arch: x86_64} + packages: &cupy_packages_cu12_x86_64 + - &cupy_cu12_x86_64 cupy-cuda12x>=12.0.0 + - {matrix: {cuda: "12.1", arch: x86_64}, packages: *cupy_packages_cu12_x86_64} + - {matrix: {cuda: "12.0", arch: x86_64}, packages: *cupy_packages_cu12_x86_64} + # All CUDA 12 + aarch64 versions + - matrix: {cuda: "12.2", arch: aarch64} + packages: &cupy_packages_cu12_aarch64 + - &cupy_cu12_aarch64 cupy-cuda12x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works. + - {matrix: {cuda: "12.1", arch: aarch64}, packages: *cupy_packages_cu12_aarch64} + - {matrix: {cuda: "12.0", arch: aarch64}, packages: *cupy_packages_cu12_aarch64} + + # All CUDA 11 + x86_64 versions + - matrix: {cuda: "11.8", arch: x86_64} + packages: &cupy_packages_cu11_x86_64 + - cupy-cuda11x>=12.0.0 + - {matrix: {cuda: "11.5", arch: x86_64}, packages: *cupy_packages_cu11_x86_64} + - {matrix: {cuda: "11.4", arch: x86_64}, packages: *cupy_packages_cu11_x86_64} + - {matrix: {cuda: "11.2", arch: x86_64}, packages: *cupy_packages_cu11_x86_64} + + # All CUDA 11 + aarch64 versions + - matrix: {cuda: "11.8", arch: aarch64} + packages: &cupy_packages_cu11_aarch64 + - cupy-cuda11x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works. + - {matrix: {cuda: "11.5", arch: aarch64}, packages: *cupy_packages_cu11_aarch64} + - {matrix: {cuda: "11.4", arch: aarch64}, packages: *cupy_packages_cu11_aarch64} + - {matrix: {cuda: "11.2", arch: aarch64}, packages: *cupy_packages_cu11_aarch64} + - {matrix: null, packages: [cupy-cuda11x>=12.0.0]} + test_libraft: common: - output_types: [conda] @@ -287,7 +370,7 @@ dependencies: - sphinx-markdown-tables build_wheels: common: - - output_types: pyproject + - output_types: [requirements, pyproject] packages: - wheel - setuptools @@ -311,7 +394,14 @@ dependencies: - output_types: [conda, pyproject] packages: - &numpy numpy>=1.21 - - *rmm + - output_types: [conda] + packages: + - *rmm_conda + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for cudf and rmm. + - --extra-index-url=https://pypi.nvidia.com specific: - output_types: [conda, requirements, pyproject] matrices: @@ -322,25 +412,62 @@ dependencies: - matrix: # All CUDA 11 versions packages: - *cuda_python11 + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.2"} + packages: &run_pylibraft_packages_cu12 + - *rmm_cu12 + - {matrix: {cuda: "12.1"}, packages: *run_pylibraft_packages_cu12} + - {matrix: {cuda: "12.0"}, packages: *run_pylibraft_packages_cu12} + - matrix: {cuda: "11.8"} + packages: &run_pylibraft_packages_cu11 + - *rmm_cu11 + - {matrix: {cuda: "11.5"}, packages: *run_pylibraft_packages_cu11} + - {matrix: {cuda: "11.4"}, packages: *run_pylibraft_packages_cu11} + - {matrix: {cuda: "11.2"}, packages: *run_pylibraft_packages_cu11} + - {matrix: null, packages: [*rmm_conda]} run_raft_dask: common: - output_types: [conda, pyproject] packages: - - dask>=2023.7.1 + - dask==2023.9.2 - dask-cuda==23.12.* - - distributed>=2023.7.1 + - distributed==2023.9.2 - joblib>=0.11 - numba>=0.57 - *numpy - ucx-py==0.35.* - output_types: conda packages: - - dask-core>=2023.7.1 + - dask-core==2023.9.2 - ucx>=1.13.0 - ucx-proc=*=gpu + - &ucx_py_conda ucx-py==0.35.* - output_types: pyproject packages: - - pylibraft==23.12.* + - &pylibraft_conda pylibraft==23.12.* + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for cudf and rmm. + - --extra-index-url=https://pypi.nvidia.com + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.2"} + packages: &run_raft_dask_packages_cu12 + - &pylibraft_cu12 pylibraft-cu12==23.12.* + - &ucx_py_cu12 ucx-py-cu12==0.35.* + - {matrix: {cuda: "12.1"}, packages: *run_raft_dask_packages_cu12} + - {matrix: {cuda: "12.0"}, packages: *run_raft_dask_packages_cu12} + - matrix: {cuda: "11.8"} + packages: &run_raft_dask_packages_cu11 + - &pylibraft_cu11 pylibraft-cu11==23.12.* + - &ucx_py_cu11 ucx-py-cu11==0.35.* + - {matrix: {cuda: "11.5"}, packages: *run_raft_dask_packages_cu11} + - {matrix: {cuda: "11.4"}, packages: *run_raft_dask_packages_cu11} + - {matrix: {cuda: "11.2"}, packages: *run_raft_dask_packages_cu11} + - {matrix: null, packages: [*pylibraft_conda, *ucx_py_conda]} test_python_common: common: - output_types: [conda, requirements, pyproject] @@ -353,9 +480,3 @@ dependencies: packages: - scikit-learn - scipy - - output_types: conda - packages: - - cupy>=12.0.0 - - output_types: pyproject - packages: - - cupy-cuda11x>=12.0.0 diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt index 53bb12c81c..d29997b4a3 100644 --- a/python/raft-dask/CMakeLists.txt +++ b/python/raft-dask/CMakeLists.txt @@ -17,6 +17,8 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) set(raft_dask_version 23.12.00) include(../../fetch_rapids.cmake) +include(rapids-cuda) +rapids_cuda_init_architectures(raft-dask-python) project( raft-dask-python @@ -25,7 +27,7 @@ project( # language to be enabled here. The test project that is built in scikit-build to verify # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. - C CXX + C CXX CUDA ) option(FIND_RAFT_CPP "Search for existing RAFT C++ installations before defaulting to local files" @@ -42,14 +44,6 @@ else() endif() if(NOT raft_FOUND) - # TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required - # languages for the C++ project even if this project does not require those languages. - include(rapids-cuda) - rapids_cuda_init_architectures(raft-dask) - enable_language(CUDA) - # Since raft-dask only enables CUDA optionally we need to manually include the file that - # rapids_cuda_init_architectures relies on `project` including. - include("${CMAKE_PROJECT_raft-dask_INCLUDE}") find_package(ucx REQUIRED) # raft-dask doesn't actually use raft libraries, it just needs the headers, so we can turn off all diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index 5c616806a2..1619edbbbf 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -35,8 +35,8 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "dask-cuda==23.12.*", - "dask>=2023.7.1", - "distributed>=2023.7.1", + "dask==2023.9.2", + "distributed==2023.9.2", "joblib>=0.11", "numba>=0.57", "numpy>=1.21",