diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
new file mode 100644
index 0000000000..9d35e3f97f
--- /dev/null
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,30 @@
+# syntax=docker/dockerfile:1.5
+
+ARG BASE
+ARG PYTHON_PACKAGE_MANAGER=conda
+
+FROM ${BASE} as pip-base
+
+ENV DEFAULT_VIRTUAL_ENV=rapids
+
+FROM ${BASE} as conda-base
+
+ENV DEFAULT_CONDA_ENV=rapids
+
+FROM ${PYTHON_PACKAGE_MANAGER}-base
+
+ARG CUDA
+ENV CUDAARCHS="RAPIDS"
+ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"
+
+ARG PYTHON_PACKAGE_MANAGER
+ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}"
+
+ENV PYTHONSAFEPATH="1"
+ENV PYTHONUNBUFFERED="1"
+ENV PYTHONDONTWRITEBYTECODE="1"
+
+ENV SCCACHE_REGION="us-east-2"
+ENV SCCACHE_BUCKET="rapids-sccache-devs"
+ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
+ENV HISTFILE="/home/coder/.cache/._bash_history"
diff --git a/.devcontainer/README.md b/.devcontainer/README.md
new file mode 100644
index 0000000000..3c76b8963d
--- /dev/null
+++ b/.devcontainer/README.md
@@ -0,0 +1,64 @@
+# RAFT Development Containers
+
+This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces).
+
+This container is a turnkey development environment for building and testing the RAFT C++ and Python libraries.
+
+## Table of Contents
+
+* [Prerequisites](#prerequisites)
+* [Host bind mounts](#host-bind-mounts)
+* [Launch a Dev Container](#launch-a-dev-container)
+
+## Prerequisites
+
+* [VSCode](https://code.visualstudio.com/download)
+* [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+
+## Host bind mounts
+
+By default, the following directories are bind-mounted into the devcontainer:
+
+* `${repo}:/home/coder/raft`
+* `${repo}/../.aws:/home/coder/.aws`
+* `${repo}/../.local:/home/coder/.local`
+* `${repo}/../.cache:/home/coder/.cache`
+* `${repo}/../.conda:/home/coder/.conda`
+* `${repo}/../.config:/home/coder/.config`
+
+This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs.
+
+## Launch a Dev Container
+
+To launch a devcontainer from VSCode, open the RAFT repo and select the "Reopen in Container" button in the bottom right:
+
+Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command.
+
+## Using the devcontainer
+
+On startup, the devcontainer creates or updates the conda/pip environment using `raft/dependencies.yaml`.
+
+The container includes convenience functions to clean, configure, and build the various RAFT components:
+
+```shell
+$ clean-raft-cpp # only cleans the C++ build dir
+$ clean-pylibraft-python # only cleans the Python build dir
+$ clean-raft # cleans both C++ and Python build dirs
+
+$ configure-raft-cpp # only configures raft C++ lib
+
+$ build-raft-cpp # only builds raft C++ lib
+$ build-pylibraft-python # only builds raft Python lib
+$ build-raft # builds both C++ and Python libs
+```
+
+* The C++ build script is a small wrapper around `cmake -S ~/raft/cpp -B ~/raft/cpp/build` and `cmake --build ~/raft/cpp/build`
+* The Python build script is a small wrapper around `pip install --editable ~/raft/cpp`
+
+Unlike `build.sh`, these convenience scripts *don't* install the libraries after building them. Instead, they automatically inject the correct arguments to build the C++ libraries from source and use their build dirs as package roots:
+
+```shell
+$ cmake -S ~/raft/cpp -B ~/raft/cpp/build
+$ CMAKE_ARGS="-Draft_ROOT=~/raft/cpp/build" \ # <-- this argument is automatic
+ pip install -e ~/raft/cpp
+```
diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
new file mode 100644
index 0000000000..203f52f1a2
--- /dev/null
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "11.8",
+ "PYTHON_PACKAGE_MANAGER": "conda",
+ "BASE": "rapidsai/devcontainers:23.12-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda11.8-envs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda11.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
new file mode 100644
index 0000000000..080ece996e
--- /dev/null
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -0,0 +1,38 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "11.8",
+ "PYTHON_PACKAGE_MANAGER": "pip",
+ "BASE": "rapidsai/devcontainers:23.12-cpp-llvm16-cuda11.8-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/ucx:23.12": {"version": "1.14.1"},
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/ucx",
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json
new file mode 100644
index 0000000000..da8bfb4db9
--- /dev/null
+++ b/.devcontainer/cuda12.0-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "12.0",
+ "PYTHON_PACKAGE_MANAGER": "conda",
+ "BASE": "rapidsai/devcontainers:23.12-cpp-mambaforge-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json
new file mode 100644
index 0000000000..e2bee94f8a
--- /dev/null
+++ b/.devcontainer/cuda12.0-pip/devcontainer.json
@@ -0,0 +1,38 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "12.0",
+ "PYTHON_PACKAGE_MANAGER": "pip",
+ "BASE": "rapidsai/devcontainers:23.12-cpp-llvm16-cuda12.0-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/ucx:23.12": {"version": "1.14.1"},
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/ucx",
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c2b318df47..1b7fb8e1a5 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -22,6 +22,7 @@ jobs:
- wheel-tests-pylibraft
- wheel-build-raft-dask
- wheel-tests-raft-dask
+ - devcontainer
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@cuda-120-arm
checks:
@@ -92,3 +93,11 @@ jobs:
with:
build_type: pull-request
script: ci/test_wheel_raft_dask.sh
+ devcontainer:
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12
+ with:
+ build_command: |
+ sccache -z;
+ build-all -DBUILD_PRIMS_BENCH=ON -DBUILD_ANN_BENCH=ON --verbose;
+ sccache -s;
diff --git a/.gitignore b/.gitignore
index 7939fc1622..11b7bc3eba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,3 +62,7 @@ _xml
# sphinx
_html
_text
+
+# clang tooling
+compile_commands.json
+.clangd/
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 7a69b95da1..a867a71f68 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -91,3 +91,10 @@ sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxy
sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md
sed_runner "/GIT_TAG.*branch-/ s|branch-.*|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md
sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md
+
+# .devcontainer files
+find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
+ sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
+ sed_runner "s@rapidsai/devcontainers/features/ucx:[0-9.]*@rapidsai/devcontainers/features/ucx:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+ sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+done
diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh
index a20e950313..a9ae5dcabb 100755
--- a/ci/test_wheel_raft_dask.sh
+++ b/ci/test_wheel_raft_dask.sh
@@ -12,7 +12,7 @@ RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels
python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl
# Always install latest dask for testing
-python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.12
+python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.12
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/raft_dask*.whl)[test]
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index dc27a0aa32..fcbf1451a8 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -10,7 +10,7 @@ dependencies:
- breathe
- c-compiler
- clang-tools=16.0.6
-- clang=16.0.6
+- clang==16.0.6
- cmake>=3.26.4
- cuda-profiler-api=11.8.86
- cuda-python>=11.7.1,<12.0a0
@@ -19,10 +19,10 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.9.2
- dask-cuda==23.12.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.9.2
+- distributed==2023.9.2
- doxygen>=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
@@ -43,6 +43,8 @@ dependencies:
- numba>=0.57
- numpy>=1.21
- numpydoc
+- nvcc_linux-64=11.8
+- pre-commit
- pydata-sphinx-theme
- pytest
- pytest-cov
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 019679592f..a1e22f50a0 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -10,19 +10,20 @@ dependencies:
- breathe
- c-compiler
- clang-tools=16.0.6
-- clang=16.0.6
+- clang==16.0.6
- cmake>=3.26.4
- cuda-cudart-dev
+- cuda-nvcc
- cuda-profiler-api
- cuda-python>=12.0,<13.0a0
- cuda-version=12.0
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.9.2
- dask-cuda==23.12.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.9.2
+- distributed==2023.9.2
- doxygen>=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
@@ -39,6 +40,7 @@ dependencies:
- numba>=0.57
- numpy>=1.21
- numpydoc
+- pre-commit
- pydata-sphinx-theme
- pytest
- pytest-cov
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 5a9ef5bd32..4f1df12dfa 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -10,7 +10,7 @@ dependencies:
- benchmark>=1.8.2
- c-compiler
- clang-tools=16.0.6
-- clang=16.0.6
+- clang==16.0.6
- cmake>=3.26.4
- cuda-profiler-api=11.8.86
- cuda-version=11.8
@@ -34,6 +34,7 @@ dependencies:
- nccl>=2.9.9
- ninja
- nlohmann_json>=3.11.2
+- nvcc_linux-64=11.8
- scikit-build>=0.13.1
- sysroot_linux-64==2.17
name: bench_ann_cuda-118_arch-x86_64
diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml
index c9caa4dd9b..04dfef5063 100644
--- a/conda/recipes/raft-dask/meta.yaml
+++ b/conda/recipes/raft-dask/meta.yaml
@@ -60,10 +60,10 @@ requirements:
- cudatoolkit
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- - dask >=2023.7.1
- - dask-core >=2023.7.1
+ - dask ==2023.9.2
+ - dask-core ==2023.9.2
- dask-cuda ={{ minor_version }}
- - distributed >=2023.7.1
+ - distributed ==2023.9.2
- joblib >=0.11
- nccl >=2.9.9
- pylibraft {{ version }}
diff --git a/cpp/.clangd b/cpp/.clangd
new file mode 100644
index 0000000000..7c4fe036dd
--- /dev/null
+++ b/cpp/.clangd
@@ -0,0 +1,65 @@
+# https://clangd.llvm.org/config
+
+# Apply a config conditionally to all C files
+If:
+ PathMatch: .*\.(c|h)$
+
+---
+
+# Apply a config conditionally to all C++ files
+If:
+ PathMatch: .*\.(c|h)pp
+
+---
+
+# Apply a config conditionally to all CUDA files
+If:
+ PathMatch: .*\.cuh?
+CompileFlags:
+ Add:
+ - "-x"
+ - "cuda"
+ # No error on unknown CUDA versions
+ - "-Wno-unknown-cuda-version"
+ # Allow variadic CUDA functions
+ - "-Xclang=-fcuda-allow-variadic-functions"
+Diagnostics:
+ Suppress:
+ - "variadic_device_fn"
+ - "attributes_not_allowed"
+
+---
+
+# Tweak the clangd parse settings for all files
+CompileFlags:
+ Add:
+ # report all errors
+ - "-ferror-limit=0"
+ - "-fmacro-backtrace-limit=0"
+ - "-ftemplate-backtrace-limit=0"
+ # Skip the CUDA version check
+ - "--no-cuda-version-check"
+ Remove:
+ # remove gcc's -fcoroutines
+ - -fcoroutines
+ # remove nvc++ flags unknown to clang
+ - "-gpu=*"
+ - "-stdpar*"
+ # remove nvcc flags unknown to clang
+ - "-arch*"
+ - "-gencode*"
+ - "--generate-code*"
+ - "-ccbin*"
+ - "-t=*"
+ - "--threads*"
+ - "-Xptxas*"
+ - "-Xcudafe*"
+ - "-Xfatbin*"
+ - "-Xcompiler*"
+ - "--diag-suppress*"
+ - "--diag_suppress*"
+ - "--compiler-options*"
+ - "--expt-extended-lambda"
+ - "--expt-relaxed-constexpr"
+ - "-forward-unknown-to-host-compiler"
+ - "-Werror=cross-execution-space-call"
diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/prims/neighbors/cagra_bench.cuh
index bb405088bb..63f6c14686 100644
--- a/cpp/bench/prims/neighbors/cagra_bench.cuh
+++ b/cpp/bench/prims/neighbors/cagra_bench.cuh
@@ -18,8 +18,10 @@
#include
#include
+#include
#include
#include
+#include
#include
@@ -40,6 +42,8 @@ struct params {
int block_size;
int search_width;
int max_iterations;
+ /** Ratio of removed indices. */
+ double removed_ratio;
};
template
@@ -49,7 +53,8 @@ struct CagraBench : public fixture {
params_(ps),
queries_(make_device_matrix(handle, ps.n_queries, ps.n_dims)),
dataset_(make_device_matrix(handle, ps.n_samples, ps.n_dims)),
- knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree))
+ knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)),
+ removed_indices_bitset_(handle, ps.n_samples)
{
// Generate random dataset and queriees
raft::random::RngState state{42};
@@ -74,6 +79,13 @@ struct CagraBench : public fixture {
auto metric = raft::distance::DistanceType::L2Expanded;
+ auto removed_indices =
+ raft::make_device_vector(handle, ps.removed_ratio * ps.n_samples);
+ thrust::sequence(
+ resource::get_thrust_policy(handle),
+ thrust::device_pointer_cast(removed_indices.data_handle()),
+ thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0)));
+ removed_indices_bitset_.set(handle, removed_indices.view());
index_.emplace(raft::neighbors::cagra::index(
handle, metric, make_const_mdspan(dataset_.view()), make_const_mdspan(knn_graph_.view())));
}
@@ -95,10 +107,18 @@ struct CagraBench : public fixture {
distances.data_handle(), params_.n_queries, params_.k);
auto queries_v = make_const_mdspan(queries_.view());
- loop_on_state(state, [&]() {
- raft::neighbors::cagra::search(
- this->handle, search_params, *this->index_, queries_v, ind_v, dist_v);
- });
+ if (params_.removed_ratio > 0) {
+ auto filter = raft::neighbors::filtering::bitset_filter(removed_indices_bitset_.view());
+ loop_on_state(state, [&]() {
+ raft::neighbors::cagra::search_with_filtering(
+ this->handle, search_params, *this->index_, queries_v, ind_v, dist_v, filter);
+ });
+ } else {
+ loop_on_state(state, [&]() {
+ raft::neighbors::cagra::search(
+ this->handle, search_params, *this->index_, queries_v, ind_v, dist_v);
+ });
+ }
double data_size = params_.n_samples * params_.n_dims * sizeof(T);
double graph_size = params_.n_samples * params_.degree * sizeof(IdxT);
@@ -120,6 +140,7 @@ struct CagraBench : public fixture {
state.counters["block_size"] = params_.block_size;
state.counters["search_width"] = params_.search_width;
state.counters["iterations"] = iterations;
+ state.counters["removed_ratio"] = params_.removed_ratio;
}
private:
@@ -128,6 +149,7 @@ struct CagraBench : public fixture {
raft::device_matrix queries_;
raft::device_matrix dataset_;
raft::device_matrix knn_graph_;
+ raft::core::bitset removed_indices_bitset_;
};
inline const std::vector generate_inputs()
@@ -141,7 +163,8 @@ inline const std::vector generate_inputs()
{64}, // itopk_size
{0}, // block_size
{1}, // search_width
- {0} // max_iterations
+ {0}, // max_iterations
+ {0.0} // removed_ratio
);
auto inputs2 = raft::util::itertools::product({2000000ull, 10000000ull}, // n_samples
{128}, // dataset dim
@@ -151,7 +174,22 @@ inline const std::vector generate_inputs()
{64}, // itopk_size
{64, 128, 256, 512, 1024}, // block_size
{1}, // search_width
- {0} // max_iterations
+ {0}, // max_iterations
+ {0.0} // removed_ratio
+ );
+ inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
+
+ inputs2 = raft::util::itertools::product(
+ {2000000ull, 10000000ull}, // n_samples
+ {128}, // dataset dim
+ {1, 10, 10000}, // n_queries
+ {255}, // k
+ {64}, // knn graph degree
+ {300}, // itopk_size
+ {256}, // block_size
+ {2}, // search_width
+ {0}, // max_iterations
+ {0.0, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64} // removed_ratio
);
inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
return inputs;
diff --git a/cpp/include/raft/neighbors/brute_force_types.hpp b/cpp/include/raft/neighbors/brute_force_types.hpp
index cc934b7a98..19dd6b8350 100644
--- a/cpp/include/raft/neighbors/brute_force_types.hpp
+++ b/cpp/include/raft/neighbors/brute_force_types.hpp
@@ -66,11 +66,11 @@ struct index : ann::index {
/** Dataset norms */
[[nodiscard]] inline auto norms() const -> device_vector_view
{
- return make_const_mdspan(norms_.value().view());
+ return norms_view_.value();
}
/** Whether ot not this index has dataset norms */
- [[nodiscard]] inline bool has_norms() const noexcept { return norms_.has_value(); }
+ [[nodiscard]] inline bool has_norms() const noexcept { return norms_view_.has_value(); }
[[nodiscard]] inline T metric_arg() const noexcept { return metric_arg_; }
@@ -102,10 +102,30 @@ struct index : ann::index {
norms_(std::move(norms)),
metric_arg_(metric_arg)
{
+ if (norms_) { norms_view_ = make_const_mdspan(norms_.value().view()); }
update_dataset(res, dataset);
resource::sync_stream(res);
}
+ /** Construct a brute force index from dataset
+ *
+ * This class stores a non-owning reference to the dataset and norms here.
+ * Having precomputed norms gives us a performance advantage at query time.
+ */
+ index(raft::resources const& res,
+ raft::device_matrix_view dataset_view,
+ std::optional> norms_view,
+ raft::distance::DistanceType metric,
+ T metric_arg = 0.0)
+ : ann::index(),
+ metric_(metric),
+ dataset_(make_device_matrix(res, 0, 0)),
+ dataset_view_(dataset_view),
+ norms_view_(norms_view),
+ metric_arg_(metric_arg)
+ {
+ }
+
private:
/**
* Replace the dataset with a new dataset.
@@ -135,6 +155,7 @@ struct index : ann::index {
raft::distance::DistanceType metric_;
raft::device_matrix dataset_;
std::optional> norms_;
+ std::optional> norms_view_;
raft::device_matrix_view dataset_view_;
T metric_arg_;
};
diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh
index f96dd34e05..f9682a973f 100644
--- a/cpp/include/raft/neighbors/cagra.cuh
+++ b/cpp/include/raft/neighbors/cagra.cuh
@@ -391,7 +391,25 @@ void search(raft::resources const& res,
/**
* @brief Search ANN using the constructed index with the given sample filter.
*
- * See the [cagra::build](#cagra::build) documentation for a usage example.
+ * Usage example:
+ * @code{.cpp}
+ * using namespace raft::neighbors;
+ * // use default index parameters
+ * cagra::index_params index_params;
+ * // create and fill the index from a [N, D] dataset
+ * auto index = cagra::build(res, index_params, dataset);
+ * // use default search parameters
+ * cagra::search_params search_params;
+ * // create a bitset to filter the search
+ * auto removed_indices = raft::make_device_vector(res, n_removed_indices);
+ * raft::core::bitset removed_indices_bitset(
+ * res, removed_indices.view(), dataset.extent(0));
+ * // search K nearest neighbours according to a bitset
+ * auto neighbors = raft::make_device_matrix(res, n_queries, k);
+ * auto distances = raft::make_device_matrix(res, n_queries, k);
+ * cagra::search_with_filtering(res, search_params, index, queries, neighbors, distances,
+ * filtering::bitset_filter(removed_indices_bitset.view()));
+ * @endcode
*
* @tparam T data element type
* @tparam IdxT type of the indices
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
index 5dcfcb3929..9392bde440 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
@@ -478,13 +478,15 @@ __global__ void apply_filter_kernel(INDEX_T* const result_indices_ptr,
const INDEX_T query_id_offset,
SAMPLE_FILTER_T sample_filter)
{
- const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
+ constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value;
+ const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid >= result_buffer_size * num_queries) { return; }
const auto i = tid % result_buffer_size;
const auto j = tid / result_buffer_size;
const auto index = i + j * lds;
- if (!sample_filter(query_id_offset + j, result_indices_ptr[index])) {
+ if (result_indices_ptr[index] != ~index_msb_1_mask &&
+ !sample_filter(query_id_offset + j, result_indices_ptr[index])) {
result_indices_ptr[index] = utils::get_max_value();
result_distances_ptr[index] = utils::get_max_value();
}
@@ -788,12 +790,15 @@ struct search : search_plan_impl {
auto result_indices_ptr = result_indices.data() + (iter & 0x1) * result_buffer_size;
auto result_distances_ptr = result_distances.data() + (iter & 0x1) * result_buffer_size;
- // Remove parent bit in search results
- remove_parent_bit(
- num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream);
+ if constexpr (!std::is_same::value) {
+ // Remove parent bit in search results
+ remove_parent_bit(num_queries,
+ result_buffer_size,
+ result_indices.data() + (iter & 0x1) * itopk_size,
+ result_buffer_allocation_size,
+ stream);
- if (!std::is_same::value) {
apply_filter(
result_indices.data() + (iter & 0x1) * itopk_size,
result_distances.data() + (iter & 0x1) * itopk_size,
@@ -821,6 +826,10 @@ struct search : search_plan_impl {
true,
topk_hint.data(),
stream);
+ } else {
+ // Remove parent bit in search results
+ remove_parent_bit(
+ num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream);
}
// Copy results from working buffer to final buffer
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
index a0f346ab51..147b8b753d 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
@@ -291,6 +291,14 @@ struct search_plan_impl : public search_plan_impl_base {
"`hashmap_max_fill_rate` must be equal to or greater than 0.1 and smaller than 0.9. " +
std::to_string(hashmap_max_fill_rate) + " has been given.";
}
+ if constexpr (!std::is_same::value) {
+ if (hashmap_mode == hash_mode::SMALL) {
+ error_message += "`SMALL` hash is not available when filtering";
+ } else {
+ hashmap_mode = hash_mode::HASH;
+ }
+ }
if (algo == search_algo::MULTI_CTA) {
if (hashmap_mode == hash_mode::SMALL) {
error_message += "`small_hash` is not available when 'search_mode' is \"multi-cta\"";
diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh
index 3e4d0409bd..009ffd4684 100644
--- a/cpp/include/raft/neighbors/detail/nn_descent.cuh
+++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh
@@ -1278,8 +1278,7 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out
std::thread update_and_sample_thread(update_and_sample, it);
- std::cout << "# GNND iteraton: " << it + 1 << "/" << build_config_.max_iterations << "\r";
- std::fflush(stdout);
+ RAFT_LOG_DEBUG("# GNND iteraton: %lu / %lu", it + 1, build_config_.max_iterations);
// Reuse dists_buffer_ to save GPU memory. graph_buffer_ cannot be reused, because it
// contains some information for local_join.
diff --git a/cpp/include/raft/neighbors/sample_filter.cuh b/cpp/include/raft/neighbors/sample_filter.cuh
new file mode 100644
index 0000000000..9182d72da9
--- /dev/null
+++ b/cpp/include/raft/neighbors/sample_filter.cuh
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+#include
+
+#include
+
+namespace raft::neighbors::filtering {
+/**
+ * @brief Filter an index with a bitset
+ *
+ * @tparam index_t Indexing type
+ */
+template
+struct bitset_filter {
+ // View of the bitset to use as a filter
+ const raft::core::bitset_view bitset_view_;
+
+ bitset_filter(const raft::core::bitset_view bitset_for_filtering)
+ : bitset_view_{bitset_for_filtering}
+ {
+ }
+ inline _RAFT_HOST_DEVICE bool operator()(
+ // query index
+ const uint32_t query_ix,
+ // the index of the current sample
+ const uint32_t sample_ix) const
+ {
+ return bitset_view_.test(sample_ix);
+ }
+};
+} // namespace raft::neighbors::filtering
diff --git a/cpp/test/core/math_device.cu b/cpp/test/core/math_device.cu
index 15c7b2b33a..8e3a9df01b 100644
--- a/cpp/test/core/math_device.cu
+++ b/cpp/test/core/math_device.cu
@@ -21,7 +21,9 @@
#include
#include
-#if _RAFT_HAS_CUDA
+#include
+
+#ifdef _RAFT_HAS_CUDA
#include
#include
#endif
@@ -35,7 +37,7 @@ __global__ void math_eval_kernel(OutT* out, OpT op, Args... args)
template
auto math_eval(OpT op, Args&&... args)
{
- typedef decltype(op(args...)) OutT;
+ using OutT = cuda::std::invoke_result_t;
auto stream = rmm::cuda_stream_default;
rmm::device_scalar result(stream);
math_eval_kernel<<<1, 1, 0, stream>>>(result.data(), op, std::forward(args)...);
diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh
index b750372244..e6c3873063 100644
--- a/cpp/test/neighbors/ann_cagra.cuh
+++ b/cpp/test/neighbors/ann_cagra.cuh
@@ -30,6 +30,7 @@
#include
#include
#include
+#include
#include
#include
@@ -525,6 +526,119 @@ class AnnCagraFilterTest : public ::testing::TestWithParam {
}
}
+ void testCagraRemoved()
+ {
+ size_t queries_size = ps.n_queries * ps.k;
+ std::vector indices_Cagra(queries_size);
+ std::vector indices_naive(queries_size);
+ std::vector distances_Cagra(queries_size);
+ std::vector distances_naive(queries_size);
+
+ {
+ rmm::device_uvector distances_naive_dev(queries_size, stream_);
+ rmm::device_uvector indices_naive_dev(queries_size, stream_);
+ auto* database_filtered_ptr = database.data() + test_cagra_sample_filter::offset * ps.dim;
+ naive_knn(handle_,
+ distances_naive_dev.data(),
+ indices_naive_dev.data(),
+ search_queries.data(),
+ database_filtered_ptr,
+ ps.n_queries,
+ ps.n_rows - test_cagra_sample_filter::offset,
+ ps.dim,
+ ps.k,
+ ps.metric);
+ raft::linalg::addScalar(indices_naive_dev.data(),
+ indices_naive_dev.data(),
+ IdxT(test_cagra_sample_filter::offset),
+ queries_size,
+ stream_);
+ update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_);
+ update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_);
+ resource::sync_stream(handle_);
+ }
+
+ {
+ rmm::device_uvector distances_dev(queries_size, stream_);
+ rmm::device_uvector indices_dev(queries_size, stream_);
+
+ {
+ cagra::index_params index_params;
+ index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is
+ // not used for knn_graph building.
+ cagra::search_params search_params;
+ search_params.algo = ps.algo;
+ search_params.max_queries = ps.max_queries;
+ search_params.team_size = ps.team_size;
+ search_params.hashmap_mode = cagra::hash_mode::HASH;
+
+ auto database_view = raft::make_device_matrix_view(
+ (const DataT*)database.data(), ps.n_rows, ps.dim);
+
+ cagra::index index(handle_);
+ if (ps.host_dataset) {
+ auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim);
+ raft::copy(database_host.data_handle(), database.data(), database.size(), stream_);
+ auto database_host_view = raft::make_host_matrix_view(
+ (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim);
+ index = cagra::build(handle_, index_params, database_host_view);
+ } else {
+ index = cagra::build(handle_, index_params, database_view);
+ }
+
+ if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); }
+
+ auto search_queries_view = raft::make_device_matrix_view(
+ search_queries.data(), ps.n_queries, ps.dim);
+ auto indices_out_view =
+ raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k);
+ auto dists_out_view = raft::make_device_matrix_view(
+ distances_dev.data(), ps.n_queries, ps.k);
+ auto removed_indices =
+ raft::make_device_vector(handle_, test_cagra_sample_filter::offset);
+ thrust::sequence(
+ resource::get_thrust_policy(handle_),
+ thrust::device_pointer_cast(removed_indices.data_handle()),
+ thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0)));
+ resource::sync_stream(handle_);
+ raft::core::bitset removed_indices_bitset(
+ handle_, removed_indices.view(), ps.n_rows);
+ cagra::search_with_filtering(
+ handle_,
+ search_params,
+ index,
+ search_queries_view,
+ indices_out_view,
+ dists_out_view,
+ raft::neighbors::filtering::bitset_filter(removed_indices_bitset.view()));
+ update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_);
+ update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_);
+ resource::sync_stream(handle_);
+ }
+
+ double min_recall = ps.min_recall;
+ EXPECT_TRUE(eval_neighbours(indices_naive,
+ indices_Cagra,
+ distances_naive,
+ distances_Cagra,
+ ps.n_queries,
+ ps.k,
+ 0.001,
+ min_recall));
+ EXPECT_TRUE(eval_distances(handle_,
+ database.data(),
+ search_queries.data(),
+ indices_dev.data(),
+ distances_dev.data(),
+ ps.n_rows,
+ ps.dim,
+ ps.n_queries,
+ ps.k,
+ ps.metric,
+ 1.0e-4));
+ }
+ }
+
void SetUp() override
{
database.resize(((size_t)ps.n_rows) * ps.dim, stream_);
diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
index 01d7e1e1ea..944c2cbc89 100644
--- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
@@ -27,7 +27,11 @@ typedef AnnCagraSortTest AnnCagraSortTestF_U32;
TEST_P(AnnCagraSortTestF_U32, AnnCagraSort) { this->testCagraSort(); }
typedef AnnCagraFilterTest AnnCagraFilterTestF_U32;
-TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter) { this->testCagraFilter(); }
+TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter)
+{
+ this->testCagraFilter();
+ this->testCagraRemoved();
+}
INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs));
INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF_U32, ::testing::ValuesIn(inputs));
diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
index ee06d369fa..3d9dc76953 100644
--- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
@@ -25,7 +25,11 @@ TEST_P(AnnCagraTestI8_U32, AnnCagra) { this->testCagra(); }
typedef AnnCagraSortTest AnnCagraSortTestI8_U32;
TEST_P(AnnCagraSortTestI8_U32, AnnCagraSort) { this->testCagraSort(); }
typedef AnnCagraFilterTest AnnCagraFilterTestI8_U32;
-TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter) { this->testCagraFilter(); }
+TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter)
+{
+ this->testCagraFilter();
+ this->testCagraRemoved();
+}
INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs));
INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8_U32, ::testing::ValuesIn(inputs));
diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
index 3243e73ccd..c5b1b1704b 100644
--- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
@@ -27,7 +27,11 @@ typedef AnnCagraSortTest AnnCagraSortTestU8_
TEST_P(AnnCagraSortTestU8_U32, AnnCagraSort) { this->testCagraSort(); }
typedef AnnCagraFilterTest AnnCagraFilterTestU8_U32;
-TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort) { this->testCagraFilter(); }
+TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort)
+{
+ this->testCagraFilter();
+ this->testCagraRemoved();
+}
INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs));
INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8_U32, ::testing::ValuesIn(inputs));
diff --git a/dependencies.yaml b/dependencies.yaml
index f1b74cfe49..fe4a4620e0 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -10,12 +10,15 @@ files:
- build_pylibraft
- cudatoolkit
- develop
+ - checks
+ - build_wheels
- test_libraft
- docs
- run_raft_dask
- run_pylibraft
- test_python_common
- test_pylibraft
+ - cupy
bench_ann:
output: conda
matrix:
@@ -38,6 +41,7 @@ files:
- py_version
- test_python_common
- test_pylibraft
+ - cupy
checks:
output: none
includes:
@@ -47,6 +51,7 @@ files:
output: none
includes:
- test_pylibraft
+ - cupy
- cudatoolkit
- docs
- py_version
@@ -75,6 +80,7 @@ files:
includes:
- test_python_common
- test_pylibraft
+ - cupy
py_build_raft_dask:
output: pyproject
pyproject_dir: python/raft-dask
@@ -145,11 +151,37 @@ dependencies:
packages:
- gcc_linux-aarch64=11.*
- sysroot_linux-aarch64==2.17
+ - output_types: conda
+ matrices:
+ - matrix: {cuda: "12.0"}
+ packages: [cuda-version=12.0, cuda-nvcc]
+ - matrix: {cuda: "11.8", arch: x86_64}
+ packages: [nvcc_linux-64=11.8]
+ - matrix: {cuda: "11.8", arch: aarch64}
+ packages: [nvcc_linux-aarch64=11.8]
+ - matrix: {cuda: "11.5", arch: x86_64}
+ packages: [nvcc_linux-64=11.5]
+ - matrix: {cuda: "11.5", arch: aarch64}
+ packages: [nvcc_linux-aarch64=11.5]
+ - matrix: {cuda: "11.4", arch: x86_64}
+ packages: [nvcc_linux-64=11.4]
+ - matrix: {cuda: "11.4", arch: aarch64}
+ packages: [nvcc_linux-aarch64=11.4]
+ - matrix: {cuda: "11.2", arch: x86_64}
+ packages: [nvcc_linux-64=11.2]
+ - matrix: {cuda: "11.2", arch: aarch64}
+ packages: [nvcc_linux-aarch64=11.2]
+
build_pylibraft:
common:
- - output_types: [conda, requirements, pyproject]
+ - output_types: [conda]
packages:
- - &rmm rmm==23.12.*
+ - &rmm_conda rmm==23.12.*
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ # This index is needed for rmm-cu{11,12}.
+ - --extra-index-url=https://pypi.nvidia.com
specific:
- output_types: [conda, requirements, pyproject]
matrices:
@@ -160,6 +192,20 @@ dependencies:
- matrix: # All CUDA 11 versions
packages:
- &cuda_python11 cuda-python>=11.7.1,<12.0a0
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &build_pylibraft_packages_cu12
+ - &rmm_cu12 rmm-cu12==23.12.*
+ - {matrix: {cuda: "12.1"}, packages: *build_pylibraft_packages_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *build_pylibraft_packages_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &build_pylibraft_packages_cu11
+ - &rmm_cu11 rmm-cu11==23.12.*
+ - {matrix: {cuda: "11.5"}, packages: *build_pylibraft_packages_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *build_pylibraft_packages_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *build_pylibraft_packages_cu11}
+ - {matrix: null, packages: [*rmm_conda] }
checks:
common:
- output_types: [conda, requirements]
@@ -167,11 +213,9 @@ dependencies:
- pre-commit
develop:
common:
- - output_types: [conda, requirements]
- packages:
- - clang=16.0.6
- - output_types: [conda]
+ - output_types: conda
packages:
+ - clang==16.0.6
- clang-tools=16.0.6
nn_bench:
common:
@@ -265,6 +309,45 @@ dependencies:
- *libcusolver114
- *libcusparse_dev114
- *libcusparse114
+
+ cupy:
+ common:
+ - output_types: conda
+ packages:
+ - cupy>=12.0.0
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ # All CUDA 12 + x86_64 versions
+ - matrix: {cuda: "12.2", arch: x86_64}
+ packages: &cupy_packages_cu12_x86_64
+ - &cupy_cu12_x86_64 cupy-cuda12x>=12.0.0
+ - {matrix: {cuda: "12.1", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
+ - {matrix: {cuda: "12.0", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
+ # All CUDA 12 + aarch64 versions
+ - matrix: {cuda: "12.2", arch: aarch64}
+ packages: &cupy_packages_cu12_aarch64
+ - &cupy_cu12_aarch64 cupy-cuda12x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
+ - {matrix: {cuda: "12.1", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
+ - {matrix: {cuda: "12.0", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
+
+ # All CUDA 11 + x86_64 versions
+ - matrix: {cuda: "11.8", arch: x86_64}
+ packages: &cupy_packages_cu11_x86_64
+ - cupy-cuda11x>=12.0.0
+ - {matrix: {cuda: "11.5", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
+ - {matrix: {cuda: "11.4", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
+ - {matrix: {cuda: "11.2", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
+
+ # All CUDA 11 + aarch64 versions
+ - matrix: {cuda: "11.8", arch: aarch64}
+ packages: &cupy_packages_cu11_aarch64
+ - cupy-cuda11x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
+ - {matrix: {cuda: "11.5", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
+ - {matrix: {cuda: "11.4", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
+ - {matrix: {cuda: "11.2", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
+ - {matrix: null, packages: [cupy-cuda11x>=12.0.0]}
+
test_libraft:
common:
- output_types: [conda]
@@ -287,7 +370,7 @@ dependencies:
- sphinx-markdown-tables
build_wheels:
common:
- - output_types: pyproject
+ - output_types: [requirements, pyproject]
packages:
- wheel
- setuptools
@@ -311,7 +394,14 @@ dependencies:
- output_types: [conda, pyproject]
packages:
- &numpy numpy>=1.21
- - *rmm
+ - output_types: [conda]
+ packages:
+ - *rmm_conda
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ # This index is needed for cudf and rmm.
+ - --extra-index-url=https://pypi.nvidia.com
specific:
- output_types: [conda, requirements, pyproject]
matrices:
@@ -322,25 +412,62 @@ dependencies:
- matrix: # All CUDA 11 versions
packages:
- *cuda_python11
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &run_pylibraft_packages_cu12
+ - *rmm_cu12
+ - {matrix: {cuda: "12.1"}, packages: *run_pylibraft_packages_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *run_pylibraft_packages_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &run_pylibraft_packages_cu11
+ - *rmm_cu11
+ - {matrix: {cuda: "11.5"}, packages: *run_pylibraft_packages_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *run_pylibraft_packages_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *run_pylibraft_packages_cu11}
+ - {matrix: null, packages: [*rmm_conda]}
run_raft_dask:
common:
- output_types: [conda, pyproject]
packages:
- - dask>=2023.7.1
+ - dask==2023.9.2
- dask-cuda==23.12.*
- - distributed>=2023.7.1
+ - distributed==2023.9.2
- joblib>=0.11
- numba>=0.57
- *numpy
- ucx-py==0.35.*
- output_types: conda
packages:
- - dask-core>=2023.7.1
+ - dask-core==2023.9.2
- ucx>=1.13.0
- ucx-proc=*=gpu
+ - &ucx_py_conda ucx-py==0.35.*
- output_types: pyproject
packages:
- - pylibraft==23.12.*
+ - &pylibraft_conda pylibraft==23.12.*
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ # This index is needed for cudf and rmm.
+ - --extra-index-url=https://pypi.nvidia.com
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &run_raft_dask_packages_cu12
+ - &pylibraft_cu12 pylibraft-cu12==23.12.*
+ - &ucx_py_cu12 ucx-py-cu12==0.35.*
+ - {matrix: {cuda: "12.1"}, packages: *run_raft_dask_packages_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *run_raft_dask_packages_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &run_raft_dask_packages_cu11
+ - &pylibraft_cu11 pylibraft-cu11==23.12.*
+ - &ucx_py_cu11 ucx-py-cu11==0.35.*
+ - {matrix: {cuda: "11.5"}, packages: *run_raft_dask_packages_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *run_raft_dask_packages_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *run_raft_dask_packages_cu11}
+ - {matrix: null, packages: [*pylibraft_conda, *ucx_py_conda]}
test_python_common:
common:
- output_types: [conda, requirements, pyproject]
@@ -353,9 +480,3 @@ dependencies:
packages:
- scikit-learn
- scipy
- - output_types: conda
- packages:
- - cupy>=12.0.0
- - output_types: pyproject
- packages:
- - cupy-cuda11x>=12.0.0
diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt
index 53bb12c81c..d29997b4a3 100644
--- a/python/raft-dask/CMakeLists.txt
+++ b/python/raft-dask/CMakeLists.txt
@@ -17,6 +17,8 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
set(raft_dask_version 23.12.00)
include(../../fetch_rapids.cmake)
+include(rapids-cuda)
+rapids_cuda_init_architectures(raft-dask-python)
project(
raft-dask-python
@@ -25,7 +27,7 @@ project(
# language to be enabled here. The test project that is built in scikit-build to verify
# various linking options for the python library is hardcoded to build with C, so until
# that is fixed we need to keep C.
- C CXX
+ C CXX CUDA
)
option(FIND_RAFT_CPP "Search for existing RAFT C++ installations before defaulting to local files"
@@ -42,14 +44,6 @@ else()
endif()
if(NOT raft_FOUND)
- # TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required
- # languages for the C++ project even if this project does not require those languages.
- include(rapids-cuda)
- rapids_cuda_init_architectures(raft-dask)
- enable_language(CUDA)
- # Since raft-dask only enables CUDA optionally we need to manually include the file that
- # rapids_cuda_init_architectures relies on `project` including.
- include("${CMAKE_PROJECT_raft-dask_INCLUDE}")
find_package(ucx REQUIRED)
# raft-dask doesn't actually use raft libraries, it just needs the headers, so we can turn off all
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index 5c616806a2..1619edbbbf 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -35,8 +35,8 @@ license = { text = "Apache 2.0" }
requires-python = ">=3.9"
dependencies = [
"dask-cuda==23.12.*",
- "dask>=2023.7.1",
- "distributed>=2023.7.1",
+ "dask==2023.9.2",
+ "distributed==2023.9.2",
"joblib>=0.11",
"numba>=0.57",
"numpy>=1.21",