diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
new file mode 100644
index 0000000000..9d35e3f97f
--- /dev/null
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,30 @@
+# syntax=docker/dockerfile:1.5
+
+ARG BASE
+ARG PYTHON_PACKAGE_MANAGER=conda
+
+FROM ${BASE} as pip-base
+
+ENV DEFAULT_VIRTUAL_ENV=rapids
+
+FROM ${BASE} as conda-base
+
+ENV DEFAULT_CONDA_ENV=rapids
+
+FROM ${PYTHON_PACKAGE_MANAGER}-base
+
+ARG CUDA
+ENV CUDAARCHS="RAPIDS"
+ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"
+
+ARG PYTHON_PACKAGE_MANAGER
+ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}"
+
+ENV PYTHONSAFEPATH="1"
+ENV PYTHONUNBUFFERED="1"
+ENV PYTHONDONTWRITEBYTECODE="1"
+
+ENV SCCACHE_REGION="us-east-2"
+ENV SCCACHE_BUCKET="rapids-sccache-devs"
+ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
+ENV HISTFILE="/home/coder/.cache/._bash_history"
diff --git a/.devcontainer/README.md b/.devcontainer/README.md
new file mode 100644
index 0000000000..3c76b8963d
--- /dev/null
+++ b/.devcontainer/README.md
@@ -0,0 +1,64 @@
+# RAFT Development Containers
+
+This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces).
+
+This container is a turnkey development environment for building and testing the RAFT C++ and Python libraries.
+
+## Table of Contents
+
+* [Prerequisites](#prerequisites)
+* [Host bind mounts](#host-bind-mounts)
+* [Launch a Dev Container](#launch-a-dev-container)
+
+## Prerequisites
+
+* [VSCode](https://code.visualstudio.com/download)
+* [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+
+## Host bind mounts
+
+By default, the following directories are bind-mounted into the devcontainer:
+
+* `${repo}:/home/coder/raft`
+* `${repo}/../.aws:/home/coder/.aws`
+* `${repo}/../.local:/home/coder/.local`
+* `${repo}/../.cache:/home/coder/.cache`
+* `${repo}/../.conda:/home/coder/.conda`
+* `${repo}/../.config:/home/coder/.config`
+
+This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs.
+
+## Launch a Dev Container
+
+To launch a devcontainer from VSCode, open the RAFT repo and select the "Reopen in Container" button in the bottom right:
+
+Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command.
+
+## Using the devcontainer
+
+On startup, the devcontainer creates or updates the conda/pip environment using `raft/dependencies.yaml`.
+
+The container includes convenience functions to clean, configure, and build the various RAFT components:
+
+```shell
+$ clean-raft-cpp # only cleans the C++ build dir
+$ clean-pylibraft-python # only cleans the Python build dir
+$ clean-raft # cleans both C++ and Python build dirs
+
+$ configure-raft-cpp # only configures raft C++ lib
+
+$ build-raft-cpp # only builds raft C++ lib
+$ build-pylibraft-python # only builds raft Python lib
+$ build-raft # builds both C++ and Python libs
+```
+
+* The C++ build script is a small wrapper around `cmake -S ~/raft/cpp -B ~/raft/cpp/build` and `cmake --build ~/raft/cpp/build`
+* The Python build script is a small wrapper around `pip install --editable ~/raft/cpp`
+
+Unlike `build.sh`, these convenience scripts *don't* install the libraries after building them. Instead, they automatically inject the correct arguments to build the C++ libraries from source and use their build dirs as package roots:
+
+```shell
+$ cmake -S ~/raft/cpp -B ~/raft/cpp/build
+$ CMAKE_ARGS="-Draft_ROOT=~/raft/cpp/build" \ # <-- this argument is automatic
+ pip install -e ~/raft/cpp
+```
diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
new file mode 100644
index 0000000000..8da9b5428a
--- /dev/null
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "11.8",
+ "PYTHON_PACKAGE_MANAGER": "conda",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda11.8-envs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda11.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
new file mode 100644
index 0000000000..0b3ec79e37
--- /dev/null
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -0,0 +1,38 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "11.8",
+ "PYTHON_PACKAGE_MANAGER": "pip",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/ucx:23.10": {"version": "1.14.1"},
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/ucx",
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json
new file mode 100644
index 0000000000..f5af166b46
--- /dev/null
+++ b/.devcontainer/cuda12.0-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "12.0",
+ "PYTHON_PACKAGE_MANAGER": "conda",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-mambaforge-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json
new file mode 100644
index 0000000000..9f28002d38
--- /dev/null
+++ b/.devcontainer/cuda12.0-pip/devcontainer.json
@@ -0,0 +1,38 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "12.0",
+ "PYTHON_PACKAGE_MANAGER": "pip",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda12.0-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/ucx:23.10": {"version": "1.14.1"},
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/ucx",
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 00004c4e4d..107823d5ee 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -62,7 +62,7 @@ jobs:
arch: "amd64"
branch: ${{ inputs.branch }}
build_type: ${{ inputs.build_type || 'branch' }}
- container_image: "rapidsai/ci:latest"
+ container_image: "rapidsai/ci-conda:latest"
date: ${{ inputs.date }}
node_type: "gpu-v100-latest-1"
run_script: "ci/build_docs.sh"
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 4437e0dc85..e539877851 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -22,6 +22,7 @@ jobs:
- wheel-tests-pylibraft
- wheel-build-raft-dask
- wheel-tests-raft-dask
+ - devcontainer
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.10
checks:
@@ -62,7 +63,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
- container_image: "rapidsai/ci:latest"
+ container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build-pylibraft:
needs: checks
@@ -92,3 +93,11 @@ jobs:
with:
build_type: pull-request
script: ci/test_wheel_raft_dask.sh
+ devcontainer:
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.10
+ with:
+ build_command: |
+ sccache -z;
+ build-all -DBUILD_PRIMS_BENCH=ON -DBUILD_ANN_BENCH=ON --verbose;
+ sccache -s;
diff --git a/.gitignore b/.gitignore
index 7939fc1622..11b7bc3eba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,3 +62,7 @@ _xml
# sphinx
_html
_text
+
+# clang tooling
+compile_commands.json
+.clangd/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2a70632497..66862ada5e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -43,7 +43,7 @@ repos:
additional_dependencies: [toml]
args: ["--config=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
- rev: v16.0.1
+ rev: v16.0.6
hooks:
- id: clang-format
types_or: [c, c++, cuda]
diff --git a/build.sh b/build.sh
index 071820ba93..6200e6a2fa 100755
--- a/build.sh
+++ b/build.sh
@@ -78,8 +78,8 @@ INSTALL_TARGET=install
BUILD_REPORT_METRICS=""
BUILD_REPORT_INCL_CACHE_STATS=OFF
-TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;NEIGHBORS_TEST;NEIGHBORS_ANN_CAGRA_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;STATS_TEST;UTILS_TEST"
-BENCH_TARGETS="CLUSTER_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH"
+TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;NEIGHBORS_TEST;NEIGHBORS_ANN_CAGRA_TEST;NEIGHBORS_ANN_NN_DESCENT_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;STATS_TEST;UTILS_TEST"
+BENCH_TARGETS="CLUSTER_BENCH;CORE_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH"
CACHE_ARGS=""
NVTX=ON
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 853ae095d3..a41f81152d 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
set -euo pipefail
@@ -11,6 +11,6 @@ rapids-print-env
rapids-logger "Begin cpp build"
-rapids-mamba-retry mambabuild conda/recipes/libraft
+rapids-conda-retry mambabuild conda/recipes/libraft
rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 2a31deb46a..c49677e78c 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -15,19 +15,19 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
# TODO: Remove `--no-test` flags once importing on a CPU
# node works correctly
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
conda/recipes/pylibraft
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/raft-dask
# Build ann-bench for each cuda and python version
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
@@ -37,7 +37,7 @@ conda/recipes/raft-ann-bench
# version
RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
- rapids-mamba-retry mambabuild \
+ rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index a9f7f64294..662a11ad0e 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -5,6 +5,7 @@ set -euo pipefail
package_name=$1
package_dir=$2
+underscore_package_name=$(echo "${package_name}" | tr "-" "_")
source rapids-configure-sccache
source rapids-date-string
@@ -15,9 +16,36 @@ version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-ci/release/apply_wheel_modifications.sh ${version_override} "-${RAPIDS_PY_CUDA_SUFFIX}"
-echo "The package name and/or version was modified in the package source. The git diff is:"
-git diff
+# This is the version of the suffix with a preceding hyphen. It's used
+# everywhere except in the final wheel name.
+PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
+
+# Patch project metadata files to include the CUDA version suffix and version override.
+pyproject_file="${package_dir}/pyproject.toml"
+
+sed -i "s/^version = .*/version = \"${version_override}\"/g" ${pyproject_file}
+sed -i "s/name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
+
+# For nightlies we want to ensure that we're pulling in alphas as well. The
+# easiest way to do so is to augment the spec with a constraint containing a
+# min alpha version that doesn't affect the version bounds but does allow usage
+# of alpha versions for that dependency without --pre
+alpha_spec=''
+if ! rapids-is-release-build; then
+ alpha_spec=',>=0.0.0a0'
+fi
+
+if [[ ${package_name} == "raft-dask" ]]; then
+ sed -r -i "s/pylibraft==(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
+ sed -i "s/ucx-py/ucx-py${PACKAGE_CUDA_SUFFIX}/g" python/raft-dask/pyproject.toml
+else
+ sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
+fi
+
+if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
+ sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
+ sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
+fi
cd "${package_dir}"
@@ -27,4 +55,4 @@ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
mkdir -p final_dist
python -m auditwheel repair -w final_dist dist/*
-RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
+RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
diff --git a/ci/build_wheel_raft_dask.sh b/ci/build_wheel_raft_dask.sh
index f0204d45c0..ff89f4da23 100755
--- a/ci/build_wheel_raft_dask.sh
+++ b/ci/build_wheel_raft_dask.sh
@@ -6,9 +6,4 @@ set -euo pipefail
# Set up skbuild options. Enable sccache in skbuild config options
export SKBUILD_CONFIGURE_OPTIONS="-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-
-RAPIDS_PY_WHEEL_NAME=pylibraft_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibraft
-python -m pip install --no-deps ./local-pylibraft/pylibraft*.whl
-
-ci/build_wheel.sh raft_dask python/raft-dask
+ci/build_wheel.sh raft-dask python/raft-dask
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
deleted file mode 100755
index fd6c2f929e..0000000000
--- a/ci/release/apply_wheel_modifications.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Usage: bash apply_wheel_modifications.sh
-
-VERSION=${1}
-CUDA_SUFFIX=${2}
-
-# pyproject.toml versions
-sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/pylibraft/pyproject.toml
-sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/raft-dask/pyproject.toml
-
-# pylibraft pyproject.toml cuda suffixes
-sed -i "s/^name = \"pylibraft\"/name = \"pylibraft${CUDA_SUFFIX}\"/g" python/pylibraft/pyproject.toml
-sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/pylibraft/pyproject.toml
-
-# raft-dask pyproject.toml cuda suffixes
-sed -i "s/^name = \"raft-dask\"/name = \"raft-dask${CUDA_SUFFIX}\"/g" python/raft-dask/pyproject.toml
-sed -i "s/pylibraft/pylibraft${CUDA_SUFFIX}/g" python/raft-dask/pyproject.toml
-sed -i "s/ucx-py/ucx-py${CUDA_SUFFIX}/g" python/raft-dask/pyproject.toml
-
-if [[ $CUDA_SUFFIX == "-cu12" ]]; then
- sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" python/pylibraft/pyproject.toml
- sed -i "s/cupy-cuda11x/cupy-cuda12x/g" python/pylibraft/pyproject.toml
-fi
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 6a7e319f5d..a867a71f68 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -47,10 +47,6 @@ sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cma
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/pylibraft/pylibraft/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/raft_dask/__init__.py
-# Python pyproject.toml updates
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pylibraft/pyproject.toml
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/pyproject.toml
-
# Wheel testing script
sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_raft_dask.sh
@@ -74,6 +70,7 @@ for FILE in python/*/pyproject.toml; do
for DEP in "${DEPENDENCIES[@]}"; do
sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE}
done
+ sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" "${FILE}"
sed_runner "/\"ucx-py==/ s/==.*\"/==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\"/g" ${FILE}
done
@@ -94,3 +91,10 @@ sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxy
sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md
sed_runner "/GIT_TAG.*branch-/ s|branch-.*|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md
sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md
+
+# .devcontainer files
+find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
+ sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
+ sed_runner "s@rapidsai/devcontainers/features/ucx:[0-9.]*@rapidsai/devcontainers/features/ucx:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+ sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+done
diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh
index 676d642de9..fd9668e968 100755
--- a/ci/test_wheel_raft_dask.sh
+++ b/ci/test_wheel_raft_dask.sh
@@ -12,7 +12,7 @@ RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels
python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl
# Always install latest dask for testing
-python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10
+python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/raft_dask*.whl)[test]
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 7e921decd5..739e1e9785 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -9,8 +9,8 @@ channels:
dependencies:
- breathe
- c-compiler
-- clang-tools=16.0.1
-- clang=16.0.1
+- clang-tools=16.0.6
+- clang==16.0.6
- cmake>=3.26.4
- cuda-profiler-api=11.8.86
- cuda-python>=11.7.1,<12.0a0
@@ -19,10 +19,10 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.9.2
- dask-cuda==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.9.2
+- distributed==2023.9.2
- doxygen>=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
@@ -43,6 +43,8 @@ dependencies:
- numba>=0.57
- numpy>=1.21
- numpydoc
+- nvcc_linux-64=11.8
+- pre-commit
- pydata-sphinx-theme
- pytest
- pytest-cov
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 2ea685b529..321c17bf4f 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -9,20 +9,21 @@ channels:
dependencies:
- breathe
- c-compiler
-- clang-tools=16.0.1
-- clang=16.0.1
+- clang-tools=16.0.6
+- clang==16.0.6
- cmake>=3.26.4
- cuda-cudart-dev
+- cuda-nvcc
- cuda-profiler-api
- cuda-python>=12.0,<13.0a0
- cuda-version=12.0
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.9.2
- dask-cuda==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.9.2
+- distributed==2023.9.2
- doxygen>=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
@@ -39,6 +40,7 @@ dependencies:
- numba>=0.57
- numpy>=1.21
- numpydoc
+- pre-commit
- pydata-sphinx-theme
- pytest
- pytest-cov
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 742040ad50..4f1df12dfa 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -9,8 +9,8 @@ channels:
dependencies:
- benchmark>=1.8.2
- c-compiler
-- clang-tools=16.0.1
-- clang=16.0.1
+- clang-tools=16.0.6
+- clang==16.0.6
- cmake>=3.26.4
- cuda-profiler-api=11.8.86
- cuda-version=11.8
@@ -34,6 +34,7 @@ dependencies:
- nccl>=2.9.9
- ninja
- nlohmann_json>=3.11.2
+- nvcc_linux-64=11.8
- scikit-build>=0.13.1
- sysroot_linux-64==2.17
name: bench_ann_cuda-118_arch-x86_64
diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml
index 355ea640ff..06737b0497 100644
--- a/conda/recipes/raft-ann-bench-cpu/meta.yaml
+++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml
@@ -50,6 +50,7 @@ requirements:
- nlohmann_json {{ nlohmann_json_version }}
- python
- pyyaml
+ - pandas
run:
- glog {{ glog_version }}
@@ -57,6 +58,8 @@ requirements:
- matplotlib
- python
- pyyaml
+ - pandas
+ - benchmark
about:
home: https://rapids.ai/
diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml
index 882ff6cc49..a2ab0af643 100644
--- a/conda/recipes/raft-ann-bench/meta.yaml
+++ b/conda/recipes/raft-ann-bench/meta.yaml
@@ -75,6 +75,14 @@ requirements:
- faiss-proc=*=cuda
- libfaiss {{ faiss_version }}
{% endif %}
+ - h5py {{ h5py_version }}
+ - benchmark
+ - matplotlib
+ - python
+ - pandas
+ - pyyaml
+ # rmm is needed to determine if package is gpu-enabled
+ - rmm ={{ minor_version }}
run:
- python
@@ -90,7 +98,14 @@ requirements:
- libfaiss {{ faiss_version }}
{% endif %}
- h5py {{ h5py_version }}
-
+ - benchmark
+ - glog {{ glog_version }}
+ - matplotlib
+ - python
+ - pandas
+ - pyyaml
+ # rmm is needed to determine if package is gpu-enabled
+ - rmm ={{ minor_version }}
about:
home: https://rapids.ai/
license: Apache-2.0
diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml
index c9caa4dd9b..04dfef5063 100644
--- a/conda/recipes/raft-dask/meta.yaml
+++ b/conda/recipes/raft-dask/meta.yaml
@@ -60,10 +60,10 @@ requirements:
- cudatoolkit
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- - dask >=2023.7.1
- - dask-core >=2023.7.1
+ - dask ==2023.9.2
+ - dask-core ==2023.9.2
- dask-cuda ={{ minor_version }}
- - distributed >=2023.7.1
+ - distributed ==2023.9.2
- joblib >=0.11
- nccl >=2.9.9
- pylibraft {{ version }}
diff --git a/cpp/.clangd b/cpp/.clangd
new file mode 100644
index 0000000000..7c4fe036dd
--- /dev/null
+++ b/cpp/.clangd
@@ -0,0 +1,65 @@
+# https://clangd.llvm.org/config
+
+# Apply a config conditionally to all C files
+If:
+ PathMatch: .*\.(c|h)$
+
+---
+
+# Apply a config conditionally to all C++ files
+If:
+ PathMatch: .*\.(c|h)pp
+
+---
+
+# Apply a config conditionally to all CUDA files
+If:
+ PathMatch: .*\.cuh?
+CompileFlags:
+ Add:
+ - "-x"
+ - "cuda"
+ # No error on unknown CUDA versions
+ - "-Wno-unknown-cuda-version"
+ # Allow variadic CUDA functions
+ - "-Xclang=-fcuda-allow-variadic-functions"
+Diagnostics:
+ Suppress:
+ - "variadic_device_fn"
+ - "attributes_not_allowed"
+
+---
+
+# Tweak the clangd parse settings for all files
+CompileFlags:
+ Add:
+ # report all errors
+ - "-ferror-limit=0"
+ - "-fmacro-backtrace-limit=0"
+ - "-ftemplate-backtrace-limit=0"
+ # Skip the CUDA version check
+ - "--no-cuda-version-check"
+ Remove:
+ # remove gcc's -fcoroutines
+ - -fcoroutines
+ # remove nvc++ flags unknown to clang
+ - "-gpu=*"
+ - "-stdpar*"
+ # remove nvcc flags unknown to clang
+ - "-arch*"
+ - "-gencode*"
+ - "--generate-code*"
+ - "-ccbin*"
+ - "-t=*"
+ - "--threads*"
+ - "-Xptxas*"
+ - "-Xcudafe*"
+ - "-Xfatbin*"
+ - "-Xcompiler*"
+ - "--diag-suppress*"
+ - "--diag_suppress*"
+ - "--compiler-options*"
+ - "--expt-extended-lambda"
+ - "--expt-relaxed-constexpr"
+ - "-forward-unknown-to-host-compiler"
+ - "-Werror=cross-execution-space-call"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d93b19f784..7d63751906 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -22,7 +22,8 @@ include(rapids-find)
option(BUILD_CPU_ONLY "Build CPU only components. Applies to RAFT ANN benchmarks currently" OFF)
-# workaround for rapids_cuda_init_architectures not working for arch detection with enable_language(CUDA)
+# workaround for rapids_cuda_init_architectures not working for arch detection with
+# enable_language(CUDA)
set(lang_list "CXX")
if(NOT BUILD_CPU_ONLY)
@@ -286,7 +287,8 @@ endif()
set_target_properties(raft_compiled PROPERTIES EXPORT_NAME compiled)
if(RAFT_COMPILE_LIBRARY)
- add_library(raft_objs OBJECT
+ add_library(
+ raft_objs OBJECT
src/core/logger.cpp
src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
@@ -331,6 +333,7 @@ if(RAFT_COMPILE_LIBRARY)
src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
src/neighbors/brute_force_knn_int_float_int.cu
src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
+ src/neighbors/brute_force_knn_index_float.cu
src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu
src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu
src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu
@@ -452,18 +455,21 @@ if(RAFT_COMPILE_LIBRARY)
src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu
src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu
src/util/memory_pool.cpp
- )
+ )
set_target_properties(
raft_objs
PROPERTIES CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON)
+ POSITION_INDEPENDENT_CODE ON
+ )
target_compile_definitions(raft_objs PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY")
- target_compile_options(raft_objs PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
- "$<$:${RAFT_CUDA_FLAGS}>")
+ target_compile_options(
+ raft_objs PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
+ "$<$:${RAFT_CUDA_FLAGS}>"
+ )
add_library(raft_lib SHARED $)
add_library(raft_lib_static STATIC $)
@@ -477,13 +483,15 @@ if(RAFT_COMPILE_LIBRARY)
)
foreach(target raft_lib raft_lib_static raft_objs)
- target_link_libraries(${target} PUBLIC
- raft::raft
- ${RAFT_CTK_MATH_DEPENDENCIES} # TODO: Once `raft::resources` is used everywhere, this
- # will just be cublas
- $)
+ target_link_libraries(
+ ${target}
+ PUBLIC raft::raft
+ ${RAFT_CTK_MATH_DEPENDENCIES} # TODO: Once `raft::resources` is used everywhere, this
+ # will just be cublas
+ $
+ )
- #So consumers know when using libraft.so/libraft.a
+ # So consumers know when using libraft.so/libraft.a
target_compile_definitions(${target} PUBLIC "RAFT_COMPILED")
# ensure CUDA symbols aren't relocated to the middle of the debug build binaries
target_link_options(${target} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 4e91ee0690..4ec977700d 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -211,9 +211,10 @@ void bench_search(::benchmark::State& state,
try {
algo->set_search_dataset(dataset->base_set(algo_property.dataset_memory_type),
dataset->base_set_size());
- } catch (const std::exception&) {
+ } catch (const std::exception& ex) {
state.SkipWithError("The algorithm '" + index.name +
- "' requires the base set, but it's not available.");
+ "' requires the base set, but it's not available. " +
+ "Exception: " + std::string(ex.what()));
return;
}
}
diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
index 231154ccfd..56885cce5c 100644
--- a/cpp/bench/ann/src/faiss/faiss_benchmark.cu
+++ b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
@@ -30,19 +30,27 @@
namespace raft::bench::ann {
+template
+void parse_base_build_param(const nlohmann::json& conf,
+ typename raft::bench::ann::FaissGpu::BuildParam& param)
+{
+ param.nlist = conf.at("nlist");
+ if (conf.contains("ratio")) { param.ratio = conf.at("ratio"); }
+}
+
template
void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpuIVFFlat::BuildParam& param)
{
- param.nlist = conf.at("nlist");
+ parse_base_build_param(conf, param);
}
template
void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpuIVFPQ::BuildParam& param)
{
- param.nlist = conf.at("nlist");
- param.M = conf.at("M");
+ parse_base_build_param(conf, param);
+ param.M = conf.at("M");
if (conf.contains("usePrecomputed")) {
param.usePrecomputed = conf.at("usePrecomputed");
} else {
@@ -59,7 +67,7 @@ template
void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpuIVFSQ::BuildParam& param)
{
- param.nlist = conf.at("nlist");
+ parse_base_build_param(conf, param);
param.quantizer_type = conf.at("quantizer_type");
}
diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h
index ec80e6cbfd..672c685b1f 100644
--- a/cpp/bench/ann/src/faiss/faiss_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h
@@ -18,6 +18,7 @@
#include "../common/ann_types.hpp"
+#include
#include
#include
@@ -85,7 +86,23 @@ class FaissGpu : public ANN {
float refine_ratio = 1.0;
};
- FaissGpu(Metric metric, int dim, int nlist);
+ struct BuildParam {
+ int nlist = 1;
+ int ratio = 2;
+ };
+
+ FaissGpu(Metric metric, int dim, const BuildParam& param)
+ : ANN(metric, dim),
+ metric_type_(parse_metric_type(metric)),
+ nlist_{param.nlist},
+ training_sample_fraction_{1.0 / double(param.ratio)}
+ {
+ static_assert(std::is_same_v, "faiss support only float type");
+ RAFT_CUDA_TRY(cudaGetDevice(&device_));
+ RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
+ faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
+ }
+
virtual ~FaissGpu() noexcept { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_)); }
void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final;
@@ -131,23 +148,35 @@ class FaissGpu : public ANN {
int device_;
cudaEvent_t sync_{nullptr};
cudaStream_t faiss_default_stream_{nullptr};
+ double training_sample_fraction_;
};
-template
-FaissGpu::FaissGpu(Metric metric, int dim, int nlist)
- : ANN(metric, dim), metric_type_(parse_metric_type(metric)), nlist_(nlist)
-{
- static_assert(std::is_same_v, "faiss support only float type");
- RAFT_CUDA_TRY(cudaGetDevice(&device_));
- RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
- faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
-}
-
template
void FaissGpu::build(const T* dataset, size_t nrow, cudaStream_t stream)
{
OmpSingleThreadScope omp_single_thread;
-
+ auto index_ivf = dynamic_cast(index_.get());
+ if (index_ivf != nullptr) {
+ // set the min/max training size for clustering to use the whole provided training set.
+ double trainset_size = training_sample_fraction_ * static_cast(nrow);
+ double points_per_centroid = trainset_size / static_cast(nlist_);
+ int max_ppc = std::ceil(points_per_centroid);
+ int min_ppc = std::floor(points_per_centroid);
+ if (min_ppc < index_ivf->cp.min_points_per_centroid) {
+ RAFT_LOG_WARN(
+ "The suggested training set size %zu (data size %zu, training sample ratio %f) yields %d "
+ "points per cluster (n_lists = %d). This is smaller than the FAISS default "
+ "min_points_per_centroid = %d.",
+ static_cast(trainset_size),
+ nrow,
+ training_sample_fraction_,
+ min_ppc,
+ nlist_,
+ index_ivf->cp.min_points_per_centroid);
+ }
+ index_ivf->cp.max_points_per_centroid = max_ppc;
+ index_ivf->cp.min_points_per_centroid = min_ppc;
+ }
index_->train(nrow, dataset); // faiss::gpu::GpuIndexFlat::train() will do nothing
assert(index_->is_trained);
index_->add(nrow, dataset);
@@ -208,12 +237,9 @@ void FaissGpu::load_(const std::string& file)
template
class FaissGpuIVFFlat : public FaissGpu {
public:
- struct BuildParam {
- int nlist;
- };
+ using typename FaissGpu::BuildParam;
- FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param)
- : FaissGpu(metric, dim, param.nlist)
+ FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissGpu(metric, dim, param)
{
faiss::gpu::GpuIndexIVFFlatConfig config;
config.device = this->device_;
@@ -234,15 +260,13 @@ class FaissGpuIVFFlat : public FaissGpu {
template
class FaissGpuIVFPQ : public FaissGpu {
public:
- struct BuildParam {
- int nlist;
+ struct BuildParam : public FaissGpu::BuildParam {
int M;
bool useFloat16;
bool usePrecomputed;
};
- FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param)
- : FaissGpu(metric, dim, param.nlist)
+ FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissGpu(metric, dim, param)
{
faiss::gpu::GpuIndexIVFPQConfig config;
config.useFloat16LookupTables = param.useFloat16;
@@ -271,13 +295,11 @@ class FaissGpuIVFPQ : public FaissGpu {
template
class FaissGpuIVFSQ : public FaissGpu {
public:
- struct BuildParam {
- int nlist;
+ struct BuildParam : public FaissGpu::BuildParam {
std::string quantizer_type;
};
- FaissGpuIVFSQ(Metric metric, int dim, const BuildParam& param)
- : FaissGpu(metric, dim, param.nlist)
+ FaissGpuIVFSQ(Metric metric, int dim, const BuildParam& param) : FaissGpu(metric, dim, param)
{
faiss::ScalarQuantizer::QuantizerType qtype;
if (param.quantizer_type == "fp16") {
@@ -310,7 +332,8 @@ class FaissGpuIVFSQ : public FaissGpu {
template
class FaissGpuFlat : public FaissGpu {
public:
- FaissGpuFlat(Metric metric, int dim) : FaissGpu(metric, dim, 0)
+ FaissGpuFlat(Metric metric, int dim)
+ : FaissGpu(metric, dim, typename FaissGpu::BuildParam{})
{
faiss::gpu::GpuIndexFlatConfig config;
config.device = this->device_;
diff --git a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
index 99481c2921..3b2e97062f 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
+++ b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
@@ -33,8 +33,7 @@ template
void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::Ggnn::BuildParam& param)
{
- param.dataset_size = conf.at("dataset_size");
- param.k = conf.at("k");
+ param.k = conf.at("k");
if (conf.contains("k_build")) { param.k_build = conf.at("k_build"); }
if (conf.contains("segment_size")) { param.segment_size = conf.at("segment_size"); }
diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
index 74c7cddc3c..664ec511dd 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
+++ b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
@@ -38,8 +38,6 @@ class Ggnn : public ANN {
int num_layers{4}; // L
float tau{0.5};
int refine_iterations{2};
-
- size_t dataset_size;
int k; // GGNN requires to know k during building
};
@@ -182,12 +180,6 @@ GgnnImpl::GgnnImpl(Metric metric,
}
if (dim != D) { throw std::runtime_error("mis-matched dim"); }
-
- int device;
- RAFT_CUDA_TRY(cudaGetDevice(&device));
-
- ggnn_ = std::make_unique(
- device, build_param_.dataset_size, build_param_.num_layers, true, build_param_.tau);
}
template
@@ -195,11 +187,10 @@ void GgnnImpl::build(const T* dataset,
size_t nrow,
cudaStream_t stream)
{
- if (nrow != build_param_.dataset_size) {
- throw std::runtime_error(
- "build_param_.dataset_size = " + std::to_string(build_param_.dataset_size) +
- " , but nrow = " + std::to_string(nrow));
- }
+ int device;
+ RAFT_CUDA_TRY(cudaGetDevice(&device));
+ ggnn_ = std::make_unique(
+ device, nrow, build_param_.num_layers, true, build_param_.tau);
ggnn_->set_base_data(dataset);
ggnn_->set_stream(stream);
@@ -212,11 +203,6 @@ void GgnnImpl::build(const T* dataset,
template
void GgnnImpl::set_search_dataset(const T* dataset, size_t nrow)
{
- if (nrow != build_param_.dataset_size) {
- throw std::runtime_error(
- "build_param_.dataset_size = " + std::to_string(build_param_.dataset_size) +
- " , but nrow = " + std::to_string(nrow));
- }
ggnn_->set_base_data(dataset);
}
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
index 5cd33ef94d..4d7b993aa1 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
@@ -31,6 +31,8 @@
#include
#include
+#include
+
#include "../common/ann_types.hpp"
#include
@@ -164,13 +166,13 @@ class HnswLib : public ANN {
struct BuildParam {
int M;
int ef_construction;
- int num_threads{1};
+ int num_threads = omp_get_num_procs();
};
using typename ANN::AnnSearchParam;
struct SearchParam : public AnnSearchParam {
int ef;
- int num_threads{1};
+ int num_threads = omp_get_num_procs();
};
HnswLib(Metric metric, int dim, const BuildParam& param);
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
index aa25d1532f..a9ff6c2922 100644
--- a/cpp/bench/ann/src/raft/raft_benchmark.cu
+++ b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -58,10 +58,7 @@ void parse_build_param(const nlohmann::json& conf,
{
param.n_lists = conf.at("nlist");
if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
- if (conf.contains("ratio")) {
- param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio");
- std::cout << "kmeans_trainset_fraction " << param.kmeans_trainset_fraction;
- }
+ if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
}
template
@@ -82,6 +79,17 @@ void parse_build_param(const nlohmann::json& conf,
if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); }
if (conf.contains("pq_dim")) { param.pq_dim = conf.at("pq_dim"); }
+ if (conf.contains("codebook_kind")) {
+ std::string kind = conf.at("codebook_kind");
+ if (kind == "cluster") {
+ param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER;
+ } else if (kind == "subspace") {
+ param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE;
+ } else {
+ throw std::runtime_error("codebook_kind: '" + kind +
+ "', should be either 'cluster' or 'subspace'");
+ }
+ }
}
template
@@ -139,6 +147,13 @@ void parse_build_param(const nlohmann::json& conf,
if (conf.contains("intermediate_graph_degree")) {
param.intermediate_graph_degree = conf.at("intermediate_graph_degree");
}
+ if (conf.contains("graph_build_algo")) {
+ if (conf.at("graph_build_algo") == "IVF_PQ") {
+ param.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ;
+ } else if (conf.at("graph_build_algo") == "NN_DESCENT") {
+ param.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT;
+ }
+ }
}
template
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
index 1554c1f016..8f1e43a706 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
@@ -63,9 +63,14 @@ class RaftIvfPQ : public ANN {
rmm::mr::set_current_device_resource(&mr_);
index_params_.metric = parse_metric_type(metric);
RAFT_CUDA_TRY(cudaGetDevice(&device_));
+ RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
}
- ~RaftIvfPQ() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
+ ~RaftIvfPQ() noexcept
+ {
+ RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_));
+ rmm::mr::set_current_device_resource(mr_.get_upstream());
+ }
void build(const T* dataset, size_t nrow, cudaStream_t stream) final;
@@ -96,6 +101,7 @@ class RaftIvfPQ : public ANN {
// `mr_` must go first to make sure it dies last
rmm::mr::pool_memory_resource mr_;
raft::device_resources handle_;
+ cudaEvent_t sync_{nullptr};
BuildParam index_params_;
raft::neighbors::ivf_pq::search_params search_params_;
std::optional> index_;
@@ -103,6 +109,12 @@ class RaftIvfPQ : public ANN {
int dimension_;
float refine_ratio_ = 1.0;
raft::device_matrix_view dataset_;
+
+ void stream_wait(cudaStream_t stream) const
+ {
+ RAFT_CUDA_TRY(cudaEventRecord(sync_, resource::get_cuda_stream(handle_)));
+ RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, sync_));
+ }
};
template
@@ -121,12 +133,12 @@ void RaftIvfPQ::load(const std::string& file)
}
template
-void RaftIvfPQ::build(const T* dataset, size_t nrow, cudaStream_t)
+void RaftIvfPQ::build(const T* dataset, size_t nrow, cudaStream_t stream)
{
auto dataset_v = raft::make_device_matrix_view(dataset, IdxT(nrow), dim_);
index_.emplace(raft::runtime::neighbors::ivf_pq::build(handle_, index_params_, dataset_v));
- return;
+ stream_wait(stream);
}
template
@@ -176,16 +188,14 @@ void RaftIvfPQ::search(const T* queries,
neighbors_v,
distances_v,
index_->metric());
+ stream_wait(stream); // RAFT stream -> bench stream
} else {
auto queries_host = raft::make_host_matrix(batch_size, index_->dim());
auto candidates_host = raft::make_host_matrix(batch_size, k0);
auto neighbors_host = raft::make_host_matrix(batch_size, k);
auto distances_host = raft::make_host_matrix(batch_size, k);
- raft::copy(queries_host.data_handle(),
- queries,
- queries_host.size(),
- resource::get_cuda_stream(handle_));
+ raft::copy(queries_host.data_handle(), queries, queries_host.size(), stream);
raft::copy(candidates_host.data_handle(),
candidates.data_handle(),
candidates_host.size(),
@@ -194,6 +204,10 @@ void RaftIvfPQ::search(const T* queries,
auto dataset_v = raft::make_host_matrix_view(
dataset_.data_handle(), dataset_.extent(0), dataset_.extent(1));
+ // wait for the queries to copy to host in 'stream` and for IVF-PQ::search to finish
+ RAFT_CUDA_TRY(cudaEventRecord(sync_, resource::get_cuda_stream(handle_)));
+ RAFT_CUDA_TRY(cudaEventRecord(sync_, stream));
+ RAFT_CUDA_TRY(cudaEventSynchronize(sync_));
raft::runtime::neighbors::refine(handle_,
dataset_v,
queries_host.view(),
@@ -202,14 +216,8 @@ void RaftIvfPQ::search(const T* queries,
distances_host.view(),
index_->metric());
- raft::copy(neighbors,
- (size_t*)neighbors_host.data_handle(),
- neighbors_host.size(),
- resource::get_cuda_stream(handle_));
- raft::copy(distances,
- distances_host.data_handle(),
- distances_host.size(),
- resource::get_cuda_stream(handle_));
+ raft::copy(neighbors, (size_t*)neighbors_host.data_handle(), neighbors_host.size(), stream);
+ raft::copy(distances, distances_host.data_handle(), distances_host.size(), stream);
}
} else {
auto queries_v =
@@ -219,8 +227,7 @@ void RaftIvfPQ::search(const T* queries,
raft::runtime::neighbors::ivf_pq::search(
handle_, search_params_, *index_, queries_v, neighbors_v, distances_v);
+ stream_wait(stream); // RAFT stream -> bench stream
}
- resource::sync_stream(handle_);
- return;
}
} // namespace raft::bench::ann
diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt
index e8d4739384..ca4b0f099d 100644
--- a/cpp/bench/prims/CMakeLists.txt
+++ b/cpp/bench/prims/CMakeLists.txt
@@ -77,6 +77,7 @@ if(BUILD_PRIMS_BENCH)
NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu
bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY
)
+ ConfigureBench(NAME CORE_BENCH PATH bench/prims/core/bitset.cu bench/prims/main.cpp)
ConfigureBench(
NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu
@@ -155,4 +156,5 @@ if(BUILD_PRIMS_BENCH)
LIB
EXPLICIT_INSTANTIATE_ONLY
)
+
endif()
diff --git a/cpp/bench/prims/core/bitset.cu b/cpp/bench/prims/core/bitset.cu
new file mode 100644
index 0000000000..5f44aa9af5
--- /dev/null
+++ b/cpp/bench/prims/core/bitset.cu
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+
+namespace raft::bench::core {
+
+struct bitset_inputs {
+ uint32_t bitset_len;
+ uint32_t mask_len;
+ uint32_t query_len;
+}; // struct bitset_inputs
+
+template
+struct bitset_bench : public fixture {
+ bitset_bench(const bitset_inputs& p)
+ : params(p),
+ mask{raft::make_device_vector(res, p.mask_len)},
+ queries{raft::make_device_vector(res, p.query_len)},
+ outputs{raft::make_device_vector(res, p.query_len)}
+ {
+ raft::random::RngState state{42};
+ raft::random::uniformInt(res, state, mask.view(), index_t{0}, index_t{p.bitset_len});
+ }
+
+ void run_benchmark(::benchmark::State& state) override
+ {
+ loop_on_state(state, [this]() {
+ auto my_bitset = raft::core::bitset(
+ this->res, raft::make_const_mdspan(mask.view()), params.bitset_len);
+ my_bitset.test(res, raft::make_const_mdspan(queries.view()), outputs.view());
+ });
+ }
+
+ private:
+ raft::resources res;
+ bitset_inputs params;
+ raft::device_vector mask, queries;
+ raft::device_vector outputs;
+}; // struct bitset
+
+const std::vector bitset_input_vecs{
+ {256 * 1024 * 1024, 64 * 1024 * 1024, 256 * 1024 * 1024}, // Standard Bench
+ {256 * 1024 * 1024, 64 * 1024 * 1024, 1024 * 1024 * 1024}, // Extra queries
+ {128 * 1024 * 1024, 1024 * 1024 * 1024, 256 * 1024 * 1024}, // Extra mask to test atomics impact
+};
+
+using Uint8_32 = bitset_bench;
+using Uint16_64 = bitset_bench;
+using Uint32_32 = bitset_bench;
+using Uint32_64 = bitset_bench;
+
+RAFT_BENCH_REGISTER(Uint8_32, "", bitset_input_vecs);
+RAFT_BENCH_REGISTER(Uint16_64, "", bitset_input_vecs);
+RAFT_BENCH_REGISTER(Uint32_32, "", bitset_input_vecs);
+RAFT_BENCH_REGISTER(Uint32_64, "", bitset_input_vecs);
+
+} // namespace raft::bench::core
diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu
index 1bff66cac4..d3994e59c5 100644
--- a/cpp/bench/prims/matrix/select_k.cu
+++ b/cpp/bench/prims/matrix/select_k.cu
@@ -19,6 +19,7 @@
#include
#include
+#include
#include
#include
#include
@@ -38,6 +39,19 @@
namespace raft::matrix {
using namespace raft::bench; // NOLINT
+template
+struct replace_with_mask {
+ KeyT replacement;
+ int64_t line_length;
+ int64_t spared_inputs;
+ constexpr auto inline operator()(int64_t offset, KeyT x, uint8_t mask) -> KeyT
+ {
+ auto i = offset % line_length;
+ // don't replace all the inputs, spare a few elements at the beginning of the input
+ return (mask && i >= spared_inputs) ? replacement : x;
+ }
+};
+
template
struct selection : public fixture {
explicit selection(const select::params& p)
@@ -67,6 +81,21 @@ struct selection : public fixture {
}
}
raft::random::uniform(handle, state, in_dists_.data(), in_dists_.size(), min_value, max_value);
+ if (p.frac_infinities > 0.0) {
+ rmm::device_uvector mask_buf(p.batch_size * p.len, stream);
+ auto mask = make_device_vector_view(mask_buf.data(), mask_buf.size());
+ raft::random::bernoulli(handle, state, mask, p.frac_infinities);
+ KeyT bound = p.select_min ? raft::upper_bound() : raft::lower_bound();
+ auto mask_in =
+ make_device_vector_view(mask_buf.data(), mask_buf.size());
+ auto dists_in = make_device_vector_view(in_dists_.data(), in_dists_.size());
+ auto dists_out = make_device_vector_view(in_dists_.data(), in_dists_.size());
+ raft::linalg::map_offset(handle,
+ dists_out,
+ replace_with_mask{bound, int64_t(p.len), int64_t(p.k / 2)},
+ dists_in,
+ mask_in);
+ }
}
void run_benchmark(::benchmark::State& state) override // NOLINT
@@ -75,8 +104,12 @@ struct selection : public fixture {
std::ostringstream label_stream;
label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k;
if (params_.use_same_leading_bits) { label_stream << "#same-leading-bits"; }
+ if (params_.frac_infinities > 0) { label_stream << "#infs-" << params_.frac_infinities; }
state.SetLabel(label_stream.str());
- loop_on_state(state, [this]() {
+ common::nvtx::range case_scope("%s - %s", state.name().c_str(), label_stream.str().c_str());
+ int iter = 0;
+ loop_on_state(state, [&iter, this]() {
+ common::nvtx::range lap_scope("lap-", iter++);
select::select_k_impl(handle,
Algo,
in_dists_.data(),
@@ -149,6 +182,35 @@ const std::vector kInputs{
{10, 1000000, 64, true, false, true},
{10, 1000000, 128, true, false, true},
{10, 1000000, 256, true, false, true},
+
+ {10, 1000000, 1, true, false, false, true, 0.1},
+ {10, 1000000, 16, true, false, false, true, 0.1},
+ {10, 1000000, 64, true, false, false, true, 0.1},
+ {10, 1000000, 128, true, false, false, true, 0.1},
+ {10, 1000000, 256, true, false, false, true, 0.1},
+
+ {10, 1000000, 1, true, false, false, true, 0.9},
+ {10, 1000000, 16, true, false, false, true, 0.9},
+ {10, 1000000, 64, true, false, false, true, 0.9},
+ {10, 1000000, 128, true, false, false, true, 0.9},
+ {10, 1000000, 256, true, false, false, true, 0.9},
+ {1000, 10000, 1, true, false, false, true, 0.9},
+ {1000, 10000, 16, true, false, false, true, 0.9},
+ {1000, 10000, 64, true, false, false, true, 0.9},
+ {1000, 10000, 128, true, false, false, true, 0.9},
+ {1000, 10000, 256, true, false, false, true, 0.9},
+
+ {10, 1000000, 1, true, false, false, true, 1.0},
+ {10, 1000000, 16, true, false, false, true, 1.0},
+ {10, 1000000, 64, true, false, false, true, 1.0},
+ {10, 1000000, 128, true, false, false, true, 1.0},
+ {10, 1000000, 256, true, false, false, true, 1.0},
+ {1000, 10000, 1, true, false, false, true, 1.0},
+ {1000, 10000, 16, true, false, false, true, 1.0},
+ {1000, 10000, 64, true, false, false, true, 1.0},
+ {1000, 10000, 128, true, false, false, true, 1.0},
+ {1000, 10000, 256, true, false, false, true, 1.0},
+ {1000, 10000, 256, true, false, false, true, 0.999},
};
#define SELECTION_REGISTER(KeyT, IdxT, A) \
@@ -157,28 +219,28 @@ const std::vector kInputs{
RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \
}
-SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT
-SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT
-SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT
-SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT
SELECTION_REGISTER(double, uint32_t, kRadix8bits); // NOLINT
SELECTION_REGISTER(double, uint32_t, kRadix11bits); // NOLINT
SELECTION_REGISTER(double, uint32_t, kRadix11bitsExtraPass); // NOLINT
SELECTION_REGISTER(double, uint32_t, kWarpAuto); // NOLINT
-SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT
-SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT
-SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT
-SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT
-SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT
-SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT
-SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT
+SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT
+SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT
+SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT
+SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT
+SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT
+SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT
+SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT
// For learning a heuristic of which selection algorithm to use, we
// have a couple of additional constraints when generating the dataset:
diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
index 866a0ebdfa..ade3a6e348 100644
--- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
@@ -438,7 +438,7 @@ __global__ void __launch_bounds__((WarpSize * BlockDimY))
adjust_centers_kernel(MathT* centers, // [n_clusters, dim]
IdxT n_clusters,
IdxT dim,
- const T* dataset, // [n_rows, dim]
+ const T* dataset, // [n_rows, dim]
IdxT n_rows,
const LabelT* labels, // [n_rows]
const CounterT* cluster_sizes, // [n_clusters]
diff --git a/cpp/include/raft/core/bitset.cuh b/cpp/include/raft/core/bitset.cuh
new file mode 100644
index 0000000000..6747c5fab0
--- /dev/null
+++ b/cpp/include/raft/core/bitset.cuh
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace raft::core {
+/**
+ * @defgroup bitset Bitset
+ * @{
+ */
+/**
+ * @brief View of a RAFT Bitset.
+ *
+ * This lightweight structure stores a pointer to a bitset in device memory with it's length.
+ * It provides a test() device function to check if a given index is set in the bitset.
+ *
+ * @tparam bitset_t Underlying type of the bitset array. Default is uint32_t.
+ * @tparam index_t Indexing type used. Default is uint32_t.
+ */
+template
+struct bitset_view {
+ index_t static constexpr const bitset_element_size = sizeof(bitset_t) * 8;
+
+ _RAFT_HOST_DEVICE bitset_view(bitset_t* bitset_ptr, index_t bitset_len)
+ : bitset_ptr_{bitset_ptr}, bitset_len_{bitset_len}
+ {
+ }
+ /**
+ * @brief Create a bitset view from a device vector view of the bitset.
+ *
+ * @param bitset_span Device vector view of the bitset
+ * @param bitset_len Number of bits in the bitset
+ */
+ _RAFT_HOST_DEVICE bitset_view(raft::device_vector_view bitset_span,
+ index_t bitset_len)
+ : bitset_ptr_{bitset_span.data_handle()}, bitset_len_{bitset_len}
+ {
+ }
+ /**
+ * @brief Device function to test if a given index is set in the bitset.
+ *
+ * @param sample_index Single index to test
+ * @return bool True if index has not been unset in the bitset
+ */
+ inline _RAFT_DEVICE auto test(const index_t sample_index) const -> bool
+ {
+ const bitset_t bit_element = bitset_ptr_[sample_index / bitset_element_size];
+ const index_t bit_index = sample_index % bitset_element_size;
+ const bool is_bit_set = (bit_element & (bitset_t{1} << bit_index)) != 0;
+ return is_bit_set;
+ }
+
+ /**
+ * @brief Get the device pointer to the bitset.
+ */
+ inline _RAFT_HOST_DEVICE auto data_handle() -> bitset_t* { return bitset_ptr_; }
+ inline _RAFT_HOST_DEVICE auto data_handle() const -> const bitset_t* { return bitset_ptr_; }
+ /**
+ * @brief Get the number of bits of the bitset representation.
+ */
+ inline _RAFT_HOST_DEVICE auto size() const -> index_t { return bitset_len_; }
+
+ /**
+ * @brief Get the number of elements used by the bitset representation.
+ */
+ inline _RAFT_HOST_DEVICE auto n_elements() const -> index_t
+ {
+ return raft::ceildiv(bitset_len_, bitset_element_size);
+ }
+
+ inline auto to_mdspan() -> raft::device_vector_view
+ {
+ return raft::make_device_vector_view(bitset_ptr_, n_elements());
+ }
+ inline auto to_mdspan() const -> raft::device_vector_view
+ {
+ return raft::make_device_vector_view(bitset_ptr_, n_elements());
+ }
+
+ private:
+ bitset_t* bitset_ptr_;
+ index_t bitset_len_;
+};
+
+/**
+ * @brief RAFT Bitset.
+ *
+ * This structure encapsulates a bitset in device memory. It provides a view() method to get a
+ * device-usable lightweight view of the bitset.
+ * Each index is represented by a single bit in the bitset. The total number of bytes used is
+ * ceil(bitset_len / 8).
+ * @tparam bitset_t Underlying type of the bitset array. Default is uint32_t.
+ * @tparam index_t Indexing type used. Default is uint32_t.
+ */
+template
+struct bitset {
+ index_t static constexpr const bitset_element_size = sizeof(bitset_t) * 8;
+
+ /**
+ * @brief Construct a new bitset object with a list of indices to unset.
+ *
+ * @param res RAFT resources
+ * @param mask_index List of indices to unset in the bitset
+ * @param bitset_len Length of the bitset
+ * @param default_value Default value to set the bits to. Default is true.
+ */
+ bitset(const raft::resources& res,
+ raft::device_vector_view mask_index,
+ index_t bitset_len,
+ bool default_value = true)
+ : bitset_{std::size_t(raft::ceildiv(bitset_len, bitset_element_size)),
+ raft::resource::get_cuda_stream(res)},
+ bitset_len_{bitset_len},
+ default_value_{default_value}
+ {
+ cudaMemsetAsync(bitset_.data(),
+ default_value ? 0xff : 0x00,
+ n_elements() * sizeof(bitset_t),
+ resource::get_cuda_stream(res));
+ set(res, mask_index, !default_value);
+ }
+
+ /**
+ * @brief Construct a new bitset object
+ *
+ * @param res RAFT resources
+ * @param bitset_len Length of the bitset
+ * @param default_value Default value to set the bits to. Default is true.
+ */
+ bitset(const raft::resources& res, index_t bitset_len, bool default_value = true)
+ : bitset_{std::size_t(raft::ceildiv(bitset_len, bitset_element_size)),
+ resource::get_cuda_stream(res)},
+ bitset_len_{bitset_len},
+ default_value_{default_value}
+ {
+ cudaMemsetAsync(bitset_.data(),
+ default_value ? 0xff : 0x00,
+ n_elements() * sizeof(bitset_t),
+ resource::get_cuda_stream(res));
+ }
+ // Disable copy constructor
+ bitset(const bitset&) = delete;
+ bitset(bitset&&) = default;
+ bitset& operator=(const bitset&) = delete;
+ bitset& operator=(bitset&&) = default;
+
+ /**
+ * @brief Create a device-usable view of the bitset.
+ *
+ * @return bitset_view
+ */
+ inline auto view() -> raft::core::bitset_view
+ {
+ return bitset_view(to_mdspan(), bitset_len_);
+ }
+ [[nodiscard]] inline auto view() const -> raft::core::bitset_view
+ {
+ return bitset_view(to_mdspan(), bitset_len_);
+ }
+
+ /**
+ * @brief Get the device pointer to the bitset.
+ */
+ inline auto data_handle() -> bitset_t* { return bitset_.data(); }
+ inline auto data_handle() const -> const bitset_t* { return bitset_.data(); }
+ /**
+ * @brief Get the number of bits of the bitset representation.
+ */
+ inline auto size() const -> index_t { return bitset_len_; }
+
+ /**
+ * @brief Get the number of elements used by the bitset representation.
+ */
+ inline auto n_elements() const -> index_t
+ {
+ return raft::ceildiv(bitset_len_, bitset_element_size);
+ }
+
+ /** @brief Get an mdspan view of the current bitset */
+ inline auto to_mdspan() -> raft::device_vector_view
+ {
+ return raft::make_device_vector_view(bitset_.data(), n_elements());
+ }
+ [[nodiscard]] inline auto to_mdspan() const -> raft::device_vector_view
+ {
+ return raft::make_device_vector_view(bitset_.data(), n_elements());
+ }
+
+ /** @brief Resize the bitset. If the requested size is larger, new memory is allocated and set to
+ * the default value. */
+ void resize(const raft::resources& res, index_t new_bitset_len)
+ {
+ auto old_size = raft::ceildiv(bitset_len_, bitset_element_size);
+ auto new_size = raft::ceildiv(new_bitset_len, bitset_element_size);
+ bitset_.resize(new_size);
+ bitset_len_ = new_bitset_len;
+ if (old_size < new_size) {
+ // If the new size is larger, set the new bits to the default value
+ cudaMemsetAsync(bitset_.data() + old_size,
+ default_value_ ? 0xff : 0x00,
+ (new_size - old_size) * sizeof(bitset_t),
+ resource::get_cuda_stream(res));
+ }
+ }
+
+ /**
+ * @brief Test a list of indices in a bitset.
+ *
+ * @tparam output_t Output type of the test. Default is bool.
+ * @param res RAFT resources
+ * @param queries List of indices to test
+ * @param output List of outputs
+ */
+ template
+ void test(const raft::resources& res,
+ raft::device_vector_view queries,
+ raft::device_vector_view output) const
+ {
+ RAFT_EXPECTS(output.extent(0) == queries.extent(0), "Output and queries must be same size");
+ auto bitset_view = view();
+ raft::linalg::map(
+ res,
+ output,
+ [bitset_view] __device__(index_t query) { return output_t(bitset_view.test(query)); },
+ queries);
+ }
+ /**
+ * @brief Set a list of indices in a bitset to set_value.
+ *
+ * @param res RAFT resources
+ * @param mask_index indices to remove from the bitset
+ * @param set_value Value to set the bits to (true or false)
+ */
+ void set(const raft::resources& res,
+ raft::device_vector_view mask_index,
+ bool set_value = false)
+ {
+ auto* bitset_ptr = this->data_handle();
+ thrust::for_each_n(resource::get_thrust_policy(res),
+ mask_index.data_handle(),
+ mask_index.extent(0),
+ [bitset_ptr, set_value] __device__(const index_t sample_index) {
+ const index_t bit_element = sample_index / bitset_element_size;
+ const index_t bit_index = sample_index % bitset_element_size;
+ const bitset_t bitmask = bitset_t{1} << bit_index;
+ if (set_value) {
+ atomicOr(bitset_ptr + bit_element, bitmask);
+ } else {
+ const bitset_t bitmask2 = ~bitmask;
+ atomicAnd(bitset_ptr + bit_element, bitmask2);
+ }
+ });
+ }
+ /**
+ * @brief Flip all the bits in a bitset.
+ *
+ * @param res RAFT resources
+ */
+ void flip(const raft::resources& res)
+ {
+ auto bitset_span = this->to_mdspan();
+ raft::linalg::map(
+ res,
+ bitset_span,
+ [] __device__(bitset_t element) { return bitset_t(~element); },
+ raft::make_const_mdspan(bitset_span));
+ }
+ /**
+ * @brief Reset the bits in a bitset.
+ *
+ * @param res RAFT resources
+ */
+ void reset(const raft::resources& res)
+ {
+ cudaMemsetAsync(bitset_.data(),
+ default_value_ ? 0xff : 0x00,
+ n_elements() * sizeof(bitset_t),
+ resource::get_cuda_stream(res));
+ }
+
+ private:
+ raft::device_uvector bitset_;
+ index_t bitset_len_;
+ bool default_value_;
+};
+
+/** @} */
+} // end namespace raft::core
diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
index 328080da1f..8e41aa96f3 100644
--- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
+++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
@@ -75,7 +75,7 @@ namespace numpy_serializer {
#if RAFT_SYSTEM_LITTLE_ENDIAN == 1
#define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_LITTLE_ENDIAN_CHAR
-#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1
+#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1
#define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_BIG_ENDIAN_CHAR
#endif // RAFT_SYSTEM_LITTLE_ENDIAN == 1
diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp
index e734c99029..f077a49b77 100644
--- a/cpp/include/raft/core/detail/nvtx.hpp
+++ b/cpp/include/raft/core/detail/nvtx.hpp
@@ -193,7 +193,7 @@ inline void pop_range()
} // namespace raft::common::nvtx::detail
-#else // NVTX_ENABLED
+#else // NVTX_ENABLED
namespace raft::common::nvtx::detail {
diff --git a/cpp/include/raft/core/kvp.hpp b/cpp/include/raft/core/kvp.hpp
index 2e0d1117a1..192d160d45 100644
--- a/cpp/include/raft/core/kvp.hpp
+++ b/cpp/include/raft/core/kvp.hpp
@@ -32,8 +32,8 @@ struct KeyValuePair {
typedef _Key Key; ///< Key data type
typedef _Value Value; ///< Value data type
- Key key; ///< Item key
- Value value; ///< Item value
+ Key key; ///< Item key
+ Value value; ///< Item value
/// Constructor
RAFT_INLINE_FUNCTION KeyValuePair() {}
diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp
index 2dc4eb1f9d..8e331293bf 100644
--- a/cpp/include/raft/core/resource/resource_types.hpp
+++ b/cpp/include/raft/core/resource/resource_types.hpp
@@ -42,7 +42,7 @@ enum resource_type {
THRUST_POLICY, // thrust execution policy
WORKSPACE_RESOURCE, // rmm device memory resource
- LAST_KEY // reserved for the last key
+ LAST_KEY // reserved for the last key
};
/**
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h
index 10827a8778..f659ed256d 100644
--- a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h
@@ -397,7 +397,7 @@ class EpilogueWithBroadcastCustom : public EpilogueBase
diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
index 68922943f4..f0f12acdb1 100644
--- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh
+++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
@@ -16,11 +16,11 @@
#pragma once
-#include // size_t
-#include // std::numeric_limits
-#include // raft::KeyValuePair
-#include // raft::identity_op
-#include // ops::l2_exp_distance_op
+#include // size_t
+#include // std::numeric_limits
+#include // raft::KeyValuePair
+#include // raft::identity_op
+#include // ops::l2_exp_distance_op
#include
#include // PairwiseDistances
#include // Policy
diff --git a/cpp/include/raft/distance/detail/masked_distance_base.cuh b/cpp/include/raft/distance/detail/masked_distance_base.cuh
index 5a33c9ce4a..55da634145 100644
--- a/cpp/include/raft/distance/detail/masked_distance_base.cuh
+++ b/cpp/include/raft/distance/detail/masked_distance_base.cuh
@@ -217,7 +217,7 @@ struct MaskedDistances : public BaseClass {
} // tile_idx_n
} // idx_g
rowEpilog_op(tile_idx_m);
- } // tile_idx_m
+ } // tile_idx_m
}
private:
diff --git a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
index 58b5daa8ca..c6b09be31e 100644
--- a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
+++ b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
@@ -18,7 +18,7 @@
#include // ceildiv
#include // RAFT_CUDA_TRY
-#include // size_t
+#include // size_t
namespace raft {
namespace distance {
diff --git a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh
index dd58ab4328..e1dc6f9b37 100644
--- a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh
+++ b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh
@@ -45,7 +45,7 @@ void pairwise_matrix_dispatch(OpT distance_op,
cudaStream_t stream,
bool is_row_major) RAFT_EXPLICIT;
-}; // namespace raft::distance::detail
+}; // namespace raft::distance::detail
#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY
diff --git a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
index cd748b9e6b..951f8a0132 100644
--- a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
+++ b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
@@ -57,8 +57,8 @@ namespace threadblock {
///
/// Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator
///
-template
diff --git a/cpp/include/raft/distance/distance-ext.cuh b/cpp/include/raft/distance/distance-ext.cuh
index 3f7f2b0a23..7171ba605f 100644
--- a/cpp/include/raft/distance/distance-ext.cuh
+++ b/cpp/include/raft/distance/distance-ext.cuh
@@ -140,8 +140,8 @@ void pairwise_distance(raft::resources const& handle,
raft::distance::DistanceType metric,
Type metric_arg = 2.0f) RAFT_EXPLICIT;
-}; // namespace distance
-}; // namespace raft
+}; // namespace distance
+}; // namespace raft
#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY
diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh
index 30f4a2d167..b2cd736c57 100644
--- a/cpp/include/raft/linalg/add.cuh
+++ b/cpp/include/raft/linalg/add.cuh
@@ -217,7 +217,7 @@ void add_scalar(raft::resources const& handle,
/** @} */ // end of group add
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/binary_op.cuh b/cpp/include/raft/linalg/binary_op.cuh
index f6889e959b..03beb1d1d1 100644
--- a/cpp/include/raft/linalg/binary_op.cuh
+++ b/cpp/include/raft/linalg/binary_op.cuh
@@ -82,7 +82,7 @@ void binary_op(raft::resources const& handle, InType in1, InType in2, OutType ou
/** @} */ // end of group binary_op
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh
index 5609656234..afa58d73fc 100644
--- a/cpp/include/raft/linalg/coalesced_reduction.cuh
+++ b/cpp/include/raft/linalg/coalesced_reduction.cuh
@@ -160,7 +160,7 @@ void coalesced_reduction(raft::resources const& handle,
/** @} */ // end of group coalesced_reduction
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh
index 3b1e8c41c4..cb6488bedf 100644
--- a/cpp/include/raft/linalg/contractions.cuh
+++ b/cpp/include/raft/linalg/contractions.cuh
@@ -100,7 +100,7 @@ struct KernelPolicy {
SmemSize = 2 * SmemPage * sizeof(DataT),
}; // enum
-}; // struct KernelPolicy
+}; // struct KernelPolicy
template
struct ColKernelPolicy {
diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
index 5a7356a4c2..d15e343c9a 100644
--- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
+++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
@@ -550,7 +550,7 @@ cublasStatus_t cublasgetrfBatched(cublasHandle_t handle,
template <>
inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT
int n,
- float* const A[], // NOLINT
+ float* const A[], // NOLINT
int lda,
int* P,
int* info,
@@ -564,7 +564,7 @@ inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT
template <>
inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT
int n,
- double* const A[], // NOLINT
+ double* const A[], // NOLINT
int lda,
int* P,
int* info,
diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh
index d617b065da..17ec5c3136 100644
--- a/cpp/include/raft/linalg/divide.cuh
+++ b/cpp/include/raft/linalg/divide.cuh
@@ -96,7 +96,7 @@ void divide_scalar(raft::resources const& handle,
/** @} */ // end of group add
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh
index 954bf19334..57f3b61388 100644
--- a/cpp/include/raft/linalg/eig.cuh
+++ b/cpp/include/raft/linalg/eig.cuh
@@ -220,7 +220,7 @@ void eig_jacobi(raft::resources const& handle,
/** @} */ // end of eig
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh
index 640964d018..610ea07f96 100644
--- a/cpp/include/raft/linalg/gemv.cuh
+++ b/cpp/include/raft/linalg/gemv.cuh
@@ -305,6 +305,6 @@ void gemv(raft::resources const& handle,
}
/** @} */ // end of gemv
-}; // namespace linalg
-}; // namespace raft
+}; // namespace linalg
+}; // namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh
index 20588cbe17..21575d7806 100644
--- a/cpp/include/raft/linalg/lstsq.cuh
+++ b/cpp/include/raft/linalg/lstsq.cuh
@@ -245,7 +245,7 @@ void lstsq_qr(raft::resources const& handle,
/** @} */ // end of lstsq
-}; // namespace linalg
-}; // namespace raft
+}; // namespace linalg
+}; // namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh
index e620d227eb..a65f6ed390 100644
--- a/cpp/include/raft/linalg/matrix_vector_op.cuh
+++ b/cpp/include/raft/linalg/matrix_vector_op.cuh
@@ -240,7 +240,7 @@ void matrix_vector_op(raft::resources const& handle,
/** @} */ // end of group matrix_vector_op
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
diff --git a/cpp/include/raft/linalg/mean_squared_error.cuh b/cpp/include/raft/linalg/mean_squared_error.cuh
index d45f11524d..b59a0fcef7 100644
--- a/cpp/include/raft/linalg/mean_squared_error.cuh
+++ b/cpp/include/raft/linalg/mean_squared_error.cuh
@@ -79,7 +79,7 @@ void mean_squared_error(raft::resources const& handle,
/** @} */ // end of group mean_squared_error
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
diff --git a/cpp/include/raft/linalg/multiply.cuh b/cpp/include/raft/linalg/multiply.cuh
index 3ade108235..9973a3cc6c 100644
--- a/cpp/include/raft/linalg/multiply.cuh
+++ b/cpp/include/raft/linalg/multiply.cuh
@@ -98,7 +98,7 @@ void multiply_scalar(
/** @} */ // end of group multiply
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh
index 26ac1035ca..5c7dcbd5cf 100644
--- a/cpp/include/raft/linalg/power.cuh
+++ b/cpp/include/raft/linalg/power.cuh
@@ -154,7 +154,7 @@ void power_scalar(
/** @} */ // end of group add
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh
index a3d0ef71d0..3181dd0224 100644
--- a/cpp/include/raft/linalg/reduce.cuh
+++ b/cpp/include/raft/linalg/reduce.cuh
@@ -162,7 +162,7 @@ void reduce(raft::resources const& handle,
/** @} */ // end of group reduction
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.cuh b/cpp/include/raft/linalg/reduce_cols_by_key.cuh
index 6eaf1e2ba7..5ed0fb7407 100644
--- a/cpp/include/raft/linalg/reduce_cols_by_key.cuh
+++ b/cpp/include/raft/linalg/reduce_cols_by_key.cuh
@@ -113,7 +113,7 @@ void reduce_cols_by_key(
/** @} */ // end of group reduce_cols_by_key
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.cuh b/cpp/include/raft/linalg/reduce_rows_by_key.cuh
index fa624b2191..7d93c3946f 100644
--- a/cpp/include/raft/linalg/reduce_rows_by_key.cuh
+++ b/cpp/include/raft/linalg/reduce_rows_by_key.cuh
@@ -192,7 +192,7 @@ void reduce_rows_by_key(
/** @} */ // end of group reduce_rows_by_key
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh
index 2dece5b957..163f360481 100644
--- a/cpp/include/raft/linalg/rsvd.cuh
+++ b/cpp/include/raft/linalg/rsvd.cuh
@@ -876,7 +876,7 @@ void randomized_svd(const raft::resources& handle,
/** @} */ // end of group rsvd
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh
index 99754c4eb2..81b7ab7dec 100644
--- a/cpp/include/raft/linalg/sqrt.cuh
+++ b/cpp/include/raft/linalg/sqrt.cuh
@@ -84,7 +84,7 @@ void sqrt(raft::resources const& handle, InType in, OutType out)
/** @} */ // end of group add
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh
index f971d0e40b..c7ff000e00 100644
--- a/cpp/include/raft/linalg/strided_reduction.cuh
+++ b/cpp/include/raft/linalg/strided_reduction.cuh
@@ -171,7 +171,7 @@ void strided_reduction(raft::resources const& handle,
/** @} */ // end of group strided_reduction
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/subtract.cuh b/cpp/include/raft/linalg/subtract.cuh
index 688e60a806..f4243f9dc1 100644
--- a/cpp/include/raft/linalg/subtract.cuh
+++ b/cpp/include/raft/linalg/subtract.cuh
@@ -223,7 +223,7 @@ void subtract_scalar(
/** @} */ // end of group subtract
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh
index 08f9462ba9..f7071de75b 100644
--- a/cpp/include/raft/linalg/svd.cuh
+++ b/cpp/include/raft/linalg/svd.cuh
@@ -416,7 +416,7 @@ void svd_reconstruction(raft::resources const& handle,
/** @} */ // end of group svd
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/ternary_op.cuh b/cpp/include/raft/linalg/ternary_op.cuh
index f46133abd9..67b04c6791 100644
--- a/cpp/include/raft/linalg/ternary_op.cuh
+++ b/cpp/include/raft/linalg/ternary_op.cuh
@@ -83,7 +83,7 @@ void ternary_op(
/** @} */ // end of group ternary_op
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh
index afe1962223..1b46082fbe 100644
--- a/cpp/include/raft/linalg/transpose.cuh
+++ b/cpp/include/raft/linalg/transpose.cuh
@@ -103,7 +103,7 @@ auto transpose(raft::resources const& handle,
/** @} */ // end of group transpose
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
diff --git a/cpp/include/raft/linalg/unary_op.cuh b/cpp/include/raft/linalg/unary_op.cuh
index 47a432f415..5ebe27923a 100644
--- a/cpp/include/raft/linalg/unary_op.cuh
+++ b/cpp/include/raft/linalg/unary_op.cuh
@@ -125,7 +125,7 @@ void write_only_unary_op(const raft::resources& handle, OutType out, Lambda op)
/** @} */ // end of group unary_op
-}; // end namespace linalg
-}; // end namespace raft
+}; // end namespace linalg
+}; // end namespace raft
#endif
diff --git a/cpp/include/raft/matrix/col_wise_sort.cuh b/cpp/include/raft/matrix/col_wise_sort.cuh
index 887741ad71..c94b2506d3 100644
--- a/cpp/include/raft/matrix/col_wise_sort.cuh
+++ b/cpp/include/raft/matrix/col_wise_sort.cuh
@@ -134,6 +134,6 @@ void sort_cols_per_row(Args... args)
/** @} */ // end of group col_wise_sort
-}; // end namespace raft::matrix
+}; // end namespace raft::matrix
#endif
\ No newline at end of file
diff --git a/cpp/include/raft/matrix/detail/select_k-ext.cuh b/cpp/include/raft/matrix/detail/select_k-ext.cuh
index f934d7e3b4..870f0c3240 100644
--- a/cpp/include/raft/matrix/detail/select_k-ext.cuh
+++ b/cpp/include/raft/matrix/detail/select_k-ext.cuh
@@ -16,8 +16,8 @@
#pragma once
-#include // uint32_t
-#include // __half
+#include // uint32_t
+#include // __half
#include
#include // RAFT_EXPLICIT
#include // rmm:cuda_stream_view
diff --git a/cpp/include/raft/matrix/detail/select_warpsort.cuh b/cpp/include/raft/matrix/detail/select_warpsort.cuh
index dc86a04733..2927604e7d 100644
--- a/cpp/include/raft/matrix/detail/select_warpsort.cuh
+++ b/cpp/include/raft/matrix/detail/select_warpsort.cuh
@@ -959,7 +959,7 @@ void calc_launch_parameter(
if (batch_size >= size_t(another_min_grid_size) // still have enough work
&& another_block_size < block_size // protect against an infinite loop
&& another_min_grid_size * another_block_size >
- min_grid_size * block_size // improve occupancy
+ min_grid_size * block_size // improve occupancy
) {
block_size = another_block_size;
min_grid_size = another_min_grid_size;
diff --git a/cpp/include/raft/neighbors/ann_types.hpp b/cpp/include/raft/neighbors/ann_types.hpp
index 469d3c09d4..c17be4a8ff 100644
--- a/cpp/include/raft/neighbors/ann_types.hpp
+++ b/cpp/include/raft/neighbors/ann_types.hpp
@@ -49,4 +49,4 @@ struct search_params {};
/** @} */ // end group ann_types
-}; // namespace raft::neighbors::ann
+}; // namespace raft::neighbors::ann
diff --git a/cpp/include/raft/neighbors/brute_force-ext.cuh b/cpp/include/raft/neighbors/brute_force-ext.cuh
index 862db75866..b8c00616da 100644
--- a/cpp/include/raft/neighbors/brute_force-ext.cuh
+++ b/cpp/include/raft/neighbors/brute_force-ext.cuh
@@ -22,7 +22,8 @@
#include // raft::identity_op
#include // raft::resources
#include // raft::distance::DistanceType
-#include // RAFT_EXPLICIT
+#include
+#include // RAFT_EXPLICIT
#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
@@ -38,6 +39,19 @@ inline void knn_merge_parts(
size_t n_samples,
std::optional> translations = std::nullopt) RAFT_EXPLICIT;
+template
+index build(raft::resources const& res,
+ mdspan, row_major, Accessor> dataset,
+ raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded,
+ T metric_arg = 0.0) RAFT_EXPLICIT;
+
+template
+void search(raft::resources const& res,
+ const index& idx,
+ raft::device_matrix_view queries,
+ raft::device_matrix_view neighbors,
+ raft::device_matrix_view distances) RAFT_EXPLICIT;
+
template (
+ raft::resources const& res,
+ const raft::neighbors::brute_force::index& idx,
+ raft::device_matrix_view queries,
+ raft::device_matrix_view neighbors,
+ raft::device_matrix_view distances);
+
+extern template void search(
+ raft::resources const& res,
+ const raft::neighbors::brute_force::index& idx,
+ raft::device_matrix_view queries,
+ raft::device_matrix_view neighbors,
+ raft::device_matrix_view distances);
+
+extern template raft::neighbors::brute_force::index build(
+ raft::resources const& res,
+ raft::device_matrix_view dataset,
+ raft::distance::DistanceType metric,
+ float metric_arg);
+} // namespace raft::neighbors::brute_force
+
#define instantiate_raft_neighbors_brute_force_fused_l2_knn( \
value_t, idx_t, idx_layout, query_layout) \
extern template void raft::neighbors::brute_force::fused_l2_knn( \
diff --git a/cpp/include/raft/neighbors/brute_force-inl.cuh b/cpp/include/raft/neighbors/brute_force-inl.cuh
index bc9e09e5b0..88439a738b 100644
--- a/cpp/include/raft/neighbors/brute_force-inl.cuh
+++ b/cpp/include/raft/neighbors/brute_force-inl.cuh
@@ -19,6 +19,7 @@
#include
#include
#include
+#include
#include
#include
@@ -280,6 +281,101 @@ void fused_l2_knn(raft::resources const& handle,
metric);
}
-/** @} */ // end group brute_force_knn
+/**
+ * @brief Build the index from the dataset for efficient search.
+ *
+ * @tparam T data element type
+ *
+ * @param[in] res
+ * @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim]
+ * @param[in] metric: distance metric to use. Euclidean (L2) is used by default
+ * @param[in] metric_arg: the value of `p` for Minkowski (l-p) distances. This
+ * is ignored if the metric_type is not Minkowski.
+ *
+ * @return the constructed brute force index
+ */
+template
+index build(raft::resources const& res,
+ mdspan, row_major, Accessor> dataset,
+ raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded,
+ T metric_arg = 0.0)
+{
+ // certain distance metrics can benefit by pre-calculating the norms for the index dataset
+ // which lets us avoid calculating these at query time
+ std::optional> norms;
+ if (metric == raft::distance::DistanceType::L2Expanded ||
+ metric == raft::distance::DistanceType::L2SqrtExpanded ||
+ metric == raft::distance::DistanceType::CosineExpanded) {
+ norms = make_device_vector(res, dataset.extent(0));
+ // cosine needs the l2norm, where as l2 distances needs the squared norm
+ if (metric == raft::distance::DistanceType::CosineExpanded) {
+ raft::linalg::norm(res,
+ dataset,
+ norms->view(),
+ raft::linalg::NormType::L2Norm,
+ raft::linalg::Apply::ALONG_ROWS,
+ raft::sqrt_op{});
+ } else {
+ raft::linalg::norm(res,
+ dataset,
+ norms->view(),
+ raft::linalg::NormType::L2Norm,
+ raft::linalg::Apply::ALONG_ROWS);
+ }
+ }
+
+ return index(res, dataset, std::move(norms), metric, metric_arg);
+}
+/**
+ * @brief Brute Force search using the constructed index.
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices
+ *
+ * @param[in] res raft resources
+ * @param[in] idx brute force index
+ * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()]
+ * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset
+ * [n_queries, k]
+ * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries,
+ * k]
+ */
+template
+void search(raft::resources const& res,
+ const index& idx,
+ raft::device_matrix_view queries,
+ raft::device_matrix_view neighbors,
+ raft::device_matrix_view distances)
+{
+ RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), "Value of k must match for outputs");
+ RAFT_EXPECTS(idx.dataset().extent(1) == queries.extent(1),
+ "Number of columns in queries must match brute force index");
+
+ auto k = neighbors.extent(1);
+ auto d = idx.dataset().extent(1);
+
+ std::vector dataset = {const_cast(idx.dataset().data_handle())};
+ std::vector sizes = {idx.dataset().extent(0)};
+ std::vector norms;
+ if (idx.has_norms()) { norms.push_back(const_cast(idx.norms().data_handle())); }
+
+ detail::brute_force_knn_impl(res,
+ dataset,
+ sizes,
+ d,
+ const_cast(queries.data_handle()),
+ queries.extent(0),
+ neighbors.data_handle(),
+ distances.data_handle(),
+ k,
+ true,
+ true,
+ nullptr,
+ idx.metric(),
+ idx.metric_arg(),
+ raft::identity_op(),
+ norms.size() ? &norms : nullptr);
+}
+/** @} */ // end group brute_force_knn
} // namespace raft::neighbors::brute_force
diff --git a/cpp/include/raft/neighbors/brute_force_types.hpp b/cpp/include/raft/neighbors/brute_force_types.hpp
new file mode 100644
index 0000000000..cc934b7a98
--- /dev/null
+++ b/cpp/include/raft/neighbors/brute_force_types.hpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "ann_types.hpp"
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+namespace raft::neighbors::brute_force {
+/**
+ * @addtogroup brute_force
+ * @{
+ */
+
+/**
+ * @brief Brute Force index.
+ *
+ * The index stores the dataset and norms for the dataset in device memory.
+ *
+ * @tparam T data element type
+ */
+template