Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add nn_descent #325

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
108 changes: 108 additions & 0 deletions src/rapids_singlecell/preprocessing/_kernels/_nn_descent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from __future__ import annotations

import cupy as cp

kernel_code_cos = r"""
extern "C" __global__
void computeDistances_Cosine(const float* pca,
float* out,
const unsigned int* pairs,
const long long int n_samples,
Comment on lines +9 to +10
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const unsigned int* pairs,
const long long int n_samples,
const std::uint32_t* pairs,
const std::uint64_t n_samples,

and so on

const long long int n_pcs,
const long long int n_neighbors)
{
long long int i1 = blockDim.x * blockIdx.x + threadIdx.x;
if(i1 >= n_samples){
return;
}

float sum_i1 = 0.0f;
for (long long int d = 0; d < n_pcs; d++) {
sum_i1 += pca[i1 * n_pcs + d] * pca[i1 * n_pcs + d];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is https://docs.nvidia.com/cuda/cuda-math-api/cuda_math_api/group__CUDA__MATH__SINGLE.html?highlight=pow#_CPPv44powfff not feasible? Seems like two accesses to the same thing is not great, but would be very curious to find out if it actually is fine (i.e., how cheap this is).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thing below re: data access.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the reason everyone does this is because there are too different functions for float and double. So its just easier to do x*x

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but you've declared the type above, no?

}
for (long long int j = 0; j < n_neighbors; j++){
long long int i2 = static_cast<long long>(pairs[i1 * n_neighbors + j]);
float dist = 0.0f;

float sum_i2 = 0.0f;
for (long long int d = 0; d < n_pcs; d++) {
dist += pca[i1 * n_pcs + d] * pca[i2 * n_pcs + d];
sum_i2 += pca[i2 * n_pcs + d] * pca[i2 * n_pcs + d];
}
out[i1 * n_neighbors + j] = 1-dist/ (sqrtf(sum_i1) * sqrtf(sum_i2));
}

}
"""

# Create the RawKernel object
calc_distance_kernel_cos = cp.RawKernel(
code=kernel_code_cos,
name="computeDistances_Cosine",
)

kernel_code = r"""
extern "C" __global__
void computeDistances(const float* pca,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this only work with pca? should it be called something else? it looks like just normal euclidean distance

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(this applies everywhere about argument names being pca etc.)

float* out,
const unsigned int* pairs,
const long long int n_samples,
const long long int n_pcs,
const long long int n_neighbors)
{
long long int i1 = blockDim.x * blockIdx.x + threadIdx.x;
if(i1 >= n_samples){
return;
}
for (long long int j = 0; j < n_neighbors; j++){
long long int i2 = static_cast<long long>(pairs[i1 * n_neighbors + j]);
float dist = 0.0f;
for (long long int d = 0; d < n_pcs; d++) {
float diff = pca[i1 * n_pcs + d] - pca[i2 * n_pcs + d];
dist += diff * diff;
}
out[i1 * n_neighbors + j] = dist;
}
}
"""

# Create the RawKernel object
calc_distance_kernel = cp.RawKernel(
code=kernel_code,
name="computeDistances",
)

kernel_code_inner = r"""
extern "C" __global__
void computeDistances_inner(const float* pca,
float* out,
const unsigned int* pairs,
const long long int n_samples,
const long long int n_pcs,
const long long int n_neighbors)
{
long long int i1 = blockDim.x * blockIdx.x + threadIdx.x;
if(i1 >= n_samples){
return;
}


for (long long int j = 0; j < n_neighbors; j++){
long long int i2 = static_cast<long long>(pairs[i1 * n_neighbors + j]);
float dist = 0.0f;

for (long long int d = 0; d < n_pcs; d++) {
dist += pca[i1 * n_pcs + d] * pca[i2 * n_pcs + d];

}
out[i1 * n_neighbors + j] = dist;
}

}
"""

# Create the RawKernel object
Copy link
Member

@flying-sheep flying-sheep Feb 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove these comments, they are obvious. As said: Comments should contain the “why”, the code should contain the “what”. ... = cp.RawKernel(...) already says “create the RawKernel object” loudly.

calc_distance_kernel_inner = cp.RawKernel(
code=kernel_code_inner,
name="computeDistances_inner", # name must match function name in code
)
120 changes: 92 additions & 28 deletions src/rapids_singlecell/preprocessing/_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import math
from types import MappingProxyType
from typing import TYPE_CHECKING, Any, Literal
from typing import TYPE_CHECKING, Any, Literal, get_args

import cupy as cp
import numpy as np
Expand All @@ -21,7 +21,9 @@
from anndata import AnnData

AnyRandom = None | int | np.random.RandomState
_Alogithms = Literal["brute", "ivfflat", "ivfpq", "cagra"]

_Algorithms_bbknn = Literal["brute", "cagra", "ivfflat", "ivfpq"]
_Algorithms = Literal["brute", "cagra", "ivfflat", "ivfpq", "nn_descent"]
_MetricsDense = Literal[
"l2",
"chebyshev",
Expand Down Expand Up @@ -107,34 +109,28 @@ def _cagra_knn(
build_kwargs = {}
search_kwargs = {}

build_params = cagra.IndexParams(metric="sqeuclidean", build_algo="nn_descent")
if metric == "euclidean":
metric_to_use = "sqeuclidean"
else:
metric_to_use = metric
build_params = cagra.IndexParams(
metric=metric_to_use, graph_degree=k, build_algo="nn_descent"
)
index = cagra.build(build_params, X, **build_kwargs)

n_samples = Y.shape[0]
all_neighbors = cp.zeros((n_samples, k), dtype=cp.int32)
all_distances = cp.zeros((n_samples, k), dtype=cp.float32)

batchsize = 65000
n_batches = math.ceil(n_samples / batchsize)
for batch in range(n_batches):
start_idx = batch * batchsize
stop_idx = min((batch + 1) * batchsize, n_samples)
batch_Y = Y[start_idx:stop_idx, :]

search_params = cagra.SearchParams()
distances, neighbors = cagra.search(
search_params, index, batch_Y, k, **search_kwargs
)
all_neighbors[start_idx:stop_idx, :] = cp.asarray(neighbors)
all_distances[start_idx:stop_idx, :] = cp.asarray(distances)
search_params = cagra.SearchParams()
distances, neighbors = cagra.search(search_params, index, Y, k, **search_kwargs)

if resources is not None:
resources.sync()

neighbors = cp.asarray(neighbors)
distances = cp.asarray(distances)
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved

if metric == "euclidean":
all_distances = cp.sqrt(all_distances)
distances = cp.sqrt(distances)

return all_neighbors, all_distances
return neighbors, distances


def _ivf_flat_knn(
Expand Down Expand Up @@ -200,17 +196,66 @@ def _ivf_pq_knn(
return neighbors, distances


def _nn_descent_knn(
X: cp.ndarray, Y: cp.ndarray, k: int, metric: _Metrics, metric_kwds: Mapping
) -> tuple[cp.ndarray, cp.ndarray]:
from cuvs import __version__ as cuvs_version

if parse_version(cuvs_version) <= parse_version("24.12"):
raise ValueError(
"The 'nn_descent' algorithm is only available in cuvs >= 25.02. "
"Please update your cuvs installation."
)
from cuvs.neighbors import nn_descent

idxparams = nn_descent.IndexParams(
graph_degree=k, metric="sqeuclidean" if metric == "euclidean" else metric
)
idx = nn_descent.build(
idxparams,
dataset=X,
)
neighbors = cp.array(idx.graph).astype(cp.uint32)
if metric == "euclidean":
from ._kernels._nn_descent import calc_distance_kernel as dist_func
elif metric == "cosine":
from ._kernels._nn_descent import calc_distance_kernel_cos as dist_func
elif metric == "inner_product":
from ._kernels._nn_descent import calc_distance_kernel_inner as dist_func
grid_size = (X.shape[0] + 32 - 1) // 32
distances = cp.zeros((X.shape[0], neighbors.shape[1]), dtype=cp.float32)

dist_func(
(grid_size,),
(32,),
(X, distances, neighbors, X.shape[0], X.shape[1], neighbors.shape[1]),
)
if metric == "euclidean":
distances = cp.sqrt(distances)
if metric in ("cosine", "euclidean", "sqeuclidean"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can't be sqeuclidean?

Suggested change
if metric in ("cosine", "euclidean", "sqeuclidean"):
if metric in ("cosine", "euclidean"):

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confused logic? How is the possible? see above?

# Add self-neighbors and self-distances for distance metrics.
# This is not needed for inner_product, as it is a similarity metric.
add_self_neighbors = cp.arange(X.shape[0], dtype=cp.uint32)
neighbors = cp.concatenate(
(add_self_neighbors[:, None], neighbors[:, :-1]), axis=1
)
add_self_distances = cp.zeros((X.shape[0], 1), dtype=cp.float32)
distances = cp.concatenate((add_self_distances, distances[:, :-1]), axis=1)
Comment on lines +238 to +243
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment here explaining why you add self.

A valid comment would e.g. be something like

API xyz expects the kNN indices to contain a self column. The real distance metrics don‘t add this column since all self-distances are 0 in distance metrics, but inner_product, which isn’t an actual distance metric as it has non-0 values in the self column, does add it.

return neighbors, distances


KNN_ALGORITHMS = {
"brute": _brute_knn,
"cagra": _cagra_knn,
"ivfflat": _ivf_flat_knn,
"ivfpq": _ivf_pq_knn,
"nn_descent": _nn_descent_knn,
}


def _check_neighbors_X(
X: cp_sparse.spmatrix | sc_sparse.spmatrix | np.ndarray | cp.ndarray,
algorithm: _Alogithms,
algorithm: _Algorithms,
) -> cp_sparse.spmatrix | cp.ndarray:
"""Check and convert input X to the expected format based on algorithm.

Expand Down Expand Up @@ -244,7 +289,7 @@ def _check_neighbors_X(
return X_contiguous


def _check_metrics(algorithm: _Alogithms, metric: _Metrics) -> bool:
def _check_metrics(algorithm: _Algorithms, metric: _Metrics) -> bool:
"""Check if the provided metric is compatible with the chosen algorithm.

Parameters
Expand All @@ -261,15 +306,20 @@ def _check_metrics(algorithm: _Alogithms, metric: _Metrics) -> bool:
# 'brute' support all metrics, no need to check further.
return True
elif algorithm == "cagra":
if metric not in ["euclidean", "sqeuclidean"]:
if metric not in ["euclidean", "sqeuclidean", "inner_product"]:
raise ValueError(
"cagra only supports 'euclidean' and 'sqeuclidean' metrics."
"cagra only supports 'euclidean', 'inner_product' and 'sqeuclidean' metrics."
)
elif algorithm in ["ivfpq", "ivfflat"]:
if metric not in ["euclidean", "sqeuclidean", "inner_product"]:
raise ValueError(
f"{algorithm} only supports 'euclidean', 'sqeuclidean', and 'inner_product' metrics."
)
elif algorithm == "nn_descent":
if metric not in ["euclidean", "sqeuclidean", "cosine", "inner_product"]:
raise ValueError(
"nn_descent only supports 'euclidean', 'sqeuclidean', 'inner_product' and 'cosine' metrics."
)
else:
raise NotImplementedError(f"The {algorithm} algorithm is not implemented yet.")

Expand Down Expand Up @@ -335,7 +385,7 @@ def neighbors(
*,
use_rep: str | None = None,
random_state: AnyRandom = 0,
algorithm: _Alogithms = "brute",
algorithm: _Algorithms = "brute",
metric: _Metrics = "euclidean",
metric_kwds: Mapping[str, Any] = MappingProxyType({}),
key_added: str | None = None,
Expand Down Expand Up @@ -376,6 +426,8 @@ def neighbors(

* 'cagra': Employs the Compressed, Accurate Graph-based search to quickly find nearest neighbors by traversing a graph structure.

* 'nn_descent': Uses the NN-descent algorithm to approximate the k-nearest neighbors.

Please ensure that the chosen algorithm is compatible with your dataset and the specific requirements of your search problem.
metric
A known metric's name or a callable that returns a distance.
Expand Down Expand Up @@ -407,6 +459,13 @@ def neighbors(

"""
adata = adata.copy() if copy else adata

if algorithm not in get_args(_Algorithms):
raise ValueError(
f"Invalid algorithm '{algorithm}' for KNN. "
f"Valid options are: {get_args(_Algorithms)}."
)

if adata.is_view:
adata._init_as_actual(adata.copy())
X = _choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs)
Expand Down Expand Up @@ -477,7 +536,7 @@ def bbknn(
batch_key: str | None = None,
use_rep: str | None = None,
random_state: AnyRandom = 0,
algorithm: _Alogithms = "brute",
algorithm: _Algorithms_bbknn = "brute",
metric: _Metrics = "euclidean",
metric_kwds: Mapping[str, Any] = MappingProxyType({}),
trim: int | None = None,
Expand Down Expand Up @@ -564,6 +623,11 @@ def bbknn(
if batch_key not in adata.obs:
raise ValueError(f"Batch key '{batch_key}' not present in `adata.obs`.")

if algorithm not in get_args(_Algorithms_bbknn):
raise ValueError(
f"Invalid algorithm '{algorithm}' for batch-balanced KNN. "
f"Valid options are: {get_args(_Algorithms_bbknn)}."
)
adata = adata.copy() if copy else adata
if adata.is_view:
adata._init_as_actual(adata.copy())
Expand Down
2 changes: 1 addition & 1 deletion tests/test_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
]


@pytest.mark.parametrize("algo", ["brute", "cagra", "ivfflat"])
@pytest.mark.parametrize("algo", ["brute", "cagra", "ivfflat", "nn_descent"])
def test_umap_connectivities_euclidean(algo):
adata = AnnData(X=X)
neighbors(adata, n_neighbors=3, algorithm=algo)
Expand Down
Loading