diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h index e4169f6f8..2df090126 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h @@ -16,6 +16,7 @@ #pragma once #include "cuvs_cagra_wrapper.h" +#include #include #include @@ -85,13 +86,21 @@ class cuvs_cagra_hnswlib : public algo, public algo_gpu { template void cuvs_cagra_hnswlib::build(const T* dataset, size_t nrow) { + auto start_time = std::chrono::high_resolution_clock::now(); cagra_build_.build(dataset, nrow); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end_time - start_time); + std::cout << "CAGRA build time: " << duration.count() << " ms" << std::endl; auto* cagra_index = cagra_build_.get_index(); auto host_dataset_view = raft::make_host_matrix_view(dataset, nrow, this->dim_); auto opt_dataset_view = std::optional>(std::move(host_dataset_view)); + start_time = std::chrono::high_resolution_clock::now(); hnsw_index_ = cuvs::neighbors::hnsw::from_cagra( handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view); + end_time = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(end_time - start_time); + std::cout << "HNSW conversion time: " << duration.count() << " ms" << std::endl; } template diff --git a/cpp/include/cuvs/neighbors/hnsw.h b/cpp/include/cuvs/neighbors/hnsw.h index b7eda54b8..fd36c16b4 100644 --- a/cpp/include/cuvs/neighbors/hnsw.h +++ b/cpp/include/cuvs/neighbors/hnsw.h @@ -51,9 +51,8 @@ struct cuvsHnswIndexParams { /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/ int ef_construction; /** Number of host threads to use to construct hierarchy when hierarchy is `CPU` - NOTE: Constructing the hierarchy when converting from a CAGRA graph is highly sensitive - to parallelism, and increasing the number of threads can reduce the quality of the index. - */ + When the value is 0, the number of threads is automatically determined to the maximum + number of threads available. int num_threads; }; @@ -65,389 +64,389 @@ typedef struct cuvsHnswIndexParams* cuvsHnswIndexParams_t; * @param[in] params cuvsHnswIndexParams_t to allocate * @return cuvsError_t */ -cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params); - -/** - * @brief De-allocate HNSW Index params - * - * @param[in] params - * @return cuvsError_t - */ -cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params); - -/** - * @} - */ + cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params); -/** - * @defgroup hnsw_c_index C API for hnswlib wrapper index - * @{ - */ - -/** - * @brief Struct to hold address of cuvs::neighbors::Hnsw::index and its active trained dtype - * - */ -typedef struct { - uintptr_t addr; - DLDataType dtype; + /** + * @brief De-allocate HNSW Index params + * + * @param[in] params + * @return cuvsError_t + */ + cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params); -} cuvsHnswIndex; + /** + * @} + */ -typedef cuvsHnswIndex* cuvsHnswIndex_t; + /** + * @defgroup hnsw_c_index C API for hnswlib wrapper index + * @{ + */ -/** - * @brief Allocate HNSW index - * - * @param[in] index cuvsHnswIndex_t to allocate - * @return HnswError_t - */ -cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index); + /** + * @brief Struct to hold address of cuvs::neighbors::Hnsw::index and its active trained dtype + * + */ + typedef struct { + uintptr_t addr; + DLDataType dtype; -/** - * @brief De-allocate HNSW index - * - * @param[in] index cuvsHnswIndex_t to de-allocate - */ -cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index); + } cuvsHnswIndex; -/** - * @} - */ + typedef cuvsHnswIndex* cuvsHnswIndex_t; -/** - * @defgroup hnsw_c_extend_params Parameters for extending HNSW index - * @{ - */ + /** + * @brief Allocate HNSW index + * + * @param[in] index cuvsHnswIndex_t to allocate + * @return HnswError_t + */ + cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index); -struct cuvsHnswExtendParams { - /** Number of CPU threads used to extend additional vectors */ - int num_threads; -}; + /** + * @brief De-allocate HNSW index + * + * @param[in] index cuvsHnswIndex_t to de-allocate + */ + cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index); -typedef struct cuvsHnswExtendParams* cuvsHnswExtendParams_t; + /** + * @} + */ -/** - * @brief Allocate HNSW extend params, and populate with default values - * - * @param[in] params cuvsHnswExtendParams_t to allocate - * @return cuvsError_t - */ -cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params); + /** + * @defgroup hnsw_c_extend_params Parameters for extending HNSW index + * @{ + */ -/** - * @brief De-allocate HNSW extend params - * - * @param[in] params cuvsHnswExtendParams_t to de-allocate - * @return cuvsError_t - */ + struct cuvsHnswExtendParams { + /** Number of CPU threads used to extend additional vectors */ + int num_threads; + }; -cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params); + typedef struct cuvsHnswExtendParams* cuvsHnswExtendParams_t; -/** - * @} - */ + /** + * @brief Allocate HNSW extend params, and populate with default values + * + * @param[in] params cuvsHnswExtendParams_t to allocate + * @return cuvsError_t + */ + cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params); -/** - * @defgroup hnsw_c_index_load Load CAGRA index as hnswlib index - * @{ - */ + /** + * @brief De-allocate HNSW extend params + * + * @param[in] params cuvsHnswExtendParams_t to de-allocate + * @return cuvsError_t + */ -/** - * @brief Convert a CAGRA Index to an HNSW index. - * NOTE: When hierarchy is: - * 1. `NONE`: This method uses the filesystem to write the CAGRA index in - * `/tmp/.bin` before reading it as an hnswlib index, then deleting the temporary - * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as - * the format is not compatible with the original hnswlib. - * 2. `CPU`: The returned index is mutable and can be extended with additional vectors. The - * serialized index is also compatible with the original hnswlib library. - * - * @param[in] res cuvsResources_t opaque C handle - * @param[in] params cuvsHnswIndexParams_t used to load Hnsw index - * @param[in] cagra_index cuvsCagraIndex_t to convert to HNSW index - * @param[out] hnsw_index cuvsHnswIndex_t to return the HNSW index - * - * @return cuvsError_t - * - * @code{.c} - * #include - * #include - * #include - * - * // Create cuvsResources_t - * cuvsResources_t res; - * cuvsError_t res_create_status = cuvsResourcesCreate(&res); - * - * // create a CAGRA index with `cuvsCagraBuild` - * - * // Convert the CAGRA index to an HNSW index - * cuvsHnswIndex_t hnsw_index; - * cuvsHnswIndexCreate(&hnsw_index); - * cuvsHnswIndexParams_t hnsw_params; - * cuvsHnswIndexParamsCreate(&hnsw_params); - * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index); - * - * // de-allocate `hnsw_params`, `hnsw_index` and `res` - * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params); - * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index); - * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); - * @endcode - */ -cuvsError_t cuvsHnswFromCagra(cuvsResources_t res, - cuvsHnswIndexParams_t params, - cuvsCagraIndex_t cagra_index, - cuvsHnswIndex_t hnsw_index); + cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params); -/** - * @} - */ + /** + * @} + */ -/** - * @defgroup hnsw_c_index_extend Extend HNSW index with additional vectors - * @{ - */ + /** + * @defgroup hnsw_c_index_load Load CAGRA index as hnswlib index + * @{ + */ -/** - * @brief Add new vectors to an HNSW index - * NOTE: The HNSW index can only be extended when the hierarchy is `CPU` - * when converting from a CAGRA index. - - * @param[in] res cuvsResources_t opaque C handle - * @param[in] params cuvsHnswExtendParams_t used to extend Hnsw index - * @param[in] additional_dataset DLManagedTensor* additional dataset to extend the index - * @param[inout] index cuvsHnswIndex_t to extend - * - * @return cuvsError_t - * - * @code{.c} - * #include - * #include - * #include - * - * // Create cuvsResources_t - * cuvsResources_t res; - * cuvsError_t res_create_status = cuvsResourcesCreate(&res); - * - * // create an index with `cuvsCagraBuild` - * - * // Convert the CAGRA index to an HNSW index - * cuvsHnswIndex_t hnsw_index; - * cuvsHnswIndexCreate(&hnsw_index); - * cuvsHnswIndexParams_t hnsw_params; - * cuvsHnswIndexParamsCreate(&hnsw_params); - * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index); - * - * // Extend the HNSW index with additional vectors - * DLManagedTensor additional_dataset; - * cuvsHnswExtendParams_t extend_params; - * cuvsHnswExtendParamsCreate(&extend_params); - * cuvsHnswExtend(res, extend_params, additional_dataset, hnsw_index); - * - * // de-allocate `hnsw_params`, `hnsw_index`, `extend_params` and `res` - * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params); - * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index); - * cuvsError_t extend_params_destroy_status = cuvsHnswExtendParamsDestroy(extend_params); - * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); - * @endcode - */ - -cuvsError_t cuvsHnswExtend(cuvsResources_t res, - cuvsHnswExtendParams_t params, - DLManagedTensor* additional_dataset, - cuvsHnswIndex_t index); + /** + * @brief Convert a CAGRA Index to an HNSW index. + * NOTE: When hierarchy is: + * 1. `NONE`: This method uses the filesystem to write the CAGRA index in + * `/tmp/.bin` before reading it as an hnswlib index, then deleting the temporary + * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, + * as the format is not compatible with the original hnswlib. + * 2. `CPU`: The returned index is mutable and can be extended with additional vectors. The + * serialized index is also compatible with the original hnswlib library. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cuvsHnswIndexParams_t used to load Hnsw index + * @param[in] cagra_index cuvsCagraIndex_t to convert to HNSW index + * @param[out] hnsw_index cuvsHnswIndex_t to return the HNSW index + * + * @return cuvsError_t + * + * @code{.c} + * #include + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create a CAGRA index with `cuvsCagraBuild` + * + * // Convert the CAGRA index to an HNSW index + * cuvsHnswIndex_t hnsw_index; + * cuvsHnswIndexCreate(&hnsw_index); + * cuvsHnswIndexParams_t hnsw_params; + * cuvsHnswIndexParamsCreate(&hnsw_params); + * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index); + * + * // de-allocate `hnsw_params`, `hnsw_index` and `res` + * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params); + * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + */ + cuvsError_t cuvsHnswFromCagra(cuvsResources_t res, + cuvsHnswIndexParams_t params, + cuvsCagraIndex_t cagra_index, + cuvsHnswIndex_t hnsw_index); -/** - * @} - */ + /** + * @} + */ -/** - * @defgroup hnsw_c_search_params C API for hnswlib wrapper search params - * @{ - */ + /** + * @defgroup hnsw_c_index_extend Extend HNSW index with additional vectors + * @{ + */ -struct cuvsHnswSearchParams { - int32_t ef; - int32_t num_threads; -}; + /** + * @brief Add new vectors to an HNSW index + * NOTE: The HNSW index can only be extended when the hierarchy is `CPU` + * when converting from a CAGRA index. + + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cuvsHnswExtendParams_t used to extend Hnsw index + * @param[in] additional_dataset DLManagedTensor* additional dataset to extend the index + * @param[inout] index cuvsHnswIndex_t to extend + * + * @return cuvsError_t + * + * @code{.c} + * #include + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsCagraBuild` + * + * // Convert the CAGRA index to an HNSW index + * cuvsHnswIndex_t hnsw_index; + * cuvsHnswIndexCreate(&hnsw_index); + * cuvsHnswIndexParams_t hnsw_params; + * cuvsHnswIndexParamsCreate(&hnsw_params); + * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index); + * + * // Extend the HNSW index with additional vectors + * DLManagedTensor additional_dataset; + * cuvsHnswExtendParams_t extend_params; + * cuvsHnswExtendParamsCreate(&extend_params); + * cuvsHnswExtend(res, extend_params, additional_dataset, hnsw_index); + * + * // de-allocate `hnsw_params`, `hnsw_index`, `extend_params` and `res` + * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params); + * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index); + * cuvsError_t extend_params_destroy_status = cuvsHnswExtendParamsDestroy(extend_params); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + */ + + cuvsError_t cuvsHnswExtend(cuvsResources_t res, + cuvsHnswExtendParams_t params, + DLManagedTensor* additional_dataset, + cuvsHnswIndex_t index); + + /** + * @} + */ -typedef struct cuvsHnswSearchParams* cuvsHnswSearchParams_t; + /** + * @defgroup hnsw_c_search_params C API for hnswlib wrapper search params + * @{ + */ -/** - * @brief Allocate HNSW search params, and populate with default values - * - * @param[in] params cuvsHnswSearchParams_t to allocate - * @return cuvsError_t - */ -cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params); + struct cuvsHnswSearchParams { + int32_t ef; + int32_t num_threads; + }; -/** - * @brief De-allocate HNSW search params - * - * @param[in] params cuvsHnswSearchParams_t to de-allocate - * @return cuvsError_t - */ -cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params); + typedef struct cuvsHnswSearchParams* cuvsHnswSearchParams_t; -/** - * @} - */ + /** + * @brief Allocate HNSW search params, and populate with default values + * + * @param[in] params cuvsHnswSearchParams_t to allocate + * @return cuvsError_t + */ + cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params); -/** - * @defgroup hnsw_c_index_search C API for CUDA ANN Graph-based nearest neighbor search - * @{ - */ -/** - * @brief Search a HNSW index with a `DLManagedTensor` which has underlying - * `DLDeviceType` equal to `kDLCPU`, `kDLCUDAHost`, or `kDLCUDAManaged`. - * It is also important to note that the HNSW Index must have been built - * with the same type of `queries`, such that `index.dtype.code == - * queries.dl_tensor.dtype.code` - * Supported types for input are: - * 1. `queries`: - * a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` - * b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` - * c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` - * 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 64` - * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` - * NOTE: When hierarchy is `NONE`, the HNSW index can only be searched by the hnswlib wrapper in - * cuVS, as the format is not compatible with the original hnswlib. - * - * @code {.c} - * #include - * #include - * - * // Create cuvsResources_t - * cuvsResources_t res; - * cuvsError_t res_create_status = cuvsResourcesCreate(&res); - * - * // Assume a populated `DLManagedTensor` type here - * DLManagedTensor dataset; - * DLManagedTensor queries; - * DLManagedTensor neighbors; - * - * // Create default search params - * cuvsHnswSearchParams_t params; - * cuvsError_t params_create_status = cuvsHnswSearchParamsCreate(¶ms); - * - * // Search the `index` built using `cuvsHnswFromCagra` - * cuvsError_t search_status = cuvsHnswSearch(res, params, index, &queries, &neighbors, - * &distances); - * - * // de-allocate `params` and `res` - * cuvsError_t params_destroy_status = cuvsHnswSearchParamsDestroy(params); - * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); - * @endcode - * - * @param[in] res cuvsResources_t opaque C handle - * @param[in] params cuvsHnswSearchParams_t used to search Hnsw index - * @param[in] index cuvsHnswIndex which has been returned by `cuvsHnswFromCagra` - * @param[in] queries DLManagedTensor* queries dataset to search - * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries - * @param[out] distances DLManagedTensor* output `k` distances for queries - */ -cuvsError_t cuvsHnswSearch(cuvsResources_t res, - cuvsHnswSearchParams_t params, - cuvsHnswIndex_t index, - DLManagedTensor* queries, - DLManagedTensor* neighbors, - DLManagedTensor* distances); + /** + * @brief De-allocate HNSW search params + * + * @param[in] params cuvsHnswSearchParams_t to de-allocate + * @return cuvsError_t + */ + cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params); -/** - * @} - */ + /** + * @} + */ -/** - * @defgroup hnsw_c_serialize HNSW C-API serialize functions - * @{ - */ + /** + * @defgroup hnsw_c_index_search C API for CUDA ANN Graph-based nearest neighbor search + * @{ + */ + /** + * @brief Search a HNSW index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCPU`, `kDLCUDAHost`, or `kDLCUDAManaged`. + * It is also important to note that the HNSW Index must have been built + * with the same type of `queries`, such that `index.dtype.code == + * queries.dl_tensor.dtype.code` + * Supported types for input are: + * 1. `queries`: + * a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` + * c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` + * 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 64` + * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * NOTE: When hierarchy is `NONE`, the HNSW index can only be searched by the hnswlib wrapper in + * cuVS, as the format is not compatible with the original hnswlib. + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor dataset; + * DLManagedTensor queries; + * DLManagedTensor neighbors; + * + * // Create default search params + * cuvsHnswSearchParams_t params; + * cuvsError_t params_create_status = cuvsHnswSearchParamsCreate(¶ms); + * + * // Search the `index` built using `cuvsHnswFromCagra` + * cuvsError_t search_status = cuvsHnswSearch(res, params, index, &queries, &neighbors, + * &distances); + * + * // de-allocate `params` and `res` + * cuvsError_t params_destroy_status = cuvsHnswSearchParamsDestroy(params); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cuvsHnswSearchParams_t used to search Hnsw index + * @param[in] index cuvsHnswIndex which has been returned by `cuvsHnswFromCagra` + * @param[in] queries DLManagedTensor* queries dataset to search + * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries + * @param[out] distances DLManagedTensor* output `k` distances for queries + */ + cuvsError_t cuvsHnswSearch(cuvsResources_t res, + cuvsHnswSearchParams_t params, + cuvsHnswIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances); + + /** + * @} + */ -/** - * @brief Serialize a CAGRA index to a file as an hnswlib index - * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the - * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. - * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib - * library. - * - * @param[in] res cuvsResources_t opaque C handle - * @param[in] filename the name of the file to save the index - * @param[in] index cuvsHnswIndex_t to serialize - * @return cuvsError_t - * - * @code{.c} - * #include - * #include - * #include - * - * // Create cuvsResources_t - * cuvsResources_t res; - * cuvsError_t res_create_status = cuvsResourcesCreate(&res); - * - * // create an index with `cuvsCagraBuild` - * - * // Convert the CAGRA index to an HNSW index - * cuvsHnswIndex_t hnsw_index; - * cuvsHnswIndexCreate(&hnsw_index); - * cuvsHnswIndexParams_t hnsw_params; - * cuvsHnswIndexParamsCreate(&hnsw_params); - * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index); - * - * // Serialize the HNSW index - * cuvsHnswSerialize(res, "/path/to/index", hnsw_index); - * - * // de-allocate `hnsw_params`, `hnsw_index` and `res` - * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params); - * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index); - * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); - * @endcode - */ -cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filename, cuvsHnswIndex_t index); + /** + * @defgroup hnsw_c_serialize HNSW C-API serialize functions + * @{ + */ -/** - * Load hnswlib index from file which was serialized from a HNSW index. - * NOTE: When hierarchy is `NONE`, the loaded hnswlib index is immutable, and only be read by the - * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. - * Experimental, both the API and the serialization format are subject to change. - * - * @code{.c} - * #include - * #include - * #include - * - * // Create cuvsResources_t - * cuvsResources_t res; - * cuvsError_t res_create_status = cuvsResourcesCreate(&res); - * - * // create an index with `cuvsCagraBuild` - * cuvsCagraSerializeHnswlib(res, "/path/to/index", index); - * - * // Load the serialized CAGRA index from file as an hnswlib index - * // The index should have the same dtype as the one used to build CAGRA the index - * cuvsHnswIndex_t hnsw_index; - * cuvsHnswIndexCreate(&hnsw_index); - * cuvsHnsWIndexParams_t hnsw_params; - * cuvsHnswIndexParamsCreate(&hnsw_params); - * hnsw_params->hierarchy = NONE; - * hnsw_index->dtype = index->dtype; - * cuvsHnswDeserialize(res, hnsw_params, "/path/to/index", dim, metric hnsw_index); - * @endcode - * - * @param[in] res cuvsResources_t opaque C handle - * @param[in] params cuvsHnswIndexParams_t used to load Hnsw index - * @param[in] filename the name of the file that stores the index - * @param[in] dim the dimension of the vectors in the index - * @param[in] metric the distance metric used to build the index - * @param[out] index HNSW index loaded disk - */ -cuvsError_t cuvsHnswDeserialize(cuvsResources_t res, - cuvsHnswIndexParams_t params, - const char* filename, - int dim, - cuvsDistanceType metric, - cuvsHnswIndex_t index); -/** - * @} - */ + /** + * @brief Serialize a CAGRA index to a file as an hnswlib index + * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by + * the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original + * hnswlib. However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the + * original hnswlib library. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the name of the file to save the index + * @param[in] index cuvsHnswIndex_t to serialize + * @return cuvsError_t + * + * @code{.c} + * #include + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsCagraBuild` + * + * // Convert the CAGRA index to an HNSW index + * cuvsHnswIndex_t hnsw_index; + * cuvsHnswIndexCreate(&hnsw_index); + * cuvsHnswIndexParams_t hnsw_params; + * cuvsHnswIndexParamsCreate(&hnsw_params); + * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index); + * + * // Serialize the HNSW index + * cuvsHnswSerialize(res, "/path/to/index", hnsw_index); + * + * // de-allocate `hnsw_params`, `hnsw_index` and `res` + * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params); + * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + */ + cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filename, cuvsHnswIndex_t index); + + /** + * Load hnswlib index from file which was serialized from a HNSW index. + * NOTE: When hierarchy is `NONE`, the loaded hnswlib index is immutable, and only be read by the + * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original + * hnswlib. Experimental, both the API and the serialization format are subject to change. + * + * @code{.c} + * #include + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsCagraBuild` + * cuvsCagraSerializeHnswlib(res, "/path/to/index", index); + * + * // Load the serialized CAGRA index from file as an hnswlib index + * // The index should have the same dtype as the one used to build CAGRA the index + * cuvsHnswIndex_t hnsw_index; + * cuvsHnswIndexCreate(&hnsw_index); + * cuvsHnsWIndexParams_t hnsw_params; + * cuvsHnswIndexParamsCreate(&hnsw_params); + * hnsw_params->hierarchy = NONE; + * hnsw_index->dtype = index->dtype; + * cuvsHnswDeserialize(res, hnsw_params, "/path/to/index", dim, metric hnsw_index); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cuvsHnswIndexParams_t used to load Hnsw index + * @param[in] filename the name of the file that stores the index + * @param[in] dim the dimension of the vectors in the index + * @param[in] metric the distance metric used to build the index + * @param[out] index HNSW index loaded disk + */ + cuvsError_t cuvsHnswDeserialize(cuvsResources_t res, + cuvsHnswIndexParams_t params, + const char* filename, + int dim, + cuvsDistanceType metric, + cuvsHnswIndex_t index); + /** + * @} + */ #ifdef __cplusplus } diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp index f0b433d8e..db58641c8 100644 --- a/cpp/include/cuvs/neighbors/hnsw.hpp +++ b/cpp/include/cuvs/neighbors/hnsw.hpp @@ -54,10 +54,10 @@ struct index_params : cuvs::neighbors::index_params { /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/ int ef_construction = 200; /** Number of host threads to use to construct hierarchy when hierarchy is `CPU` - NOTE: Constructing the hierarchy when converting from a CAGRA graph is highly sensitive - to parallelism, and increasing the number of threads can reduce the quality of the index. + When the value is 0, the number of threads is automatically determined to the + maximum number of threads available. */ - int num_threads = 2; + int num_threads = 0; }; /**@}*/ diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index e129d23e8..75b1c9ce6 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -21,64 +21,12 @@ #include #include #include +#include #include #include namespace cuvs::neighbors::hnsw::detail { -// Multithreaded executor -// The helper function is copied from the hnswlib repository -// as for some reason, adding vectors to the hnswlib index does not -// work well with omp parallel for -template -inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) -{ - if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); } - - if (numThreads == 1) { - for (size_t id = start; id < end; id++) { - fn(id, 0); - } - } else { - std::vector threads; - std::atomic current(start); - - // keep track of exceptions in threads - // https://stackoverflow.com/a/32428427/1713196 - std::exception_ptr lastException = nullptr; - std::mutex lastExceptMutex; - - for (size_t threadId = 0; threadId < numThreads; ++threadId) { - threads.push_back(std::thread([&, threadId] { - while (true) { - size_t id = current.fetch_add(1); - - if (id >= end) { break; } - - try { - fn(id, threadId); - } catch (...) { - std::unique_lock lastExcepLock(lastExceptMutex); - lastException = std::current_exception(); - /* - * This will work even when current is the largest value that - * size_t can fit, because fetch_add returns the previous value - * before the increment (what will result in overflow - * and produce 0 instead of current + 1). - */ - current = end; - break; - } - } - })); - } - for (auto& thread : threads) { - thread.join(); - } - if (lastException) { std::rethrow_exception(lastException); } - } -} - template struct hnsw_dist_t { using type = void; @@ -217,10 +165,13 @@ std::enable_if_t>> fro cagra_index.graph().extent(1) / 2, params.ef_construction); appr_algo->base_layer_init = false; // tell hnswlib to build upper layers only - ParallelFor(0, host_dataset_view.extent(0), params.num_threads, [&](size_t i, size_t threadId) { + auto num_threads = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads; + std::cout << "num_threads: " << num_threads << std::endl; +#pragma omp parallel for num_threads(num_threads) + for (int64_t i = 0; i < host_dataset_view.extent(0); i++) { appr_algo->addPoint((void*)(host_dataset_view.data_handle() + i * host_dataset_view.extent(1)), i); - }); + } appr_algo->base_layer_init = true; // reset to true to allow addition of new points // move cagra graph to host @@ -236,11 +187,13 @@ std::enable_if_t>> fro // copy cagra graph to hnswlib base layer #pragma omp parallel for for (size_t i = 0; i < static_cast(host_graph.extent(0)); ++i) { - auto ll_i = appr_algo->get_linklist0(i); + auto hnsw_internal_id = appr_algo->label_lookup_.find(i)->second; + auto ll_i = appr_algo->get_linklist0(hnsw_internal_id); appr_algo->setListCount(ll_i, host_graph.extent(1)); auto* data = (uint32_t*)(ll_i + 1); for (size_t j = 0; j < static_cast(host_graph.extent(1)); ++j) { - data[j] = host_graph(i, j); + auto neighbor_internal_id = appr_algo->label_lookup_.find(host_graph(i, j))->second; + data[j] = neighbor_internal_id; } } @@ -275,19 +228,15 @@ void extend(raft::resources const& res, const_cast(idx.get_index())); auto current_element_count = hnswlib_index->getCurrentElementCount(); auto new_element_count = additional_dataset.extent(0); - auto num_threads = params.num_threads == 0 ? std::thread::hardware_concurrency() - : static_cast(params.num_threads); + auto num_threads = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads; hnswlib_index->resizeIndex(current_element_count + new_element_count); - ParallelFor(current_element_count, - current_element_count + new_element_count, - num_threads, - [&](size_t i, size_t threadId) { - hnswlib_index->addPoint( - (void*)(additional_dataset.data_handle() + - (i - current_element_count) * additional_dataset.extent(1)), - i); - }); +#pragma omp parallel for num_threads(num_threads) + for (int64_t i = 0; i < additional_dataset.extent(0); i++) { + hnswlib_index->addPoint( + (void*)(additional_dataset.data_handle() + i * additional_dataset.extent(1)), + current_element_count + i); + } } template diff --git a/cpp/src/neighbors/hnsw_c.cpp b/cpp/src/neighbors/hnsw_c.cpp index 0233a510a..628d87e00 100644 --- a/cpp/src/neighbors/hnsw_c.cpp +++ b/cpp/src/neighbors/hnsw_c.cpp @@ -123,7 +123,7 @@ extern "C" cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params) { return cuvs::core::translate_exceptions([=] { *params = new cuvsHnswIndexParams{ - .hierarchy = cuvsHnswHierarchy::NONE, .ef_construction = 200, .num_threads = 2}; + .hierarchy = cuvsHnswHierarchy::NONE, .ef_construction = 200, .num_threads = 0}; }); } diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx index 4c44350e8..72a3617bd 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx +++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx @@ -52,12 +52,10 @@ cdef class IndexParams: ef_construction : int, default = 200 (optional) Maximum number of candidate list size used during construction when hierarchy is `cpu`. - num_threads : int, default = 2 (optional) + num_threads : int, default = 0 (optional) Number of CPU threads used to increase construction parallelism - when hierarchy is `cpu`. - NOTE: Constructing the hierarchy when converting from a CAGRA graph - is highly sensitive to parallelism, and increasing the number of - threads can reduce the quality of the index. + when hierarchy is `cpu`. When the value is 0, the number of threads is + automatically determined to the maximum number of threads available. """ cdef cuvsHnswIndexParams* params @@ -71,7 +69,7 @@ cdef class IndexParams: def __init__(self, *, hierarchy="none", ef_construction=200, - num_threads=2): + num_threads=0): if hierarchy == "none": self.params.hierarchy = cuvsHnswHierarchy.NONE elif hierarchy == "cpu": diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py index 20f583ae8..b6ddf14df 100644 --- a/python/cuvs/cuvs/test/test_hnsw.py +++ b/python/cuvs/cuvs/test/test_hnsw.py @@ -54,7 +54,7 @@ def run_hnsw_build_search_test( assert index.trained - hnsw_params = hnsw.IndexParams(hierarchy=hierarchy, num_threads=1) + hnsw_params = hnsw.IndexParams(hierarchy=hierarchy) hnsw_index = hnsw.from_cagra(hnsw_params, index) queries = generate_data((n_queries, n_cols), dtype) @@ -135,7 +135,7 @@ def run_hnsw_extend_test( assert index.trained - hnsw_params = hnsw.IndexParams(hierarchy="cpu", num_threads=1) + hnsw_params = hnsw.IndexParams(hierarchy="cpu") hnsw_index = hnsw.from_cagra(hnsw_params, index) hnsw.extend(hnsw.ExtendParams(), hnsw_index, add_dataset) diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml index 90a561bca..b02efaa8f 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml @@ -1,5 +1,6 @@ name: cuvs_cagra_hnswlib constraints: + build: cuvs_bench.config.algos.constraints.cuvs_cagra_build search: cuvs_bench.config.algos.constraints.hnswlib_search groups: base: @@ -7,8 +8,7 @@ groups: graph_degree: [32, 64, 96, 128] intermediate_graph_degree: [32, 64, 96, 128] graph_build_algo: ["NN_DESCENT"] - hierarchy: ["none", "cpu"] + hierarchy: ["cpu"] ef_construction: [64, 128, 256, 512] - num_threads: [2, 5, 10] search: ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800]