diff --git a/knowhere/index/vector_index/IndexDiskANN.cpp b/knowhere/index/vector_index/IndexDiskANN.cpp index 230fef86e..11809b5ec 100644 --- a/knowhere/index/vector_index/IndexDiskANN.cpp +++ b/knowhere/index/vector_index/IndexDiskANN.cpp @@ -51,6 +51,7 @@ IndexDiskANN::IndexDiskANN(std::string index_prefix, MetricType metric_type, } namespace { +static constexpr float kCacheExpansionRate = 1.2; void CheckPreparation(bool is_prepared) { if (!is_prepared) { @@ -234,7 +235,7 @@ IndexDiskANN::Prepare(const Config& config) { // Load file from file manager. for (auto& filename : GetNecessaryFilenames(index_prefix_, metric_ == diskann::INNER_PRODUCT, - prep_conf.num_nodes_to_cache > 0 && !prep_conf.use_bfs_cache, prep_conf.warm_up)) { + prep_conf.search_cache_budget_gb > 0 && !prep_conf.use_bfs_cache, prep_conf.warm_up)) { if (!LoadFile(filename)) { return false; } @@ -271,16 +272,18 @@ IndexDiskANN::Prepare(const Config& config) { std::string warmup_query_file = diskann::get_sample_data_filename(index_prefix_); // load cache - if (prep_conf.num_nodes_to_cache > pq_flash_index_->get_num_points() / 3) { + auto num_nodes_to_cache = GetCachedNodeNum(prep_conf.search_cache_budget_gb, pq_flash_index_->get_data_dim(), + pq_flash_index_->get_max_degree()); + if (num_nodes_to_cache > pq_flash_index_->get_num_points() / 3) { KNOWHERE_THROW_MSG("Failed to generate cache, num_nodes_to_cache is larger than 1/3 of the total data number."); } - if (prep_conf.num_nodes_to_cache > 0) { + if (num_nodes_to_cache > 0) { std::vector node_list; - LOG_KNOWHERE_INFO_ << "Caching " << prep_conf.num_nodes_to_cache << " sample nodes around medoid(s)."; + LOG_KNOWHERE_INFO_ << "Caching " << num_nodes_to_cache << " sample nodes around medoid(s)."; if (prep_conf.use_bfs_cache) { auto gen_cache_successful = TryDiskANNCall([&]() -> bool { - pq_flash_index_->cache_bfs_levels(prep_conf.num_nodes_to_cache, node_list); + pq_flash_index_->cache_bfs_levels(num_nodes_to_cache, node_list); return true; }); @@ -290,8 +293,8 @@ IndexDiskANN::Prepare(const Config& config) { } } else { auto gen_cache_successful = TryDiskANNCall([&]() -> bool { - pq_flash_index_->generate_cache_list_from_sample_queries( - warmup_query_file, 15, 6, prep_conf.num_nodes_to_cache, prep_conf.num_threads, node_list); + pq_flash_index_->generate_cache_list_from_sample_queries(warmup_query_file, 15, 6, num_nodes_to_cache, + prep_conf.num_threads, node_list); return true; }); @@ -511,6 +514,15 @@ IndexDiskANN::AddFile(const std::string& filename) { return true; } +template +uint64_t +IndexDiskANN::GetCachedNodeNum(const float cache_dram_budget, const uint64_t data_dim, const uint64_t max_degree) { + uint32_t one_cached_node_budget = (max_degree + 1) * sizeof(unsigned) + sizeof(T) * data_dim; + auto num_nodes_to_cache = + static_cast(1024 * 1024 * 1024 * cache_dram_budget) / (one_cached_node_budget * kCacheExpansionRate); + return num_nodes_to_cache; +} + // Explicit template instantiation template class IndexDiskANN; template class IndexDiskANN; diff --git a/knowhere/index/vector_index/IndexDiskANN.h b/knowhere/index/vector_index/IndexDiskANN.h index d6ffba2bd..7944e0c1f 100644 --- a/knowhere/index/vector_index/IndexDiskANN.h +++ b/knowhere/index/vector_index/IndexDiskANN.h @@ -98,6 +98,9 @@ class IndexDiskANN : public VecIndex { bool AddFile(const std::string& filename); + uint64_t + GetCachedNodeNum(const float CacheDramBudget, const uint64_t data_dim, const uint64_t max_degree); + std::string index_prefix_; diskann::Metric metric_; diff --git a/knowhere/index/vector_index/IndexDiskANNConfig.cpp b/knowhere/index/vector_index/IndexDiskANNConfig.cpp index 807186231..1d2624355 100644 --- a/knowhere/index/vector_index/IndexDiskANNConfig.cpp +++ b/knowhere/index/vector_index/IndexDiskANNConfig.cpp @@ -29,7 +29,7 @@ static constexpr const char* kNumThreads = "num_threads"; static constexpr const char* kDiskPqBytes = "disk_pq_dims"; static constexpr const char* kAccelerateBuild = "accelerate_build"; -static constexpr const char* kNumNodesToCache = "num_nodes_to_cache"; +static constexpr const char* kCacheDramBudgetGb = "search_cache_budget_gb"; static constexpr const char* kWarmUp = "warm_up"; static constexpr const char* kUseBfsCache = "use_bfs_cache"; static constexpr const char* kAioMaxnr = "aio_maxnr"; @@ -68,8 +68,8 @@ static constexpr uint64_t kAioMaxnrMaxValue = 2 * kBeamwidthMaxValue; static constexpr uint64_t kAioMaxnrDefaultValue = 32; static constexpr uint32_t kLinuxAioMaxnrLimit = 65536; static constexpr uint32_t kSearchNumThreadsMinValue = 1; -static constexpr uint32_t kNumNodesToCacheMinValue = 0; -static constexpr std::optional kNumNodesToCacheMaxValue = std::nullopt; +static constexpr float kCacheDramBudgetGbMinValue = 0; +static constexpr std::optional kCacheDramBudgetGbMaxValue = std::nullopt; static constexpr std::optional kRadiusMinValue = std::nullopt; static constexpr std::optional kRadiusMaxValue = std::nullopt; static constexpr uint64_t kMinKMinValue = 1; @@ -169,8 +169,8 @@ from_json(const Config& config, DiskANNBuildConfig& build_conf) { build_conf.max_degree); CheckNumericParamAndSet(config, kSearchListSize, kBuildSearchListSizeMinValue, kBuildSearchListSizeMaxValue, build_conf.search_list_size); - CheckNumericParamAndSet(config, kPQCodeBudgetGb, kPQCodeBudgetGbMinValue, - kPQCodeBudgetGbMaxValue, build_conf.pq_code_budget_gb); + CheckNumericParamAndSet(config, kPQCodeBudgetGb, kPQCodeBudgetGbMinValue, kPQCodeBudgetGbMaxValue, + build_conf.pq_code_budget_gb); CheckNumericParamAndSet(config, kBuildDramBudgetGb, kBuildDramBudgetGbMinValue, kBuildDramBudgetGbMaxValue, build_conf.build_dram_budget_gb); CheckNumericParamAndSet(config, kNumThreads, kBuildNumThreadsMinValue, kBuildNumThreadsMaxValue, @@ -183,7 +183,7 @@ from_json(const Config& config, DiskANNBuildConfig& build_conf) { void to_json(Config& config, const DiskANNPrepareConfig& prep_conf) { config = Config{{kNumThreads, prep_conf.num_threads}, - {kNumNodesToCache, prep_conf.num_nodes_to_cache}, + {kCacheDramBudgetGb, prep_conf.search_cache_budget_gb}, {kWarmUp, prep_conf.warm_up}, {kUseBfsCache, prep_conf.use_bfs_cache}, {kAioMaxnr, prep_conf.aio_maxnr}}; @@ -200,8 +200,8 @@ from_json(const Config& config, DiskANNPrepareConfig& prep_conf) { auto num_thread_max_value = kLinuxAioMaxnrLimit / prep_conf.aio_maxnr; CheckNumericParamAndSet(config, kNumThreads, kSearchNumThreadsMinValue, num_thread_max_value, prep_conf.num_threads); - CheckNumericParamAndSet(config, kNumNodesToCache, kNumNodesToCacheMinValue, kNumNodesToCacheMaxValue, - prep_conf.num_nodes_to_cache); + CheckNumericParamAndSet(config, kCacheDramBudgetGb, kCacheDramBudgetGbMinValue, + kCacheDramBudgetGbMaxValue, prep_conf.search_cache_budget_gb); CheckNonNumbericParamAndSet(config, kWarmUp, prep_conf.warm_up); CheckNonNumbericParamAndSet(config, kUseBfsCache, prep_conf.use_bfs_cache); } diff --git a/knowhere/index/vector_index/IndexDiskANNConfig.h b/knowhere/index/vector_index/IndexDiskANNConfig.h index 65010b6f9..f519cc4e3 100644 --- a/knowhere/index/vector_index/IndexDiskANNConfig.h +++ b/knowhere/index/vector_index/IndexDiskANNConfig.h @@ -63,7 +63,7 @@ struct DiskANNPrepareConfig { uint32_t num_threads; // While serving the index, the entire graph is stored on SSD. For faster search performance, you can cache a few // frequently accessed nodes in memory. - uint32_t num_nodes_to_cache; + float search_cache_budget_gb; // Should we do warm-up before searching. bool warm_up; // Should we use the bfs strategy to cache. We have two cache strategies: 1. use sample queries to do searches and diff --git a/thirdparty/DiskANN/include/pq_flash_index.h b/thirdparty/DiskANN/include/pq_flash_index.h index 052554700..55ab641c3 100644 --- a/thirdparty/DiskANN/include/pq_flash_index.h +++ b/thirdparty/DiskANN/include/pq_flash_index.h @@ -114,6 +114,8 @@ namespace diskann { DISKANN_DLLEXPORT _u64 get_data_dim() const noexcept; + DISKANN_DLLEXPORT _u64 get_max_degree() const noexcept; + protected: DISKANN_DLLEXPORT void use_medoids_data_as_centroids(); DISKANN_DLLEXPORT void setup_thread_data(_u64 nthreads); diff --git a/thirdparty/DiskANN/src/pq_flash_index.cpp b/thirdparty/DiskANN/src/pq_flash_index.cpp index c00cb810c..97995040f 100644 --- a/thirdparty/DiskANN/src/pq_flash_index.cpp +++ b/thirdparty/DiskANN/src/pq_flash_index.cpp @@ -1275,6 +1275,11 @@ namespace diskann { return data_dim; } + template + _u64 PQFlashIndex::get_max_degree() const noexcept { + return max_degree; + } + #ifdef EXEC_ENV_OLS template char *PQFlashIndex::getHeaderBytes() { diff --git a/unittest/pycase/test_diskann.py b/unittest/pycase/test_diskann.py index 917009192..985b86365 100644 --- a/unittest/pycase/test_diskann.py +++ b/unittest/pycase/test_diskann.py @@ -38,7 +38,7 @@ def test_diskann(): }, "diskANN_prepare_config": { "num_threads": 16, - "num_nodes_to_cache": 100, + "search_cache_budget_gb": 0.00009, "warm_up": True, "use_bfs_cache": False }, diff --git a/unittest/test_diskann.cpp b/unittest/test_diskann.cpp index 7a95ef9b4..c001afd35 100644 --- a/unittest/test_diskann.cpp +++ b/unittest/test_diskann.cpp @@ -552,7 +552,7 @@ TEST_P(DiskANNTest, cached_warmup_test) { // search cache + warmup preparation knowhere::DiskANNPrepareConfig prep_conf_to_test = prep_conf; prep_conf_to_test.warm_up = true; - prep_conf_to_test.num_nodes_to_cache = 1000; + prep_conf_to_test.search_cache_budget_gb = 0.00001; knowhere::DiskANNPrepareConfig::Set(cfg, prep_conf_to_test); EXPECT_TRUE(diskann->Prepare(cfg)); @@ -736,10 +736,9 @@ TEST_P(DiskANNTest, build_config_test) { TEST_P(DiskANNTest, generate_cache_list_test) { knowhere::Config cfg; - uint32_t cached_nodes_num = kNumRows * 10; knowhere::DiskANNPrepareConfig prep_conf_to_test = prep_conf; prep_conf_to_test.use_bfs_cache = false; - prep_conf_to_test.num_nodes_to_cache = cached_nodes_num; + prep_conf_to_test.search_cache_budget_gb = 3.0; knowhere::DiskANNPrepareConfig::Set(cfg, prep_conf_to_test); EXPECT_THROW(diskann->Prepare(cfg), knowhere::KnowhereException);