From 63f3997b73ba869f167ca274eb048683cac29440 Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Mon, 13 Jun 2022 17:18:06 +0800 Subject: [PATCH] Fix hnsw range search error (#214) Signed-off-by: yudong.cai --- knowhere/index/vector_index/IndexHNSW.cpp | 4 +++ .../benchmark/benchmark_knowhere_binary.cpp | 4 +-- .../benchmark_knowhere_binary_range.cpp | 4 +-- .../benchmark/benchmark_knowhere_float.cpp | 8 ++--- .../benchmark_knowhere_float_range.cpp | 6 ++-- .../benchmark/benchmark_knowhere_perf.cpp | 8 ++--- .../test_knowhere_range_hnsw.log | 36 +++++++++---------- 7 files changed, 37 insertions(+), 33 deletions(-) diff --git a/knowhere/index/vector_index/IndexHNSW.cpp b/knowhere/index/vector_index/IndexHNSW.cpp index 21941c76c..fc9c297c6 100644 --- a/knowhere/index/vector_index/IndexHNSW.cpp +++ b/knowhere/index/vector_index/IndexHNSW.cpp @@ -236,6 +236,10 @@ IndexHNSW::QueryByRange(const DatasetPtr& dataset, index_->setEf(GetIndexParamEf(config)); bool is_IP = (index_->metric_type_ == 1); // InnerProduct: 1 + if (!is_IP) { + radius *= radius; + } + std::vector> result_id_array(rows); std::vector> result_dist_array(rows); std::vector result_lims(rows + 1, 0); diff --git a/unittest/benchmark/benchmark_knowhere_binary.cpp b/unittest/benchmark/benchmark_knowhere_binary.cpp index 4f1f09de2..fbf314b99 100644 --- a/unittest/benchmark/benchmark_knowhere_binary.cpp +++ b/unittest/benchmark/benchmark_knowhere_binary.cpp @@ -29,7 +29,7 @@ class Benchmark_knowhere_binary : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, nq, k); - printf(" nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nq, k, t_diff, recall); + printf(" nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nq, k, t_diff, recall); } } printf("================================================================================\n"); @@ -54,7 +54,7 @@ class Benchmark_knowhere_binary : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, nq, k); - printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nprobe, nq, k, t_diff, + printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nprobe, nq, k, t_diff, recall); } } diff --git a/unittest/benchmark/benchmark_knowhere_binary_range.cpp b/unittest/benchmark/benchmark_knowhere_binary_range.cpp index 81ffa78a8..9634de885 100644 --- a/unittest/benchmark/benchmark_knowhere_binary_range.cpp +++ b/unittest/benchmark/benchmark_knowhere_binary_range.cpp @@ -29,7 +29,7 @@ class Benchmark_knowhere_binary_range : public Benchmark_knowhere { auto lims = knowhere::GetDatasetLims(result); float recall = CalcRecall(ids, lims, nq); float accuracy = CalcAccuracy(ids, lims, nq); - printf(" nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy); + printf(" nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy); } printf("================================================================================\n"); printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), @@ -53,7 +53,7 @@ class Benchmark_knowhere_binary_range : public Benchmark_knowhere { auto lims = knowhere::GetDatasetLims(result); float recall = CalcRecall(ids, lims, nq); float accuracy = CalcAccuracy(ids, lims, nq); - printf(" nprobe = %4d, nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n", + printf(" nprobe = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", nprobe, nq, t_diff, recall, accuracy); } } diff --git a/unittest/benchmark/benchmark_knowhere_float.cpp b/unittest/benchmark/benchmark_knowhere_float.cpp index 3c062a481..a99918ec7 100644 --- a/unittest/benchmark/benchmark_knowhere_float.cpp +++ b/unittest/benchmark/benchmark_knowhere_float.cpp @@ -29,7 +29,7 @@ class Benchmark_knowhere_float : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, nq, k); - printf(" nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nq, k, t_diff, recall); + printf(" nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nq, k, t_diff, recall); } } printf("================================================================================\n"); @@ -54,7 +54,7 @@ class Benchmark_knowhere_float : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, nq, k); - printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nprobe, nq, k, t_diff, + printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nprobe, nq, k, t_diff, recall); } } @@ -82,7 +82,7 @@ class Benchmark_knowhere_float : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, nq, k); - printf(" ef = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", ef, nq, k, t_diff, recall); + printf(" ef = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", ef, nq, k, t_diff, recall); } } } @@ -108,7 +108,7 @@ class Benchmark_knowhere_float : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, nq, k); - printf(" search_k = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", sk, nq, k, t_diff, + printf(" search_k = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", sk, nq, k, t_diff, recall); } } diff --git a/unittest/benchmark/benchmark_knowhere_float_range.cpp b/unittest/benchmark/benchmark_knowhere_float_range.cpp index 99227c97c..410769e75 100644 --- a/unittest/benchmark/benchmark_knowhere_float_range.cpp +++ b/unittest/benchmark/benchmark_knowhere_float_range.cpp @@ -29,7 +29,7 @@ class Benchmark_knowhere_float_range : public Benchmark_knowhere { auto lims = knowhere::GetDatasetLims(result); float recall = CalcRecall(ids, lims, nq); float accuracy = CalcAccuracy(ids, lims, nq); - printf(" nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy); + printf(" nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy); } printf("================================================================================\n"); printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), @@ -53,7 +53,7 @@ class Benchmark_knowhere_float_range : public Benchmark_knowhere { auto lims = knowhere::GetDatasetLims(result); float recall = CalcRecall(ids, lims, nq); float accuracy = CalcAccuracy(ids, lims, nq); - printf(" nprobe = %4d, nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n", + printf(" nprobe = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", nprobe, nq, t_diff, recall, accuracy); } } @@ -80,7 +80,7 @@ class Benchmark_knowhere_float_range : public Benchmark_knowhere { auto lims = knowhere::GetDatasetLims(result); float recall = CalcRecall(ids, lims, nq); float accuracy = CalcAccuracy(ids, lims, nq); - printf(" ef = %4d, nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n", + printf(" ef = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", ef, nq, t_diff, recall, accuracy); } } diff --git a/unittest/benchmark/benchmark_knowhere_perf.cpp b/unittest/benchmark/benchmark_knowhere_perf.cpp index b2d1e8c86..d3e33d991 100644 --- a/unittest/benchmark/benchmark_knowhere_perf.cpp +++ b/unittest/benchmark/benchmark_knowhere_perf.cpp @@ -30,7 +30,7 @@ class Benchmark_knowhere_perf : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, i, NQ_STEP_, k); - printf(" No.%4d: nq = [%4d, %4d), k = %4d, elapse = %.4fs, R@ = %.4f\n", no++, i, i + NQ_STEP_, k, + printf(" No.%4d: nq = [%4d, %4d), k = %4d, elapse = %6.3fs, R@ = %.4f\n", no++, i, i + NQ_STEP_, k, t_diff, recall); } } @@ -57,7 +57,7 @@ class Benchmark_knowhere_perf : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, i, NQ_STEP_, k); - printf(" No.%4d: nprobe = %4d, nq = [%4d, %4d), k = %4d, elapse = %.4fs, R@ = %.4f\n", no++, + printf(" No.%4d: nprobe = %4d, nq = [%4d, %4d), k = %4d, elapse = %6.3fs, R@ = %.4f\n", no++, nprobe, i, i + NQ_STEP_, k, t_diff, recall); } } @@ -86,7 +86,7 @@ class Benchmark_knowhere_perf : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, i, NQ_STEP_, k); - printf(" No.%4d: ef = %4d, nq = [%4d, %4d), k = %4d, elapse = %.4fs, R@ = %.4f\n", no++, ef, i, + printf(" No.%4d: ef = %4d, nq = [%4d, %4d), k = %4d, elapse = %6.3fs, R@ = %.4f\n", no++, ef, i, i + NQ_STEP_, k, t_diff, recall); } } @@ -114,7 +114,7 @@ class Benchmark_knowhere_perf : public Benchmark_knowhere { CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); auto ids = knowhere::GetDatasetIDs(result); float recall = CalcRecall(ids, i, NQ_STEP_, k); - printf(" No.%4d: search_k = %4d, nq = [%4d, %4d), k = %4d, elapse = %.4fs, R@ = %.4f\n", no++, sk, + printf(" No.%4d: search_k = %4d, nq = [%4d, %4d), k = %4d, elapse = %6.3fs, R@ = %.4f\n", no++, sk, i, i + NQ_STEP_, k, t_diff, recall); } } diff --git a/unittest/benchmark/ref_log/benchmark_knowhere_float_range/test_knowhere_range_hnsw.log b/unittest/benchmark/ref_log/benchmark_knowhere_float_range/test_knowhere_range_hnsw.log index c98e65359..f8516701f 100644 --- a/unittest/benchmark/ref_log/benchmark_knowhere_float_range/test_knowhere_range_hnsw.log +++ b/unittest/benchmark/ref_log/benchmark_knowhere_float_range/test_knowhere_range_hnsw.log @@ -6,28 +6,28 @@ Note: Google Test filter = Benchmark_knowhere_float_range.TEST_HNSW [ RUN ] Benchmark_knowhere_float_range.TEST_HNSW [0.000 s] Loading HDF5 file: sift-128-euclidean-range.hdf5 [0.000 s] Loading train data -[0.186 s] Loading test data -[0.188 s] Loading ground truth data -2022-06-08 20:38:53,635 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO -2022-06-08 20:38:53,635 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2 -[0.190 s] Creating CPU index "HNSW" -[0.190 s] Reading index file: sift-128-euclidean-range_HNSW_16_100_20.index -[0.190 s] Building all on 1000000 vectors -[50.130 s] Writing index file: sift-128-euclidean-range_HNSW_16_100_20.index +[0.195 s] Loading test data +[0.197 s] Loading ground truth data +2022-06-13 16:58:24,478 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO +2022-06-13 16:58:24,479 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2 +[0.199 s] Creating CPU index "HNSW" +[0.199 s] Reading index file: sift-128-euclidean-range_HNSW_16_100_20.index +[0.199 s] Building all on 1000000 vectors +[50.409 s] Writing index file: sift-128-euclidean-range_HNSW_16_100_20.index -[50.895 s] sift-128-euclidean-range | HNSW | M=16 | efConstruction=100 +[51.170 s] sift-128-euclidean-range | HNSW | M=16 | efConstruction=100 ================================================================================ - ef = 16, nq = 10000, elapse = 0.7227s, R@ = 0.0000, A@ = -nan - ef = 32, nq = 10000, elapse = 1.0283s, R@ = 0.0000, A@ = -nan - ef = 64, nq = 10000, elapse = 1.7962s, R@ = 0.0000, A@ = -nan - ef = 128, nq = 10000, elapse = 3.2314s, R@ = 0.0000, A@ = -nan - ef = 256, nq = 10000, elapse = 5.7384s, R@ = 0.0000, A@ = -nan + ef = 16, nq = 10000, elapse = 1.462s, R@ = 0.9962, A@ = 1.0000 + ef = 32, nq = 10000, elapse = 1.756s, R@ = 0.9969, A@ = 1.0000 + ef = 64, nq = 10000, elapse = 2.496s, R@ = 0.9975, A@ = 1.0000 + ef = 128, nq = 10000, elapse = 3.871s, R@ = 0.9977, A@ = 1.0000 + ef = 256, nq = 10000, elapse = 6.338s, R@ = 0.9978, A@ = 1.0000 ================================================================================ -[63.768 s] Test 'sift-128-euclidean-range/HNSW' done +[68.047 s] Test 'sift-128-euclidean-range/HNSW' done -[ OK ] Benchmark_knowhere_float_range.TEST_HNSW (63816 ms) -[----------] 1 test from Benchmark_knowhere_float_range (63816 ms total) +[ OK ] Benchmark_knowhere_float_range.TEST_HNSW (68088 ms) +[----------] 1 test from Benchmark_knowhere_float_range (68088 ms total) [----------] Global test environment tear-down -[==========] 1 test from 1 test case ran. (63816 ms total) +[==========] 1 test from 1 test case ran. (68088 ms total) [ PASSED ] 1 test.