Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize benchmark for SCANN & HNSW #1010

Merged
merged 1 commit into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions benchmark/hdf5/benchmark_binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ TEST_F(Benchmark_binary, TEST_BINARY_IDMAP) {
index_type_ = knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP;

knowhere::Json conf = cfg_;
std::string index_file_name = get_index_name<knowhere::bin1>({});
std::vector<int32_t> params = {};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_idmap(conf);
}
Expand All @@ -187,7 +188,8 @@ TEST_F(Benchmark_binary, TEST_BINARY_IVF_FLAT) {
knowhere::Json conf = cfg_;
for (auto nlist : NLISTs_) {
conf[knowhere::indexparam::NLIST] = nlist;
std::string index_file_name = get_index_name<knowhere::bin1>({nlist});
std::vector<int32_t> params = {nlist};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_ivf(conf);
}
Expand All @@ -201,7 +203,8 @@ TEST_F(Benchmark_binary, TEST_BINARY_HNSW) {
conf[knowhere::indexparam::HNSW_M] = M;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
std::string index_file_name = get_index_name<knowhere::bin1>({M, efc});
std::vector<int32_t> params = {M, efc};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_hnsw(conf);
}
Expand Down
9 changes: 6 additions & 3 deletions benchmark/hdf5/benchmark_binary_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ TEST_F(Benchmark_binary_range, TEST_BINARY_IDMAP) {
index_type_ = knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP;

knowhere::Json conf = cfg_;
std::string index_file_name = get_index_name<knowhere::bin1>({});
std::vector<int32_t> params = {};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_idmap(conf);
}
Expand All @@ -193,7 +194,8 @@ TEST_F(Benchmark_binary_range, TEST_BINARY_IVF_FLAT) {
knowhere::Json conf = cfg_;
for (auto nlist : NLISTs_) {
conf[knowhere::indexparam::NLIST] = nlist;
std::string index_file_name = get_index_name<knowhere::bin1>({nlist});
std::vector<int32_t> params = {nlist};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_ivf(conf);
}
Expand All @@ -207,7 +209,8 @@ TEST_F(Benchmark_binary_range, TEST_BINARY_HNSW) {
conf[knowhere::indexparam::HNSW_M] = M;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
std::string index_file_name = get_index_name<knowhere::bin1>({M, efc});
std::vector<int32_t> params = {M, efc};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_hnsw(conf);
}
Expand Down
183 changes: 152 additions & 31 deletions benchmark/hdf5/benchmark_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,48 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
test_scann(const knowhere::Json& cfg) {
auto conf = cfg;

const auto reorder_k = conf[knowhere::indexparam::REORDER_K].get<int32_t>();
const auto with_raw_data = conf[knowhere::indexparam::WITH_RAW_DATA].get<bool>();
auto nlist = conf[knowhere::indexparam::NLIST].get<int32_t>();
std::string data_type_str = get_data_type_name<T>();

printf("\n[%0.3f s] %s | %s(%s) | nlist=%d, reorder_k=%d\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), nlist, reorder_k);
printf("\n[%0.3f s] %s | %s(%s) | nlist=%d\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str(), nlist);
printf("================================================================================\n");
for (auto nprobe : NPROBEs_) {
conf[knowhere::indexparam::NPROBE] = nprobe;
for (auto reorder_k : SCANN_REORDER_Ks) {
conf[knowhere::indexparam::REORDER_K] = reorder_k;
for (auto nprobe : NPROBEs_) {
conf[knowhere::indexparam::NPROBE] = nprobe;
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
auto query = knowhere::ConvertToDataTypeIfNeeded<T>(ds_ptr);
for (auto k : TOPKs_) {
conf[knowhere::meta::TOPK] = k;
CALC_TIME_SPAN(auto result = index_.value().Search(query, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" reorder_k = %4d, nprobe = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n",
reorder_k, nprobe, nq, k, TDIFF_, recall);
std::fflush(stdout);
}
}
}
}
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
}

template <typename T>
void
test_hnsw(const knowhere::Json& cfg) {
auto conf = cfg;
auto M = conf[knowhere::indexparam::HNSW_M].get<int64_t>();
auto efConstruction = conf[knowhere::indexparam::EFCONSTRUCTION].get<int64_t>();

std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | M=%ld | efc=%ld\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), M, efConstruction);
printf("================================================================================\n");
for (auto ef : EFs_) {
conf[knowhere::indexparam::EF] = ef;
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
auto query = knowhere::ConvertToDataTypeIfNeeded<T>(ds_ptr);
Expand All @@ -128,8 +160,7 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
CALC_TIME_SPAN(auto result = index_.value().Search(query, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nprobe, nq, k, TDIFF_,
recall);
printf(" ef = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", ef, nq, k, TDIFF_, recall);
std::fflush(stdout);
}
}
Expand All @@ -140,17 +171,20 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {

template <typename T>
void
test_hnsw(const knowhere::Json& cfg) {
test_hnsw_refine(const knowhere::Json& cfg) {
auto conf = cfg;
auto M = conf[knowhere::indexparam::HNSW_M].get<int64_t>();
auto efConstruction = conf[knowhere::indexparam::EFCONSTRUCTION].get<int64_t>();
auto hnsw_M = conf[knowhere::indexparam::HNSW_M].get<int64_t>();
auto efc = conf[knowhere::indexparam::EFCONSTRUCTION].get<int64_t>();

auto ef = EFs_[0];
conf[knowhere::indexparam::EF] = ef;

std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | M=%ld | efConstruction=%ld\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), M, efConstruction);
printf("\n[%0.3f s] %s | %s(%s) | hnsw_M=%ld, efc=%ld, ef=%ld\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), hnsw_M, efc, ef);
printf("================================================================================\n");
for (auto ef : EFs_) {
conf[knowhere::indexparam::EF] = ef;
for (auto refine_k : HNSW_REFINE_Ks_) {
conf[knowhere::indexparam::HNSW_REFINE_K] = refine_k;
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
auto query = knowhere::ConvertToDataTypeIfNeeded<T>(ds_ptr);
Expand All @@ -159,7 +193,8 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
CALC_TIME_SPAN(auto result = index_.value().Search(query, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" ef = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", ef, nq, k, TDIFF_, recall);
printf(" refine_k = %3d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", refine_k, nq, k, TDIFF_,
recall);
std::fflush(stdout);
}
}
Expand Down Expand Up @@ -267,13 +302,14 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
const int32_t NBITS_ = 8;

// SCANN index params
const std::vector<int32_t> SCANN_REORDER_K = {256, 512, 1024};
const std::vector<bool> SCANN_WITH_RAW_DATA = {true};
const std::vector<int32_t> SCANN_REORDER_Ks = {128, 256, 512};

// HNSW index params
const std::vector<int32_t> HNSW_Ms_ = {16};
const std::vector<int32_t> EFCONs_ = {200};
const std::vector<int32_t> EFs_ = {128, 256, 512};
const std::vector<std::string> HNSW_SQ_TYPEs_ = {"SQ8", "FP16"};
const std::vector<int32_t> HNSW_REFINE_Ks_ = {1, 2, 4, 8, 16};

// DISKANN index params
const std::vector<int32_t> SEARCH_LISTs_ = {100, 200, 400};
Expand Down Expand Up @@ -383,23 +419,18 @@ TEST_F(Benchmark_float, TEST_SCANN) {

std::string index_file_name;
knowhere::Json conf = cfg_;
for (auto reorder_k : SCANN_REORDER_K) {
conf[knowhere::indexparam::REORDER_K] = reorder_k;
for (auto nlist : NLISTs_) {
conf[knowhere::indexparam::NLIST] = nlist;
for (const auto with_raw_data : SCANN_WITH_RAW_DATA) {
conf[knowhere::indexparam::WITH_RAW_DATA] = with_raw_data;
std::vector<int32_t> params = {nlist, reorder_k, with_raw_data};
conf[knowhere::indexparam::WITH_RAW_DATA] = true;
for (auto nlist : NLISTs_) {
conf[knowhere::indexparam::NLIST] = nlist;
std::vector<int32_t> params = {nlist};

TEST_SCANN(knowhere::fp32, params);
TEST_SCANN(knowhere::fp16, params);
TEST_SCANN(knowhere::bf16, params);
}
}
TEST_SCANN(knowhere::fp32, params);
TEST_SCANN(knowhere::fp16, params);
TEST_SCANN(knowhere::bf16, params);
}
}

TEST_F(Benchmark_float, TEST_HNSW) {
TEST_F(Benchmark_float, TEST_HNSW_FLAT) {
index_type_ = knowhere::IndexEnum::INDEX_HNSW;

#define TEST_HNSW(T, X) \
Expand All @@ -422,6 +453,96 @@ TEST_F(Benchmark_float, TEST_HNSW) {
}
}

TEST_F(Benchmark_float, TEST_HNSW_SQ) {
index_type_ = knowhere::IndexEnum::INDEX_HNSW_SQ;

#define TEST_HNSW(T, X) \
index_file_name = get_index_name<T>(X); \
create_index<T>(index_file_name, conf); \
test_hnsw_refine<T>(conf);

std::string index_file_name;
knowhere::Json conf = cfg_;

conf[knowhere::indexparam::HNSW_REFINE] = true;
conf[knowhere::indexparam::HNSW_REFINE_TYPE] = "FLAT";

for (auto M : HNSW_Ms_) {
conf[knowhere::indexparam::HNSW_M] = M;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
for (auto sq_type : HNSW_SQ_TYPEs_) {
conf[knowhere::indexparam::SQ_TYPE] = sq_type;
std::vector<std::string> params = {std::to_string(M), std::to_string(efc), sq_type};

TEST_HNSW(knowhere::fp32, params);
TEST_HNSW(knowhere::fp16, params);
TEST_HNSW(knowhere::bf16, params);
}
}
}
}

TEST_F(Benchmark_float, TEST_HNSW_PQ) {
index_type_ = knowhere::IndexEnum::INDEX_HNSW_PQ;

#define TEST_HNSW(T, X) \
index_file_name = get_index_name<T>(X); \
create_index<T>(index_file_name, conf); \
test_hnsw_refine<T>(conf);

std::string index_file_name;
knowhere::Json conf = cfg_;

conf[knowhere::indexparam::HNSW_REFINE] = true;
conf[knowhere::indexparam::HNSW_REFINE_TYPE] = "FLAT";
conf[knowhere::indexparam::NBITS] = NBITS_;
conf[knowhere::indexparam::M] = 8;
for (auto hnsw_m : HNSW_Ms_) {
conf[knowhere::indexparam::HNSW_M] = hnsw_m;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
for (auto pq_m : Ms_) {
conf[knowhere::indexparam::M] = pq_m;
std::vector<int32_t> params = {hnsw_m, efc, pq_m};

TEST_HNSW(knowhere::fp32, params);
TEST_HNSW(knowhere::fp16, params);
TEST_HNSW(knowhere::bf16, params);
}
}
}
}

TEST_F(Benchmark_float, TEST_HNSW_PRQ) {
index_type_ = knowhere::IndexEnum::INDEX_HNSW_PRQ;

#define TEST_HNSW(T, X) \
index_file_name = get_index_name<T>(X); \
create_index<T>(index_file_name, conf); \
test_hnsw_refine<T>(conf);

std::string index_file_name;
knowhere::Json conf = cfg_;

conf[knowhere::indexparam::HNSW_REFINE] = true;
conf[knowhere::indexparam::HNSW_REFINE_TYPE] = "FLAT";
conf[knowhere::indexparam::NBITS] = NBITS_;
conf[knowhere::indexparam::M] = 8;
conf[knowhere::indexparam::PRQ_NUM] = 2;
for (auto M : HNSW_Ms_) {
conf[knowhere::indexparam::HNSW_M] = M;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
std::vector<int32_t> params = {M, efc};

TEST_HNSW(knowhere::fp32, params);
TEST_HNSW(knowhere::fp16, params);
TEST_HNSW(knowhere::bf16, params);
}
}
}

#ifdef KNOWHERE_WITH_DISKANN
TEST_F(Benchmark_float, TEST_DISKANN) {
index_type_ = knowhere::IndexEnum::INDEX_DISKANN;
Expand Down
14 changes: 12 additions & 2 deletions benchmark/hdf5/benchmark_knowhere.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ class Benchmark_knowhere : public Benchmark_hdf5 {
template <typename T>
static std::string
get_index_name(const std::string& ann_test_name, const std::string& index_type,
const std::vector<int32_t>& params) {
const std::vector<std::string>& params) {
std::string params_str = "";
for (size_t i = 0; i < params.size(); i++) {
params_str += "_" + std::to_string(params[i]);
params_str += "_" + params[i];
}
if constexpr (std::is_same_v<T, knowhere::fp32>) {
return ann_test_name + "_" + index_type + params_str + "_fp32" + ".index";
Expand All @@ -131,6 +131,16 @@ class Benchmark_knowhere : public Benchmark_hdf5 {
template <typename T>
std::string
get_index_name(const std::vector<int32_t>& params) {
std::vector<std::string> str_params;
for (auto param : params) {
str_params.push_back(std::to_string(param));
}
return this->get_index_name<T>(ann_test_name_, index_type_, str_params);
}

template <typename T>
std::string
get_index_name(const std::vector<std::string>& params) {
return this->get_index_name<T>(ann_test_name_, index_type_, params);
}

Expand Down
16 changes: 12 additions & 4 deletions benchmark/hdf5/ref_logs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ test_binary_range_hnsw:

###################################################################################################
# Test Knowhere float index
test_float: test_float_brute_force test_float_idmap test_float_ivf_flat test_float_ivf_sq8 test_float_ivf_pq test_float_scann test_float_hnsw test_float_diskann
test_float: test_float_brute_force test_float_idmap test_float_ivf_flat test_float_ivf_sq8 test_float_ivf_pq test_float_scann \
test_float_hnsw_flat test_float_hnsw_sq test_float_hnsw_pq test_float_diskann
test_float_raft: test_float_raft_brute_force test_float_raft_ivf_flat test_float_raft_ivf_pq test_float_raft_cagra
test_float_ivf: test_float_ivf_flat test_float_ivf_pq
test_float_ivf: test_float_ivf_flat test_float_ivf_sq8 test_float_ivf_pq
test_float_hnsw: test_float_hnsw_flat test_float_hnsw_sq test_float_hnsw_pq test_float_hnsw_prq

test_float_brute_force:
./benchmark_float --gtest_filter="Benchmark_float.TEST_BRUTE_FORCE" | tee test_float_brute_force.log
Expand All @@ -38,8 +40,14 @@ test_float_ivf_pq:
./benchmark_float --gtest_filter="Benchmark_float.TEST_IVF_PQ" | tee test_float_ivf_pq.log
test_float_scann:
./benchmark_float --gtest_filter="Benchmark_float.TEST_SCANN" | tee test_float_scann.log
test_float_hnsw:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW" | tee test_float_hnsw.log
test_float_hnsw_flat:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW_FLAT" | tee test_float_hnsw_flat.log
test_float_hnsw_sq:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW_SQ" | tee test_float_hnsw_sq.log
test_float_hnsw_pq:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW_PQ" | tee test_float_hnsw_pq.log
test_float_hnsw_prq:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW_PRQ" | tee test_float_hnsw_prq.log
test_float_diskann:
./benchmark_float --gtest_filter="Benchmark_float.TEST_DISKANN" | tee test_float_diskann.log

Expand Down
3 changes: 3 additions & 0 deletions include/knowhere/comp/index_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ constexpr const char* SEARCH_CACHE_BUDGET_GB = "search_cache_budget_gb";
constexpr const char* SEARCH_LIST_SIZE = "search_list_size";

// FAISS additional Params
constexpr const char* HNSW_REFINE = "refine";
constexpr const char* HNSW_REFINE_K = "refine_k";
constexpr const char* HNSW_REFINE_TYPE = "refine_type";
constexpr const char* SQ_TYPE = "sq_type"; // for IVF_SQ and HNSW_SQ
constexpr const char* PRQ_NUM = "nrq"; // for PRQ, number of redisual quantizers

Expand Down