Skip to content

Commit

Permalink
Optimize benchmark to show data type info (#1000)
Browse files Browse the repository at this point in the history
Signed-off-by: Cai Yudong <[email protected]>
  • Loading branch information
cydrain authored Dec 20, 2024
1 parent 9e514f3 commit 63ee72e
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 38 deletions.
60 changes: 53 additions & 7 deletions benchmark/hdf5/benchmark_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,50 @@
#include <vector>

#include "benchmark_knowhere.h"
#include "knowhere/comp/brute_force.h"
#include "knowhere/comp/index_param.h"
#include "knowhere/comp/knowhere_config.h"
#include "knowhere/comp/local_file_manager.h"
#include "knowhere/dataset.h"

class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
public:
template <typename T>
void
test_brute_force(const knowhere::Json& cfg) {
auto conf = cfg;
std::string data_type_str = get_data_type_name<T>();

auto base_ds_ptr = knowhere::GenDataSet(nb_, dim_, xb_);
auto base = knowhere::ConvertToDataTypeIfNeeded<T>(base_ds_ptr);

printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str());
printf("================================================================================\n");
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
auto query = knowhere::ConvertToDataTypeIfNeeded<T>(ds_ptr);
for (auto k : TOPKs_) {
conf[knowhere::meta::TOPK] = k;
CALC_TIME_SPAN(auto result = knowhere::BruteForce::Search<T>(base, query, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nq, k, TDIFF_, recall);
std::fflush(stdout);
}
}
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
}

template <typename T>
void
test_idmap(const knowhere::Json& cfg) {
auto conf = cfg;

printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str());
printf("================================================================================\n");
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
Expand All @@ -50,8 +81,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
auto conf = cfg;
auto nlist = conf[knowhere::indexparam::NLIST].get<int64_t>();

printf("\n[%0.3f s] %s | %s | nlist=%ld\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
nlist);
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | nlist=%ld\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str(), nlist);
printf("================================================================================\n");
for (auto nprobe : NPROBEs_) {
conf[knowhere::indexparam::NPROBE] = nprobe;
Expand Down Expand Up @@ -80,8 +112,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
auto M = conf[knowhere::indexparam::HNSW_M].get<int64_t>();
auto efConstruction = conf[knowhere::indexparam::EFCONSTRUCTION].get<int64_t>();

printf("\n[%0.3f s] %s | %s | M=%ld | efConstruction=%ld\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), M, efConstruction);
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | M=%ld | efConstruction=%ld\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), M, efConstruction);
printf("================================================================================\n");
for (auto ef : EFs_) {
conf[knowhere::indexparam::EF] = ef;
Expand All @@ -108,7 +141,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
test_diskann(const knowhere::Json& cfg) {
auto conf = cfg;

printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str());
printf("================================================================================\n");
for (auto search_list_size : SEARCH_LISTs_) {
conf["search_list_size"] = search_list_size;
Expand Down Expand Up @@ -137,7 +172,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
test_raft_cagra(const knowhere::Json& cfg) {
auto conf = cfg;

printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str());
printf("================================================================================\n");
for (auto itopk_size : ITOPK_SIZEs_) {
conf[knowhere::indexparam::ITOPK_SIZE] = itopk_size;
Expand Down Expand Up @@ -209,6 +246,15 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
const std::vector<int32_t> ITOPK_SIZEs_ = {128, 192, 256};
};

TEST_F(Benchmark_float, TEST_BRUTE_FORCE) {
index_type_ = "BruteForce";

knowhere::Json conf = cfg_;
test_brute_force<knowhere::fp32>(conf);
test_brute_force<knowhere::fp16>(conf);
test_brute_force<knowhere::bf16>(conf);
}

TEST_F(Benchmark_float, TEST_IDMAP) {
index_type_ = knowhere::IndexEnum::INDEX_FAISS_IDMAP;

Expand Down
12 changes: 9 additions & 3 deletions benchmark/hdf5/benchmark_float_bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test
test_ivf(const knowhere::Json& cfg) {
auto conf = cfg;

printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str());
printf("================================================================================\n");
for (auto per : PERCENTs_) {
auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100);
Expand Down Expand Up @@ -65,7 +67,9 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test
test_hnsw(const knowhere::Json& cfg) {
auto conf = cfg;

printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str());
printf("================================================================================\n");
for (auto per : PERCENTs_) {
auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100);
Expand Down Expand Up @@ -97,7 +101,9 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test
test_diskann(const knowhere::Json& cfg) {
auto conf = cfg;

printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str());
printf("================================================================================\n");
for (auto per : PERCENTs_) {
auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100);
Expand Down
35 changes: 22 additions & 13 deletions benchmark/hdf5/benchmark_float_qps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
float expected_recall = 1.0f;
conf[knowhere::meta::TOPK] = topk_;

printf("\n[%0.3f s] %s | %s | k=%d, R@=%.4f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
topk_, expected_recall);
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | k=%d, R@=%.4f\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), topk_, expected_recall);
printf("================================================================================\n");
for (auto thread_num : THREAD_NUMs_) {
CALC_TIME_SPAN(task<T>(conf, thread_num, nq_));
Expand All @@ -49,6 +50,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
test_ivf(const knowhere::Json& cfg) {
auto conf = cfg;
auto nlist = conf[knowhere::indexparam::NLIST].get<int32_t>();
std::string data_type_str = get_data_type_name<T>();

auto find_smallest_nprobe = [&](float expected_recall) -> int32_t {
conf[knowhere::meta::TOPK] = topk_;
Expand Down Expand Up @@ -83,8 +85,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
conf[knowhere::indexparam::NPROBE] = nprobe;
conf[knowhere::meta::TOPK] = topk_;

printf("\n[%0.3f s] %s | %s | nlist=%d, nprobe=%d, k=%d, R@=%.4f\n", get_time_diff(),
ann_test_name_.c_str(), index_type_.c_str(), nlist, nprobe, topk_, expected_recall);
printf("\n[%0.3f s] %s | %s(%s) | nlist=%d, nprobe=%d, k=%d, R@=%.4f\n", get_time_diff(),
ann_test_name_.c_str(), index_type_.c_str(), data_type_str.c_str(), nlist, nprobe, topk_,
expected_recall);
printf("================================================================================\n");
for (auto thread_num : THREAD_NUMs_) {
CALC_TIME_SPAN(task<T>(conf, thread_num, nq_));
Expand All @@ -100,6 +103,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
void
test_raft_cagra(const knowhere::Json& cfg) {
auto conf = cfg;
std::string data_type_str = get_data_type_name<T>();

auto find_smallest_max_iters = [&](float expected_recall) -> int32_t {
auto ds_ptr = knowhere::GenDataSet(nq_, dim_, xq_);
Expand Down Expand Up @@ -134,8 +138,8 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
conf[knowhere::meta::TOPK] = topk_;
conf[knowhere::indexparam::MAX_ITERATIONS] = find_smallest_max_iters(expected_recall);

printf("\n[%0.3f s] %s | %s | k=%d, R@=%.4f\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), topk_, expected_recall);
printf("\n[%0.3f s] %s | %s(%s) | k=%d, R@=%.4f\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), topk_, expected_recall);
printf("================================================================================\n");
for (auto thread_num : THREAD_NUMs_) {
CALC_TIME_SPAN(task<T>(conf, thread_num, nq_));
Expand All @@ -153,6 +157,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
auto conf = cfg;
auto M = conf[knowhere::indexparam::HNSW_M].get<int32_t>();
auto efConstruction = conf[knowhere::indexparam::EFCONSTRUCTION].get<int32_t>();
std::string data_type_str = get_data_type_name<T>();

auto find_smallest_ef = [&](float expected_recall) -> int32_t {
conf[knowhere::meta::TOPK] = topk_;
Expand Down Expand Up @@ -187,8 +192,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
conf[knowhere::indexparam::EF] = ef;
conf[knowhere::meta::TOPK] = topk_;

printf("\n[%0.3f s] %s | %s | M=%d | efConstruction=%d, ef=%d, k=%d, R@=%.4f\n", get_time_diff(),
ann_test_name_.c_str(), index_type_.c_str(), M, efConstruction, ef, topk_, expected_recall);
printf("\n[%0.3f s] %s | %s(%s) | M=%d | efConstruction=%d, ef=%d, k=%d, R@=%.4f\n", get_time_diff(),
ann_test_name_.c_str(), index_type_.c_str(), data_type_str.c_str(), M, efConstruction, ef, topk_,
expected_recall);
printf("================================================================================\n");
for (auto thread_num : THREAD_NUMs_) {
CALC_TIME_SPAN(task<T>(conf, thread_num, nq_));
Expand All @@ -208,6 +214,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
const auto reorder_k = conf[knowhere::indexparam::REORDER_K].get<int32_t>();
const auto with_raw_data = conf[knowhere::indexparam::WITH_RAW_DATA].get<bool>();
auto nlist = conf[knowhere::indexparam::NLIST].get<int32_t>();
std::string data_type_str = get_data_type_name<T>();

auto find_smallest_nprobe = [&](float expected_recall) -> int32_t {
conf[knowhere::meta::TOPK] = topk_;
Expand Down Expand Up @@ -244,9 +251,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
conf[knowhere::indexparam::NPROBE] = nprobe;
conf[knowhere::meta::TOPK] = topk_;

printf("\n[%0.3f s] %s | %s | nlist=%d, nprobe=%d, reorder_k=%d, with_raw_data=%d, k=%d, R@=%.4f\n",
get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), nlist, nprobe, reorder_k,
with_raw_data ? 1 : 0, topk_, expected_recall);
printf("\n[%0.3f s] %s | %s(%s) | nlist=%d, nprobe=%d, reorder_k=%d, with_raw_data=%d, k=%d, R@=%.4f\n",
get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), data_type_str.c_str(), nlist, nprobe,
reorder_k, with_raw_data ? 1 : 0, topk_, expected_recall);
printf("================================================================================\n");
for (auto thread_num : THREAD_NUMs_) {
CALC_TIME_SPAN(task<T>(conf, thread_num, nq_));
Expand All @@ -263,6 +270,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
void
test_diskann(const knowhere::Json& cfg) {
auto conf = cfg;
std::string data_type_str = get_data_type_name<T>();

auto find_smallest_search_list_size = [&](float expected_recall) -> int32_t {
conf[knowhere::meta::TOPK] = topk_;
Expand Down Expand Up @@ -298,8 +306,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
conf[knowhere::indexparam::SEARCH_LIST_SIZE] = search_list_size;
conf[knowhere::meta::TOPK] = topk_;

printf("\n[%0.3f s] %s | %s | search_list_size=%d, k=%d, R@=%.4f\n", get_time_diff(),
ann_test_name_.c_str(), index_type_.c_str(), search_list_size, topk_, expected_recall);
printf("\n[%0.3f s] %s | %s(%s) | search_list_size=%d, k=%d, R@=%.4f\n", get_time_diff(),
ann_test_name_.c_str(), index_type_.c_str(), data_type_str.c_str(), search_list_size, topk_,
expected_recall);
printf("================================================================================\n");
for (auto thread_num : THREAD_NUMs_) {
CALC_TIME_SPAN(task<T>(conf, thread_num, nq_));
Expand Down
20 changes: 12 additions & 8 deletions benchmark/hdf5/benchmark_float_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
auto conf = cfg;
auto radius = conf.at(knowhere::meta::RADIUS).get<float>();

printf("\n[%0.3f s] %s | %s, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
radius);
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str(), radius);
printf("================================================================================\n");
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
Expand All @@ -54,8 +55,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
auto nlist = conf[knowhere::indexparam::NLIST].get<int64_t>();
auto radius = conf.at(knowhere::meta::RADIUS).get<float>();

printf("\n[%0.3f s] %s | %s | nlist=%ld, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), nlist, radius);
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | nlist=%ld, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), nlist, radius);
printf("================================================================================\n");
for (auto nprobe : NPROBEs_) {
conf[knowhere::indexparam::NPROBE] = nprobe;
Expand Down Expand Up @@ -84,8 +86,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
auto efc = conf[knowhere::indexparam::EFCONSTRUCTION].get<int64_t>();
auto radius = conf.at(knowhere::meta::RADIUS).get<float>();

printf("\n[%0.3f s] %s | %s | M=%ld | efc=%ld, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), M, efc, radius);
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | M=%ld | efc=%ld, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), M, efc, radius);
printf("================================================================================\n");
for (auto ef : EFs_) {
conf[knowhere::indexparam::EF] = ef;
Expand Down Expand Up @@ -113,8 +116,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
auto conf = cfg;
auto radius = conf.at(knowhere::meta::RADIUS).get<float>();

printf("\n[%0.3f s] %s | %s, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
radius);
std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str(), radius);
printf("================================================================================\n");
for (auto search_list_size : SEARCH_LISTs_) {
conf["search_list_size"] = search_list_size;
Expand Down
Loading

0 comments on commit 63ee72e

Please sign in to comment.