Skip to content

Commit

Permalink
A candidate that uses Faiss 1.7.4 (early Sep 2023 version)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexanderguzhva committed Sep 20, 2023
1 parent 7b63839 commit 36097a2
Show file tree
Hide file tree
Showing 426 changed files with 46,268 additions and 12,295 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ venv/
**/knowhere/swigknowhere.py
wheelhouse/*

**/thirdparty/cardinal


*.bin

Expand Down
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ knowhere_option(WITH_BENCHMARK "Build with benchmark" OFF)
knowhere_option(WITH_COVERAGE "Build with coverage" OFF)
knowhere_option(WITH_CCACHE "Build with ccache" ON)
knowhere_option(WITH_PROFILER "Build with profiler" OFF)
knowhere_option(WITH_FAISS_TESTS "Build with Faiss unit tests" OFF)

if(KNOWHERE_VERSION)
message(STATUS "Building KNOWHERE version: ${KNOWHERE_VERSION}")
Expand Down Expand Up @@ -147,6 +148,10 @@ if(WITH_BENCHMARK)
add_subdirectory(benchmark)
endif()

if(WITH_FAISS_TESTS)
add_subdirectory(tests/faiss)
endif()

install(TARGETS knowhere
DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
install(DIRECTORY "${PROJECT_SOURCE_DIR}/include/knowhere"
Expand Down
2 changes: 1 addition & 1 deletion benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ endif()
macro(benchmark_test target file)
set(FILE_SRCS ${file})
add_executable(${target} ${FILE_SRCS})
target_link_libraries(${target} ${depend_libs} ${unittest_libs})
target_link_libraries(${target} ${depend_libs} ${unittest_libs} atomic)
install(TARGETS ${target} DESTINATION unittest)
endmacro()

Expand Down
3 changes: 2 additions & 1 deletion benchmark/hdf5/benchmark_float_bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,8 @@ TEST_F(Benchmark_float_bitset, TEST_DISKANN) {
std::shared_ptr<knowhere::FileManager> file_manager = std::make_shared<knowhere::LocalFileManager>();
auto diskann_index_pack = knowhere::Pack(file_manager);

index_ = knowhere::IndexFactory::Instance().Create(index_type_, diskann_index_pack);
auto version = knowhere::Version::GetCurrentVersion().VersionCode();
index_ = knowhere::IndexFactory::Instance().Create(index_type_, version, diskann_index_pack);
printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_);
knowhere::DataSetPtr ds_ptr = nullptr;
index_.Build(*ds_ptr, conf);
Expand Down
3 changes: 2 additions & 1 deletion benchmark/hdf5/benchmark_float_range_bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ TEST_F(Benchmark_float_range_bitset, TEST_DISKANN) {
std::shared_ptr<knowhere::FileManager> file_manager = std::make_shared<knowhere::LocalFileManager>();
auto diskann_index_pack = knowhere::Pack(file_manager);

index_ = knowhere::IndexFactory::Instance().Create(index_type_, diskann_index_pack);
auto version = knowhere::Version::GetCurrentVersion().VersionCode();
index_ = knowhere::IndexFactory::Instance().Create(index_type_, version, diskann_index_pack);
printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_);
knowhere::DataSetPtr ds_ptr = nullptr;
index_.Build(*ds_ptr, conf);
Expand Down
7 changes: 5 additions & 2 deletions benchmark/hdf5/benchmark_knowhere.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "knowhere/config.h"
#include "knowhere/factory.h"
#include "knowhere/index.h"
#include "knowhere/version.h"

class Benchmark_knowhere : public Benchmark_hdf5 {
public:
Expand Down Expand Up @@ -93,8 +94,9 @@ class Benchmark_knowhere : public Benchmark_hdf5 {

knowhere::Index<knowhere::IndexNode>
create_index(const std::string& index_file_name, const knowhere::Json& conf) {
auto version = knowhere::Version::GetCurrentVersion().VersionCode();
printf("[%.3f s] Creating index \"%s\"\n", get_time_diff(), index_type_.c_str());
index_ = knowhere::IndexFactory::Instance().Create(index_type_);
index_ = knowhere::IndexFactory::Instance().Create(index_type_, version);

try {
printf("[%.3f s] Reading index file: %s\n", get_time_diff(), index_file_name.c_str());
Expand All @@ -112,11 +114,12 @@ class Benchmark_knowhere : public Benchmark_hdf5 {

knowhere::Index<knowhere::IndexNode>
create_golden_index(const knowhere::Json& conf) {
auto version = knowhere::Version::GetCurrentVersion().VersionCode();
golden_index_type_ = knowhere::IndexEnum::INDEX_FAISS_IDMAP;

std::string golden_index_file_name = ann_test_name_ + "_" + golden_index_type_ + "_GOLDEN" + ".index";
printf("[%.3f s] Creating golden index \"%s\"\n", get_time_diff(), golden_index_type_.c_str());
golden_index_ = knowhere::IndexFactory::Instance().Create(golden_index_type_);
golden_index_ = knowhere::IndexFactory::Instance().Create(golden_index_type_, version);

try {
printf("[%.3f s] Reading golden index file: %s\n", get_time_diff(), golden_index_file_name.c_str());
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/builder/cpu/ubuntu20.04/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM ubuntu:20.04

ENV CMAKE_VERSION="v3.23"
ENV CMAKE_TAR="cmake-3.23.0-linux-x86_64.tar.gz"
ENV CMAKE_VERSION="v3.27"
ENV CMAKE_TAR="cmake-3.27.5-linux-x86_64.tar.gz"
RUN apt-get update && apt-get install -y --no-install-recommends wget curl g++ gcc ca-certificates\
make ccache python3-dev gfortran python3-setuptools swig libopenblas-dev pip \
&& apt-get remove --purge -y \
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/builder/gpu/ubuntu20.04/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM nvidia/cuda:11.6.0-devel-ubuntu20.04

ENV CMAKE_VERSION="v3.23"
ENV CMAKE_TAR="cmake-3.23.1-linux-x86_64.tar.gz"
ENV CMAKE_VERSION="v3.27"
ENV CMAKE_TAR="cmake-3.27.5-linux-x86_64.tar.gz"
RUN apt-get update && apt-get install -y --no-install-recommends wget curl g++ gcc ca-certificates\
make ccache python3-dev gfortran python3-setuptools swig libopenblas-dev pip \
&& apt-get remove --purge -y \
Expand Down
4 changes: 4 additions & 0 deletions cmake/libs/libfaiss.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ knowhere_file_glob(GLOB FAISS_AVX512_SRCS

list(REMOVE_ITEM FAISS_SRCS ${FAISS_AVX512_SRCS})

# disable RHNSW
knowhere_file_glob(GLOB FAISS_RHNSW_SRCS thirdparty/faiss/faiss/impl/RHNSW.cpp)
list(REMOVE_ITEM FAISS_SRCS ${FAISS_RHNSW_SRCS})

if(__X86_64)
set(UTILS_SRC src/simd/distances_ref.cc src/simd/hook.cc)
set(UTILS_SSE_SRC src/simd/distances_sse.cc)
Expand Down
7 changes: 6 additions & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class KnowhereConan(ConanFile):
"with_ut": [True, False],
"with_benchmark": [True, False],
"with_coverage": [True, False],
"with_faiss_tests": [True, False],
}
default_options = {
"shared": True,
Expand All @@ -47,6 +48,7 @@ class KnowhereConan(ConanFile):
"with_coverage": False,
"boost:without_test": True,
"fmt:header_only": True,
"with_faiss_tests": False,
}

exports_sources = (
Expand Down Expand Up @@ -81,7 +83,7 @@ def configure(self):
self.options.rm_safe("fPIC")

def requirements(self):
self.requires("boost/1.78.0")
self.requires("boost/1.83.0")
self.requires("glog/0.4.0")
self.requires("nlohmann_json/3.11.2")
self.requires("openssl/1.1.1t")
Expand All @@ -96,6 +98,8 @@ def requirements(self):
if self.options.with_benchmark:
self.requires("gtest/1.13.0")
self.requires("hdf5/1.14.0")
if self.options.with_faiss_tests:
self.requires("gtest/1.13.0")

@property
def _required_boost_components(self):
Expand Down Expand Up @@ -156,6 +160,7 @@ def generate(self):
tc.variables["WITH_UT"] = self.options.with_ut
tc.variables["WITH_BENCHMARK"] = self.options.with_benchmark
tc.variables["WITH_COVERAGE"] = self.options.with_coverage
tc.variables["WITH_FAISS_TESTS"] = self.options.with_faiss_tests
tc.generate()
deps = CMakeDeps(self)
deps.generate()
Expand Down
1 change: 1 addition & 0 deletions include/knowhere/bitsetview.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#define BITSET_H

#include <cassert>
#include <cstdint>
#include <sstream>
#include <string>

Expand Down
4 changes: 4 additions & 0 deletions include/knowhere/comp/index_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ constexpr const char* INDEX_DISKANN = "DISKANN";
namespace meta {
constexpr const char* INDEX_TYPE = "index_type";
constexpr const char* METRIC_TYPE = "metric_type";
constexpr const char* DATA_PATH = "data_path";
constexpr const char* INDEX_PREFIX = "index_prefix";
constexpr const char* INDEX_ENGINE_VERSION = "index_engine_version";
constexpr const char* RETRIEVE_FRIENDLY = "retrieve_friendly";
constexpr const char* DIM = "dim";
constexpr const char* TENSOR = "tensor";
constexpr const char* ROWS = "rows";
Expand Down
6 changes: 6 additions & 0 deletions include/knowhere/comp/knowhere_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ class KnowhereConfig {
static bool
SetAioContextPool(size_t num_ctx);

static void
SetBuildThreadPoolSize(size_t num_threads);

static void
SetSearchThreadPoolSize(size_t num_threads);

/**
* init GPU Resource
*/
Expand Down
10 changes: 10 additions & 0 deletions include/knowhere/comp/local_file_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@

#include <unordered_set>

#if __has_include(<filesystem>)
#include <filesystem>
namespace fs = std::filesystem;
#elif __has_include(<experimental/filesystem>)
#include <experimental/filesystem>
namespace fs = std::experimental::filesystem;
#else
error "Missing the <filesystem> header."
#endif

#include "knowhere/file_manager.h"
namespace knowhere {
/**
Expand Down
16 changes: 16 additions & 0 deletions include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,13 +501,29 @@ class BaseConfig : public Config {
CFG_STRING metric_type;
CFG_INT k;
CFG_INT num_build_thread;
CFG_BOOL retrieve_friendly;
CFG_STRING data_path;
CFG_STRING index_prefix;
CFG_FLOAT radius;
CFG_FLOAT range_filter;
CFG_BOOL trace_visit;
CFG_BOOL enable_mmap;
CFG_BOOL for_tuning;
KNOHWERE_DECLARE_CONFIG(BaseConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(metric_type).set_default("L2").description("metric type").for_train_and_search();
KNOWHERE_CONFIG_DECLARE_FIELD(retrieve_friendly)
.description("whether the index holds raw data for fast retrieval")
.set_default(false)
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(data_path)
.description("raw data path.")
.allow_empty_without_default()
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(index_prefix)
.description("path prefix to load or save index.")
.allow_empty_without_default()
.for_train()
.for_deserialize();
KNOWHERE_CONFIG_DECLARE_FIELD(k)
.set_default(10)
.description("search for top k similar vector.")
Expand Down
46 changes: 45 additions & 1 deletion include/knowhere/expected.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,57 @@ enum class Status {
hnsw_inner_error = 12,
malloc_error = 13,
diskann_inner_error = 14,
diskann_file_error = 15,
disk_file_error = 15,
invalid_value_in_json = 16,
arithmetic_overflow = 17,
raft_inner_error = 18,
invalid_binary_set = 19,
};

inline std::string
Status2String(knowhere::Status status) {
switch (status) {
case knowhere::Status::invalid_args:
return "invalid args";
case knowhere::Status::invalid_param_in_json:
return "invalid param in json";
case knowhere::Status::out_of_range_in_json:
return "out of range in json";
case knowhere::Status::type_conflict_in_json:
return "type conflict in json";
case knowhere::Status::invalid_metric_type:
return "invalid metric type";
case knowhere::Status::empty_index:
return "empty index";
case knowhere::Status::not_implemented:
return "not implemented";
case knowhere::Status::index_not_trained:
return "index not trained";
case knowhere::Status::index_already_trained:
return "index already trained";
case knowhere::Status::faiss_inner_error:
return "faiss inner error";
case knowhere::Status::hnsw_inner_error:
return "hnsw inner error";
case knowhere::Status::malloc_error:
return "malloc error";
case knowhere::Status::diskann_inner_error:
return "diskann inner error";
case knowhere::Status::disk_file_error:
return "disk file error";
case knowhere::Status::invalid_value_in_json:
return "invalid value in json";
case knowhere::Status::arithmetic_overflow:
return "arithmetic overflow";
case knowhere::Status::raft_inner_error:
return "raft inner error";
case knowhere::Status::invalid_binary_set:
return "invalid binary set";
default:
return "unexpected status";
}
}

template <typename T>
class expected {
public:
Expand Down
6 changes: 3 additions & 3 deletions include/knowhere/factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ namespace knowhere {
class IndexFactory {
public:
Index<IndexNode>
Create(const std::string& name, const Object& object = nullptr);
Create(const std::string& name, const std::string& version, const Object& object = nullptr);
const IndexFactory&
Register(const std::string& name, std::function<Index<IndexNode>(const Object&)> func);
Register(const std::string& name, std::function<Index<IndexNode>(const std::string& version, const Object&)> func);
static IndexFactory&
Instance();

private:
typedef std::map<std::string, std::function<Index<IndexNode>(const Object&)>> FuncMap;
typedef std::map<std::string, std::function<Index<IndexNode>(const std::string&, const Object&)>> FuncMap;
IndexFactory();
static FuncMap&
MapInstance();
Expand Down
10 changes: 10 additions & 0 deletions include/knowhere/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ class Index {
return *this;
}

T1*
Node() {
return node;
}

const T1*
Node() const {
return node;
}

template <typename T2>
Index<T2>
Cast() {
Expand Down
3 changes: 3 additions & 0 deletions include/knowhere/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,7 @@ round_down(const T value, const T align) {
extern void
ConvertIVFFlatIfNeeded(const BinarySet& binset, const uint8_t* raw_data, const size_t raw_size);

bool
UseDiskLoad(const std::string& index_type, const std::string& /*version*/);

} // namespace knowhere
Loading

0 comments on commit 36097a2

Please sign in to comment.