From b3bd8b5177ceb78aa0652e3b020108adb321648d Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Tue, 5 Nov 2024 20:02:49 +0800 Subject: [PATCH] improve sparse vector index mmap: to mmap almost everything Signed-off-by: Buqian Zheng --- include/knowhere/sparse_utils.h | 80 ++++++ src/index/sparse/sparse_index_node.cc | 15 +- src/index/sparse/sparse_inverted_index.h | 324 ++++++++++++++++------- src/io/memory_io.h | 10 + tests/ut/test_sparse.cc | 115 +++++--- 5 files changed, 400 insertions(+), 144 deletions(-) diff --git a/include/knowhere/sparse_utils.h b/include/knowhere/sparse_utils.h index aca4b3238..9b13bb9cc 100644 --- a/include/knowhere/sparse_utils.h +++ b/include/knowhere/sparse_utils.h @@ -274,4 +274,84 @@ class MaxMinHeap { std::vector> pool_; }; // class MaxMinHeap +// A std::vector like container but uses fixed size free memory(typically from +// mmap) as backing store. +// +// It supports limited operations of std::vector. +// +// Must be initialized with a valid pointer to memory when used. The memory must be +// valid during the lifetime of this object. After initialization, MmapVector will +// have space for mmap_element_count_ elements, none of which are initialized. +// +// Currently only used in sparse InvertedIndex. Move to other places if needed. +template +class MmapVector { + public: + using value_type = T; + using size_type = size_t; + + MmapVector() = default; + + void + initialize(void* data, size_type byte_size) { + if (byte_size % sizeof(T) != 0) { + throw std::invalid_argument("MmapVector byte_size must be a multiple of element size"); + } + mmap_data_ = data; + mmap_byte_size_ = byte_size; + mmap_element_count_ = 0; + } + + size_type + capacity() const { + return mmap_byte_size_ / sizeof(T); + } + + size_type + size() const { + return mmap_element_count_; + } + + template + T& + emplace_back(Args&&... args) { + if (size() == capacity()) { + throw std::out_of_range("emplace_back on a full MmapVector"); + } + auto* elem = reinterpret_cast(mmap_data_) + mmap_element_count_++; + return *new (elem) T(std::forward(args)...); + } + + T& + operator[](size_type i) { + return reinterpret_cast(mmap_data_)[i]; + } + + const T& + operator[](size_type i) const { + return reinterpret_cast(mmap_data_)[i]; + } + + T& + at(size_type i) { + if (i >= mmap_element_count_) { + throw std::out_of_range("MmapVector index out of range"); + } + return reinterpret_cast(mmap_data_)[i]; + } + + const T& + at(size_type i) const { + if (i >= mmap_element_count_) { + throw std::out_of_range("MmapVector index out of range"); + } + return reinterpret_cast(mmap_data_)[i]; + } + + private: + void* mmap_data_ = nullptr; + size_type mmap_byte_size_ = 0; + size_type mmap_element_count_ = 0; +}; + } // namespace knowhere::sparse diff --git a/src/index/sparse/sparse_index_node.cc b/src/index/sparse/sparse_index_node.cc index 28324c504..6e47b6c59 100644 --- a/src/index/sparse/sparse_index_node.cc +++ b/src/index/sparse/sparse_index_node.cc @@ -53,7 +53,7 @@ class SparseInvertedIndexNode : public IndexNode { return Status::invalid_metric_type; } auto drop_ratio_build = cfg.drop_ratio_build.value_or(0.0f); - auto index_or = CreateIndex(cfg); + auto index_or = CreateIndex(cfg); if (!index_or.has_value()) { return index_or.error(); } @@ -206,12 +206,12 @@ class SparseInvertedIndexNode : public IndexNode { return Status::invalid_binary_set; } MemoryIOReader reader(binary->data.get(), binary->size); - auto index_or = CreateIndex(cfg); + auto index_or = CreateIndex(cfg); if (!index_or.has_value()) { return index_or.error(); } index_ = index_or.value(); - return index_->Load(reader, false); + return index_->Load(reader); } Status @@ -237,13 +237,13 @@ class SparseInvertedIndexNode : public IndexNode { if (madvise(map_, map_size_, MADV_RANDOM) != 0) { LOG_KNOWHERE_WARNING_ << "Failed to madvise file: " << strerror(errno); } - auto index_or = CreateIndex(cfg); + auto index_or = CreateIndex(cfg); if (!index_or.has_value()) { return index_or.error(); } index_ = index_or.value(); MemoryIOReader map_reader((uint8_t*)map_, map_size_); - return index_->Load(map_reader, true); + return index_->Load(map_reader, map_flags); } static std::unique_ptr @@ -278,10 +278,11 @@ class SparseInvertedIndexNode : public IndexNode { } private: + template expected*> CreateIndex(const SparseInvertedIndexConfig& cfg) const { if (IsMetricType(cfg.metric_type.value(), metric::BM25)) { - auto idx = new sparse::InvertedIndex(); + auto idx = new sparse::InvertedIndex(); if (!cfg.bm25_k1.has_value() || !cfg.bm25_b.has_value() || !cfg.bm25_avgdl.has_value()) { return expected*>::Err( Status::invalid_args, "BM25 parameters k1, b, and avgdl must be set when building/loading"); @@ -293,7 +294,7 @@ class SparseInvertedIndexNode : public IndexNode { idx->SetBM25Params(k1, b, avgdl, max_score_ratio); return idx; } else { - return new sparse::InvertedIndex(); + return new sparse::InvertedIndex(); } } diff --git a/src/index/sparse/sparse_inverted_index.h b/src/index/sparse/sparse_inverted_index.h index 18264c2ec..57b04044b 100644 --- a/src/index/sparse/sparse_inverted_index.h +++ b/src/index/sparse/sparse_inverted_index.h @@ -12,6 +12,8 @@ #ifndef SPARSE_INVERTED_INDEX_H #define SPARSE_INVERTED_INDEX_H +#include + #include #include #include @@ -36,7 +38,7 @@ class BaseInvertedIndex { Save(MemoryIOWriter& writer) = 0; virtual Status - Load(MemoryIOReader& reader, bool is_mmap) = 0; + Load(MemoryIOReader& reader, int map_flags = MAP_PRIVATE) = 0; virtual Status Train(const SparseRow* data, size_t rows, float drop_ratio_build) = 0; @@ -68,12 +70,25 @@ class BaseInvertedIndex { n_cols() const = 0; }; -template +template class InvertedIndex : public BaseInvertedIndex { public: explicit InvertedIndex() { } + ~InvertedIndex() override { + if constexpr (mmapped) { + if (map_ != nullptr) { + auto res = munmap(map_, map_size_); + if (res != 0) { + LOG_KNOWHERE_ERROR_ << "Failed to munmap when deleting sparse InvertedIndex: " << strerror(errno); + } + map_ = nullptr; + map_size_ = 0; + } + } + } + void SetBM25Params(float k1, float b, float avgdl, float max_score_ratio) { bm25_params_ = std::make_unique(k1, b, avgdl, max_score_ratio); @@ -120,25 +135,21 @@ class InvertedIndex : public BaseInvertedIndex { Status Save(MemoryIOWriter& writer) override { /** - * zero copy is not yet implemented, now serializing in a zero copy - * compatible way while still copying during deserialization. - * * Layout: * - * 1. int32_t rows, sign indicates whether to use wand - * 2. int32_t cols - * 3. for each row: - * 1. int32_t len + * 1. size_t rows + * 2. size_t cols + * 3. T value_threshold_ + * 4. for each row: + * 1. size_t len * 2. for each non-zero value: * 1. table_t idx * 2. T val - * With zero copy deserization, each SparseRow object should - * reference(not owning) the memory address of the first element. * * inverted_lut_ and max_score_in_dim_ not serialized, they will be * constructed dynamically during deserialization. * - * Data are densly packed in serialized bytes and no padding is added. + * Data are densely packed in serialized bytes and no padding is added. */ std::shared_lock lock(mu_); writeBinaryPOD(writer, n_rows_internal()); @@ -156,7 +167,7 @@ class InvertedIndex : public BaseInvertedIndex { } Status - Load(MemoryIOReader& reader, bool is_mmap) override { + Load(MemoryIOReader& reader, int map_flags = MAP_PRIVATE) override { std::unique_lock lock(mu_); int64_t rows; readBinaryPOD(reader, rows); @@ -166,16 +177,23 @@ class InvertedIndex : public BaseInvertedIndex { rows = std::abs(rows); readBinaryPOD(reader, max_dim_); readBinaryPOD(reader, value_threshold_); + if (value_threshold_ > 0) { + drop_during_build_ = true; + } - raw_data_.reserve(rows); - if constexpr (bm25) { - bm25_params_->row_sums.resize(rows); + if constexpr (mmapped) { + RETURN_IF_ERROR(PrepareMmap(reader, rows, map_flags)); + } else { + raw_data_.reserve(rows); + if constexpr (bm25) { + bm25_params_->row_sums.reserve(rows); + } } for (int64_t i = 0; i < rows; ++i) { size_t count; readBinaryPOD(reader, count); - if (is_mmap) { + if constexpr (mmapped) { raw_data_.emplace_back(count, reader.data() + reader.tellg(), false); reader.advance(count * SparseRow::element_size()); } else { @@ -187,68 +205,169 @@ class InvertedIndex : public BaseInvertedIndex { } add_row_to_index(raw_data_[i], i); } - return Status::success; } - // Non zero drop ratio is only supported for static index, i.e. data should - // include all rows that'll be added to the index. + // memory in reader must be guaranteed to be valid during the lifetime of this object. Status - Train(const SparseRow* data, size_t rows, float drop_ratio_build) override { - if (drop_ratio_build == 0.0f) { - return Status::success; - } - // TODO: maybe i += 10 to down sample to speed up. - size_t amount = 0; - for (size_t i = 0; i < rows; ++i) { - amount += data[i].size(); - } - if (amount == 0) { - return Status::success; - } - std::vector vals; - vals.reserve(amount); + PrepareMmap(MemoryIOReader& reader, size_t rows, int map_flags) { + const auto initial_reader_location = reader.tellg(); + const auto nnz = (reader.remaining() - (rows * sizeof(size_t))) / SparseRow::element_size(); + + // count raw vector idx occurrences + std::unordered_map idx_counts; for (size_t i = 0; i < rows; ++i) { - for (size_t j = 0; j < data[i].size(); ++j) { - vals.push_back(fabs(data[i][j].val)); + size_t row_nnz; + readBinaryPOD(reader, row_nnz); + if (row_nnz == 0) { + continue; + } + for (size_t j = 0; j < row_nnz; ++j) { + table_t idx; + readBinaryPOD(reader, idx); + idx_counts[idx]++; + // skip value + reader.advance(sizeof(T)); } } - auto pos = vals.begin() + static_cast(drop_ratio_build * vals.size()); - // pos may be vals.end() if drop_ratio_build is 1.0, in that case we use - // the largest value as the threshold. - if (pos == vals.end()) { - pos--; + // reset reader to the beginning + reader.seekg(initial_reader_location); + + auto raw_data_size = rows * sizeof(typename decltype(raw_data_)::value_type); + auto inverted_lut_size = idx_counts.size() * sizeof(typename decltype(inverted_lut_)::value_type); + // actually due to drop_ratio_build, the number of non-zero values that will be added to the luts is + // less than nnz. but since the memory is mmapped, it is ok to still allocate some extra space for those + // dropped values. + auto luts_size = nnz * sizeof(typename decltype(inverted_lut_)::value_type::value_type); + auto max_score_in_dim_size = idx_counts.size() * sizeof(typename decltype(max_score_in_dim_)::value_type); + size_t row_sums_size = 0; + + map_size_ = raw_data_size + inverted_lut_size + luts_size; + if constexpr (use_wand) { + map_size_ += max_score_in_dim_size; + } + if constexpr (bm25) { + row_sums_size = rows * sizeof(typename decltype(bm25_params_->row_sums)::value_type); + map_size_ += row_sums_size; } - std::nth_element(vals.begin(), pos, vals.end()); - std::unique_lock lock(mu_); - value_threshold_ = *pos; - drop_during_build_ = true; - return Status::success; - } + // clear MAP_SHARED flag as we want to create an anonymous mmap and will not share it with other processes. + map_flags &= ~MAP_SHARED; - Status - Add(const SparseRow* data, size_t rows, int64_t dim) override { - std::unique_lock lock(mu_); - auto current_rows = n_rows_internal(); - if (current_rows > 0 && drop_during_build_) { - LOG_KNOWHERE_ERROR_ << "Not allowed to add data to a built index with drop_ratio_build > 0."; - return Status::invalid_args; + // anonymous mmap guarantees that the memory to be zero-initialized. + map_ = static_cast( + mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, map_flags | MAP_ANON | MAP_PRIVATE, -1, 0)); + if (map_ == MAP_FAILED) { + LOG_KNOWHERE_ERROR_ << "Failed to create anonymous mmap when loading sparse InvertedIndex: " + << strerror(errno) << ", size: " << map_size_; + return Status::disk_file_error; } - if ((size_t)dim > max_dim_) { - max_dim_ = dim; + if (madvise(map_, map_size_, MADV_RANDOM) != 0) { + LOG_KNOWHERE_WARNING_ << "Failed to madvise mmap when loading sparse InvertedIndex: " << strerror(errno); + } + + char* ptr = map_; + + // initialize containers memory. + raw_data_.initialize(ptr, raw_data_size); + ptr += raw_data_size; + inverted_lut_.initialize(ptr, inverted_lut_size); + ptr += inverted_lut_size; + + if constexpr (use_wand) { + max_score_in_dim_.initialize(ptr, max_score_in_dim_size); + ptr += max_score_in_dim_size; } - raw_data_.insert(raw_data_.end(), data, data + rows); if constexpr (bm25) { - bm25_params_->row_sums.resize(current_rows + rows); + bm25_params_->row_sums.initialize(ptr, row_sums_size); + ptr += row_sums_size; } - for (size_t i = 0; i < rows; ++i) { - add_row_to_index(data[i], current_rows + i); + + size_t dim_id = 0; + for (const auto& [idx, count] : idx_counts) { + dim_map_[idx] = dim_id; + auto& lut = inverted_lut_.emplace_back(); + auto lut_size = count * sizeof(SparseIdVal); + lut.initialize(ptr, lut_size); + ptr += lut_size; + if constexpr (use_wand) { + max_score_in_dim_.emplace_back(0); + } + ++dim_id; } + // in mmap mode, next_dim_id_ should never be used, but still assigning for consistency. + next_dim_id_ = dim_id; + return Status::success; } + // Non zero drop ratio is only supported for static index, i.e. data should + // include all rows that'll be added to the index. + Status + Train(const SparseRow* data, size_t rows, float drop_ratio_build) override { + if constexpr (mmapped) { + throw std::invalid_argument("mmapped InvertedIndex does not support Train"); + } else { + if (drop_ratio_build == 0.0f) { + return Status::success; + } + // TODO: maybe i += 10 to down sample to speed up. + size_t amount = 0; + for (size_t i = 0; i < rows; ++i) { + amount += data[i].size(); + } + if (amount == 0) { + return Status::success; + } + std::vector vals; + vals.reserve(amount); + for (size_t i = 0; i < rows; ++i) { + for (size_t j = 0; j < data[i].size(); ++j) { + vals.push_back(fabs(data[i][j].val)); + } + } + auto pos = vals.begin() + static_cast(drop_ratio_build * vals.size()); + // pos may be vals.end() if drop_ratio_build is 1.0, in that case we use + // the largest value as the threshold. + if (pos == vals.end()) { + pos--; + } + std::nth_element(vals.begin(), pos, vals.end()); + + std::unique_lock lock(mu_); + value_threshold_ = *pos; + drop_during_build_ = true; + return Status::success; + } + } + + Status + Add(const SparseRow* data, size_t rows, int64_t dim) override { + if constexpr (mmapped) { + throw std::invalid_argument("mmapped InvertedIndex does not support Add"); + } else { + std::unique_lock lock(mu_); + auto current_rows = n_rows_internal(); + if (current_rows > 0 && drop_during_build_) { + LOG_KNOWHERE_ERROR_ << "Not allowed to add data to a built index with drop_ratio_build > 0."; + return Status::invalid_args; + } + if ((size_t)dim > max_dim_) { + max_dim_ = dim; + } + + raw_data_.insert(raw_data_.end(), data, data + rows); + if constexpr (bm25) { + bm25_params_->row_sums.reserve(current_rows + rows); + } + for (size_t i = 0; i < rows; ++i) { + add_row_to_index(data[i], current_rows + i); + } + return Status::success; + } + } + void Search(const SparseRow& query, size_t k, float drop_ratio_search, float* distances, label_t* labels, size_t refine_factor, const BitsetView& bitset, const DocValueComputer& computer) const override { @@ -318,22 +437,28 @@ class InvertedIndex : public BaseInvertedIndex { [[nodiscard]] size_t size() const override { - // TODO: std::shared_lock lock(mu_); size_t res = sizeof(*this); - res += sizeof(SparseRow) * n_rows_internal(); - for (auto& row : raw_data_) { - res += row.memory_usage(); + for (size_t i = 0; i < raw_data_.size(); ++i) { + res += raw_data_[i].memory_usage(); } + res += dim_map_.size() * + (sizeof(typename decltype(dim_map_)::key_type) + sizeof(typename decltype(dim_map_)::mapped_type)); - res += (sizeof(table_t) + sizeof(std::vector>)) * inverted_lut_.size(); - for (const auto& [idx, lut] : inverted_lut_) { - res += sizeof(SparseIdVal) * lut.capacity(); - } - if constexpr (use_wand) { - res += (sizeof(table_t) + sizeof(T)) * max_score_in_dim_.size(); + if constexpr (mmapped) { + return res + map_size_; + } else { + res += sizeof(typename decltype(raw_data_)::value_type) * raw_data_.capacity(); + + res += sizeof(typename decltype(inverted_lut_)::value_type) * inverted_lut_.capacity(); + for (size_t i = 0; i < inverted_lut_.size(); ++i) { + res += sizeof(typename decltype(inverted_lut_)::value_type::value_type) * inverted_lut_[i].capacity(); + } + if constexpr (use_wand) { + res += sizeof(typename decltype(max_score_in_dim_)::value_type) * max_score_in_dim_.capacity(); + } + return res; } - return res; } [[nodiscard]] size_t @@ -367,12 +492,12 @@ class InvertedIndex : public BaseInvertedIndex { if (v < q_threshold || i >= n_cols_internal()) { continue; } - auto lut_it = inverted_lut_.find(i); - if (lut_it == inverted_lut_.end()) { + auto dim_id = dim_map_.find(i); + if (dim_id == dim_map_.end()) { continue; } + auto& lut = inverted_lut_[dim_id->second]; // TODO: improve with SIMD - auto& lut = lut_it->second; for (size_t j = 0; j < lut.size(); j++) { auto [doc_id, val] = lut[j]; T val_sum = bm25 ? bm25_params_->row_sums.at(doc_id) : 0; @@ -464,20 +589,17 @@ class InvertedIndex : public BaseInvertedIndex { search_wand(const SparseRow& q_vec, T q_threshold, MaxMinHeap& heap, const BitsetView& bitset, const DocValueComputer& computer) const { auto q_dim = q_vec.size(); - std::vector>>>> cursors(q_dim); + std::vector>> cursors(q_dim); auto valid_q_dim = 0; for (size_t i = 0; i < q_dim; ++i) { auto [idx, val] = q_vec[i]; - if (std::abs(val) < q_threshold || idx >= n_cols_internal()) { + auto dim_id = dim_map_.find(idx); + if (dim_id == dim_map_.end() || std::abs(val) < q_threshold) { continue; } - auto lut_it = inverted_lut_.find(idx); - if (lut_it == inverted_lut_.end()) { - continue; - } - auto& lut = lut_it->second; - cursors[valid_q_dim++] = std::make_shared>>>( - lut, n_rows_internal(), max_score_in_dim_.find(idx)->second * val, val, bitset); + auto& lut = inverted_lut_[dim_id->second]; + cursors[valid_q_dim++] = std::make_shared>( + lut, n_rows_internal(), max_score_in_dim_[dim_id->second] * val, val, bitset); } if (valid_q_dim == 0) { return; @@ -580,30 +702,42 @@ class InvertedIndex : public BaseInvertedIndex { if (val == 0 || (drop_during_build_ && fabs(val) < value_threshold_)) { continue; } - if (inverted_lut_.find(idx) == inverted_lut_.end()) { - inverted_lut_[idx]; + auto dim_it = dim_map_.find(idx); + if (dim_it == dim_map_.end()) { + if constexpr (mmapped) { + throw std::runtime_error("unexpected vector dimension in mmaped InvertedIndex"); + } + dim_it = dim_map_.insert({idx, next_dim_id_++}).first; + inverted_lut_.emplace_back(); if constexpr (use_wand) { - max_score_in_dim_[idx] = 0; + max_score_in_dim_.emplace_back(0); } } - inverted_lut_[idx].emplace_back(id, val); + inverted_lut_[dim_it->second].emplace_back(id, val); if constexpr (use_wand) { auto score = val; if constexpr (bm25) { score = bm25_params_->max_score_ratio * bm25_params_->wand_max_score_computer(val, row_sum); } - max_score_in_dim_[idx] = std::max(max_score_in_dim_[idx], score); + max_score_in_dim_[dim_it->second] = std::max(max_score_in_dim_[dim_it->second], score); } } if constexpr (bm25) { - bm25_params_->row_sums.at(id) = row_sum; + bm25_params_->row_sums.emplace_back(row_sum); } } - std::vector> raw_data_; + // key is raw sparse vector dim/idx, value is the mapped dim/idx id in the index. + std::unordered_map dim_map_; + + template + using Vector = std::conditional_t, std::vector>; + + // reserve, [], size, emplace_back + Vector> raw_data_; mutable std::shared_mutex mu_; - std::unordered_map>> inverted_lut_; + Vector>> inverted_lut_; // If we want to drop small values during build, we must first train the // index with all the data to compute value_threshold_. bool drop_during_build_ = false; @@ -611,15 +745,19 @@ class InvertedIndex : public BaseInvertedIndex { // will not be added to inverted_lut_. value_threshold_ is set to the // drop_ratio_build-th percentile of all absolute values in the index. T value_threshold_ = 0.0f; - std::unordered_map max_score_in_dim_; + Vector max_score_in_dim_; size_t max_dim_ = 0; + uint32_t next_dim_id_ = 0; + + char* map_ = nullptr; + size_t map_size_ = 0; struct BM25Params { float k1; float b; // row_sums is used to cache the sum of values of each row, which // corresponds to the document length of each doc in the BM25 formula. - std::vector row_sums; + Vector row_sums; // below are used only for WAND index. float max_score_ratio; diff --git a/src/io/memory_io.h b/src/io/memory_io.h index a4837c759..461c6aaba 100644 --- a/src/io/memory_io.h +++ b/src/io/memory_io.h @@ -150,6 +150,16 @@ struct MemoryIOReader : public faiss::IOReader { reset() { rp_ = 0; } + + void + seekg(size_t pos) { + rp_ = pos; + } + + size_t + remaining() const { + return total_ - rp_; + } }; } // namespace knowhere diff --git a/tests/ut/test_sparse.cc b/tests/ut/test_sparse.cc index 4c4d2f95c..88e79d2c9 100644 --- a/tests/ut/test_sparse.cc +++ b/tests/ut/test_sparse.cc @@ -21,6 +21,17 @@ #include "knowhere/index/index_factory.h" #include "utils.h" +void +WriteBinaryToFile(const std::string& filename, const knowhere::BinaryPtr binary) { + auto data = binary->data.get(); + auto size = binary->size; + // if tmp_file already exists, remove it + std::remove(filename.c_str()); + std::ofstream out(filename, std::ios::binary); + out.write((const char*)data, size); + out.close(); +} + TEST_CASE("Test Mem Sparse Index With Float Vector", "[float metrics]") { auto [nb, dim, doc_sparsity, query_sparsity] = GENERATE(table({ // 300 dim, avg doc nnz 12, avg query nnz 9 @@ -122,14 +133,7 @@ TEST_CASE("Test Mem Sparse Index With Float Vector", "[float metrics]") { knowhere::BinarySet bs; REQUIRE(idx.Serialize(bs) == knowhere::Status::success); if (use_mmap) { - auto binary = bs.GetByName(idx.Type()); - auto data = binary->data.get(); - auto size = binary->size; - // if tmp_file already exists, remove it - std::remove(tmp_file); - std::ofstream out(tmp_file, std::ios::binary); - out.write((const char*)data, size); - out.close(); + WriteBinaryToFile(tmp_file, bs.GetByName(idx.Type())); REQUIRE(idx.DeserializeFromFile(tmp_file, json) == knowhere::Status::success); } else { REQUIRE(idx.Deserialize(bs, json) == knowhere::Status::success); @@ -343,47 +347,61 @@ TEST_CASE("Test Mem Sparse Index GetVectorByIds", "[float metrics]") { make_tuple(knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX, sparse_inverted_index_gen), make_tuple(knowhere::IndexEnum::INDEX_SPARSE_WAND, sparse_inverted_index_gen), })); - auto idx = knowhere::IndexFactory::Instance().Create(name, version).value(); - auto cfg_json = gen().dump(); - CAPTURE(name, cfg_json); - knowhere::Json json = knowhere::Json::parse(cfg_json); + auto use_mmap = GENERATE(true, false); + auto tmp_file = "/tmp/knowhere_sparse_inverted_index_test"; + { + auto idx = knowhere::IndexFactory::Instance().Create(name, version).value(); + auto cfg_json = gen().dump(); + CAPTURE(name, cfg_json); + knowhere::Json json = knowhere::Json::parse(cfg_json); - auto ids_ds = GenIdsDataSet(nb, nq); - REQUIRE(idx.Type() == name); - auto res = idx.Build(train_ds, json); - REQUIRE(idx.HasRawData(metric) == knowhere::IndexStaticFaced::HasRawData(name, version, json)); - if (!idx.HasRawData(metric)) { - return; - } - REQUIRE(res == knowhere::Status::success); - knowhere::BinarySet bs; - idx.Serialize(bs); + auto ids_ds = GenIdsDataSet(nb, nq); + REQUIRE(idx.Type() == name); + auto res = idx.Build(train_ds, json); + REQUIRE(idx.HasRawData(metric) == + knowhere::IndexStaticFaced::HasRawData(name, version, json)); + if (!idx.HasRawData(metric)) { + return; + } + REQUIRE(res == knowhere::Status::success); + knowhere::BinarySet bs; + idx.Serialize(bs); - auto idx_new = knowhere::IndexFactory::Instance().Create(name, version).value(); - idx_new.Deserialize(bs); + auto idx_new = knowhere::IndexFactory::Instance().Create(name, version).value(); + if (use_mmap) { + WriteBinaryToFile(tmp_file, bs.GetByName(idx.Type())); + REQUIRE(idx_new.DeserializeFromFile(tmp_file, json) == knowhere::Status::success); + } else { + REQUIRE(idx_new.Deserialize(bs, json) == knowhere::Status::success); + } - auto retrieve_task = [&]() { - auto results = idx_new.GetVectorByIds(ids_ds); - REQUIRE(results.has_value()); - auto xb = (knowhere::sparse::SparseRow*)train_ds->GetTensor(); - auto res_data = (knowhere::sparse::SparseRow*)results.value()->GetTensor(); - for (int i = 0; i < nq; ++i) { - const auto id = ids_ds->GetIds()[i]; - const auto& truth_row = xb[id]; - const auto& res_row = res_data[i]; - REQUIRE(truth_row.size() == res_row.size()); - for (size_t j = 0; j < truth_row.size(); ++j) { - REQUIRE(truth_row[j] == res_row[j]); + auto retrieve_task = [&]() { + auto results = idx_new.GetVectorByIds(ids_ds); + REQUIRE(results.has_value()); + auto xb = (knowhere::sparse::SparseRow*)train_ds->GetTensor(); + auto res_data = (knowhere::sparse::SparseRow*)results.value()->GetTensor(); + for (int i = 0; i < nq; ++i) { + const auto id = ids_ds->GetIds()[i]; + const auto& truth_row = xb[id]; + const auto& res_row = res_data[i]; + REQUIRE(truth_row.size() == res_row.size()); + for (size_t j = 0; j < truth_row.size(); ++j) { + REQUIRE(truth_row[j] == res_row[j]); + } } - } - }; + }; - std::vector> retrieve_task_list; - for (int i = 0; i < 20; i++) { - retrieve_task_list.push_back(std::async(std::launch::async, [&] { return retrieve_task(); })); + std::vector> retrieve_task_list; + for (int i = 0; i < 20; i++) { + retrieve_task_list.push_back(std::async(std::launch::async, [&] { return retrieve_task(); })); + } + for (auto& task : retrieve_task_list) { + task.wait(); + } + // idx/idx_new to destroy and munmap } - for (auto& task : retrieve_task_list) { - task.wait(); + if (use_mmap) { + REQUIRE(std::remove(tmp_file) == 0); } } } @@ -446,7 +464,16 @@ TEST_CASE("Test Mem Sparse Index Handle Empty Vector", "[float metrics]") { knowhere::BinarySet bs; REQUIRE(idx.Serialize(bs) == knowhere::Status::success); - REQUIRE(idx.Deserialize(bs, json) == knowhere::Status::success); + + auto use_mmap = GENERATE(false); + auto tmp_file = "/tmp/knowhere_sparse_inverted_index_test"; + + if (use_mmap) { + WriteBinaryToFile(tmp_file, bs.GetByName(idx.Type())); + REQUIRE(idx.DeserializeFromFile(tmp_file, json) == knowhere::Status::success); + } else { + REQUIRE(idx.Deserialize(bs, json) == knowhere::Status::success); + } const knowhere::Json conf = { {knowhere::meta::METRIC_TYPE, metric}, {knowhere::meta::TOPK, topk}, {knowhere::meta::BM25_K1, 1.2},