Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sparse mmap mode to use a disk file as backed mmap mmeory #935

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/index/sparse/sparse_index_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,8 @@ class SparseInvertedIndexNode : public IndexNode {
}
index_ = index_or.value();
MemoryIOReader map_reader((uint8_t*)map_, map_size_);
return index_->Load(map_reader, map_flags);
auto supplement_target_filename = filename + ".knowhere_sparse_index_supplement";
return index_->Load(map_reader, map_flags, supplement_target_filename);
}

static std::unique_ptr<BaseConfig>
Expand Down
48 changes: 39 additions & 9 deletions src/index/sparse/sparse_inverted_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
#ifndef SPARSE_INVERTED_INDEX_H
#define SPARSE_INVERTED_INDEX_H

#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>

#include <cmath>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <queue>
#include <unordered_map>
Expand All @@ -37,8 +41,10 @@ class BaseInvertedIndex {
virtual Status
Save(MemoryIOWriter& writer) = 0;

// supplement_target_filename: when in mmap mode, we need an extra file to store the mmaped index data structure.
// this file will be created during loading and deleted in the destructor.
virtual Status
Load(MemoryIOReader& reader, int map_flags = MAP_PRIVATE) = 0;
Load(MemoryIOReader& reader, int map_flags = MAP_PRIVATE, const std::string& supplement_target_filename = "") = 0;

virtual Status
Train(const SparseRow<T>* data, size_t rows, float drop_ratio_build) = 0;
Expand Down Expand Up @@ -86,6 +92,11 @@ class InvertedIndex : public BaseInvertedIndex<T> {
map_ = nullptr;
map_byte_size_ = 0;
}
if (map_fd_ != -1) {
// closing the file descriptor will also cause the file to be deleted.
close(map_fd_);
map_fd_ = -1;
}
}
}

Expand Down Expand Up @@ -167,7 +178,8 @@ class InvertedIndex : public BaseInvertedIndex<T> {
}

Status
Load(MemoryIOReader& reader, int map_flags = MAP_PRIVATE) override {
Load(MemoryIOReader& reader, int map_flags = MAP_PRIVATE,
const std::string& supplement_target_filename = "") override {
std::unique_lock<std::shared_mutex> lock(mu_);
int64_t rows;
readBinaryPOD(reader, rows);
Expand All @@ -182,7 +194,7 @@ class InvertedIndex : public BaseInvertedIndex<T> {
}

if constexpr (mmapped) {
RETURN_IF_ERROR(PrepareMmap(reader, rows, map_flags));
RETURN_IF_ERROR(PrepareMmap(reader, rows, map_flags, supplement_target_filename));
} else {
raw_data_.reserve(rows);
if constexpr (bm25) {
Expand All @@ -209,7 +221,7 @@ class InvertedIndex : public BaseInvertedIndex<T> {

// memory in reader must be guaranteed to be valid during the lifetime of this object.
Status
PrepareMmap(MemoryIOReader& reader, size_t rows, int map_flags) {
PrepareMmap(MemoryIOReader& reader, size_t rows, int map_flags, const std::string& supplement_target_filename) {
const auto initial_reader_location = reader.tellg();
const auto nnz = (reader.remaining() - (rows * sizeof(size_t))) / SparseRow<T>::element_size();

Expand Down Expand Up @@ -250,15 +262,32 @@ class InvertedIndex : public BaseInvertedIndex<T> {
map_byte_size_ += row_sums_byte_size;
}

// clear MAP_SHARED flag as we want to create an anonymous mmap and will not share it with other processes.
std::ofstream temp_file(supplement_target_filename, std::ios::binary | std::ios::trunc);
if (!temp_file) {
LOG_KNOWHERE_ERROR_ << "Failed to create mmap file when loading sparse InvertedIndex: " << strerror(errno);
return Status::disk_file_error;
}
temp_file.close();

std::filesystem::resize_file(supplement_target_filename, map_byte_size_);

map_fd_ = open(supplement_target_filename.c_str(), O_RDWR);
if (map_fd_ == -1) {
LOG_KNOWHERE_ERROR_ << "Failed to open mmap file when loading sparse InvertedIndex: " << strerror(errno);
return Status::disk_file_error;
}
// file will disappear in the filesystem immediately but the actual file will not be deleted
// until the file descriptor is closed in the destructor.
std::filesystem::remove(supplement_target_filename);

// clear MAP_SHARED flag as we will not share it with other processes.
map_flags &= ~MAP_SHARED;

// anonymous mmap guarantees that the memory to be zero-initialized.
map_ = static_cast<char*>(
mmap(nullptr, map_byte_size_, PROT_READ | PROT_WRITE, map_flags | MAP_ANON | MAP_PRIVATE, -1, 0));
mmap(nullptr, map_byte_size_, PROT_READ | PROT_WRITE, map_flags | MAP_PRIVATE, map_fd_, 0));
if (map_ == MAP_FAILED) {
LOG_KNOWHERE_ERROR_ << "Failed to create anonymous mmap when loading sparse InvertedIndex: "
<< strerror(errno) << ", size: " << map_byte_size_;
LOG_KNOWHERE_ERROR_ << "Failed to create mmap when loading sparse InvertedIndex: " << strerror(errno)
<< ", size: " << map_byte_size_ << " on file: " << supplement_target_filename;
return Status::disk_file_error;
}
if (madvise(map_, map_byte_size_, MADV_RANDOM) != 0) {
Expand Down Expand Up @@ -750,6 +779,7 @@ class InvertedIndex : public BaseInvertedIndex<T> {

char* map_ = nullptr;
size_t map_byte_size_ = 0;
int map_fd_ = -1;

struct BM25Params {
float k1;
Expand Down
Loading