Skip to content

Commit

Permalink
[Enhancement] Support gc for cloud native persistent index files (Sta…
Browse files Browse the repository at this point in the history
…rRocks#51684)

Signed-off-by: luohaha <[email protected]>
  • Loading branch information
luohaha authored Oct 10, 2024
1 parent fe227d6 commit 8a8687e
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 22 deletions.
4 changes: 4 additions & 0 deletions be/src/storage/lake/filenames.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ inline bool is_tablet_metadata_lock(std::string_view file_name) {
return HasSuffixString(file_name, ".lock");
}

inline bool is_sst(std::string_view file_name) {
return HasSuffixString(file_name, ".sst");
}

inline std::string tablet_metadata_filename(int64_t tablet_id, int64_t version) {
return fmt::format("{:016X}_{:016X}.meta", tablet_id, version);
}
Expand Down
53 changes: 33 additions & 20 deletions be/src/storage/lake/vacuum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,11 @@ Status delete_tablets_impl(TabletManager* tablet_mgr, const std::string& root_di
RETURN_IF_ERROR(deleter.delete_file(join_path(data_dir, f.name())));
}
}
if (latest_metadata->sstable_meta().sstables_size() > 0) {
for (const auto& sst : latest_metadata->sstable_meta().sstables()) {
RETURN_IF_ERROR(deleter.delete_file(join_path(data_dir, sst.filename())));
}
}
}

for (auto version : versions) {
Expand Down Expand Up @@ -716,26 +721,27 @@ static StatusOr<std::map<std::string, DirEntry>> list_data_files(FileSystem* fs,
int64_t total_files = 0;
int64_t total_bytes = 0;
const auto now = std::time(nullptr);
RETURN_IF_ERROR_WITH_WARN(ignore_not_found(fs->iterate_dir2(segment_root_location,
[&](DirEntry entry) {
total_files++;
total_bytes += entry.size.value_or(0);

if (!is_segment(entry.name)) { // Only segment files
return true;
}
if (!entry.mtime.has_value()) {
LOG(WARNING) << "Fail to get modified time of "
<< entry.name;
return true;
}

if (now >= entry.mtime.value() + expired_seconds) {
data_files.emplace(entry.name, entry);
}
return true;
})),
"Failed to list " + segment_root_location);
RETURN_IF_ERROR_WITH_WARN(
ignore_not_found(fs->iterate_dir2(segment_root_location,
[&](DirEntry entry) {
total_files++;
total_bytes += entry.size.value_or(0);

if (!is_segment(entry.name) &&
!is_sst(entry.name)) { // Only segment files and sst
return true;
}
if (!entry.mtime.has_value()) {
LOG(WARNING) << "Fail to get modified time of " << entry.name;
return true;
}

if (now >= entry.mtime.value() + expired_seconds) {
data_files.emplace(entry.name, entry);
}
return true;
})),
"Failed to list " + segment_root_location);
LOG(INFO) << "Listed all data files, total files: " << total_files << ", total bytes: " << total_bytes
<< ", candidate files: " << data_files.size();
return data_files;
Expand All @@ -762,6 +768,12 @@ static StatusOr<std::map<std::string, DirEntry>> find_orphan_data_files(FileSyst
data_files_in_metadatas.emplace(segment);
}
};
auto check_sst_meta = [&](const PersistentIndexSstableMetaPB& sst_meta) {
for (const auto& sst : sst_meta.sstables()) {
data_files.erase(sst.filename());
data_files_in_metadatas.emplace(sst.filename());
}
};

if (audit_ostream) {
audit_ostream << "Total meta files: " << meta_files.size() << std::endl;
Expand All @@ -783,6 +795,7 @@ static StatusOr<std::map<std::string, DirEntry>> find_orphan_data_files(FileSyst
for (const auto& rowset : metadata->rowsets()) {
check_rowset(rowset);
}
check_sst_meta(metadata->sstable_meta());
++progress;
if (audit_ostream) {
audit_ostream << '(' << progress << '/' << meta_files.size() << ") " << name << '\n'
Expand Down
26 changes: 24 additions & 2 deletions be/test/storage/lake/vacuum_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class LakeVacuumTest : public TestBase, testing::WithParamInterface<VacuumTestAr
full_path = join_path(join_path(kTestDir, kMetadataDirectoryName), name);
} else if (is_txn_log(name) || is_txn_slog(name) || is_txn_vlog(name) || is_combined_txn_log(name)) {
full_path = join_path(join_path(kTestDir, kTxnLogDirectoryName), name);
} else if (is_segment(name) || is_delvec(name) || is_del(name)) {
} else if (is_segment(name) || is_delvec(name) || is_del(name) || is_sst(name)) {
full_path = join_path(join_path(kTestDir, kSegmentDirectoryName), name);
} else {
CHECK(false) << name;
Expand Down Expand Up @@ -697,6 +697,7 @@ TEST_P(LakeVacuumTest, test_delete_tablets_02) {
create_data_file("00000000000359e4_a542395a-bff5-48a7-a3a7-2ed05691b58c.dat");
create_data_file("00000000000459e4_3d9c9edb-a69d-4a06-9093-a9f557e4c3b0.dat");
create_data_file("00000000000459e3_9ae981b3-7d4b-49e9-9723-d7f752686154.delvec");
create_data_file("0000000000011111_9ae981b3-7d4b-49e9-9723-d7f752686154.sst");

ASSERT_OK(_tablet_mgr->put_tablet_metadata(json_to_pb<TabletMetadataPB>(R"DEL(
{
Expand Down Expand Up @@ -776,6 +777,13 @@ TEST_P(LakeVacuumTest, test_delete_tablets_02) {
}
]
},
"sstable_meta": {
"sstables": [
{
"filename": "0000000000011111_9ae981b3-7d4b-49e9-9723-d7f752686154.sst"
}
]
},
"prev_garbage_version": 3
}
)DEL")));
Expand All @@ -796,6 +804,7 @@ TEST_P(LakeVacuumTest, test_delete_tablets_02) {
EXPECT_FALSE(file_exist("00000000000359e4_a542395a-bff5-48a7-a3a7-2ed05691b58c.dat"));
EXPECT_FALSE(file_exist("00000000000459e4_3d9c9edb-a69d-4a06-9093-a9f557e4c3b0.dat"));
EXPECT_FALSE(file_exist("00000000000459e3_9ae981b3-7d4b-49e9-9723-d7f752686154.delvec"));
EXPECT_FALSE(file_exist("0000000000011111_9ae981b3-7d4b-49e9-9723-d7f752686154.sst"));
}
{
DeleteTabletRequest request;
Expand Down Expand Up @@ -1243,6 +1252,8 @@ TEST_P(LakeVacuumTest, test_datafile_gc) {

create_data_file("00000000000259e4_27dc159f-6bfc-4a3a-9d9c-c97c10bb2e1d.dat");
create_data_file("00000000000259e4_a542395a-bff5-48a7-a3a7-2ed05691b58c.dat");
create_data_file("0000000000011111_a542395a-bff5-48a7-a3a7-2ed05691b58c.sst");
create_data_file("0000000000022222_a542395a-bff5-48a7-a3a7-2ed05691b58c.sst");

ASSERT_OK(_tablet_mgr->put_tablet_metadata(json_to_pb<TabletMetadataPB>(R"DEL(
{
Expand All @@ -1263,17 +1274,28 @@ TEST_P(LakeVacuumTest, test_datafile_gc) {
],
"data_size": 4096
}
]
],
"sstable_meta": {
"sstables": [
{
"filename": "0000000000022222_a542395a-bff5-48a7-a3a7-2ed05691b58c.sst"
}
]
}
}
)DEL")));

ASSERT_OK(datafile_gc(kTestDir, join_path(kTestDir, "audit.log"), 0, false));
EXPECT_TRUE(file_exist("00000000000259e4_27dc159f-6bfc-4a3a-9d9c-c97c10bb2e1d.dat"));
EXPECT_TRUE(file_exist("00000000000259e4_a542395a-bff5-48a7-a3a7-2ed05691b58c.dat"));
EXPECT_TRUE(file_exist("0000000000011111_a542395a-bff5-48a7-a3a7-2ed05691b58c.sst"));
EXPECT_TRUE(file_exist("0000000000022222_a542395a-bff5-48a7-a3a7-2ed05691b58c.sst"));

ASSERT_OK(datafile_gc(kTestDir, "", 0, true));
EXPECT_TRUE(file_exist("00000000000259e4_27dc159f-6bfc-4a3a-9d9c-c97c10bb2e1d.dat"));
EXPECT_FALSE(file_exist("00000000000259e4_a542395a-bff5-48a7-a3a7-2ed05691b58c.dat"));
EXPECT_FALSE(file_exist("0000000000011111_a542395a-bff5-48a7-a3a7-2ed05691b58c.sst"));
EXPECT_TRUE(file_exist("0000000000022222_a542395a-bff5-48a7-a3a7-2ed05691b58c.sst"));
}

TEST_P(LakeVacuumTest, test_vacuum_combined_txn_log) {
Expand Down

0 comments on commit 8a8687e

Please sign in to comment.