From 47282aab4f9845e6cffb9a5e119eb51cbd3ea2e3 Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Thu, 18 Nov 2021 16:41:02 +0100 Subject: [PATCH 1/8] Initial rouch test case to reproduce in test suite. refs: #279 --- arki/dataset/iseg/maintenance-test.cc | 105 ++++++++++++++++++-------- 1 file changed, 72 insertions(+), 33 deletions(-) diff --git a/arki/dataset/iseg/maintenance-test.cc b/arki/dataset/iseg/maintenance-test.cc index aef2bbadc..66fa84515 100644 --- a/arki/dataset/iseg/maintenance-test.cc +++ b/arki/dataset/iseg/maintenance-test.cc @@ -5,6 +5,7 @@ #include "arki/metadata/collection.h" #include "arki/types/source/blob.h" #include "arki/utils/sys.h" +#include "arki/utils/string.h" using namespace arki; using namespace arki::tests; @@ -15,6 +16,26 @@ namespace { using namespace arki::dataset::maintenance_test; +struct Fixture : public DatasetTest { + using DatasetTest::DatasetTest; + + void test_setup() + { + DatasetTest::test_setup(R"( + type=iseg + step=daily + )"); + } +}; + +class MaintTests : public FixtureTestCase +{ + using FixtureTestCase::FixtureTestCase; + + void register_tests() override; +} test("arki_dataset_iseg_maintenance"); + + template class CheckTests : public CheckTest { @@ -26,6 +47,19 @@ class CheckTests : public CheckTest bool can_delete_data() const override { return true; } }; +CheckTests test_iseg_check_grib("arki_dataset_iseg_check_grib", "grib", "type=iseg\nformat=grib\n"); +CheckTests test_iseg_check_grib_dir("arki_dataset_iseg_check_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); +CheckTests test_iseg_check_grib_zip("arki_dataset_iseg_check_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); +CheckTests test_iseg_check_bufr("arki_dataset_iseg_check_bufr", "bufr", "type=iseg\nformat=bufr\n"); +CheckTests test_iseg_check_bufr_dir("arki_dataset_iseg_check_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); +CheckTests test_iseg_check_bufr_zip("arki_dataset_iseg_check_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); +CheckTests test_iseg_check_vm2("arki_dataset_iseg_check_vm2", "vm2", "type=iseg\nformat=vm2\n"); +CheckTests test_iseg_check_vm2_dir("arki_dataset_iseg_check_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); +CheckTests test_iseg_check_vm2_zip("arki_dataset_iseg_check_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); +CheckTests test_iseg_check_odimh5_dir("arki_dataset_iseg_check_odimh5", "odimh5", "type=iseg\nformat=odimh5\n"); +CheckTests test_iseg_check_odimh5_zip("arki_dataset_iseg_check_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n"); + + template class FixTests : public FixTest { @@ -37,6 +71,19 @@ class FixTests : public FixTest bool can_delete_data() const override { return true; } }; +FixTests test_iseg_fix_grib("arki_dataset_iseg_fix_grib", "grib", "type=iseg\nformat=grib\n"); +FixTests test_iseg_fix_grib_dir("arki_dataset_iseg_fix_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); +FixTests test_iseg_fix_grib_zip("arki_dataset_iseg_fix_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); +FixTests test_iseg_fix_bufr("arki_dataset_iseg_fix_bufr", "bufr", "type=iseg\nformat=bufr\n"); +FixTests test_iseg_fix_bufr_dir("arki_dataset_iseg_fix_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); +FixTests test_iseg_fix_bufr_zip("arki_dataset_iseg_fix_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); +FixTests test_iseg_fix_vm2("arki_dataset_iseg_fix_vm2", "vm2", "type=iseg\nformat=vm2\n"); +FixTests test_iseg_fix_vm2_dir("arki_dataset_iseg_fix_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); +FixTests test_iseg_fix_vm2_zip("arki_dataset_iseg_fix_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); +FixTests test_iseg_fix_odimh5_dir("arki_dataset_iseg_fix_odimh5", "odimh5", "type=iseg\nformat=odimh5\n"); +FixTests test_iseg_fix_odimh5_zip("arki_dataset_iseg_fix_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n"); + + template class RepackTests : public RepackTest { @@ -50,6 +97,18 @@ class RepackTests : public RepackTest bool can_delete_data() const override { return true; } }; +RepackTests test_iseg_repack_grib("arki_dataset_iseg_repack_grib", "grib", "type=iseg\nformat=grib\n"); +RepackTests test_iseg_repack_grib_dir("arki_dataset_iseg_repack_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); +RepackTests test_iseg_repack_grib_zip("arki_dataset_iseg_repack_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); +RepackTests test_iseg_repack_bufr("arki_dataset_iseg_repack_bufr", "bufr", "type=iseg\nformat=bufr\n"); +RepackTests test_iseg_repack_bufr_dir("arki_dataset_iseg_repack_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); +RepackTests test_iseg_repack_bufr_zip("arki_dataset_iseg_repack_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); +RepackTests test_iseg_repack_vm2("arki_dataset_iseg_repack_vm2", "vm2", "type=iseg\nformat=vm2\n"); +RepackTests test_iseg_repack_vm2_dir("arki_dataset_iseg_repack_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); +RepackTests test_iseg_repack_vm2_zip("arki_dataset_iseg_repack_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); +RepackTests test_iseg_repack_odimh5_dir("arki_dataset_iseg_repack_odimh5", "odimh5", "type=iseg\nformat=odimh5\n"); +RepackTests test_iseg_repack_odimh5_zip("arki_dataset_iseg_repack_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n"); + template void RepackTests::register_tests() @@ -73,41 +132,21 @@ void RepackTests::register_tests() }); } -CheckTests test_iseg_check_grib("arki_dataset_iseg_check_grib", "grib", "type=iseg\nformat=grib\n"); -CheckTests test_iseg_check_grib_dir("arki_dataset_iseg_check_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); -CheckTests test_iseg_check_grib_zip("arki_dataset_iseg_check_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); -CheckTests test_iseg_check_bufr("arki_dataset_iseg_check_bufr", "bufr", "type=iseg\nformat=bufr\n"); -CheckTests test_iseg_check_bufr_dir("arki_dataset_iseg_check_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); -CheckTests test_iseg_check_bufr_zip("arki_dataset_iseg_check_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); -CheckTests test_iseg_check_vm2("arki_dataset_iseg_check_vm2", "vm2", "type=iseg\nformat=vm2\n"); -CheckTests test_iseg_check_vm2_dir("arki_dataset_iseg_check_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); -CheckTests test_iseg_check_vm2_zip("arki_dataset_iseg_check_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); -CheckTests test_iseg_check_odimh5_dir("arki_dataset_iseg_check_odimh5", "odimh5", "type=iseg\nformat=odimh5\n"); -CheckTests test_iseg_check_odimh5_zip("arki_dataset_iseg_check_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n"); -FixTests test_iseg_fix_grib("arki_dataset_iseg_fix_grib", "grib", "type=iseg\nformat=grib\n"); -FixTests test_iseg_fix_grib_dir("arki_dataset_iseg_fix_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); -FixTests test_iseg_fix_grib_zip("arki_dataset_iseg_fix_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); -FixTests test_iseg_fix_bufr("arki_dataset_iseg_fix_bufr", "bufr", "type=iseg\nformat=bufr\n"); -FixTests test_iseg_fix_bufr_dir("arki_dataset_iseg_fix_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); -FixTests test_iseg_fix_bufr_zip("arki_dataset_iseg_fix_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); -FixTests test_iseg_fix_vm2("arki_dataset_iseg_fix_vm2", "vm2", "type=iseg\nformat=vm2\n"); -FixTests test_iseg_fix_vm2_dir("arki_dataset_iseg_fix_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); -FixTests test_iseg_fix_vm2_zip("arki_dataset_iseg_fix_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); -FixTests test_iseg_fix_odimh5_dir("arki_dataset_iseg_fix_odimh5", "odimh5", "type=iseg\nformat=odimh5\n"); -FixTests test_iseg_fix_odimh5_zip("arki_dataset_iseg_fix_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n"); +void MaintTests::register_tests() +{ + add_method("repack_unaligned", [&](Fixture& f) { + f.cfg->set("format", "odimh5"); + sys::makedirs(str::joinpath(f.ds_root, "2021", "11-17.odimh5")); -RepackTests test_iseg_repack_grib("arki_dataset_iseg_repack_grib", "grib", "type=iseg\nformat=grib\n"); -RepackTests test_iseg_repack_grib_dir("arki_dataset_iseg_repack_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); -RepackTests test_iseg_repack_grib_zip("arki_dataset_iseg_repack_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR); -RepackTests test_iseg_repack_bufr("arki_dataset_iseg_repack_bufr", "bufr", "type=iseg\nformat=bufr\n"); -RepackTests test_iseg_repack_bufr_dir("arki_dataset_iseg_repack_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); -RepackTests test_iseg_repack_bufr_zip("arki_dataset_iseg_repack_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR); -RepackTests test_iseg_repack_vm2("arki_dataset_iseg_repack_vm2", "vm2", "type=iseg\nformat=vm2\n"); -RepackTests test_iseg_repack_vm2_dir("arki_dataset_iseg_repack_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); -RepackTests test_iseg_repack_vm2_zip("arki_dataset_iseg_repack_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR); -RepackTests test_iseg_repack_odimh5_dir("arki_dataset_iseg_repack_odimh5", "odimh5", "type=iseg\nformat=odimh5\n"); -RepackTests test_iseg_repack_odimh5_zip("arki_dataset_iseg_repack_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n"); + { + auto checker(f.makeSegmentedChecker()); + ReporterExpected e; + e.report.emplace_back("testds", "repack", "2 files ok"); + wassert(actual(checker.get()).repack(e, true)); + } + }); +} } From 6d3b1763ccca5bef0ed196df100bd6b5a87bbbe8 Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Fri, 19 Nov 2021 13:10:58 +0100 Subject: [PATCH 2/8] Reproduced issue in dir segment. refs: #279 --- arki/segment/dir-test.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arki/segment/dir-test.cc b/arki/segment/dir-test.cc index e58f992d1..bae52f664 100644 --- a/arki/segment/dir-test.cc +++ b/arki/segment/dir-test.cc @@ -196,6 +196,31 @@ this->add_method("append", [](Fixture& f) { wassert(actual(mdc1[1]).is_similar(mdc[2])); }); +// Check behaviour of an empty directory (#279) +this->add_method("empty_dir", [](Fixture& f) { + if (sys::isdir(relpath)) + sys::rmtree_ifexists(relpath); + else + sys::unlink_ifexists(relpath); + + sys::makedirs(relpath); + + // It can be read as an empty segment + { + metadata::TestCollection mdc1; + wassert(mdc1.scan_from_file(relpath, false)); + wassert(actual(mdc1.size()) == 0u); + } + + // TODO: check + { + auto checker = Segment::detect_checker(f.td.format, ".", relpath, sys::abspath(relpath)); + wassert(actual(checker->size()) == 0u); + wassert_true(checker->exists_on_disk()); + wassert_true(checker->is_empty()); + } +}); + } } From 649b0b1bb873a468d4c3122cc5bf570ad0983775 Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Fri, 19 Nov 2021 13:31:20 +0100 Subject: [PATCH 3/8] Test and fix behaviour with empty directories that are not directory segments (lacking .sequence file). refs: #279 --- arki/dataset/iseg/checker.cc | 8 ++++++++ arki/dataset/iseg/maintenance-test.cc | 9 +++++---- arki/segment.h | 7 ++++++- arki/segment/dir-test.cc | 6 +++--- arki/segment/dir.cc | 12 ++++++++++-- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/arki/dataset/iseg/checker.cc b/arki/dataset/iseg/checker.cc index a357ab3e1..65bf462fe 100644 --- a/arki/dataset/iseg/checker.cc +++ b/arki/dataset/iseg/checker.cc @@ -496,6 +496,10 @@ void Checker::segments_tracked_filtered(const Matcher& matcher, std::functionexists_on_disk()) + return; dest(segment); }); } @@ -511,6 +515,10 @@ void Checker::segments_untracked_filtered(const Matcher& matcher, std::function< dataset().step().list_segments(squery, [&](std::string&& relpath) { if (sys::stat(str::joinpath(dataset().path, relpath + ".index"))) return; CheckerSegment segment(*this, relpath); + // See #279: directory segments that are empty directories are found by + // a filesystem scan, but are not considered segments + if (!segment.segment->exists_on_disk()) + return; dest(segment); }); } diff --git a/arki/dataset/iseg/maintenance-test.cc b/arki/dataset/iseg/maintenance-test.cc index 66fa84515..9702021eb 100644 --- a/arki/dataset/iseg/maintenance-test.cc +++ b/arki/dataset/iseg/maintenance-test.cc @@ -135,14 +135,15 @@ void RepackTests::register_tests() void MaintTests::register_tests() { - add_method("repack_unaligned", [&](Fixture& f) { - f.cfg->set("format", "odimh5"); - sys::makedirs(str::joinpath(f.ds_root, "2021", "11-17.odimh5")); + add_method("empty_dir_segment", [&](Fixture& f) { + // See #279 + f.cfg->set("format", "odimh5"); + sys::makedirs(str::joinpath(f.ds_root, "2021", "11-17.odimh5")); { auto checker(f.makeSegmentedChecker()); ReporterExpected e; - e.report.emplace_back("testds", "repack", "2 files ok"); + e.report.emplace_back("testds", "repack", "0 files ok"); wassert(actual(checker.get()).repack(e, true)); } }); diff --git a/arki/segment.h b/arki/segment.h index aa8efab32..278e59cca 100644 --- a/arki/segment.h +++ b/arki/segment.h @@ -292,7 +292,12 @@ class Checker : public std::enable_shared_from_this /// Check if the segment exists on disk virtual bool exists_on_disk() = 0; - /// Return true if the segment does not contain any data + /** + * Return true if the segment does not contain any data. + * + * Return false if the segment contains data, or if the segment does not + * exist or is not a valid segment. + */ virtual bool is_empty() = 0; /** diff --git a/arki/segment/dir-test.cc b/arki/segment/dir-test.cc index bae52f664..511e73c85 100644 --- a/arki/segment/dir-test.cc +++ b/arki/segment/dir-test.cc @@ -212,12 +212,12 @@ this->add_method("empty_dir", [](Fixture& f) { wassert(actual(mdc1.size()) == 0u); } - // TODO: check + // Verify what are the results of check { auto checker = Segment::detect_checker(f.td.format, ".", relpath, sys::abspath(relpath)); wassert(actual(checker->size()) == 0u); - wassert_true(checker->exists_on_disk()); - wassert_true(checker->is_empty()); + wassert_false(checker->exists_on_disk()); + wassert_false(checker->is_empty()); } }); diff --git a/arki/segment/dir.cc b/arki/segment/dir.cc index 75bba5753..1636dcd35 100644 --- a/arki/segment/dir.cc +++ b/arki/segment/dir.cc @@ -432,14 +432,22 @@ bool BaseChecker::is_empty() { if (!sys::isdir(this->segment().abspath)) return false; sys::Path dir(this->segment().abspath); + + // If we just have an empty directory, do not consider it as a valid + // segment + bool has_sequence = false; for (sys::Path::iterator i = dir.begin(); i != dir.end(); ++i) { if (strcmp(i->d_name, ".") == 0) continue; if (strcmp(i->d_name, "..") == 0) continue; - if (strcmp(i->d_name, ".sequence") == 0) continue; + if (strcmp(i->d_name, ".sequence") == 0) + { + has_sequence = true; + continue; + } return false; } - return true; + return has_sequence; } template From d5ec2331b3003e6088699e00b6998c880e7cc4a9 Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Fri, 19 Nov 2021 13:47:21 +0100 Subject: [PATCH 4/8] Reproduced for all datasets. refs: #279 --- arki/dataset/iseg/maintenance-test.cc | 12 ------------ arki/dataset/maintenance-test.cc | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/arki/dataset/iseg/maintenance-test.cc b/arki/dataset/iseg/maintenance-test.cc index 9702021eb..e09942ef6 100644 --- a/arki/dataset/iseg/maintenance-test.cc +++ b/arki/dataset/iseg/maintenance-test.cc @@ -135,18 +135,6 @@ void RepackTests::register_tests() void MaintTests::register_tests() { - add_method("empty_dir_segment", [&](Fixture& f) { - // See #279 - f.cfg->set("format", "odimh5"); - sys::makedirs(str::joinpath(f.ds_root, "2021", "11-17.odimh5")); - - { - auto checker(f.makeSegmentedChecker()); - ReporterExpected e; - e.report.emplace_back("testds", "repack", "0 files ok"); - wassert(actual(checker.get()).repack(e, true)); - } - }); } } diff --git a/arki/dataset/maintenance-test.cc b/arki/dataset/maintenance-test.cc index 51753553c..d803ce13a 100644 --- a/arki/dataset/maintenance-test.cc +++ b/arki/dataset/maintenance-test.cc @@ -358,6 +358,21 @@ void CheckTest::register_tests() wassert(f.query_results({0, 2})); }); + this->add_method("empty_dir_segment", R"( + - a directory segment without a .sequence file is not considered a + segment, only a spurious empty directory + )", [&](Fixture& f) { + // See #279 + sys::makedirs(f.test_relpath); + + { + auto checker(f.makeSegmentedChecker()); + ReporterExpected e; + e.report.emplace_back("testds", "repack", "0 files ok"); + wassert(actual(checker.get()).repack(e, true)); + } + }); + if (can_delete_data() && TestFixture::segment_can_delete_data()) { this->add_method("check_new", R"( From c0400854ff189b52b11351773630c4473be1ca1e Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Fri, 19 Nov 2021 13:56:01 +0100 Subject: [PATCH 5/8] Fixed test. refs: #279 --- arki/dataset/maintenance-test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arki/dataset/maintenance-test.cc b/arki/dataset/maintenance-test.cc index d803ce13a..66fe69ceb 100644 --- a/arki/dataset/maintenance-test.cc +++ b/arki/dataset/maintenance-test.cc @@ -368,7 +368,7 @@ void CheckTest::register_tests() { auto checker(f.makeSegmentedChecker()); ReporterExpected e; - e.report.emplace_back("testds", "repack", "0 files ok"); + e.report.emplace_back("testds", "repack", "3 files ok"); wassert(actual(checker.get()).repack(e, true)); } }); From 588f466b5dee0206db200ea8e9d5d339bc9b0efb Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Fri, 19 Nov 2021 13:56:15 +0100 Subject: [PATCH 6/8] Fixed reporting of repackable size for .zip datasets. refs: #279 --- arki/segment/dir.cc | 2 +- arki/segment/zip.cc | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arki/segment/dir.cc b/arki/segment/dir.cc index 1636dcd35..07b05d49b 100644 --- a/arki/segment/dir.cc +++ b/arki/segment/dir.cc @@ -116,7 +116,7 @@ struct CheckBackend : public AppendCheckBackend size_t actual_end(off_t offset, size_t size) const override { return offset + 1; } size_t offset_end() const override { return scanner.max_sequence + 1; } - size_t compute_unindexed_space(const std::vector indexed_spans) const + size_t compute_unindexed_space(const std::vector indexed_spans) const override { // When this is called, all elements found in the index have already // been removed from scanner. We can just then add up what's left of diff --git a/arki/segment/zip.cc b/arki/segment/zip.cc index 3f683a5a4..d1a7ed9ea 100644 --- a/arki/segment/zip.cc +++ b/arki/segment/zip.cc @@ -100,6 +100,16 @@ struct CheckBackend : public AppendCheckBackend size_t actual_start(off_t offset, size_t size) const override { return offset - 1; } size_t actual_end(off_t offset, size_t size) const override { return offset; } size_t offset_end() const override { return max_sequence; } + size_t compute_unindexed_space(const std::vector indexed_spans) const override + { + // When this is called, all elements found in the index have already + // been removed from scanner. We can just then add up what's left of + // sizes in scanner + size_t res = 0; + for (const auto& i: on_disk) + res += i.second; + return res; + } State check_source(const types::source::Blob& source) override { From 5ea83f80d2c29b6dfdf2d7a5b3ab2e5a8b8505fe Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Mon, 22 Nov 2021 12:30:14 +0100 Subject: [PATCH 7/8] Document behaviour of exists_on_disk for dir segments. refs: #279 --- arki/segment/dir.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arki/segment/dir.cc b/arki/segment/dir.cc index 07b05d49b..3b4d99c4d 100644 --- a/arki/segment/dir.cc +++ b/arki/segment/dir.cc @@ -423,6 +423,13 @@ void HoleWriter::write_file(Metadata& md, NamedFileDescriptor& fd) template bool BaseChecker::exists_on_disk() { + /** + * To consider the segment an existing dir segment, it needs to be a + * directory that contains a .sequence file. + * + * Just an empty directory is considered not enough, to leave space for + * implementing different formats of directory-based segments + */ if (!sys::isdir(this->segment().abspath)) return false; return sys::exists(str::joinpath(this->segment().abspath, ".sequence")); } From 785ca972757ab2b71e1d93f3292d55571f25c0b6 Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Mon, 22 Nov 2021 12:44:48 +0100 Subject: [PATCH 8/8] Don't filter with exists_on_disk segments that were found as present in the index. refs: #279 --- arki/dataset/iseg/checker.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arki/dataset/iseg/checker.cc b/arki/dataset/iseg/checker.cc index 65bf462fe..a9bdf9d83 100644 --- a/arki/dataset/iseg/checker.cc +++ b/arki/dataset/iseg/checker.cc @@ -496,10 +496,6 @@ void Checker::segments_tracked_filtered(const Matcher& matcher, std::functionexists_on_disk()) - return; dest(segment); }); }