Skip to content

Commit

Permalink
Fixed inconsistent handling of empty directories as dir segments. Fixes
Browse files Browse the repository at this point in the history
…: #279
  • Loading branch information
spanezz committed Nov 22, 2021
2 parents 4383221 + 785ca97 commit 57c4f37
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 38 deletions.
4 changes: 4 additions & 0 deletions arki/dataset/iseg/checker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,10 @@ void Checker::segments_untracked_filtered(const Matcher& matcher, std::function<
dataset().step().list_segments(squery, [&](std::string&& relpath) {
if (sys::stat(str::joinpath(dataset().path, relpath + ".index"))) return;
CheckerSegment segment(*this, relpath);
// See #279: directory segments that are empty directories are found by
// a filesystem scan, but are not considered segments
if (!segment.segment->exists_on_disk())
return;
dest(segment);
});
}
Expand Down
96 changes: 62 additions & 34 deletions arki/dataset/iseg/maintenance-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "arki/metadata/collection.h"
#include "arki/types/source/blob.h"
#include "arki/utils/sys.h"
#include "arki/utils/string.h"

using namespace arki;
using namespace arki::tests;
Expand All @@ -15,6 +16,26 @@ namespace {

using namespace arki::dataset::maintenance_test;

struct Fixture : public DatasetTest {
using DatasetTest::DatasetTest;

void test_setup()
{
DatasetTest::test_setup(R"(
type=iseg
step=daily
)");
}
};

class MaintTests : public FixtureTestCase<Fixture>
{
using FixtureTestCase::FixtureTestCase;

void register_tests() override;
} test("arki_dataset_iseg_maintenance");


template<typename TestFixture>
class CheckTests : public CheckTest<TestFixture>
{
Expand All @@ -26,6 +47,19 @@ class CheckTests : public CheckTest<TestFixture>
bool can_delete_data() const override { return true; }
};

CheckTests<FixtureConcat> test_iseg_check_grib("arki_dataset_iseg_check_grib", "grib", "type=iseg\nformat=grib\n");
CheckTests<FixtureDir> test_iseg_check_grib_dir("arki_dataset_iseg_check_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureZip> test_iseg_check_grib_zip("arki_dataset_iseg_check_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureConcat> test_iseg_check_bufr("arki_dataset_iseg_check_bufr", "bufr", "type=iseg\nformat=bufr\n");
CheckTests<FixtureDir> test_iseg_check_bufr_dir("arki_dataset_iseg_check_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureZip> test_iseg_check_bufr_zip("arki_dataset_iseg_check_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureConcat> test_iseg_check_vm2("arki_dataset_iseg_check_vm2", "vm2", "type=iseg\nformat=vm2\n");
CheckTests<FixtureDir> test_iseg_check_vm2_dir("arki_dataset_iseg_check_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureZip> test_iseg_check_vm2_zip("arki_dataset_iseg_check_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureDir> test_iseg_check_odimh5_dir("arki_dataset_iseg_check_odimh5", "odimh5", "type=iseg\nformat=odimh5\n");
CheckTests<FixtureZip> test_iseg_check_odimh5_zip("arki_dataset_iseg_check_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n");


template<typename TestFixture>
class FixTests : public FixTest<TestFixture>
{
Expand All @@ -37,6 +71,19 @@ class FixTests : public FixTest<TestFixture>
bool can_delete_data() const override { return true; }
};

FixTests<FixtureConcat> test_iseg_fix_grib("arki_dataset_iseg_fix_grib", "grib", "type=iseg\nformat=grib\n");
FixTests<FixtureDir> test_iseg_fix_grib_dir("arki_dataset_iseg_fix_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureZip> test_iseg_fix_grib_zip("arki_dataset_iseg_fix_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureConcat> test_iseg_fix_bufr("arki_dataset_iseg_fix_bufr", "bufr", "type=iseg\nformat=bufr\n");
FixTests<FixtureDir> test_iseg_fix_bufr_dir("arki_dataset_iseg_fix_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureZip> test_iseg_fix_bufr_zip("arki_dataset_iseg_fix_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureConcat> test_iseg_fix_vm2("arki_dataset_iseg_fix_vm2", "vm2", "type=iseg\nformat=vm2\n");
FixTests<FixtureDir> test_iseg_fix_vm2_dir("arki_dataset_iseg_fix_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureZip> test_iseg_fix_vm2_zip("arki_dataset_iseg_fix_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureDir> test_iseg_fix_odimh5_dir("arki_dataset_iseg_fix_odimh5", "odimh5", "type=iseg\nformat=odimh5\n");
FixTests<FixtureZip> test_iseg_fix_odimh5_zip("arki_dataset_iseg_fix_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n");


template<typename TestFixture>
class RepackTests : public RepackTest<TestFixture>
{
Expand All @@ -50,6 +97,18 @@ class RepackTests : public RepackTest<TestFixture>
bool can_delete_data() const override { return true; }
};

RepackTests<FixtureConcat> test_iseg_repack_grib("arki_dataset_iseg_repack_grib", "grib", "type=iseg\nformat=grib\n");
RepackTests<FixtureDir> test_iseg_repack_grib_dir("arki_dataset_iseg_repack_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureZip> test_iseg_repack_grib_zip("arki_dataset_iseg_repack_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureConcat> test_iseg_repack_bufr("arki_dataset_iseg_repack_bufr", "bufr", "type=iseg\nformat=bufr\n");
RepackTests<FixtureDir> test_iseg_repack_bufr_dir("arki_dataset_iseg_repack_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureZip> test_iseg_repack_bufr_zip("arki_dataset_iseg_repack_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureConcat> test_iseg_repack_vm2("arki_dataset_iseg_repack_vm2", "vm2", "type=iseg\nformat=vm2\n");
RepackTests<FixtureDir> test_iseg_repack_vm2_dir("arki_dataset_iseg_repack_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureZip> test_iseg_repack_vm2_zip("arki_dataset_iseg_repack_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureDir> test_iseg_repack_odimh5_dir("arki_dataset_iseg_repack_odimh5", "odimh5", "type=iseg\nformat=odimh5\n");
RepackTests<FixtureZip> test_iseg_repack_odimh5_zip("arki_dataset_iseg_repack_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n");


template<typename TestFixture>
void RepackTests<TestFixture>::register_tests()
Expand All @@ -73,41 +132,10 @@ void RepackTests<TestFixture>::register_tests()
});
}

CheckTests<FixtureConcat> test_iseg_check_grib("arki_dataset_iseg_check_grib", "grib", "type=iseg\nformat=grib\n");
CheckTests<FixtureDir> test_iseg_check_grib_dir("arki_dataset_iseg_check_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureZip> test_iseg_check_grib_zip("arki_dataset_iseg_check_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureConcat> test_iseg_check_bufr("arki_dataset_iseg_check_bufr", "bufr", "type=iseg\nformat=bufr\n");
CheckTests<FixtureDir> test_iseg_check_bufr_dir("arki_dataset_iseg_check_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureZip> test_iseg_check_bufr_zip("arki_dataset_iseg_check_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureConcat> test_iseg_check_vm2("arki_dataset_iseg_check_vm2", "vm2", "type=iseg\nformat=vm2\n");
CheckTests<FixtureDir> test_iseg_check_vm2_dir("arki_dataset_iseg_check_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureZip> test_iseg_check_vm2_zip("arki_dataset_iseg_check_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
CheckTests<FixtureDir> test_iseg_check_odimh5_dir("arki_dataset_iseg_check_odimh5", "odimh5", "type=iseg\nformat=odimh5\n");
CheckTests<FixtureZip> test_iseg_check_odimh5_zip("arki_dataset_iseg_check_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n");

FixTests<FixtureConcat> test_iseg_fix_grib("arki_dataset_iseg_fix_grib", "grib", "type=iseg\nformat=grib\n");
FixTests<FixtureDir> test_iseg_fix_grib_dir("arki_dataset_iseg_fix_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureZip> test_iseg_fix_grib_zip("arki_dataset_iseg_fix_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureConcat> test_iseg_fix_bufr("arki_dataset_iseg_fix_bufr", "bufr", "type=iseg\nformat=bufr\n");
FixTests<FixtureDir> test_iseg_fix_bufr_dir("arki_dataset_iseg_fix_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureZip> test_iseg_fix_bufr_zip("arki_dataset_iseg_fix_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureConcat> test_iseg_fix_vm2("arki_dataset_iseg_fix_vm2", "vm2", "type=iseg\nformat=vm2\n");
FixTests<FixtureDir> test_iseg_fix_vm2_dir("arki_dataset_iseg_fix_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureZip> test_iseg_fix_vm2_zip("arki_dataset_iseg_fix_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
FixTests<FixtureDir> test_iseg_fix_odimh5_dir("arki_dataset_iseg_fix_odimh5", "odimh5", "type=iseg\nformat=odimh5\n");
FixTests<FixtureZip> test_iseg_fix_odimh5_zip("arki_dataset_iseg_fix_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n");

RepackTests<FixtureConcat> test_iseg_repack_grib("arki_dataset_iseg_repack_grib", "grib", "type=iseg\nformat=grib\n");
RepackTests<FixtureDir> test_iseg_repack_grib_dir("arki_dataset_iseg_repack_grib_dirs", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureZip> test_iseg_repack_grib_zip("arki_dataset_iseg_repack_grib_zip", "grib", "type=iseg\nformat=grib\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureConcat> test_iseg_repack_bufr("arki_dataset_iseg_repack_bufr", "bufr", "type=iseg\nformat=bufr\n");
RepackTests<FixtureDir> test_iseg_repack_bufr_dir("arki_dataset_iseg_repack_bufr_dirs", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureZip> test_iseg_repack_bufr_zip("arki_dataset_iseg_repack_bufr_zip", "bufr", "type=iseg\nformat=bufr\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureConcat> test_iseg_repack_vm2("arki_dataset_iseg_repack_vm2", "vm2", "type=iseg\nformat=vm2\n");
RepackTests<FixtureDir> test_iseg_repack_vm2_dir("arki_dataset_iseg_repack_vm2_dirs", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureZip> test_iseg_repack_vm2_zip("arki_dataset_iseg_repack_vm2_zip", "vm2", "type=iseg\nformat=vm2\n", DatasetTest::TEST_FORCE_DIR);
RepackTests<FixtureDir> test_iseg_repack_odimh5_dir("arki_dataset_iseg_repack_odimh5", "odimh5", "type=iseg\nformat=odimh5\n");
RepackTests<FixtureZip> test_iseg_repack_odimh5_zip("arki_dataset_iseg_repack_odimh5_zip", "odimh5", "type=iseg\nformat=odimh5\n");
void MaintTests::register_tests()
{
}

}

Expand Down
15 changes: 15 additions & 0 deletions arki/dataset/maintenance-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,21 @@ void CheckTest<TestFixture>::register_tests()
wassert(f.query_results({0, 2}));
});

this->add_method("empty_dir_segment", R"(
- a directory segment without a .sequence file is not considered a
segment, only a spurious empty directory
)", [&](Fixture& f) {
// See #279
sys::makedirs(f.test_relpath);

{
auto checker(f.makeSegmentedChecker());
ReporterExpected e;
e.report.emplace_back("testds", "repack", "3 files ok");
wassert(actual(checker.get()).repack(e, true));
}
});

if (can_delete_data() && TestFixture::segment_can_delete_data())
{
this->add_method("check_new", R"(
Expand Down
7 changes: 6 additions & 1 deletion arki/segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,12 @@ class Checker : public std::enable_shared_from_this<Checker>
/// Check if the segment exists on disk
virtual bool exists_on_disk() = 0;

/// Return true if the segment does not contain any data
/**
* Return true if the segment does not contain any data.
*
* Return false if the segment contains data, or if the segment does not
* exist or is not a valid segment.
*/
virtual bool is_empty() = 0;

/**
Expand Down
25 changes: 25 additions & 0 deletions arki/segment/dir-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,31 @@ this->add_method("append", [](Fixture& f) {
wassert(actual(mdc1[1]).is_similar(mdc[2]));
});

// Check behaviour of an empty directory (#279)
this->add_method("empty_dir", [](Fixture& f) {
if (sys::isdir(relpath))
sys::rmtree_ifexists(relpath);
else
sys::unlink_ifexists(relpath);

sys::makedirs(relpath);

// It can be read as an empty segment
{
metadata::TestCollection mdc1;
wassert(mdc1.scan_from_file(relpath, false));
wassert(actual(mdc1.size()) == 0u);
}

// Verify what are the results of check
{
auto checker = Segment::detect_checker(f.td.format, ".", relpath, sys::abspath(relpath));
wassert(actual(checker->size()) == 0u);
wassert_false(checker->exists_on_disk());
wassert_false(checker->is_empty());
}
});

}

}
Expand Down
21 changes: 18 additions & 3 deletions arki/segment/dir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ struct CheckBackend : public AppendCheckBackend

size_t actual_end(off_t offset, size_t size) const override { return offset + 1; }
size_t offset_end() const override { return scanner.max_sequence + 1; }
size_t compute_unindexed_space(const std::vector<Span> indexed_spans) const
size_t compute_unindexed_space(const std::vector<Span> indexed_spans) const override
{
// When this is called, all elements found in the index have already
// been removed from scanner. We can just then add up what's left of
Expand Down Expand Up @@ -423,6 +423,13 @@ void HoleWriter::write_file(Metadata& md, NamedFileDescriptor& fd)
template<typename Segment>
bool BaseChecker<Segment>::exists_on_disk()
{
/**
* To consider the segment an existing dir segment, it needs to be a
* directory that contains a .sequence file.
*
* Just an empty directory is considered not enough, to leave space for
* implementing different formats of directory-based segments
*/
if (!sys::isdir(this->segment().abspath)) return false;
return sys::exists(str::joinpath(this->segment().abspath, ".sequence"));
}
Expand All @@ -432,14 +439,22 @@ bool BaseChecker<Segment>::is_empty()
{
if (!sys::isdir(this->segment().abspath)) return false;
sys::Path dir(this->segment().abspath);

// If we just have an empty directory, do not consider it as a valid
// segment
bool has_sequence = false;
for (sys::Path::iterator i = dir.begin(); i != dir.end(); ++i)
{
if (strcmp(i->d_name, ".") == 0) continue;
if (strcmp(i->d_name, "..") == 0) continue;
if (strcmp(i->d_name, ".sequence") == 0) continue;
if (strcmp(i->d_name, ".sequence") == 0)
{
has_sequence = true;
continue;
}
return false;
}
return true;
return has_sequence;
}

template<typename Segment>
Expand Down
10 changes: 10 additions & 0 deletions arki/segment/zip.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@ struct CheckBackend : public AppendCheckBackend
size_t actual_start(off_t offset, size_t size) const override { return offset - 1; }
size_t actual_end(off_t offset, size_t size) const override { return offset; }
size_t offset_end() const override { return max_sequence; }
size_t compute_unindexed_space(const std::vector<Span> indexed_spans) const override
{
// When this is called, all elements found in the index have already
// been removed from scanner. We can just then add up what's left of
// sizes in scanner
size_t res = 0;
for (const auto& i: on_disk)
res += i.second;
return res;
}

State check_source(const types::source::Blob& source) override
{
Expand Down

0 comments on commit 57c4f37

Please sign in to comment.