From 5446712736eb4c1c1e7d029b41c34b500ed93be6 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 23 Nov 2024 16:51:42 +0100 Subject: [PATCH] refactor: factor out `metadata_builder` from `scanner` --- cmake/libdwarfs.cmake | 1 + .../writer/internal/global_entry_data.h | 8 +- .../dwarfs/writer/internal/metadata_builder.h | 134 +++++++++ include/dwarfs/writer/metadata_options.h | 59 ++++ include/dwarfs/writer/scanner_options.h | 19 +- src/writer/internal/metadata_builder.cpp | 284 ++++++++++++++++++ src/writer/scanner.cpp | 193 ++---------- test/dwarfs_benchmark.cpp | 22 +- test/dwarfs_test.cpp | 48 +-- tools/src/mkdwarfs_main.cpp | 75 ++--- 10 files changed, 576 insertions(+), 267 deletions(-) create mode 100644 include/dwarfs/writer/internal/metadata_builder.h create mode 100644 include/dwarfs/writer/metadata_options.h create mode 100644 src/writer/internal/metadata_builder.cpp diff --git a/cmake/libdwarfs.cmake b/cmake/libdwarfs.cmake index 36ebb0c6..e307df81 100644 --- a/cmake/libdwarfs.cmake +++ b/cmake/libdwarfs.cmake @@ -116,6 +116,7 @@ add_library( src/writer/internal/inode_element_view.cpp src/writer/internal/inode_manager.cpp src/writer/internal/inode_ordering.cpp + src/writer/internal/metadata_builder.cpp src/writer/internal/metadata_freezer.cpp src/writer/internal/nilsimsa.cpp src/writer/internal/progress.cpp diff --git a/include/dwarfs/writer/internal/global_entry_data.h b/include/dwarfs/writer/internal/global_entry_data.h index 3601ea7d..c5923fa8 100644 --- a/include/dwarfs/writer/internal/global_entry_data.h +++ b/include/dwarfs/writer/internal/global_entry_data.h @@ -32,7 +32,7 @@ namespace dwarfs { -struct scanner_options; +struct metadata_options; namespace writer::internal { @@ -44,8 +44,8 @@ class global_entry_data { enum class timestamp_type { ATIME, MTIME, CTIME }; - global_entry_data(scanner_options const& options) - : options_(options) {} + global_entry_data(metadata_options const& options) + : options_{options} {} void add_uid(uid_type uid); void add_gid(gid_type gid); @@ -111,7 +111,7 @@ class global_entry_data { gid_type next_gid_index_{0}; mode_type next_mode_index_{0}; uint64_t timestamp_base_{std::numeric_limits::max()}; - scanner_options const& options_; + metadata_options const& options_; }; } // namespace writer::internal diff --git a/include/dwarfs/writer/internal/metadata_builder.h b/include/dwarfs/writer/internal/metadata_builder.h new file mode 100644 index 00000000..566016a6 --- /dev/null +++ b/include/dwarfs/writer/internal/metadata_builder.h @@ -0,0 +1,134 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace dwarfs { + +class logger; + +namespace writer { +struct metadata_options; +} + +namespace thrift::metadata { +class metadata; +} // namespace thrift::metadata + +namespace writer::internal { + +class inode_manager; +class block_manager; +class dir; + +class metadata_builder { + public: + metadata_builder(logger& lgr, metadata_options const& options); + ~metadata_builder(); + + void set_devices(std::vector devices) { + impl_->set_devices(std::move(devices)); + } + + void set_symlink_table_size(size_t size) { + impl_->set_symlink_table_size(size); + } + + void set_block_size(uint32_t block_size) { + impl_->set_block_size(block_size); + } + + void set_total_fs_size(uint64_t total_fs_size) { + impl_->set_total_fs_size(total_fs_size); + } + + void set_total_hardlink_size(uint64_t total_hardlink_size) { + impl_->set_total_hardlink_size(total_hardlink_size); + } + + void set_shared_files_table(std::vector shared_files) { + impl_->set_shared_files_table(std::move(shared_files)); + } + + void set_category_names(std::vector category_names) { + impl_->set_category_names(std::move(category_names)); + } + + void set_block_categories(std::vector block_categories) { + impl_->set_block_categories(std::move(block_categories)); + } + + void add_symlink_table_entry(size_t index, uint32_t entry) { + impl_->add_symlink_table_entry(index, entry); + } + + void gather_chunks(inode_manager const& im, block_manager const& bm, + size_t chunk_count) { + impl_->gather_chunks(im, bm, chunk_count); + } + + void gather_entries(std::span dirs, global_entry_data& ge_data, + uint32_t num_inodes) { + impl_->gather_entries(dirs, ge_data, num_inodes); + } + + thrift::metadata::metadata const& build(global_entry_data& ge_data) { + return impl_->build(ge_data); + } + + class impl { + public: + virtual ~impl() = default; + + virtual void set_devices(std::vector devices) = 0; + virtual void set_symlink_table_size(size_t size) = 0; + virtual void set_block_size(uint32_t block_size) = 0; + virtual void set_total_fs_size(uint64_t total_fs_size) = 0; + virtual void set_total_hardlink_size(uint64_t total_hardlink_size) = 0; + virtual void set_shared_files_table(std::vector shared_files) = 0; + virtual void + set_category_names(std::vector category_names) = 0; + virtual void + set_block_categories(std::vector block_categories) = 0; + virtual void add_symlink_table_entry(size_t index, uint32_t entry) = 0; + virtual void gather_chunks(inode_manager const& im, block_manager const& bm, + size_t chunk_count) = 0; + virtual void + gather_entries(std::span dirs, global_entry_data& ge_data, + uint32_t num_inodes) = 0; + + virtual thrift::metadata::metadata const& + build(global_entry_data& ge_data) = 0; + }; + + private: + std::unique_ptr impl_; +}; + +} // namespace writer::internal + +} // namespace dwarfs diff --git a/include/dwarfs/writer/metadata_options.h b/include/dwarfs/writer/metadata_options.h new file mode 100644 index 00000000..ad8fbeed --- /dev/null +++ b/include/dwarfs/writer/metadata_options.h @@ -0,0 +1,59 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace dwarfs::writer { + +class entry_interface; + +struct metadata_options { + std::optional uid; + std::optional gid; + std::optional timestamp; + bool keep_all_times{false}; + uint32_t time_resolution_sec{1}; + bool pack_chunk_table{false}; + bool pack_directories{false}; + bool pack_shared_files_table{false}; + bool plain_names_table{false}; + bool pack_names{false}; + bool pack_names_index{false}; + bool plain_symlinks_table{false}; + bool pack_symlinks{false}; + bool pack_symlinks_index{false}; + bool force_pack_string_tables{false}; + bool no_create_timestamp{false}; + size_t inode_size_cache_min_chunk_count{128}; +}; + +} // namespace dwarfs::writer diff --git a/include/dwarfs/writer/scanner_options.h b/include/dwarfs/writer/scanner_options.h index f0f0e581..46b0241b 100644 --- a/include/dwarfs/writer/scanner_options.h +++ b/include/dwarfs/writer/scanner_options.h @@ -31,6 +31,7 @@ #include #include #include +#include namespace dwarfs::writer { @@ -38,33 +39,17 @@ class entry_interface; struct scanner_options { std::optional file_hash_algorithm{"xxh3-128"}; - std::optional uid; - std::optional gid; - std::optional timestamp; - bool keep_all_times{false}; bool remove_empty_dirs{false}; bool with_devices{false}; bool with_specials{false}; - uint32_t time_resolution_sec{1}; inode_options inode; - bool pack_chunk_table{false}; - bool pack_directories{false}; - bool pack_shared_files_table{false}; - bool plain_names_table{false}; - bool pack_names{false}; - bool pack_names_index{false}; - bool plain_symlinks_table{false}; - bool pack_symlinks{false}; - bool pack_symlinks_index{false}; - bool force_pack_string_tables{false}; - bool no_create_timestamp{false}; std::optional> debug_filter_function; size_t num_segmenter_workers{1}; bool enable_history{true}; std::optional> command_line_arguments; history_config history; - size_t inode_size_cache_min_chunk_count{128}; + metadata_options metadata; }; } // namespace dwarfs::writer diff --git a/src/writer/internal/metadata_builder.cpp b/src/writer/internal/metadata_builder.cpp new file mode 100644 index 00000000..c3599bdc --- /dev/null +++ b/src/writer/internal/metadata_builder.cpp @@ -0,0 +1,284 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace dwarfs::writer::internal { + +namespace { + +using namespace dwarfs::internal; + +template +class metadata_builder_ final : public metadata_builder::impl { + public: + explicit metadata_builder_(logger& lgr, metadata_options const& options) + : LOG_PROXY_INIT(lgr) + , options_{options} {} + + void set_devices(std::vector devices) override { + md_.devices() = std::move(devices); + } + + void set_symlink_table_size(size_t size) override { + md_.symlink_table()->resize(size); + } + + void set_block_size(uint32_t block_size) override { + md_.block_size() = block_size; + } + + void set_total_fs_size(uint64_t total_fs_size) override { + md_.total_fs_size() = total_fs_size; + } + + void set_total_hardlink_size(uint64_t total_hardlink_size) override { + md_.total_hardlink_size() = total_hardlink_size; + } + + void set_shared_files_table(std::vector shared_files) override { + md_.shared_files_table() = std::move(shared_files); + } + + void set_category_names(std::vector category_names) override { + md_.category_names() = std::move(category_names); + } + + void set_block_categories(std::vector block_categories) override { + md_.block_categories() = std::move(block_categories); + } + + void add_symlink_table_entry(size_t index, uint32_t entry) override { + DWARFS_NOTHROW(md_.symlink_table()->at(index)) = entry; + } + + void gather_chunks(inode_manager const& im, block_manager const& bm, + size_t chunk_count) override; + + void gather_entries(std::span dirs, global_entry_data& ge_data, + uint32_t num_inodes) override; + + thrift::metadata::metadata const& build(global_entry_data& ge_data) override; + + private: + LOG_PROXY_DECL(LoggerPolicy); + thrift::metadata::metadata md_; + feature_set features_; + metadata_options const& options_; +}; + +template +void metadata_builder_::gather_chunks(inode_manager const& im, + block_manager const& bm, + size_t chunk_count) { + md_.chunk_table()->resize(im.count() + 1); + + auto& size_cache = md_.reg_file_size_cache().emplace(); + size_cache.min_chunk_count() = options_.inode_size_cache_min_chunk_count; + + // TODO: we should be able to start this once all blocks have been + // submitted for compression + + md_.chunks().value().reserve(chunk_count); + + im.for_each_inode_in_order([&](std::shared_ptr const& ino) { + auto const total_chunks = md_.chunks()->size(); + DWARFS_NOTHROW(md_.chunk_table()->at(ino->num())) = total_chunks; + if (!ino->append_chunks_to(md_.chunks().value())) { + std::ostringstream oss; + for (auto fp : ino->all()) { + oss << "\n " << fp->path_as_string(); + } + LOG_ERROR << "inconsistent fragments in inode " << ino->num() + << ", the following files will be empty:" << oss.str(); + } + auto num_inode_chunks = md_.chunks()->size() - total_chunks; + if (num_inode_chunks >= options_.inode_size_cache_min_chunk_count) { + LOG_DEBUG << "caching size " << ino->size() << " for inode " << ino->num() + << " with " << num_inode_chunks << " chunks"; + size_cache.lookup()->emplace(ino->num(), ino->size()); + } + }); + + bm.map_logical_blocks(md_.chunks().value()); + + // insert dummy inode to help determine number of chunks per inode + DWARFS_NOTHROW(md_.chunk_table()->at(im.count())) = md_.chunks()->size(); + + LOG_DEBUG << "total number of unique files: " << im.count(); + LOG_DEBUG << "total number of chunks: " << md_.chunks()->size(); +} + +template +void metadata_builder_::gather_entries(std::span dirs, + global_entry_data& ge_data, + uint32_t num_inodes) { + md_.dir_entries() = std::vector(); + md_.inodes()->resize(num_inodes); + md_.directories()->reserve(dirs.size() + 1); + + for (auto p : dirs) { + if (!p->has_parent()) { + p->set_entry_index(md_.dir_entries()->size()); + p->pack_entry(md_, ge_data); + } + + p->pack(md_, ge_data); + } + + thrift::metadata::directory dummy; + dummy.parent_entry() = 0; + dummy.first_entry() = md_.dir_entries()->size(); + dummy.self_entry() = 0; + md_.directories()->push_back(dummy); +} + +template +thrift::metadata::metadata const& +metadata_builder_::build(global_entry_data& ge_data) { + LOG_VERBOSE << "building metadata"; + + thrift::metadata::fs_options fsopts; + fsopts.mtime_only() = !options_.keep_all_times; + if (options_.time_resolution_sec > 1) { + fsopts.time_resolution_sec() = options_.time_resolution_sec; + } + fsopts.packed_chunk_table() = options_.pack_chunk_table; + fsopts.packed_directories() = options_.pack_directories; + fsopts.packed_shared_files_table() = options_.pack_shared_files_table; + + if (options_.pack_directories) { + // pack directories + uint32_t last_first_entry = 0; + + for (auto& d : md_.directories().value()) { + d.parent_entry() = 0; // this will be recovered + d.self_entry() = 0; // this will be recovered + auto delta = d.first_entry().value() - last_first_entry; + last_first_entry = d.first_entry().value(); + d.first_entry() = delta; + } + } + + if (options_.pack_chunk_table) { + // delta-compress chunk table + std::adjacent_difference(md_.chunk_table()->begin(), + md_.chunk_table()->end(), + md_.chunk_table()->begin()); + } + + if (options_.pack_shared_files_table) { + if (!md_.shared_files_table()->empty()) { + auto& sf = md_.shared_files_table().value(); + DWARFS_CHECK(std::is_sorted(sf.begin(), sf.end()), + "shared files vector not sorted"); + std::vector compressed; + compressed.reserve(sf.back() + 1); + + uint32_t count = 0; + uint32_t index = 0; + for (auto i : sf) { + if (i == index) { + ++count; + } else { + ++index; + DWARFS_CHECK(i == index, "inconsistent shared files vector"); + DWARFS_CHECK(count >= 2, "unique file in shared files vector"); + compressed.emplace_back(count - 2); + count = 1; + } + } + + compressed.emplace_back(count - 2); + + DWARFS_CHECK(compressed.size() == sf.back() + 1, + "unexpected compressed vector size"); + + sf.swap(compressed); + } + } + + if (options_.plain_names_table) { + md_.names() = ge_data.get_names(); + } else { + auto ti = LOG_TIMED_INFO; + md_.compact_names() = string_table::pack( + ge_data.get_names(), string_table::pack_options( + options_.pack_names, options_.pack_names_index, + options_.force_pack_string_tables)); + ti << "saving names table..."; + } + + if (options_.plain_symlinks_table) { + md_.symlinks() = ge_data.get_symlinks(); + } else { + auto ti = LOG_TIMED_INFO; + md_.compact_symlinks() = string_table::pack( + ge_data.get_symlinks(), + string_table::pack_options(options_.pack_symlinks, + options_.pack_symlinks_index, + options_.force_pack_string_tables)); + ti << "saving symlinks table..."; + } + + md_.uids() = ge_data.get_uids(); + md_.gids() = ge_data.get_gids(); + md_.modes() = ge_data.get_modes(); + md_.timestamp_base() = ge_data.get_timestamp_base(); + md_.options() = fsopts; + md_.features() = features_.get(); + md_.dwarfs_version() = std::string("libdwarfs ") + DWARFS_GIT_ID; + if (!options_.no_create_timestamp) { + md_.create_timestamp() = std::time(nullptr); + } + md_.preferred_path_separator() = + static_cast(std::filesystem::path::preferred_separator); + + return md_; +} + +} // namespace + +metadata_builder::metadata_builder(logger& lgr, metadata_options const& options) + : impl_{ + make_unique_logging_object( + lgr, options)} {} + +metadata_builder::~metadata_builder() = default; + +} // namespace dwarfs::writer::internal diff --git a/src/writer/scanner.cpp b/src/writer/scanner.cpp index d83b6ce8..15a0d984 100644 --- a/src/writer/scanner.cpp +++ b/src/writer/scanner.cpp @@ -58,8 +58,6 @@ #include #include -#include -#include #include #include #include @@ -71,11 +69,10 @@ #include #include #include +#include #include #include -#include - namespace dwarfs::writer { namespace internal { @@ -188,24 +185,7 @@ class save_directories_visitor : public visitor_base { void visit(dir* p) override { directories_.at(p->inode_num().value()) = p; } - void pack(thrift::metadata::metadata& mv2, global_entry_data& ge_data) { - for (auto p : directories_) { - if (!p->has_parent()) { - p->set_entry_index(mv2.dir_entries()->size()); - p->pack_entry(mv2, ge_data); - } - - p->pack(mv2, ge_data); - } - - thrift::metadata::directory dummy; - dummy.parent_entry() = 0; - dummy.first_entry() = mv2.dir_entries()->size(); - dummy.self_entry() = 0; - mv2.directories()->push_back(dummy); - - directories_.clear(); - } + std::span get_directories() { return directories_; } private: std::vector directories_; @@ -230,36 +210,6 @@ class save_shared_files_visitor : public visitor_base { } } - void pack_shared_files() { - if (!shared_files_.empty()) { - DWARFS_CHECK(std::is_sorted(shared_files_.begin(), shared_files_.end()), - "shared files vector not sorted"); - std::vector compressed; - compressed.reserve(shared_files_.back() + 1); - - uint32_t count = 0; - uint32_t index = 0; - for (auto i : shared_files_) { - if (i == index) { - ++count; - } else { - ++index; - DWARFS_CHECK(i == index, "inconsistent shared files vector"); - DWARFS_CHECK(count >= 2, "unique file in shared files vector"); - compressed.emplace_back(count - 2); - count = 1; - } - } - - compressed.emplace_back(count - 2); - - DWARFS_CHECK(compressed.size() == shared_files_.back() + 1, - "unexpected compressed vector size"); - - shared_files_.swap(compressed); - } - } - std::vector& get_shared_files() { return shared_files_; } private: @@ -730,17 +680,14 @@ void scanner_::scan( } } - global_entry_data ge_data(options_); - thrift::metadata::metadata mv2; - feature_set features; - - mv2.symlink_table()->resize(first_file_inode - first_link_inode); + global_entry_data ge_data(options_.metadata); + metadata_builder mdb(LOG_GET_LOGGER, options_.metadata); LOG_INFO << "assigning device inodes..."; uint32_t first_pipe_inode = first_device_inode; device_set_inode_visitor devsiv(first_pipe_inode); root->accept(devsiv); - mv2.devices() = std::move(devsiv.device_ids()); + mdb.set_devices(std::move(devsiv.device_ids())); LOG_INFO << "assigning pipe/socket inodes..."; uint32_t last_inode = first_pipe_inode; @@ -749,6 +696,8 @@ void scanner_::scan( LOG_INFO << "building metadata..."; + mdb.set_symlink_table_size(first_file_inode - first_link_inode); + wg_.add_job([&] { LOG_INFO << "saving names and symlinks..."; names_and_symlinks_visitor nlv(ge_data); @@ -760,9 +709,9 @@ void scanner_::scan( root->walk([&](entry* ep) { ep->update(ge_data); if (auto lp = dynamic_cast(ep)) { - DWARFS_NOTHROW(mv2.symlink_table()->at(ep->inode_num().value() - - first_link_inode)) = - ge_data.get_symlink_table_entry(lp->linkname()); + mdb.add_symlink_table_entry( + ep->inode_num().value() - first_link_inode, + ge_data.get_symlink_table_entry(lp->linkname())); } }); }); @@ -893,124 +842,18 @@ void scanner_::scan( prog.run_sync([&] { root->set_name(std::string()); }); LOG_INFO << "saving chunks..."; - mv2.chunk_table()->resize(im.count() + 1); - - auto& size_cache = mv2.reg_file_size_cache().emplace(); - size_cache.min_chunk_count() = options_.inode_size_cache_min_chunk_count; - - // TODO: we should be able to start this once all blocks have been - // submitted for compression - mv2.chunks().value().reserve(prog.chunk_count); - im.for_each_inode_in_order([&](std::shared_ptr const& ino) { - auto const total_chunks = mv2.chunks()->size(); - DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = total_chunks; - if (!ino->append_chunks_to(mv2.chunks().value())) { - std::ostringstream oss; - for (auto fp : ino->all()) { - oss << "\n " << fp->path_as_string(); - } - LOG_ERROR << "inconsistent fragments in inode " << ino->num() - << ", the following files will be empty:" << oss.str(); - } - auto num_inode_chunks = mv2.chunks()->size() - total_chunks; - if (num_inode_chunks >= options_.inode_size_cache_min_chunk_count) { - LOG_DEBUG << "caching size " << ino->size() << " for inode " << ino->num() - << " with " << num_inode_chunks << " chunks"; - size_cache.lookup()->emplace(ino->num(), ino->size()); - } - }); - - blockmgr->map_logical_blocks(mv2.chunks().value()); - - // insert dummy inode to help determine number of chunks per inode - DWARFS_NOTHROW(mv2.chunk_table()->at(im.count())) = mv2.chunks()->size(); - - LOG_DEBUG << "total number of unique files: " << im.count(); - LOG_DEBUG << "total number of chunks: " << mv2.chunks()->size(); + mdb.gather_chunks(im, *blockmgr, prog.chunk_count); LOG_INFO << "saving directories..."; - mv2.dir_entries() = std::vector(); - mv2.inodes()->resize(last_inode); - mv2.directories()->reserve(first_link_inode + 1); save_directories_visitor sdv(first_link_inode); root->accept(sdv); - sdv.pack(mv2, ge_data); - - if (options_.pack_directories) { - // pack directories - uint32_t last_first_entry = 0; - - for (auto& d : mv2.directories().value()) { - d.parent_entry() = 0; // this will be recovered - d.self_entry() = 0; // this will be recovered - auto delta = d.first_entry().value() - last_first_entry; - last_first_entry = d.first_entry().value(); - d.first_entry() = delta; - } - } - - if (options_.pack_chunk_table) { - // delta-compress chunk table - std::adjacent_difference(mv2.chunk_table()->begin(), - mv2.chunk_table()->end(), - mv2.chunk_table()->begin()); - } + mdb.gather_entries(sdv.get_directories(), ge_data, last_inode); LOG_INFO << "saving shared files table..."; save_shared_files_visitor ssfv(first_file_inode, first_device_inode, fs.num_unique()); root->accept(ssfv); - if (options_.pack_shared_files_table) { - ssfv.pack_shared_files(); - } - mv2.shared_files_table() = std::move(ssfv.get_shared_files()); - - thrift::metadata::fs_options fsopts; - fsopts.mtime_only() = !options_.keep_all_times; - if (options_.time_resolution_sec > 1) { - fsopts.time_resolution_sec() = options_.time_resolution_sec; - } - fsopts.packed_chunk_table() = options_.pack_chunk_table; - fsopts.packed_directories() = options_.pack_directories; - fsopts.packed_shared_files_table() = options_.pack_shared_files_table; - - if (options_.plain_names_table) { - mv2.names() = ge_data.get_names(); - } else { - auto ti = LOG_TIMED_INFO; - mv2.compact_names() = string_table::pack( - ge_data.get_names(), string_table::pack_options( - options_.pack_names, options_.pack_names_index, - options_.force_pack_string_tables)); - ti << "saving names table..."; - } - - if (options_.plain_symlinks_table) { - mv2.symlinks() = ge_data.get_symlinks(); - } else { - auto ti = LOG_TIMED_INFO; - mv2.compact_symlinks() = string_table::pack( - ge_data.get_symlinks(), - string_table::pack_options(options_.pack_symlinks, - options_.pack_symlinks_index, - options_.force_pack_string_tables)); - ti << "saving symlinks table..."; - } - - mv2.uids() = ge_data.get_uids(); - mv2.gids() = ge_data.get_gids(); - mv2.modes() = ge_data.get_modes(); - mv2.timestamp_base() = ge_data.get_timestamp_base(); - mv2.block_size() = segmenter_factory_.get_block_size(); - mv2.total_fs_size() = prog.original_size; - mv2.total_hardlink_size() = prog.hardlink_size; - mv2.options() = fsopts; - mv2.dwarfs_version() = std::string("libdwarfs ") + DWARFS_GIT_ID; - if (!options_.no_create_timestamp) { - mv2.create_timestamp() = std::time(nullptr); - } - mv2.preferred_path_separator() = - static_cast(std::filesystem::path::preferred_separator); + mdb.set_shared_files_table(std::move(ssfv.get_shared_files())); if (auto catmgr = options_.inode.categorizer_mgr) { std::unordered_map::scan( written_categories.begin(), [&](auto const& cat) { return category_indices.at(cat); }); - mv2.category_names() = std::move(category_names); - mv2.block_categories() = std::move(written_categories); + mdb.set_category_names(std::move(category_names)); + mdb.set_block_categories(std::move(written_categories)); } - mv2.features() = features.get(); + mdb.set_block_size(segmenter_factory_.get_block_size()); + mdb.set_total_fs_size(prog.original_size); + mdb.set_total_hardlink_size(prog.hardlink_size); - auto [schema, data] = metadata_freezer::freeze(mv2); + auto [schema, data] = metadata_freezer::freeze(mdb.build(ge_data)); LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size()); diff --git a/test/dwarfs_benchmark.cpp b/test/dwarfs_benchmark.cpp index 2c1f2a53..74bbad7e 100644 --- a/test/dwarfs_benchmark.cpp +++ b/test/dwarfs_benchmark.cpp @@ -115,17 +115,17 @@ make_filesystem(::benchmark::State const* state, options.with_devices = true; options.with_specials = true; - options.keep_all_times = false; - options.pack_chunk_table = true; - options.pack_directories = state ? state->range(0) : true; - options.pack_shared_files_table = true; - options.pack_names = state ? state->range(2) : true; - options.pack_names_index = state ? state->range(3) : true; - options.pack_symlinks = state ? state->range(2) : true; - options.pack_symlinks_index = state ? state->range(3) : true; - options.force_pack_string_tables = true; - options.plain_names_table = state ? state->range(1) : false; - options.plain_symlinks_table = state ? state->range(1) : false; + options.metadata.keep_all_times = false; + options.metadata.pack_chunk_table = true; + options.metadata.pack_directories = state ? state->range(0) : true; + options.metadata.pack_shared_files_table = true; + options.metadata.pack_names = state ? state->range(2) : true; + options.metadata.pack_names_index = state ? state->range(3) : true; + options.metadata.pack_symlinks = state ? state->range(2) : true; + options.metadata.pack_symlinks_index = state ? state->range(3) : true; + options.metadata.force_pack_string_tables = true; + options.metadata.plain_names_table = state ? state->range(1) : false; + options.metadata.plain_symlinks_table = state ? state->range(1) : false; test::test_logger lgr; diff --git a/test/dwarfs_test.cpp b/test/dwarfs_test.cpp index 58f32a19..6ff4de28 100644 --- a/test/dwarfs_test.cpp +++ b/test/dwarfs_test.cpp @@ -154,28 +154,28 @@ void basic_end_to_end_test( options.with_devices = with_devices; options.with_specials = with_specials; options.inode.fragment_order.set_default(order_opts); - options.keep_all_times = keep_all_times; - options.pack_chunk_table = pack_chunk_table; - options.pack_directories = pack_directories; - options.pack_shared_files_table = pack_shared_files_table; - options.pack_names = pack_names; - options.pack_names_index = pack_names_index; - options.pack_symlinks = pack_symlinks; - options.pack_symlinks_index = pack_symlinks_index; - options.force_pack_string_tables = true; - options.plain_names_table = plain_names_table; - options.plain_symlinks_table = plain_symlinks_table; + options.metadata.keep_all_times = keep_all_times; + options.metadata.pack_chunk_table = pack_chunk_table; + options.metadata.pack_directories = pack_directories; + options.metadata.pack_shared_files_table = pack_shared_files_table; + options.metadata.pack_names = pack_names; + options.metadata.pack_names_index = pack_names_index; + options.metadata.pack_symlinks = pack_symlinks; + options.metadata.pack_symlinks_index = pack_symlinks_index; + options.metadata.force_pack_string_tables = true; + options.metadata.plain_names_table = plain_names_table; + options.metadata.plain_symlinks_table = plain_symlinks_table; if (set_uid) { - options.uid = 0; + options.metadata.uid = 0; } if (set_gid) { - options.gid = 0; + options.metadata.gid = 0; } if (set_time) { - options.timestamp = 4711; + options.metadata.timestamp = 4711; } test::test_logger lgr; @@ -695,14 +695,14 @@ TEST_P(packing_test, regression_empty_fs) { cfg.blockhash_window_size = 8; cfg.block_size_bits = 10; - options.pack_chunk_table = pack_chunk_table; - options.pack_directories = pack_directories; - options.pack_shared_files_table = pack_shared_files_table; - options.pack_names = pack_names; - options.pack_names_index = pack_names_index; - options.pack_symlinks = pack_symlinks; - options.pack_symlinks_index = pack_symlinks_index; - options.force_pack_string_tables = true; + options.metadata.pack_chunk_table = pack_chunk_table; + options.metadata.pack_directories = pack_directories; + options.metadata.pack_shared_files_table = pack_shared_files_table; + options.metadata.pack_names = pack_names; + options.metadata.pack_names_index = pack_names_index; + options.metadata.pack_symlinks = pack_symlinks; + options.metadata.pack_symlinks_index = pack_symlinks_index; + options.metadata.force_pack_string_tables = true; test::test_logger lgr; @@ -914,7 +914,7 @@ TEST_P(file_scanner, inode_ordering) { opts.file_hash_algorithm = file_hash_algo; opts.inode.fragment_order.set_default(order_opts); - opts.no_create_timestamp = true; + opts.metadata.no_create_timestamp = true; auto input = std::make_shared(); #if defined(DWARFS_TEST_RUNNING_ON_ASAN) || defined(DWARFS_TEST_RUNNING_ON_TSAN) @@ -1960,7 +1960,7 @@ TEST(filesystem, inode_size_cache) { } writer::scanner_options options; - options.inode_size_cache_min_chunk_count = 32; + options.metadata.inode_size_cache_min_chunk_count = 32; writer::segmenter::config cfg; cfg.block_size_bits = 16; diff --git a/tools/src/mkdwarfs_main.cpp b/tools/src/mkdwarfs_main.cpp index e6eef550..2cd23844 100644 --- a/tools/src/mkdwarfs_main.cpp +++ b/tools/src/mkdwarfs_main.cpp @@ -626,13 +626,13 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { po::value(&chmod_str), "recursively apply permission changes") ("no-create-timestamp", - po::value(&options.no_create_timestamp)->zero_tokens(), + po::value(&options.metadata.no_create_timestamp)->zero_tokens(), "don't add create timestamp to file system") ("set-time", po::value(×tamp), "set timestamp for whole file system (unixtime or 'now')") ("keep-all-times", - po::value(&options.keep_all_times)->zero_tokens(), + po::value(&options.metadata.keep_all_times)->zero_tokens(), "save atime and ctime in addition to mtime") ("time-resolution", po::value(&time_resolution)->default_value("sec"), @@ -974,24 +974,25 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { } if (vm.count("set-owner")) { - options.uid = uid; + options.metadata.uid = uid; } if (vm.count("set-group")) { - options.gid = gid; + options.metadata.gid = gid; } if (vm.count("set-time")) { if (timestamp == "now") { - options.timestamp = std::time(nullptr); + options.metadata.timestamp = std::time(nullptr); } else if (auto val = try_to(timestamp)) { - options.timestamp = *val; + options.metadata.timestamp = *val; } else { try { auto tp = parse_time_point(timestamp); - options.timestamp = std::chrono::duration_cast( - tp.time_since_epoch()) - .count(); + options.metadata.timestamp = + std::chrono::duration_cast( + tp.time_since_epoch()) + .count(); } catch (std::exception const& e) { iol.err << "error: " << e.what() << "\n"; return 1; @@ -1001,10 +1002,10 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { if (auto it = time_resolutions.find(time_resolution); it != time_resolutions.end()) { - options.time_resolution_sec = it->second; + options.metadata.time_resolution_sec = it->second; } else if (auto val = try_to(time_resolution)) { - options.time_resolution_sec = *val; - if (options.time_resolution_sec == 0) { + options.metadata.time_resolution_sec = *val; + if (options.metadata.time_resolution_sec == 0) { iol.err << "error: the argument to '--time-resolution' must be nonzero\n"; return 1; } @@ -1016,45 +1017,45 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { if (!pack_metadata.empty() and pack_metadata != "none") { if (pack_metadata == "auto") { - options.force_pack_string_tables = false; - options.pack_chunk_table = false; - options.pack_directories = false; - options.pack_shared_files_table = false; - options.pack_names = true; - options.pack_names_index = false; - options.pack_symlinks = true; - options.pack_symlinks_index = false; + options.metadata.force_pack_string_tables = false; + options.metadata.pack_chunk_table = false; + options.metadata.pack_directories = false; + options.metadata.pack_shared_files_table = false; + options.metadata.pack_names = true; + options.metadata.pack_names_index = false; + options.metadata.pack_symlinks = true; + options.metadata.pack_symlinks_index = false; } else { auto pack_opts = split_to>(pack_metadata, ','); for (auto const& opt : pack_opts) { if (opt == "chunk_table") { - options.pack_chunk_table = true; + options.metadata.pack_chunk_table = true; } else if (opt == "directories") { - options.pack_directories = true; + options.metadata.pack_directories = true; } else if (opt == "shared_files") { - options.pack_shared_files_table = true; + options.metadata.pack_shared_files_table = true; } else if (opt == "names") { - options.pack_names = true; + options.metadata.pack_names = true; } else if (opt == "names_index") { - options.pack_names_index = true; + options.metadata.pack_names_index = true; } else if (opt == "symlinks") { - options.pack_symlinks = true; + options.metadata.pack_symlinks = true; } else if (opt == "symlinks_index") { - options.pack_symlinks_index = true; + options.metadata.pack_symlinks_index = true; } else if (opt == "force") { - options.force_pack_string_tables = true; + options.metadata.force_pack_string_tables = true; } else if (opt == "plain") { - options.plain_names_table = true; - options.plain_symlinks_table = true; + options.metadata.plain_names_table = true; + options.metadata.plain_symlinks_table = true; } else if (opt == "all") { - options.pack_chunk_table = true; - options.pack_directories = true; - options.pack_shared_files_table = true; - options.pack_names = true; - options.pack_names_index = true; - options.pack_symlinks = true; - options.pack_symlinks_index = true; + options.metadata.pack_chunk_table = true; + options.metadata.pack_directories = true; + options.metadata.pack_shared_files_table = true; + options.metadata.pack_names = true; + options.metadata.pack_names_index = true; + options.metadata.pack_symlinks = true; + options.metadata.pack_symlinks_index = true; } else { iol.err << "error: the argument ('" << opt << "') to '--pack-metadata' is invalid\n";