From c767b0b93ec683a25485dfe46f166c03c9508cf5 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 28 Nov 2020 02:06:41 +0100 Subject: [PATCH] metadata_v2: more cleaning up --- include/dwarfs/entry.h | 28 ++---- include/dwarfs/fstypes.h | 42 +-------- src/dwarfs.cpp | 24 +++-- src/dwarfs/entry.cpp | 156 +++++-------------------------- src/dwarfs/filesystem_writer.cpp | 3 +- src/dwarfs/fstypes.cpp | 8 -- src/dwarfs/inode_manager.cpp | 3 +- src/dwarfsbench.cpp | 3 +- src/mkdwarfs.cpp | 8 +- test/dwarfs.cpp | 35 +++---- 10 files changed, 61 insertions(+), 249 deletions(-) diff --git a/include/dwarfs/entry.h b/include/dwarfs/entry.h index 0496162b..dd6faea1 100644 --- a/include/dwarfs/entry.h +++ b/include/dwarfs/entry.h @@ -38,6 +38,7 @@ namespace dwarfs { +// TODO: clean up struct global_entry_data { global_entry_data(bool no_time) : no_time_(no_time) {} @@ -151,9 +152,6 @@ class entry : public file_interface { virtual size_t total_size() const; virtual void walk(std::function const& f); virtual void walk(std::function const& f) const; - void pack(dir_entry& de) const; - void pack(dir_entry_ug& de) const; - void pack(dir_entry_ug_time& de) const; void pack(thrift::metadata::entry& entry_v2, global_entry_data const& data) const; void update(global_entry_data& data) const; @@ -161,7 +159,6 @@ class entry : public file_interface { virtual uint32_t inode_num() const = 0; protected: - virtual void pack_specific(dir_entry& de) const = 0; virtual void scan(os_access& os, const std::string& p, progress& prog) = 0; private: @@ -187,7 +184,6 @@ class file : public entry { uint32_t similarity_hash() const { return similarity_hash_; } protected: - void pack_specific(dir_entry& de) const override; void scan(os_access& os, const std::string& p, progress& prog) override; private: @@ -210,21 +206,13 @@ class dir : public entry { void sort(); void set_offset(size_t offset); void set_inode(uint32_t inode); - virtual size_t packed_size() const = 0; - virtual void - pack(uint8_t* buf, - std::function const& offset_cb) - const = 0; - virtual void pack(thrift::metadata::metadata& mv2, - global_entry_data const& data) const = 0; - virtual size_t packed_entry_size() const = 0; - virtual void pack_entry(uint8_t* buf) const = 0; - virtual void pack_entry(thrift::metadata::metadata& mv2, - global_entry_data const& data) const = 0; + void + pack(thrift::metadata::metadata& mv2, global_entry_data const& data) const; + void pack_entry(thrift::metadata::metadata& mv2, + global_entry_data const& data) const; uint32_t inode_num() const override { return inode_; } protected: - void pack_specific(dir_entry& de) const override; void scan(os_access& os, const std::string& p, progress& prog) override; using entry_ptr = std::shared_ptr; @@ -246,7 +234,6 @@ class link : public entry { uint32_t inode_num() const override { return inode_; } protected: - void pack_specific(dir_entry& de) const override; void scan(os_access& os, const std::string& p, progress& prog) override; private: @@ -257,15 +244,12 @@ class link : public entry { class entry_factory { public: - static std::shared_ptr - create(bool no_owner = false, bool no_time = false, - bool with_similarity = false); + static std::unique_ptr create(bool with_similarity = false); virtual ~entry_factory() = default; virtual std::shared_ptr create(os_access& os, const std::string& name, std::shared_ptr parent = std::shared_ptr()) = 0; - virtual dir_entry_type de_type() const = 0; }; } // namespace dwarfs diff --git a/include/dwarfs/fstypes.h b/include/dwarfs/fstypes.h index 4554f3fe..6dbee246 100644 --- a/include/dwarfs/fstypes.h +++ b/include/dwarfs/fstypes.h @@ -53,6 +53,7 @@ class block_range { std::shared_ptr block_; }; +// TODO: move elsewhere struct iovec_read_buf { // This covers more than 95% of reads static constexpr size_t inline_storage = 16; @@ -75,12 +76,6 @@ enum class section_type : uint16_t { // Frozen metadata. }; -enum class dir_entry_type : uint8_t { - DIR_ENTRY = 0, // filesystem uses dir_entry - DIR_ENTRY_UG = 1, // filesystem uses dir_entry_ug - DIR_ENTRY_UG_TIME = 2 // filesystem uses dir_entry_ug_time -}; - struct file_header { char magic[6]; // "DWARFS" uint8_t major; // major version @@ -97,41 +92,6 @@ struct section_header { void dump(std::ostream& os) const; }; -struct dir_entry { // 128 bits (16 bytes) / entry - uint32_t name_offset; - uint16_t name_size; - uint16_t mode; - uint32_t inode; // dirs start at 1, then links, then files - union { - uint32_t file_size; // for files only - uint32_t offset; // for dirs, offset to directory, - } u; // for links, offset to content in link table -}; - -struct dir_entry_ug { // 160 bits (20 bytes) / entry - dir_entry de; - uint16_t owner; - uint16_t group; -}; - -struct dir_entry_ug_time { // 256 bits (32 bytes) / entry - dir_entry_ug ug; - uint32_t atime; // yeah, I know... in a few years we can switch to 64 bits - uint32_t mtime; - uint32_t ctime; -}; - -struct directory { - uint32_t count; - uint32_t self; - uint32_t parent; - union { - dir_entry entries[1]; - dir_entry_ug entries_ug[1]; - dir_entry_ug_time entries_ug_time[1]; - } u; -}; - std::string get_compression_name(compression_type type); std::string get_section_name(section_type type); diff --git a/src/dwarfs.cpp b/src/dwarfs.cpp index ebdc806d..45120d2f 100644 --- a/src/dwarfs.cpp +++ b/src/dwarfs.cpp @@ -81,9 +81,9 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) { bco.max_bytes = opts.cachesize; bco.num_workers = opts.workers; bco.decompress_ratio = opts.decompress_ratio; - s_fs = - std::make_shared(s_lgr, std::make_shared(opts.fsimage), - bco, &opts.stat_defaults, FUSE_ROOT_ID); + s_fs = std::make_shared( + s_lgr, std::make_shared(opts.fsimage), bco, &opts.stat_defaults, + FUSE_ROOT_ID); } void op_destroy(void* /*userdata*/) { @@ -263,17 +263,15 @@ void op_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, } err = -rv; -} -catch (const dwarfs::error& e) { - std::cerr << "ERROR: " << e.what() << std::endl; - err = e.get_errno(); -} -catch (const std::exception& e) { - std::cerr << "ERROR: " << e.what() << std::endl; - err = EIO; -} + } catch (const dwarfs::error& e) { + std::cerr << "ERROR: " << e.what() << std::endl; + err = e.get_errno(); + } catch (const std::exception& e) { + std::cerr << "ERROR: " << e.what() << std::endl; + err = EIO; + } -fuse_reply_err(req, err); + fuse_reply_err(req, err); } // namespace dwarfs void op_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, diff --git a/src/dwarfs/entry.cpp b/src/dwarfs/entry.cpp index e5af2622..22d2e0c1 100644 --- a/src/dwarfs/entry.cpp +++ b/src/dwarfs/entry.cpp @@ -75,66 +75,6 @@ void global_entry_data::index(std::unordered_map& map) { from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; }; } -template -class dir_ : public dir { - public: - using dir::dir; - - size_t packed_entry_size() const override { return sizeof(DirEntryType); } - - void pack_entry(uint8_t* buf) const override { - DirEntryType* de = reinterpret_cast(buf); - entry::pack(*de); - } - - void pack_entry(thrift::metadata::metadata& mv2, - global_entry_data const& data) const override { - mv2.entry_index.at(inode_num()) = mv2.entries.size(); - mv2.entries.emplace_back(); - entry::pack(mv2.entries.back(), data); - } - - size_t packed_size() const override { - return offsetof(directory, u) + sizeof(DirEntryType) * entries_.size(); - } - - void pack(uint8_t* buf, - std::function const& offset_cb) - const override { - directory* p = reinterpret_cast(buf); - DirEntryType* de = reinterpret_cast(&p->u); - - p->count = entries_.size(); - - p->self = offset_; - p->parent = has_parent() - ? std::dynamic_pointer_cast(parent())->offset_ - : offset_; - - for (entry_ptr const& e : entries_) { - e->pack(*de); - offset_cb(e.get(), offset_ + (reinterpret_cast(de) - buf)); - ++de; - } - } - - void pack(thrift::metadata::metadata& mv2, - global_entry_data const& data) const override { - thrift::metadata::directory dir; - dir.parent_inode = - has_parent() ? std::dynamic_pointer_cast(parent())->inode_num() - : 0; - dir.first_entry = mv2.entries.size(); - dir.entry_count = entries_.size(); - mv2.directories.push_back(dir); - for (entry_ptr const& e : entries_) { - mv2.entry_index.at(e->inode_num()) = mv2.entries.size(); - mv2.entries.emplace_back(); - e->pack(mv2.entries.back(), data); - } - } -}; - entry::entry(const std::string& name, std::shared_ptr parent, const struct ::stat& st) : name_(name) @@ -192,29 +132,6 @@ void entry::walk(std::function const& f) { f(this); } void entry::walk(std::function const& f) const { f(this); } -void entry::pack(dir_entry& de) const { - de.name_offset = name_offset_; - de.name_size = folly::to(name_.size()); - de.mode = stat_.st_mode & 0xFFFF; - - pack_specific(de); -} - -void entry::pack(dir_entry_ug& de) const { - de.owner = stat_.st_uid; - de.group = stat_.st_gid; - - pack(de.de); -} - -void entry::pack(dir_entry_ug_time& de) const { - de.atime = stat_.st_atime; - de.mtime = stat_.st_mtime; - de.ctime = stat_.st_ctime; - - pack(de.ug); -} - void entry::update(global_entry_data& data) const { data.add_uid(stat_.st_uid); data.add_gid(stat_.st_gid); @@ -256,11 +173,6 @@ uint32_t file::inode_num() const { return inode_->num(); } void file::accept(entry_visitor& v, bool) { v.visit(this); } -void file::pack_specific(dir_entry& de) const { - de.inode = inode_->num(); - de.u.file_size = folly::to(inode_->size()); -} - void file::scan(os_access& os, const std::string& p, progress& prog) { assert(SHA_DIGEST_LENGTH == hash_.size()); @@ -335,12 +247,29 @@ void dir::set_offset(size_t offset) { offset_ = folly::to(offset); } void dir::set_inode(uint32_t inode) { inode_ = inode; } -void dir::pack_specific(dir_entry& de) const { - de.inode = inode_; - de.u.offset = offset_; +void dir::scan(os_access&, const std::string&, progress&) {} + +void dir::pack_entry(thrift::metadata::metadata& mv2, + global_entry_data const& data) const { + mv2.entry_index.at(inode_num()) = mv2.entries.size(); + mv2.entries.emplace_back(); + entry::pack(mv2.entries.back(), data); } -void dir::scan(os_access&, const std::string&, progress&) {} +void dir::pack(thrift::metadata::metadata& mv2, + global_entry_data const& data) const { + thrift::metadata::directory d; + d.parent_inode = + has_parent() ? std::dynamic_pointer_cast(parent())->inode_num() : 0; + d.first_entry = mv2.entries.size(); + d.entry_count = entries_.size(); + mv2.directories.push_back(d); + for (entry_ptr const& e : entries_) { + mv2.entry_index.at(e->inode_num()) = mv2.entries.size(); + mv2.entries.emplace_back(); + e->pack(mv2.entries.back(), data); + } +} entry::type_t link::type() const { return E_LINK; } @@ -352,17 +281,11 @@ void link::set_inode(uint32_t inode) { inode_ = inode; } void link::accept(entry_visitor& v, bool) { v.visit(this); } -void link::pack_specific(dir_entry& de) const { - de.inode = inode_; - de.u.offset = offset_; -} - void link::scan(os_access& os, const std::string& p, progress& prog) { link_ = os.readlink(p, size()); prog.original_size += size(); } -template class entry_factory_ : public entry_factory { public: entry_factory_(bool with_similarity) @@ -379,7 +302,7 @@ class entry_factory_ : public entry_factory { return std::make_shared(name, std::move(parent), st, with_similarity_); } else if (S_ISDIR(st.st_mode)) { - return std::make_shared>(name, std::move(parent), st); + return std::make_shared(name, std::move(parent), st); } else if (S_ISLNK(st.st_mode)) { return std::make_shared(name, std::move(parent), st); } else { @@ -389,42 +312,11 @@ class entry_factory_ : public entry_factory { return std::shared_ptr(); } - dir_entry_type de_type() const override; - private: const bool with_similarity_; }; -template <> -dir_entry_type entry_factory_::de_type() const { - return dir_entry_type::DIR_ENTRY; -} - -template <> -dir_entry_type entry_factory_::de_type() const { - return dir_entry_type::DIR_ENTRY_UG; -} - -template <> -dir_entry_type entry_factory_::de_type() const { - return dir_entry_type::DIR_ENTRY_UG_TIME; -} - -std::shared_ptr -entry_factory::create(bool no_owner, bool no_time, bool with_similarity) { - if (no_owner) { - if (!no_time) { - throw std::runtime_error("no_owner implies no_time"); - } - - // no owner/time information - return std::make_shared>(with_similarity); - } else if (no_time) { - // no time information - return std::make_shared>(with_similarity); - } else { - // the full monty - return std::make_shared>(with_similarity); - } +std::unique_ptr entry_factory::create(bool with_similarity) { + return std::make_unique(with_similarity); } } // namespace dwarfs diff --git a/src/dwarfs/filesystem_writer.cpp b/src/dwarfs/filesystem_writer.cpp index 645689f4..43d895f5 100644 --- a/src/dwarfs/filesystem_writer.cpp +++ b/src/dwarfs/filesystem_writer.cpp @@ -228,8 +228,7 @@ void filesystem_writer_::writer_thread() { fsb->wait_until_compressed(); - log_.debug() << get_section_name(fsb->type()) - << " compressed from " + log_.debug() << get_section_name(fsb->type()) << " compressed from " << size_with_unit(fsb->uncompressed_size()) << " to " << size_with_unit(fsb->size()); diff --git a/src/dwarfs/fstypes.cpp b/src/dwarfs/fstypes.cpp index 117aa50f..0ba1866a 100644 --- a/src/dwarfs/fstypes.cpp +++ b/src/dwarfs/fstypes.cpp @@ -39,14 +39,6 @@ const std::map sections{ #undef SECTION_TYPE_ }; -// TODO: remove -const std::map dir_entries{ -#define DIR_ENTRY_TYPE_(x) {dir_entry_type::x, #x} - DIR_ENTRY_TYPE_(DIR_ENTRY), DIR_ENTRY_TYPE_(DIR_ENTRY_UG), - DIR_ENTRY_TYPE_(DIR_ENTRY_UG_TIME) -#undef DIR_ENTRY_TYPE_ -}; - const std::map compressions{ #define COMPRESSION_TYPE_(x) {compression_type::x, #x} COMPRESSION_TYPE_(NONE), COMPRESSION_TYPE_(LZMA), COMPRESSION_TYPE_(ZSTD), diff --git a/src/dwarfs/inode_manager.cpp b/src/dwarfs/inode_manager.cpp index 2563e39b..dc55c8f8 100644 --- a/src/dwarfs/inode_manager.cpp +++ b/src/dwarfs/inode_manager.cpp @@ -77,8 +77,7 @@ class inode_manager_ : public inode_manager { return file_; } - void - append_chunks_to(std::vector& vec) const override { + void append_chunks_to(std::vector& vec) const override { vec.insert(vec.end(), chunks_.begin(), chunks_.end()); } diff --git a/src/dwarfsbench.cpp b/src/dwarfsbench.cpp index f7af4558..becd9ee8 100644 --- a/src/dwarfsbench.cpp +++ b/src/dwarfsbench.cpp @@ -85,7 +85,8 @@ int dwarfsbench(int argc, char** argv) { bco.num_workers = num_workers; bco.decompress_ratio = folly::to(decompress_ratio_str); - dwarfs::filesystem_v2 fs(lgr, std::make_shared(filesystem), bco); + dwarfs::filesystem_v2 fs(lgr, std::make_shared(filesystem), + bco); worker_group wg("reader", num_readers); diff --git a/src/mkdwarfs.cpp b/src/mkdwarfs.cpp index 4ee24b69..8a9ab50b 100644 --- a/src/mkdwarfs.cpp +++ b/src/mkdwarfs.cpp @@ -458,16 +458,16 @@ int mkdwarfs(int argc, char** argv) { if (recompress) { auto ti = log.timed_info(); - filesystem_v2::rewrite(lgr, prog, std::make_shared(path), fsw); + filesystem_v2::rewrite(lgr, prog, std::make_shared(path), + fsw); wg_writer.wait(); ti << "filesystem rewritten"; } else { options.no_time = no_time; scanner s(lgr, wg_scanner, cfg, - entry_factory::create(no_owner, no_owner || no_time, - options.file_order == - file_order_mode::SIMILARITY), + entry_factory::create(options.file_order == + file_order_mode::SIMILARITY), std::make_shared(), script, options); { diff --git a/test/dwarfs.cpp b/test/dwarfs.cpp index c48125fb..8f8de3ef 100644 --- a/test/dwarfs.cpp +++ b/test/dwarfs.cpp @@ -163,8 +163,8 @@ using namespace dwarfs; namespace { void basic_end_to_end_test(const std::string& compressor, - unsigned block_size_bits, file_order_mode file_order, - bool no_owner, bool no_time) { + unsigned block_size_bits, + file_order_mode file_order) { block_manager::config cfg; scanner_options options; @@ -181,8 +181,7 @@ void basic_end_to_end_test(const std::string& compressor, lgr.set_policy(); scanner s(lgr, wg, cfg, - entry_factory::create(no_owner, no_time, - file_order == file_order_mode::SIMILARITY), + entry_factory::create(file_order == file_order_mode::SIMILARITY), std::make_shared(), std::make_shared(), options); @@ -233,30 +232,18 @@ std::vector const compressions{"null", } // namespace class basic : public testing::TestWithParam< - std::tuple> {}; + std::tuple> {}; TEST_P(basic, end_to_end) { - bool no_owner = false, no_time = false; - - switch (std::get<3>(GetParam())) { - case 1: - no_time = true; - break; - case 2: - no_owner = no_time = true; - break; - default: - break; - } - basic_end_to_end_test(std::get<0>(GetParam()), std::get<1>(GetParam()), - std::get<2>(GetParam()), no_owner, no_time); + std::get<2>(GetParam())); } INSTANTIATE_TEST_SUITE_P( dwarfs, basic, - ::testing::Combine( - ::testing::ValuesIn(compressions), ::testing::Values(12, 15, 20, 28), - ::testing::Values(file_order_mode::NONE, file_order_mode::PATH, - file_order_mode::SCRIPT, file_order_mode::SIMILARITY), - ::testing::Values(0, 1, 2))); + ::testing::Combine(::testing::ValuesIn(compressions), + ::testing::Values(12, 15, 20, 28), + ::testing::Values(file_order_mode::NONE, + file_order_mode::PATH, + file_order_mode::SCRIPT, + file_order_mode::SIMILARITY)));