From 6788ba0d69d620b98ff61880d00ba4383114b3ad Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 5 Dec 2020 22:40:36 +0100 Subject: [PATCH] Rework scripting abstractions, improve inode interface --- include/dwarfs/entry.h | 18 +++++- .../{file_vector.h => entry_interface.h} | 27 ++++++--- include/dwarfs/inode.h | 18 ++++-- include/dwarfs/{file_interface.h => object.h} | 12 +--- include/dwarfs/progress.h | 4 +- include/dwarfs/script.h | 46 +++------------ src/dwarfs/block_manager.cpp | 4 +- src/dwarfs/console_writer.cpp | 2 +- src/dwarfs/entry.cpp | 57 +++++++++++++++---- src/dwarfs/inode_manager.cpp | 35 +++++------- src/dwarfs/scanner.cpp | 27 ++++----- test/dwarfs.cpp | 9 ++- 12 files changed, 145 insertions(+), 114 deletions(-) rename include/dwarfs/{file_vector.h => entry_interface.h} (55%) rename include/dwarfs/{file_interface.h => object.h} (77%) diff --git a/include/dwarfs/entry.h b/include/dwarfs/entry.h index e2bbab9d..2607bfe5 100644 --- a/include/dwarfs/entry.h +++ b/include/dwarfs/entry.h @@ -32,7 +32,7 @@ #include -#include "dwarfs/file_interface.h" +#include "dwarfs/entry_interface.h" namespace dwarfs { @@ -61,7 +61,7 @@ class entry_visitor { virtual void visit(dir* p) = 0; }; -class entry : public file_interface { +class entry : public entry_interface { public: enum type_t { E_FILE, E_DIR, E_LINK, E_DEVICE, E_OTHER }; @@ -86,6 +86,20 @@ class entry : public file_interface { virtual void scan(os_access& os, progress& prog) = 0; const struct ::stat& status() const { return stat_; } + // more methods from entry_interface + uint16_t get_permissions() const override; + void set_permissions(uint16_t perm) override; + uint16_t get_uid() const override; + void set_uid(uint16_t uid) override; + uint16_t get_gid() const override; + void set_gid(uint16_t gid) override; + uint64_t get_atime() const override; + void set_atime(uint64_t atime) override; + uint64_t get_mtime() const override; + void set_mtime(uint64_t mtime) override; + uint64_t get_ctime() const override; + void set_ctime(uint64_t ctime) override; + private: std::string name_; std::weak_ptr parent_; diff --git a/include/dwarfs/file_vector.h b/include/dwarfs/entry_interface.h similarity index 55% rename from include/dwarfs/file_vector.h rename to include/dwarfs/entry_interface.h index 78640910..1a1d1d8a 100644 --- a/include/dwarfs/file_vector.h +++ b/include/dwarfs/entry_interface.h @@ -21,17 +21,30 @@ #pragma once -#include +#include + +#include "dwarfs/object.h" namespace dwarfs { -class file_vector { +class entry_interface : public object { public: - virtual ~file_vector() = default; - virtual const file_interface* operator[](size_t i) const = 0; + virtual std::string path() const = 0; + virtual std::string const& name() const = 0; + virtual std::string type_string() const = 0; virtual size_t size() const = 0; - virtual void - sort(std::function const& less) = 0; + + virtual uint16_t get_permissions() const = 0; + virtual void set_permissions(uint16_t perm) = 0; + virtual uint16_t get_uid() const = 0; + virtual void set_uid(uint16_t uid) = 0; + virtual uint16_t get_gid() const = 0; + virtual void set_gid(uint16_t gid) = 0; + virtual uint64_t get_atime() const = 0; + virtual void set_atime(uint64_t atime) = 0; + virtual uint64_t get_mtime() const = 0; + virtual void set_mtime(uint64_t mtime) = 0; + virtual uint64_t get_ctime() const = 0; + virtual void set_ctime(uint64_t ctime) = 0; }; } // namespace dwarfs diff --git a/include/dwarfs/inode.h b/include/dwarfs/inode.h index 4a72b030..8a157e39 100644 --- a/include/dwarfs/inode.h +++ b/include/dwarfs/inode.h @@ -19,10 +19,14 @@ * along with dwarfs. If not, see . */ +#pragma once + #include #include -#include "dwarfs/fstypes.h" +#include + +#include "dwarfs/object.h" namespace dwarfs { @@ -31,17 +35,21 @@ struct chunk; } class file; -class file_interface; -class inode : public file_interface { +class inode : public object { public: - virtual void set_file(const file* f) = 0; + using files_vector = folly::small_vector; + + virtual void set_files(files_vector&& fv) = 0; virtual void set_num(uint32_t num) = 0; virtual uint32_t num() const = 0; virtual uint32_t similarity_hash() const = 0; - virtual const file_interface* any() const = 0; // TODO + virtual size_t size() const = 0; + virtual file const* any() const = 0; + virtual files_vector const& files() const = 0; virtual void add_chunk(size_t block, size_t offset, size_t size) = 0; virtual void append_chunks_to(std::vector& vec) const = 0; }; + } // namespace dwarfs diff --git a/include/dwarfs/file_interface.h b/include/dwarfs/object.h similarity index 77% rename from include/dwarfs/file_interface.h rename to include/dwarfs/object.h index 3936b7ef..ea3a2146 100644 --- a/include/dwarfs/file_interface.h +++ b/include/dwarfs/object.h @@ -21,17 +21,11 @@ #pragma once -#include - namespace dwarfs { -class file_interface { +class object { public: - virtual ~file_interface() = default; - - virtual std::string path() const = 0; - virtual const std::string& name() const = 0; - virtual std::string type_string() const = 0; - virtual size_t size() const = 0; + virtual ~object() = default; }; + } // namespace dwarfs diff --git a/include/dwarfs/progress.h b/include/dwarfs/progress.h index 29cffce6..9c900bf9 100644 --- a/include/dwarfs/progress.h +++ b/include/dwarfs/progress.h @@ -33,6 +33,8 @@ namespace dwarfs { +class object; + class progress { public: using status_function_type = @@ -51,7 +53,7 @@ class progress { std::string status(size_t max_len) const; - std::atomic current{nullptr}; + std::atomic current{nullptr}; std::atomic files_found{0}; std::atomic files_scanned{0}; std::atomic dirs_found{0}; diff --git a/include/dwarfs/script.h b/include/dwarfs/script.h index cef06f99..d15719a2 100644 --- a/include/dwarfs/script.h +++ b/include/dwarfs/script.h @@ -21,54 +21,24 @@ #pragma once -#include #include #include -#include "dwarfs/file_interface.h" -#include "dwarfs/file_vector.h" - namespace dwarfs { -namespace detail { - -template -class file_vector_ : public file_vector { - public: - file_vector_(std::vector>& vec) - : vec_(vec) {} - - const file_interface* operator[](size_t i) const override { - return vec_[i].get(); - } - - size_t size() const override { return vec_.size(); } - - void - sort(std::function const& - less) override { - std::sort(vec_.begin(), vec_.end(), - [&](const std::shared_ptr& a, const std::shared_ptr& b) { - return less(a.get(), b.get()); - }); - } - - private: - std::vector>& vec_; -}; -} // namespace detail +class entry_interface; +class inode; class script { public: + using inode_ptr = std::shared_ptr; + using inode_vector = std::vector; + virtual ~script() = default; - virtual bool filter(file_interface const& fi) const = 0; - virtual void order(file_vector& fvi) const = 0; - template - void order(std::vector>& vec) const { - detail::file_vector_ fv(vec); - order(fv); - } + virtual bool filter(entry_interface const& ei) = 0; + virtual void transform(entry_interface& ei) = 0; + virtual void order(inode_vector& iv) = 0; }; } // namespace dwarfs diff --git a/src/dwarfs/block_manager.cpp b/src/dwarfs/block_manager.cpp index 059591ff..99d22f8b 100644 --- a/src/dwarfs/block_manager.cpp +++ b/src/dwarfs/block_manager.cpp @@ -33,7 +33,7 @@ #include "dwarfs/block_manager.h" #include "dwarfs/cyclic_hash.h" -#include "dwarfs/file_interface.h" +#include "dwarfs/entry.h" #include "dwarfs/filesystem_writer.h" #include "dwarfs/inode.h" #include "dwarfs/inode_hasher.h" @@ -320,7 +320,7 @@ void block_manager_::add_data(const std::shared_ptr& ino, template void block_manager_::add_inode(std::shared_ptr ino) { - const file_interface* e = ino->any(); + auto e = ino->any(); size_t size = e->size(); if (size > 0) { diff --git a/src/dwarfs/console_writer.cpp b/src/dwarfs/console_writer.cpp index 9a32dd7f..ffb92187 100644 --- a/src/dwarfs/console_writer.cpp +++ b/src/dwarfs/console_writer.cpp @@ -28,7 +28,7 @@ #include "dwarfs/console_writer.h" #include "dwarfs/entry.h" -#include "dwarfs/file_interface.h" +#include "dwarfs/entry_interface.h" #include "dwarfs/inode.h" #include "dwarfs/progress.h" #include "dwarfs/util.h" diff --git a/src/dwarfs/entry.cpp b/src/dwarfs/entry.cpp index ba0101ad..3dc873f8 100644 --- a/src/dwarfs/entry.cpp +++ b/src/dwarfs/entry.cpp @@ -26,6 +26,8 @@ #include +#include + #include "dwarfs/entry.h" #include "dwarfs/global_entry_data.h" #include "dwarfs/inode.h" @@ -65,21 +67,25 @@ std::string entry::path() const { } std::string entry::type_string() const { - // TODO: this type stuff is a mess, see if we really need it - switch (type()) { - case E_FILE: + auto mode = stat_.st_mode; + + if (S_ISREG(mode)) { return "file"; - case E_LINK: + } else if (S_ISDIR(mode)) { + return "directory"; + } else if (S_ISLNK(mode)) { return "link"; - case E_DIR: - return "dir"; - case E_DEVICE: - return "device"; - case E_OTHER: - return "pipe/socket"; - default: - throw std::runtime_error("invalid file type"); + } else if (S_ISCHR(mode)) { + return "chardev"; + } else if (S_ISBLK(mode)) { + return "blockdev"; + } else if (S_ISFIFO(mode)) { + return "fifo"; + } else if (S_ISSOCK(mode)) { + return "socket"; } + + throw std::runtime_error(fmt::format("unknown file type: {:#06x}", mode)); } void entry::walk(std::function const& f) { f(this); } @@ -109,6 +115,33 @@ void entry::pack(thrift::metadata::entry& entry_v2, entry::type_t file::type() const { return E_FILE; } +uint16_t entry::get_permissions() const { return stat_.st_mode & 07777; } + +void entry::set_permissions(uint16_t perm) { + stat_.st_mode &= ~07777; + stat_.st_mode |= perm; +} + +uint16_t entry::get_uid() const { return stat_.st_uid; } + +void entry::set_uid(uint16_t uid) { stat_.st_uid = uid; } + +uint16_t entry::get_gid() const { return stat_.st_gid; } + +void entry::set_gid(uint16_t gid) { stat_.st_gid = gid; } + +uint64_t entry::get_atime() const { return stat_.st_atime; } + +void entry::set_atime(uint64_t atime) { stat_.st_atime = atime; } + +uint64_t entry::get_mtime() const { return stat_.st_mtime; } + +void entry::set_mtime(uint64_t mtime) { stat_.st_atime = mtime; } + +uint64_t entry::get_ctime() const { return stat_.st_ctime; } + +void entry::set_ctime(uint64_t ctime) { stat_.st_atime = ctime; } + std::string_view file::hash() const { return std::string_view(&hash_[0], hash_.size()); } diff --git a/src/dwarfs/inode_manager.cpp b/src/dwarfs/inode_manager.cpp index 357fbc52..984d26da 100644 --- a/src/dwarfs/inode_manager.cpp +++ b/src/dwarfs/inode_manager.cpp @@ -28,7 +28,6 @@ #include #include "dwarfs/entry.h" -#include "dwarfs/file_interface.h" #include "dwarfs/inode.h" #include "dwarfs/inode_manager.h" #include "dwarfs/script.h" @@ -46,19 +45,17 @@ class inode_manager_ : public inode_manager { void set_num(uint32_t num) override { num_ = num; } uint32_t num() const override { return num_; } uint32_t similarity_hash() const override { - if (!file_) { + if (files_.empty()) { throw std::runtime_error("inode has no file"); } - return file_->similarity_hash(); + return files_.front()->similarity_hash(); } - size_t size() const override { return any()->size(); } - - void set_file(const file* f) override { - if (file_) { - throw std::runtime_error("file already set for inode"); + void set_files(files_vector&& fv) override { + if (!files_.empty()) { + throw std::runtime_error("files already set for inode"); } - file_ = f; + files_ = std::move(fv); } void add_chunk(size_t block, size_t offset, size_t size) override { @@ -69,17 +66,15 @@ class inode_manager_ : public inode_manager { chunks_.push_back(c); } - std::string path() const override { return any()->path(); } + size_t size() const override { return any()->size(); } - const std::string& name() const override { return any()->name(); } + files_vector const& files() const override { return files_; } - std::string type_string() const override { return any()->type_string(); } - - const file_interface* any() const override { - if (!file_) { + file const* any() const override { + if (files_.empty()) { throw std::runtime_error("inode has no file"); } - return file_; + return files_.front(); } void append_chunks_to(std::vector& vec) const override { @@ -88,7 +83,7 @@ class inode_manager_ : public inode_manager { private: uint32_t num_{std::numeric_limits::max()}; - file const* file_{nullptr}; + files_vector files_; std::vector chunks_; }; @@ -137,9 +132,9 @@ class inode_manager_ : public inode_manager { auto ash = a->similarity_hash(); auto bsh = b->similarity_hash(); return ash < bsh || - (ash == bsh && - (a->size() > b->size() || - (a->size() == b->size() && a->path() < b->path()))); + (ash == bsh && (a->size() > b->size() || + (a->size() == b->size() && + a->any()->path() < b->any()->path()))); }); } diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index 19527c71..73cc4359 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -103,25 +103,23 @@ class file_deduplication_visitor : public visitor_base { }); } - auto first = files.front(); - { - auto inode = im.create_inode(); - first->set_inode(inode); - inode->set_file(first); + auto inode = im.create_inode(); + + for (auto fp : files) { + fp->set_inode(inode); } - if (files.size() > 1) { - for (auto i = begin(files) + 1; i != end(files); ++i) { - (*i)->set_inode(first->get_inode()); - prog.duplicate_files++; - prog.saved_by_deduplication += (*i)->size(); - } + if (auto dupes = files.size() - 1; dupes > 0) { + prog.duplicate_files += dupes; + prog.saved_by_deduplication += dupes * files.front()->size(); } + + inode->set_files(std::move(files)); } } private: - std::unordered_map, folly::Hash> hash_; + std::unordered_map hash_; }; class dir_set_inode_visitor : public visitor_base { @@ -237,12 +235,11 @@ class save_directories_visitor : public visitor_base { }; std::string status_string(progress const& p, size_t width) { - file_interface const* cp = - reinterpret_cast(p.current.load()); + auto cp = p.current.load(); std::string label, path; if (cp) { - if (auto e = dynamic_cast(cp)) { + if (auto e = dynamic_cast(cp)) { label = "scanning: "; path = e->path(); } else if (auto i = dynamic_cast(cp)) { diff --git a/test/dwarfs.cpp b/test/dwarfs.cpp index 385c1f23..08ce9246 100644 --- a/test/dwarfs.cpp +++ b/test/dwarfs.cpp @@ -164,9 +164,14 @@ class os_access_mock : public os_access { class script_mock : public script { public: - bool filter(file_interface const& /*fi*/) const override { return true; } - void order(file_vector& /*fvi*/) const override { + bool filter(entry_interface const& /*ei*/) override { return true; } + + void transform(entry_interface& /*ei*/) override { + // do nothing + } + + void order(inode_vector& /*iv*/) override { // do nothing } };