Rework scripting abstractions, improve inode interface

This commit is contained in:
Marcus Holland-Moritz 2020-12-05 22:40:36 +01:00
parent 056e11295d
commit 6788ba0d69
12 changed files with 145 additions and 114 deletions

View File

@ -32,7 +32,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include "dwarfs/file_interface.h" #include "dwarfs/entry_interface.h"
namespace dwarfs { namespace dwarfs {
@ -61,7 +61,7 @@ class entry_visitor {
virtual void visit(dir* p) = 0; virtual void visit(dir* p) = 0;
}; };
class entry : public file_interface { class entry : public entry_interface {
public: public:
enum type_t { E_FILE, E_DIR, E_LINK, E_DEVICE, E_OTHER }; enum type_t { E_FILE, E_DIR, E_LINK, E_DEVICE, E_OTHER };
@ -86,6 +86,20 @@ class entry : public file_interface {
virtual void scan(os_access& os, progress& prog) = 0; virtual void scan(os_access& os, progress& prog) = 0;
const struct ::stat& status() const { return stat_; } const struct ::stat& status() const { return stat_; }
// more methods from entry_interface
uint16_t get_permissions() const override;
void set_permissions(uint16_t perm) override;
uint16_t get_uid() const override;
void set_uid(uint16_t uid) override;
uint16_t get_gid() const override;
void set_gid(uint16_t gid) override;
uint64_t get_atime() const override;
void set_atime(uint64_t atime) override;
uint64_t get_mtime() const override;
void set_mtime(uint64_t mtime) override;
uint64_t get_ctime() const override;
void set_ctime(uint64_t ctime) override;
private: private:
std::string name_; std::string name_;
std::weak_ptr<entry> parent_; std::weak_ptr<entry> parent_;

View File

@ -21,17 +21,30 @@
#pragma once #pragma once
#include <functional> #include <string>
#include "dwarfs/object.h"
namespace dwarfs { namespace dwarfs {
class file_vector { class entry_interface : public object {
public: public:
virtual ~file_vector() = default; virtual std::string path() const = 0;
virtual const file_interface* operator[](size_t i) const = 0; virtual std::string const& name() const = 0;
virtual std::string type_string() const = 0;
virtual size_t size() const = 0; virtual size_t size() const = 0;
virtual void
sort(std::function<bool(const file_interface* a, virtual uint16_t get_permissions() const = 0;
const file_interface* b)> const& less) = 0; virtual void set_permissions(uint16_t perm) = 0;
virtual uint16_t get_uid() const = 0;
virtual void set_uid(uint16_t uid) = 0;
virtual uint16_t get_gid() const = 0;
virtual void set_gid(uint16_t gid) = 0;
virtual uint64_t get_atime() const = 0;
virtual void set_atime(uint64_t atime) = 0;
virtual uint64_t get_mtime() const = 0;
virtual void set_mtime(uint64_t mtime) = 0;
virtual uint64_t get_ctime() const = 0;
virtual void set_ctime(uint64_t ctime) = 0;
}; };
} // namespace dwarfs } // namespace dwarfs

View File

@ -19,10 +19,14 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>. * along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/ */
#pragma once
#include <iosfwd> #include <iosfwd>
#include <vector> #include <vector>
#include "dwarfs/fstypes.h" #include <folly/small_vector.h>
#include "dwarfs/object.h"
namespace dwarfs { namespace dwarfs {
@ -31,17 +35,21 @@ struct chunk;
} }
class file; class file;
class file_interface;
class inode : public file_interface { class inode : public object {
public: public:
virtual void set_file(const file* f) = 0; using files_vector = folly::small_vector<file*, 1>;
virtual void set_files(files_vector&& fv) = 0;
virtual void set_num(uint32_t num) = 0; virtual void set_num(uint32_t num) = 0;
virtual uint32_t num() const = 0; virtual uint32_t num() const = 0;
virtual uint32_t similarity_hash() const = 0; virtual uint32_t similarity_hash() const = 0;
virtual const file_interface* any() const = 0; // TODO virtual size_t size() const = 0;
virtual file const* any() const = 0;
virtual files_vector const& files() const = 0;
virtual void add_chunk(size_t block, size_t offset, size_t size) = 0; virtual void add_chunk(size_t block, size_t offset, size_t size) = 0;
virtual void virtual void
append_chunks_to(std::vector<thrift::metadata::chunk>& vec) const = 0; append_chunks_to(std::vector<thrift::metadata::chunk>& vec) const = 0;
}; };
} // namespace dwarfs } // namespace dwarfs

View File

@ -21,17 +21,11 @@
#pragma once #pragma once
#include <string>
namespace dwarfs { namespace dwarfs {
class file_interface { class object {
public: public:
virtual ~file_interface() = default; virtual ~object() = default;
virtual std::string path() const = 0;
virtual const std::string& name() const = 0;
virtual std::string type_string() const = 0;
virtual size_t size() const = 0;
}; };
} // namespace dwarfs } // namespace dwarfs

View File

@ -33,6 +33,8 @@
namespace dwarfs { namespace dwarfs {
class object;
class progress { class progress {
public: public:
using status_function_type = using status_function_type =
@ -51,7 +53,7 @@ class progress {
std::string status(size_t max_len) const; std::string status(size_t max_len) const;
std::atomic<void*> current{nullptr}; std::atomic<object const*> current{nullptr};
std::atomic<size_t> files_found{0}; std::atomic<size_t> files_found{0};
std::atomic<size_t> files_scanned{0}; std::atomic<size_t> files_scanned{0};
std::atomic<size_t> dirs_found{0}; std::atomic<size_t> dirs_found{0};

View File

@ -21,54 +21,24 @@
#pragma once #pragma once
#include <algorithm>
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "dwarfs/file_interface.h"
#include "dwarfs/file_vector.h"
namespace dwarfs { namespace dwarfs {
namespace detail { class entry_interface;
class inode;
template <class T>
class file_vector_ : public file_vector {
public:
file_vector_(std::vector<std::shared_ptr<T>>& vec)
: vec_(vec) {}
const file_interface* operator[](size_t i) const override {
return vec_[i].get();
}
size_t size() const override { return vec_.size(); }
void
sort(std::function<bool(const file_interface*, const file_interface*)> const&
less) override {
std::sort(vec_.begin(), vec_.end(),
[&](const std::shared_ptr<T>& a, const std::shared_ptr<T>& b) {
return less(a.get(), b.get());
});
}
private:
std::vector<std::shared_ptr<T>>& vec_;
};
} // namespace detail
class script { class script {
public: public:
using inode_ptr = std::shared_ptr<inode>;
using inode_vector = std::vector<inode_ptr>;
virtual ~script() = default; virtual ~script() = default;
virtual bool filter(file_interface const& fi) const = 0;
virtual void order(file_vector& fvi) const = 0;
template <typename T> virtual bool filter(entry_interface const& ei) = 0;
void order(std::vector<std::shared_ptr<T>>& vec) const { virtual void transform(entry_interface& ei) = 0;
detail::file_vector_<T> fv(vec); virtual void order(inode_vector& iv) = 0;
order(fv);
}
}; };
} // namespace dwarfs } // namespace dwarfs

View File

@ -33,7 +33,7 @@
#include "dwarfs/block_manager.h" #include "dwarfs/block_manager.h"
#include "dwarfs/cyclic_hash.h" #include "dwarfs/cyclic_hash.h"
#include "dwarfs/file_interface.h" #include "dwarfs/entry.h"
#include "dwarfs/filesystem_writer.h" #include "dwarfs/filesystem_writer.h"
#include "dwarfs/inode.h" #include "dwarfs/inode.h"
#include "dwarfs/inode_hasher.h" #include "dwarfs/inode_hasher.h"
@ -320,7 +320,7 @@ void block_manager_<LoggerPolicy>::add_data(const std::shared_ptr<inode>& ino,
template <typename LoggerPolicy> template <typename LoggerPolicy>
void block_manager_<LoggerPolicy>::add_inode(std::shared_ptr<inode> ino) { void block_manager_<LoggerPolicy>::add_inode(std::shared_ptr<inode> ino) {
const file_interface* e = ino->any(); auto e = ino->any();
size_t size = e->size(); size_t size = e->size();
if (size > 0) { if (size > 0) {

View File

@ -28,7 +28,7 @@
#include "dwarfs/console_writer.h" #include "dwarfs/console_writer.h"
#include "dwarfs/entry.h" #include "dwarfs/entry.h"
#include "dwarfs/file_interface.h" #include "dwarfs/entry_interface.h"
#include "dwarfs/inode.h" #include "dwarfs/inode.h"
#include "dwarfs/progress.h" #include "dwarfs/progress.h"
#include "dwarfs/util.h" #include "dwarfs/util.h"

View File

@ -26,6 +26,8 @@
#include <openssl/sha.h> #include <openssl/sha.h>
#include <fmt/format.h>
#include "dwarfs/entry.h" #include "dwarfs/entry.h"
#include "dwarfs/global_entry_data.h" #include "dwarfs/global_entry_data.h"
#include "dwarfs/inode.h" #include "dwarfs/inode.h"
@ -65,21 +67,25 @@ std::string entry::path() const {
} }
std::string entry::type_string() const { std::string entry::type_string() const {
// TODO: this type stuff is a mess, see if we really need it auto mode = stat_.st_mode;
switch (type()) {
case E_FILE: if (S_ISREG(mode)) {
return "file"; return "file";
case E_LINK: } else if (S_ISDIR(mode)) {
return "directory";
} else if (S_ISLNK(mode)) {
return "link"; return "link";
case E_DIR: } else if (S_ISCHR(mode)) {
return "dir"; return "chardev";
case E_DEVICE: } else if (S_ISBLK(mode)) {
return "device"; return "blockdev";
case E_OTHER: } else if (S_ISFIFO(mode)) {
return "pipe/socket"; return "fifo";
default: } else if (S_ISSOCK(mode)) {
throw std::runtime_error("invalid file type"); return "socket";
} }
throw std::runtime_error(fmt::format("unknown file type: {:#06x}", mode));
} }
void entry::walk(std::function<void(entry*)> const& f) { f(this); } void entry::walk(std::function<void(entry*)> const& f) { f(this); }
@ -109,6 +115,33 @@ void entry::pack(thrift::metadata::entry& entry_v2,
entry::type_t file::type() const { return E_FILE; } entry::type_t file::type() const { return E_FILE; }
uint16_t entry::get_permissions() const { return stat_.st_mode & 07777; }
void entry::set_permissions(uint16_t perm) {
stat_.st_mode &= ~07777;
stat_.st_mode |= perm;
}
uint16_t entry::get_uid() const { return stat_.st_uid; }
void entry::set_uid(uint16_t uid) { stat_.st_uid = uid; }
uint16_t entry::get_gid() const { return stat_.st_gid; }
void entry::set_gid(uint16_t gid) { stat_.st_gid = gid; }
uint64_t entry::get_atime() const { return stat_.st_atime; }
void entry::set_atime(uint64_t atime) { stat_.st_atime = atime; }
uint64_t entry::get_mtime() const { return stat_.st_mtime; }
void entry::set_mtime(uint64_t mtime) { stat_.st_atime = mtime; }
uint64_t entry::get_ctime() const { return stat_.st_ctime; }
void entry::set_ctime(uint64_t ctime) { stat_.st_atime = ctime; }
std::string_view file::hash() const { std::string_view file::hash() const {
return std::string_view(&hash_[0], hash_.size()); return std::string_view(&hash_[0], hash_.size());
} }

View File

@ -28,7 +28,6 @@
#include <vector> #include <vector>
#include "dwarfs/entry.h" #include "dwarfs/entry.h"
#include "dwarfs/file_interface.h"
#include "dwarfs/inode.h" #include "dwarfs/inode.h"
#include "dwarfs/inode_manager.h" #include "dwarfs/inode_manager.h"
#include "dwarfs/script.h" #include "dwarfs/script.h"
@ -46,19 +45,17 @@ class inode_manager_ : public inode_manager {
void set_num(uint32_t num) override { num_ = num; } void set_num(uint32_t num) override { num_ = num; }
uint32_t num() const override { return num_; } uint32_t num() const override { return num_; }
uint32_t similarity_hash() const override { uint32_t similarity_hash() const override {
if (!file_) { if (files_.empty()) {
throw std::runtime_error("inode has no file"); throw std::runtime_error("inode has no file");
} }
return file_->similarity_hash(); return files_.front()->similarity_hash();
} }
size_t size() const override { return any()->size(); } void set_files(files_vector&& fv) override {
if (!files_.empty()) {
void set_file(const file* f) override { throw std::runtime_error("files already set for inode");
if (file_) {
throw std::runtime_error("file already set for inode");
} }
file_ = f; files_ = std::move(fv);
} }
void add_chunk(size_t block, size_t offset, size_t size) override { void add_chunk(size_t block, size_t offset, size_t size) override {
@ -69,17 +66,15 @@ class inode_manager_ : public inode_manager {
chunks_.push_back(c); chunks_.push_back(c);
} }
std::string path() const override { return any()->path(); } size_t size() const override { return any()->size(); }
const std::string& name() const override { return any()->name(); } files_vector const& files() const override { return files_; }
std::string type_string() const override { return any()->type_string(); } file const* any() const override {
if (files_.empty()) {
const file_interface* any() const override {
if (!file_) {
throw std::runtime_error("inode has no file"); throw std::runtime_error("inode has no file");
} }
return file_; return files_.front();
} }
void append_chunks_to(std::vector<chunk_type>& vec) const override { void append_chunks_to(std::vector<chunk_type>& vec) const override {
@ -88,7 +83,7 @@ class inode_manager_ : public inode_manager {
private: private:
uint32_t num_{std::numeric_limits<uint32_t>::max()}; uint32_t num_{std::numeric_limits<uint32_t>::max()};
file const* file_{nullptr}; files_vector files_;
std::vector<chunk_type> chunks_; std::vector<chunk_type> chunks_;
}; };
@ -137,9 +132,9 @@ class inode_manager_ : public inode_manager {
auto ash = a->similarity_hash(); auto ash = a->similarity_hash();
auto bsh = b->similarity_hash(); auto bsh = b->similarity_hash();
return ash < bsh || return ash < bsh ||
(ash == bsh && (ash == bsh && (a->size() > b->size() ||
(a->size() > b->size() || (a->size() == b->size() &&
(a->size() == b->size() && a->path() < b->path()))); a->any()->path() < b->any()->path())));
}); });
} }

View File

@ -103,25 +103,23 @@ class file_deduplication_visitor : public visitor_base {
}); });
} }
auto first = files.front(); auto inode = im.create_inode();
{
auto inode = im.create_inode(); for (auto fp : files) {
first->set_inode(inode); fp->set_inode(inode);
inode->set_file(first);
} }
if (files.size() > 1) { if (auto dupes = files.size() - 1; dupes > 0) {
for (auto i = begin(files) + 1; i != end(files); ++i) { prog.duplicate_files += dupes;
(*i)->set_inode(first->get_inode()); prog.saved_by_deduplication += dupes * files.front()->size();
prog.duplicate_files++;
prog.saved_by_deduplication += (*i)->size();
}
} }
inode->set_files(std::move(files));
} }
} }
private: private:
std::unordered_map<std::string_view, std::vector<file*>, folly::Hash> hash_; std::unordered_map<std::string_view, inode::files_vector, folly::Hash> hash_;
}; };
class dir_set_inode_visitor : public visitor_base { class dir_set_inode_visitor : public visitor_base {
@ -237,12 +235,11 @@ class save_directories_visitor : public visitor_base {
}; };
std::string status_string(progress const& p, size_t width) { std::string status_string(progress const& p, size_t width) {
file_interface const* cp = auto cp = p.current.load();
reinterpret_cast<file_interface const*>(p.current.load());
std::string label, path; std::string label, path;
if (cp) { if (cp) {
if (auto e = dynamic_cast<entry const*>(cp)) { if (auto e = dynamic_cast<entry_interface const*>(cp)) {
label = "scanning: "; label = "scanning: ";
path = e->path(); path = e->path();
} else if (auto i = dynamic_cast<inode const*>(cp)) { } else if (auto i = dynamic_cast<inode const*>(cp)) {

View File

@ -164,9 +164,14 @@ class os_access_mock : public os_access {
class script_mock : public script { class script_mock : public script {
public: public:
bool filter(file_interface const& /*fi*/) const override { return true; }
void order(file_vector& /*fvi*/) const override { bool filter(entry_interface const& /*ei*/) override { return true; }
void transform(entry_interface& /*ei*/) override {
// do nothing
}
void order(inode_vector& /*iv*/) override {
// do nothing // do nothing
} }
}; };