Rework scripting abstractions, improve inode interface

This commit is contained in:
Marcus Holland-Moritz 2020-12-05 22:40:36 +01:00
parent 056e11295d
commit 6788ba0d69
12 changed files with 145 additions and 114 deletions

View File

@ -32,7 +32,7 @@
#include <sys/stat.h>
#include "dwarfs/file_interface.h"
#include "dwarfs/entry_interface.h"
namespace dwarfs {
@ -61,7 +61,7 @@ class entry_visitor {
virtual void visit(dir* p) = 0;
};
class entry : public file_interface {
class entry : public entry_interface {
public:
enum type_t { E_FILE, E_DIR, E_LINK, E_DEVICE, E_OTHER };
@ -86,6 +86,20 @@ class entry : public file_interface {
virtual void scan(os_access& os, progress& prog) = 0;
const struct ::stat& status() const { return stat_; }
// more methods from entry_interface
uint16_t get_permissions() const override;
void set_permissions(uint16_t perm) override;
uint16_t get_uid() const override;
void set_uid(uint16_t uid) override;
uint16_t get_gid() const override;
void set_gid(uint16_t gid) override;
uint64_t get_atime() const override;
void set_atime(uint64_t atime) override;
uint64_t get_mtime() const override;
void set_mtime(uint64_t mtime) override;
uint64_t get_ctime() const override;
void set_ctime(uint64_t ctime) override;
private:
std::string name_;
std::weak_ptr<entry> parent_;

View File

@ -21,17 +21,30 @@
#pragma once
#include <functional>
#include <string>
#include "dwarfs/object.h"
namespace dwarfs {
class file_vector {
class entry_interface : public object {
public:
virtual ~file_vector() = default;
virtual const file_interface* operator[](size_t i) const = 0;
virtual std::string path() const = 0;
virtual std::string const& name() const = 0;
virtual std::string type_string() const = 0;
virtual size_t size() const = 0;
virtual void
sort(std::function<bool(const file_interface* a,
const file_interface* b)> const& less) = 0;
virtual uint16_t get_permissions() const = 0;
virtual void set_permissions(uint16_t perm) = 0;
virtual uint16_t get_uid() const = 0;
virtual void set_uid(uint16_t uid) = 0;
virtual uint16_t get_gid() const = 0;
virtual void set_gid(uint16_t gid) = 0;
virtual uint64_t get_atime() const = 0;
virtual void set_atime(uint64_t atime) = 0;
virtual uint64_t get_mtime() const = 0;
virtual void set_mtime(uint64_t mtime) = 0;
virtual uint64_t get_ctime() const = 0;
virtual void set_ctime(uint64_t ctime) = 0;
};
} // namespace dwarfs

View File

@ -19,10 +19,14 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <iosfwd>
#include <vector>
#include "dwarfs/fstypes.h"
#include <folly/small_vector.h>
#include "dwarfs/object.h"
namespace dwarfs {
@ -31,17 +35,21 @@ struct chunk;
}
class file;
class file_interface;
class inode : public file_interface {
class inode : public object {
public:
virtual void set_file(const file* f) = 0;
using files_vector = folly::small_vector<file*, 1>;
virtual void set_files(files_vector&& fv) = 0;
virtual void set_num(uint32_t num) = 0;
virtual uint32_t num() const = 0;
virtual uint32_t similarity_hash() const = 0;
virtual const file_interface* any() const = 0; // TODO
virtual size_t size() const = 0;
virtual file const* any() const = 0;
virtual files_vector const& files() const = 0;
virtual void add_chunk(size_t block, size_t offset, size_t size) = 0;
virtual void
append_chunks_to(std::vector<thrift::metadata::chunk>& vec) const = 0;
};
} // namespace dwarfs

View File

@ -21,17 +21,11 @@
#pragma once
#include <string>
namespace dwarfs {
class file_interface {
class object {
public:
virtual ~file_interface() = default;
virtual std::string path() const = 0;
virtual const std::string& name() const = 0;
virtual std::string type_string() const = 0;
virtual size_t size() const = 0;
virtual ~object() = default;
};
} // namespace dwarfs

View File

@ -33,6 +33,8 @@
namespace dwarfs {
class object;
class progress {
public:
using status_function_type =
@ -51,7 +53,7 @@ class progress {
std::string status(size_t max_len) const;
std::atomic<void*> current{nullptr};
std::atomic<object const*> current{nullptr};
std::atomic<size_t> files_found{0};
std::atomic<size_t> files_scanned{0};
std::atomic<size_t> dirs_found{0};

View File

@ -21,54 +21,24 @@
#pragma once
#include <algorithm>
#include <memory>
#include <vector>
#include "dwarfs/file_interface.h"
#include "dwarfs/file_vector.h"
namespace dwarfs {
namespace detail {
template <class T>
class file_vector_ : public file_vector {
public:
file_vector_(std::vector<std::shared_ptr<T>>& vec)
: vec_(vec) {}
const file_interface* operator[](size_t i) const override {
return vec_[i].get();
}
size_t size() const override { return vec_.size(); }
void
sort(std::function<bool(const file_interface*, const file_interface*)> const&
less) override {
std::sort(vec_.begin(), vec_.end(),
[&](const std::shared_ptr<T>& a, const std::shared_ptr<T>& b) {
return less(a.get(), b.get());
});
}
private:
std::vector<std::shared_ptr<T>>& vec_;
};
} // namespace detail
class entry_interface;
class inode;
class script {
public:
using inode_ptr = std::shared_ptr<inode>;
using inode_vector = std::vector<inode_ptr>;
virtual ~script() = default;
virtual bool filter(file_interface const& fi) const = 0;
virtual void order(file_vector& fvi) const = 0;
template <typename T>
void order(std::vector<std::shared_ptr<T>>& vec) const {
detail::file_vector_<T> fv(vec);
order(fv);
}
virtual bool filter(entry_interface const& ei) = 0;
virtual void transform(entry_interface& ei) = 0;
virtual void order(inode_vector& iv) = 0;
};
} // namespace dwarfs

View File

@ -33,7 +33,7 @@
#include "dwarfs/block_manager.h"
#include "dwarfs/cyclic_hash.h"
#include "dwarfs/file_interface.h"
#include "dwarfs/entry.h"
#include "dwarfs/filesystem_writer.h"
#include "dwarfs/inode.h"
#include "dwarfs/inode_hasher.h"
@ -320,7 +320,7 @@ void block_manager_<LoggerPolicy>::add_data(const std::shared_ptr<inode>& ino,
template <typename LoggerPolicy>
void block_manager_<LoggerPolicy>::add_inode(std::shared_ptr<inode> ino) {
const file_interface* e = ino->any();
auto e = ino->any();
size_t size = e->size();
if (size > 0) {

View File

@ -28,7 +28,7 @@
#include "dwarfs/console_writer.h"
#include "dwarfs/entry.h"
#include "dwarfs/file_interface.h"
#include "dwarfs/entry_interface.h"
#include "dwarfs/inode.h"
#include "dwarfs/progress.h"
#include "dwarfs/util.h"

View File

@ -26,6 +26,8 @@
#include <openssl/sha.h>
#include <fmt/format.h>
#include "dwarfs/entry.h"
#include "dwarfs/global_entry_data.h"
#include "dwarfs/inode.h"
@ -65,21 +67,25 @@ std::string entry::path() const {
}
std::string entry::type_string() const {
// TODO: this type stuff is a mess, see if we really need it
switch (type()) {
case E_FILE:
auto mode = stat_.st_mode;
if (S_ISREG(mode)) {
return "file";
case E_LINK:
} else if (S_ISDIR(mode)) {
return "directory";
} else if (S_ISLNK(mode)) {
return "link";
case E_DIR:
return "dir";
case E_DEVICE:
return "device";
case E_OTHER:
return "pipe/socket";
default:
throw std::runtime_error("invalid file type");
} else if (S_ISCHR(mode)) {
return "chardev";
} else if (S_ISBLK(mode)) {
return "blockdev";
} else if (S_ISFIFO(mode)) {
return "fifo";
} else if (S_ISSOCK(mode)) {
return "socket";
}
throw std::runtime_error(fmt::format("unknown file type: {:#06x}", mode));
}
void entry::walk(std::function<void(entry*)> const& f) { f(this); }
@ -109,6 +115,33 @@ void entry::pack(thrift::metadata::entry& entry_v2,
entry::type_t file::type() const { return E_FILE; }
uint16_t entry::get_permissions() const { return stat_.st_mode & 07777; }
void entry::set_permissions(uint16_t perm) {
stat_.st_mode &= ~07777;
stat_.st_mode |= perm;
}
uint16_t entry::get_uid() const { return stat_.st_uid; }
void entry::set_uid(uint16_t uid) { stat_.st_uid = uid; }
uint16_t entry::get_gid() const { return stat_.st_gid; }
void entry::set_gid(uint16_t gid) { stat_.st_gid = gid; }
uint64_t entry::get_atime() const { return stat_.st_atime; }
void entry::set_atime(uint64_t atime) { stat_.st_atime = atime; }
uint64_t entry::get_mtime() const { return stat_.st_mtime; }
void entry::set_mtime(uint64_t mtime) { stat_.st_atime = mtime; }
uint64_t entry::get_ctime() const { return stat_.st_ctime; }
void entry::set_ctime(uint64_t ctime) { stat_.st_atime = ctime; }
std::string_view file::hash() const {
return std::string_view(&hash_[0], hash_.size());
}

View File

@ -28,7 +28,6 @@
#include <vector>
#include "dwarfs/entry.h"
#include "dwarfs/file_interface.h"
#include "dwarfs/inode.h"
#include "dwarfs/inode_manager.h"
#include "dwarfs/script.h"
@ -46,19 +45,17 @@ class inode_manager_ : public inode_manager {
void set_num(uint32_t num) override { num_ = num; }
uint32_t num() const override { return num_; }
uint32_t similarity_hash() const override {
if (!file_) {
if (files_.empty()) {
throw std::runtime_error("inode has no file");
}
return file_->similarity_hash();
return files_.front()->similarity_hash();
}
size_t size() const override { return any()->size(); }
void set_file(const file* f) override {
if (file_) {
throw std::runtime_error("file already set for inode");
void set_files(files_vector&& fv) override {
if (!files_.empty()) {
throw std::runtime_error("files already set for inode");
}
file_ = f;
files_ = std::move(fv);
}
void add_chunk(size_t block, size_t offset, size_t size) override {
@ -69,17 +66,15 @@ class inode_manager_ : public inode_manager {
chunks_.push_back(c);
}
std::string path() const override { return any()->path(); }
size_t size() const override { return any()->size(); }
const std::string& name() const override { return any()->name(); }
files_vector const& files() const override { return files_; }
std::string type_string() const override { return any()->type_string(); }
const file_interface* any() const override {
if (!file_) {
file const* any() const override {
if (files_.empty()) {
throw std::runtime_error("inode has no file");
}
return file_;
return files_.front();
}
void append_chunks_to(std::vector<chunk_type>& vec) const override {
@ -88,7 +83,7 @@ class inode_manager_ : public inode_manager {
private:
uint32_t num_{std::numeric_limits<uint32_t>::max()};
file const* file_{nullptr};
files_vector files_;
std::vector<chunk_type> chunks_;
};
@ -137,9 +132,9 @@ class inode_manager_ : public inode_manager {
auto ash = a->similarity_hash();
auto bsh = b->similarity_hash();
return ash < bsh ||
(ash == bsh &&
(a->size() > b->size() ||
(a->size() == b->size() && a->path() < b->path())));
(ash == bsh && (a->size() > b->size() ||
(a->size() == b->size() &&
a->any()->path() < b->any()->path())));
});
}

View File

@ -103,25 +103,23 @@ class file_deduplication_visitor : public visitor_base {
});
}
auto first = files.front();
{
auto inode = im.create_inode();
first->set_inode(inode);
inode->set_file(first);
auto inode = im.create_inode();
for (auto fp : files) {
fp->set_inode(inode);
}
if (files.size() > 1) {
for (auto i = begin(files) + 1; i != end(files); ++i) {
(*i)->set_inode(first->get_inode());
prog.duplicate_files++;
prog.saved_by_deduplication += (*i)->size();
}
if (auto dupes = files.size() - 1; dupes > 0) {
prog.duplicate_files += dupes;
prog.saved_by_deduplication += dupes * files.front()->size();
}
inode->set_files(std::move(files));
}
}
private:
std::unordered_map<std::string_view, std::vector<file*>, folly::Hash> hash_;
std::unordered_map<std::string_view, inode::files_vector, folly::Hash> hash_;
};
class dir_set_inode_visitor : public visitor_base {
@ -237,12 +235,11 @@ class save_directories_visitor : public visitor_base {
};
std::string status_string(progress const& p, size_t width) {
file_interface const* cp =
reinterpret_cast<file_interface const*>(p.current.load());
auto cp = p.current.load();
std::string label, path;
if (cp) {
if (auto e = dynamic_cast<entry const*>(cp)) {
if (auto e = dynamic_cast<entry_interface const*>(cp)) {
label = "scanning: ";
path = e->path();
} else if (auto i = dynamic_cast<inode const*>(cp)) {

View File

@ -164,9 +164,14 @@ class os_access_mock : public os_access {
class script_mock : public script {
public:
bool filter(file_interface const& /*fi*/) const override { return true; }
void order(file_vector& /*fvi*/) const override {
bool filter(entry_interface const& /*ei*/) override { return true; }
void transform(entry_interface& /*ei*/) override {
// do nothing
}
void order(inode_vector& /*iv*/) override {
// do nothing
}
};