mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 12:28:13 -04:00
Rework scripting abstractions, improve inode interface
This commit is contained in:
parent
056e11295d
commit
6788ba0d69
@ -32,7 +32,7 @@
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "dwarfs/file_interface.h"
|
||||
#include "dwarfs/entry_interface.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
@ -61,7 +61,7 @@ class entry_visitor {
|
||||
virtual void visit(dir* p) = 0;
|
||||
};
|
||||
|
||||
class entry : public file_interface {
|
||||
class entry : public entry_interface {
|
||||
public:
|
||||
enum type_t { E_FILE, E_DIR, E_LINK, E_DEVICE, E_OTHER };
|
||||
|
||||
@ -86,6 +86,20 @@ class entry : public file_interface {
|
||||
virtual void scan(os_access& os, progress& prog) = 0;
|
||||
const struct ::stat& status() const { return stat_; }
|
||||
|
||||
// more methods from entry_interface
|
||||
uint16_t get_permissions() const override;
|
||||
void set_permissions(uint16_t perm) override;
|
||||
uint16_t get_uid() const override;
|
||||
void set_uid(uint16_t uid) override;
|
||||
uint16_t get_gid() const override;
|
||||
void set_gid(uint16_t gid) override;
|
||||
uint64_t get_atime() const override;
|
||||
void set_atime(uint64_t atime) override;
|
||||
uint64_t get_mtime() const override;
|
||||
void set_mtime(uint64_t mtime) override;
|
||||
uint64_t get_ctime() const override;
|
||||
void set_ctime(uint64_t ctime) override;
|
||||
|
||||
private:
|
||||
std::string name_;
|
||||
std::weak_ptr<entry> parent_;
|
||||
|
@ -21,17 +21,30 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "dwarfs/object.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
class file_vector {
|
||||
class entry_interface : public object {
|
||||
public:
|
||||
virtual ~file_vector() = default;
|
||||
virtual const file_interface* operator[](size_t i) const = 0;
|
||||
virtual std::string path() const = 0;
|
||||
virtual std::string const& name() const = 0;
|
||||
virtual std::string type_string() const = 0;
|
||||
virtual size_t size() const = 0;
|
||||
virtual void
|
||||
sort(std::function<bool(const file_interface* a,
|
||||
const file_interface* b)> const& less) = 0;
|
||||
|
||||
virtual uint16_t get_permissions() const = 0;
|
||||
virtual void set_permissions(uint16_t perm) = 0;
|
||||
virtual uint16_t get_uid() const = 0;
|
||||
virtual void set_uid(uint16_t uid) = 0;
|
||||
virtual uint16_t get_gid() const = 0;
|
||||
virtual void set_gid(uint16_t gid) = 0;
|
||||
virtual uint64_t get_atime() const = 0;
|
||||
virtual void set_atime(uint64_t atime) = 0;
|
||||
virtual uint64_t get_mtime() const = 0;
|
||||
virtual void set_mtime(uint64_t mtime) = 0;
|
||||
virtual uint64_t get_ctime() const = 0;
|
||||
virtual void set_ctime(uint64_t ctime) = 0;
|
||||
};
|
||||
} // namespace dwarfs
|
@ -19,10 +19,14 @@
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iosfwd>
|
||||
#include <vector>
|
||||
|
||||
#include "dwarfs/fstypes.h"
|
||||
#include <folly/small_vector.h>
|
||||
|
||||
#include "dwarfs/object.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
@ -31,17 +35,21 @@ struct chunk;
|
||||
}
|
||||
|
||||
class file;
|
||||
class file_interface;
|
||||
|
||||
class inode : public file_interface {
|
||||
class inode : public object {
|
||||
public:
|
||||
virtual void set_file(const file* f) = 0;
|
||||
using files_vector = folly::small_vector<file*, 1>;
|
||||
|
||||
virtual void set_files(files_vector&& fv) = 0;
|
||||
virtual void set_num(uint32_t num) = 0;
|
||||
virtual uint32_t num() const = 0;
|
||||
virtual uint32_t similarity_hash() const = 0;
|
||||
virtual const file_interface* any() const = 0; // TODO
|
||||
virtual size_t size() const = 0;
|
||||
virtual file const* any() const = 0;
|
||||
virtual files_vector const& files() const = 0;
|
||||
virtual void add_chunk(size_t block, size_t offset, size_t size) = 0;
|
||||
virtual void
|
||||
append_chunks_to(std::vector<thrift::metadata::chunk>& vec) const = 0;
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -21,17 +21,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
class file_interface {
|
||||
class object {
|
||||
public:
|
||||
virtual ~file_interface() = default;
|
||||
|
||||
virtual std::string path() const = 0;
|
||||
virtual const std::string& name() const = 0;
|
||||
virtual std::string type_string() const = 0;
|
||||
virtual size_t size() const = 0;
|
||||
virtual ~object() = default;
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
@ -33,6 +33,8 @@
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
class object;
|
||||
|
||||
class progress {
|
||||
public:
|
||||
using status_function_type =
|
||||
@ -51,7 +53,7 @@ class progress {
|
||||
|
||||
std::string status(size_t max_len) const;
|
||||
|
||||
std::atomic<void*> current{nullptr};
|
||||
std::atomic<object const*> current{nullptr};
|
||||
std::atomic<size_t> files_found{0};
|
||||
std::atomic<size_t> files_scanned{0};
|
||||
std::atomic<size_t> dirs_found{0};
|
||||
|
@ -21,54 +21,24 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "dwarfs/file_interface.h"
|
||||
#include "dwarfs/file_vector.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <class T>
|
||||
class file_vector_ : public file_vector {
|
||||
public:
|
||||
file_vector_(std::vector<std::shared_ptr<T>>& vec)
|
||||
: vec_(vec) {}
|
||||
|
||||
const file_interface* operator[](size_t i) const override {
|
||||
return vec_[i].get();
|
||||
}
|
||||
|
||||
size_t size() const override { return vec_.size(); }
|
||||
|
||||
void
|
||||
sort(std::function<bool(const file_interface*, const file_interface*)> const&
|
||||
less) override {
|
||||
std::sort(vec_.begin(), vec_.end(),
|
||||
[&](const std::shared_ptr<T>& a, const std::shared_ptr<T>& b) {
|
||||
return less(a.get(), b.get());
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<T>>& vec_;
|
||||
};
|
||||
} // namespace detail
|
||||
class entry_interface;
|
||||
class inode;
|
||||
|
||||
class script {
|
||||
public:
|
||||
using inode_ptr = std::shared_ptr<inode>;
|
||||
using inode_vector = std::vector<inode_ptr>;
|
||||
|
||||
virtual ~script() = default;
|
||||
|
||||
virtual bool filter(file_interface const& fi) const = 0;
|
||||
virtual void order(file_vector& fvi) const = 0;
|
||||
|
||||
template <typename T>
|
||||
void order(std::vector<std::shared_ptr<T>>& vec) const {
|
||||
detail::file_vector_<T> fv(vec);
|
||||
order(fv);
|
||||
}
|
||||
virtual bool filter(entry_interface const& ei) = 0;
|
||||
virtual void transform(entry_interface& ei) = 0;
|
||||
virtual void order(inode_vector& iv) = 0;
|
||||
};
|
||||
} // namespace dwarfs
|
||||
|
@ -33,7 +33,7 @@
|
||||
|
||||
#include "dwarfs/block_manager.h"
|
||||
#include "dwarfs/cyclic_hash.h"
|
||||
#include "dwarfs/file_interface.h"
|
||||
#include "dwarfs/entry.h"
|
||||
#include "dwarfs/filesystem_writer.h"
|
||||
#include "dwarfs/inode.h"
|
||||
#include "dwarfs/inode_hasher.h"
|
||||
@ -320,7 +320,7 @@ void block_manager_<LoggerPolicy>::add_data(const std::shared_ptr<inode>& ino,
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void block_manager_<LoggerPolicy>::add_inode(std::shared_ptr<inode> ino) {
|
||||
const file_interface* e = ino->any();
|
||||
auto e = ino->any();
|
||||
size_t size = e->size();
|
||||
|
||||
if (size > 0) {
|
||||
|
@ -28,7 +28,7 @@
|
||||
|
||||
#include "dwarfs/console_writer.h"
|
||||
#include "dwarfs/entry.h"
|
||||
#include "dwarfs/file_interface.h"
|
||||
#include "dwarfs/entry_interface.h"
|
||||
#include "dwarfs/inode.h"
|
||||
#include "dwarfs/progress.h"
|
||||
#include "dwarfs/util.h"
|
||||
|
@ -26,6 +26,8 @@
|
||||
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "dwarfs/entry.h"
|
||||
#include "dwarfs/global_entry_data.h"
|
||||
#include "dwarfs/inode.h"
|
||||
@ -65,21 +67,25 @@ std::string entry::path() const {
|
||||
}
|
||||
|
||||
std::string entry::type_string() const {
|
||||
// TODO: this type stuff is a mess, see if we really need it
|
||||
switch (type()) {
|
||||
case E_FILE:
|
||||
auto mode = stat_.st_mode;
|
||||
|
||||
if (S_ISREG(mode)) {
|
||||
return "file";
|
||||
case E_LINK:
|
||||
} else if (S_ISDIR(mode)) {
|
||||
return "directory";
|
||||
} else if (S_ISLNK(mode)) {
|
||||
return "link";
|
||||
case E_DIR:
|
||||
return "dir";
|
||||
case E_DEVICE:
|
||||
return "device";
|
||||
case E_OTHER:
|
||||
return "pipe/socket";
|
||||
default:
|
||||
throw std::runtime_error("invalid file type");
|
||||
} else if (S_ISCHR(mode)) {
|
||||
return "chardev";
|
||||
} else if (S_ISBLK(mode)) {
|
||||
return "blockdev";
|
||||
} else if (S_ISFIFO(mode)) {
|
||||
return "fifo";
|
||||
} else if (S_ISSOCK(mode)) {
|
||||
return "socket";
|
||||
}
|
||||
|
||||
throw std::runtime_error(fmt::format("unknown file type: {:#06x}", mode));
|
||||
}
|
||||
|
||||
void entry::walk(std::function<void(entry*)> const& f) { f(this); }
|
||||
@ -109,6 +115,33 @@ void entry::pack(thrift::metadata::entry& entry_v2,
|
||||
|
||||
entry::type_t file::type() const { return E_FILE; }
|
||||
|
||||
uint16_t entry::get_permissions() const { return stat_.st_mode & 07777; }
|
||||
|
||||
void entry::set_permissions(uint16_t perm) {
|
||||
stat_.st_mode &= ~07777;
|
||||
stat_.st_mode |= perm;
|
||||
}
|
||||
|
||||
uint16_t entry::get_uid() const { return stat_.st_uid; }
|
||||
|
||||
void entry::set_uid(uint16_t uid) { stat_.st_uid = uid; }
|
||||
|
||||
uint16_t entry::get_gid() const { return stat_.st_gid; }
|
||||
|
||||
void entry::set_gid(uint16_t gid) { stat_.st_gid = gid; }
|
||||
|
||||
uint64_t entry::get_atime() const { return stat_.st_atime; }
|
||||
|
||||
void entry::set_atime(uint64_t atime) { stat_.st_atime = atime; }
|
||||
|
||||
uint64_t entry::get_mtime() const { return stat_.st_mtime; }
|
||||
|
||||
void entry::set_mtime(uint64_t mtime) { stat_.st_atime = mtime; }
|
||||
|
||||
uint64_t entry::get_ctime() const { return stat_.st_ctime; }
|
||||
|
||||
void entry::set_ctime(uint64_t ctime) { stat_.st_atime = ctime; }
|
||||
|
||||
std::string_view file::hash() const {
|
||||
return std::string_view(&hash_[0], hash_.size());
|
||||
}
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include <vector>
|
||||
|
||||
#include "dwarfs/entry.h"
|
||||
#include "dwarfs/file_interface.h"
|
||||
#include "dwarfs/inode.h"
|
||||
#include "dwarfs/inode_manager.h"
|
||||
#include "dwarfs/script.h"
|
||||
@ -46,19 +45,17 @@ class inode_manager_ : public inode_manager {
|
||||
void set_num(uint32_t num) override { num_ = num; }
|
||||
uint32_t num() const override { return num_; }
|
||||
uint32_t similarity_hash() const override {
|
||||
if (!file_) {
|
||||
if (files_.empty()) {
|
||||
throw std::runtime_error("inode has no file");
|
||||
}
|
||||
return file_->similarity_hash();
|
||||
return files_.front()->similarity_hash();
|
||||
}
|
||||
|
||||
size_t size() const override { return any()->size(); }
|
||||
|
||||
void set_file(const file* f) override {
|
||||
if (file_) {
|
||||
throw std::runtime_error("file already set for inode");
|
||||
void set_files(files_vector&& fv) override {
|
||||
if (!files_.empty()) {
|
||||
throw std::runtime_error("files already set for inode");
|
||||
}
|
||||
file_ = f;
|
||||
files_ = std::move(fv);
|
||||
}
|
||||
|
||||
void add_chunk(size_t block, size_t offset, size_t size) override {
|
||||
@ -69,17 +66,15 @@ class inode_manager_ : public inode_manager {
|
||||
chunks_.push_back(c);
|
||||
}
|
||||
|
||||
std::string path() const override { return any()->path(); }
|
||||
size_t size() const override { return any()->size(); }
|
||||
|
||||
const std::string& name() const override { return any()->name(); }
|
||||
files_vector const& files() const override { return files_; }
|
||||
|
||||
std::string type_string() const override { return any()->type_string(); }
|
||||
|
||||
const file_interface* any() const override {
|
||||
if (!file_) {
|
||||
file const* any() const override {
|
||||
if (files_.empty()) {
|
||||
throw std::runtime_error("inode has no file");
|
||||
}
|
||||
return file_;
|
||||
return files_.front();
|
||||
}
|
||||
|
||||
void append_chunks_to(std::vector<chunk_type>& vec) const override {
|
||||
@ -88,7 +83,7 @@ class inode_manager_ : public inode_manager {
|
||||
|
||||
private:
|
||||
uint32_t num_{std::numeric_limits<uint32_t>::max()};
|
||||
file const* file_{nullptr};
|
||||
files_vector files_;
|
||||
std::vector<chunk_type> chunks_;
|
||||
};
|
||||
|
||||
@ -137,9 +132,9 @@ class inode_manager_ : public inode_manager {
|
||||
auto ash = a->similarity_hash();
|
||||
auto bsh = b->similarity_hash();
|
||||
return ash < bsh ||
|
||||
(ash == bsh &&
|
||||
(a->size() > b->size() ||
|
||||
(a->size() == b->size() && a->path() < b->path())));
|
||||
(ash == bsh && (a->size() > b->size() ||
|
||||
(a->size() == b->size() &&
|
||||
a->any()->path() < b->any()->path())));
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -103,25 +103,23 @@ class file_deduplication_visitor : public visitor_base {
|
||||
});
|
||||
}
|
||||
|
||||
auto first = files.front();
|
||||
{
|
||||
auto inode = im.create_inode();
|
||||
first->set_inode(inode);
|
||||
inode->set_file(first);
|
||||
auto inode = im.create_inode();
|
||||
|
||||
for (auto fp : files) {
|
||||
fp->set_inode(inode);
|
||||
}
|
||||
|
||||
if (files.size() > 1) {
|
||||
for (auto i = begin(files) + 1; i != end(files); ++i) {
|
||||
(*i)->set_inode(first->get_inode());
|
||||
prog.duplicate_files++;
|
||||
prog.saved_by_deduplication += (*i)->size();
|
||||
}
|
||||
if (auto dupes = files.size() - 1; dupes > 0) {
|
||||
prog.duplicate_files += dupes;
|
||||
prog.saved_by_deduplication += dupes * files.front()->size();
|
||||
}
|
||||
|
||||
inode->set_files(std::move(files));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string_view, std::vector<file*>, folly::Hash> hash_;
|
||||
std::unordered_map<std::string_view, inode::files_vector, folly::Hash> hash_;
|
||||
};
|
||||
|
||||
class dir_set_inode_visitor : public visitor_base {
|
||||
@ -237,12 +235,11 @@ class save_directories_visitor : public visitor_base {
|
||||
};
|
||||
|
||||
std::string status_string(progress const& p, size_t width) {
|
||||
file_interface const* cp =
|
||||
reinterpret_cast<file_interface const*>(p.current.load());
|
||||
auto cp = p.current.load();
|
||||
std::string label, path;
|
||||
|
||||
if (cp) {
|
||||
if (auto e = dynamic_cast<entry const*>(cp)) {
|
||||
if (auto e = dynamic_cast<entry_interface const*>(cp)) {
|
||||
label = "scanning: ";
|
||||
path = e->path();
|
||||
} else if (auto i = dynamic_cast<inode const*>(cp)) {
|
||||
|
@ -164,9 +164,14 @@ class os_access_mock : public os_access {
|
||||
|
||||
class script_mock : public script {
|
||||
public:
|
||||
bool filter(file_interface const& /*fi*/) const override { return true; }
|
||||
|
||||
void order(file_vector& /*fvi*/) const override {
|
||||
bool filter(entry_interface const& /*ei*/) override { return true; }
|
||||
|
||||
void transform(entry_interface& /*ei*/) override {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
void order(inode_vector& /*iv*/) override {
|
||||
// do nothing
|
||||
}
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user