mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 11:59:48 -04:00
metadata_v2: more cleaning up
This commit is contained in:
parent
5858c1c53d
commit
c767b0b93e
@ -38,6 +38,7 @@
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
// TODO: clean up
|
||||
struct global_entry_data {
|
||||
global_entry_data(bool no_time)
|
||||
: no_time_(no_time) {}
|
||||
@ -151,9 +152,6 @@ class entry : public file_interface {
|
||||
virtual size_t total_size() const;
|
||||
virtual void walk(std::function<void(entry*)> const& f);
|
||||
virtual void walk(std::function<void(const entry*)> const& f) const;
|
||||
void pack(dir_entry& de) const;
|
||||
void pack(dir_entry_ug& de) const;
|
||||
void pack(dir_entry_ug_time& de) const;
|
||||
void
|
||||
pack(thrift::metadata::entry& entry_v2, global_entry_data const& data) const;
|
||||
void update(global_entry_data& data) const;
|
||||
@ -161,7 +159,6 @@ class entry : public file_interface {
|
||||
virtual uint32_t inode_num() const = 0;
|
||||
|
||||
protected:
|
||||
virtual void pack_specific(dir_entry& de) const = 0;
|
||||
virtual void scan(os_access& os, const std::string& p, progress& prog) = 0;
|
||||
|
||||
private:
|
||||
@ -187,7 +184,6 @@ class file : public entry {
|
||||
uint32_t similarity_hash() const { return similarity_hash_; }
|
||||
|
||||
protected:
|
||||
void pack_specific(dir_entry& de) const override;
|
||||
void scan(os_access& os, const std::string& p, progress& prog) override;
|
||||
|
||||
private:
|
||||
@ -210,21 +206,13 @@ class dir : public entry {
|
||||
void sort();
|
||||
void set_offset(size_t offset);
|
||||
void set_inode(uint32_t inode);
|
||||
virtual size_t packed_size() const = 0;
|
||||
virtual void
|
||||
pack(uint8_t* buf,
|
||||
std::function<void(const entry* e, size_t offset)> const& offset_cb)
|
||||
const = 0;
|
||||
virtual void pack(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const = 0;
|
||||
virtual size_t packed_entry_size() const = 0;
|
||||
virtual void pack_entry(uint8_t* buf) const = 0;
|
||||
virtual void pack_entry(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const = 0;
|
||||
void
|
||||
pack(thrift::metadata::metadata& mv2, global_entry_data const& data) const;
|
||||
void pack_entry(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const;
|
||||
uint32_t inode_num() const override { return inode_; }
|
||||
|
||||
protected:
|
||||
void pack_specific(dir_entry& de) const override;
|
||||
void scan(os_access& os, const std::string& p, progress& prog) override;
|
||||
|
||||
using entry_ptr = std::shared_ptr<entry>;
|
||||
@ -246,7 +234,6 @@ class link : public entry {
|
||||
uint32_t inode_num() const override { return inode_; }
|
||||
|
||||
protected:
|
||||
void pack_specific(dir_entry& de) const override;
|
||||
void scan(os_access& os, const std::string& p, progress& prog) override;
|
||||
|
||||
private:
|
||||
@ -257,15 +244,12 @@ class link : public entry {
|
||||
|
||||
class entry_factory {
|
||||
public:
|
||||
static std::shared_ptr<entry_factory>
|
||||
create(bool no_owner = false, bool no_time = false,
|
||||
bool with_similarity = false);
|
||||
static std::unique_ptr<entry_factory> create(bool with_similarity = false);
|
||||
|
||||
virtual ~entry_factory() = default;
|
||||
|
||||
virtual std::shared_ptr<entry>
|
||||
create(os_access& os, const std::string& name,
|
||||
std::shared_ptr<entry> parent = std::shared_ptr<entry>()) = 0;
|
||||
virtual dir_entry_type de_type() const = 0;
|
||||
};
|
||||
} // namespace dwarfs
|
||||
|
@ -53,6 +53,7 @@ class block_range {
|
||||
std::shared_ptr<cached_block const> block_;
|
||||
};
|
||||
|
||||
// TODO: move elsewhere
|
||||
struct iovec_read_buf {
|
||||
// This covers more than 95% of reads
|
||||
static constexpr size_t inline_storage = 16;
|
||||
@ -75,12 +76,6 @@ enum class section_type : uint16_t {
|
||||
// Frozen metadata.
|
||||
};
|
||||
|
||||
enum class dir_entry_type : uint8_t {
|
||||
DIR_ENTRY = 0, // filesystem uses dir_entry
|
||||
DIR_ENTRY_UG = 1, // filesystem uses dir_entry_ug
|
||||
DIR_ENTRY_UG_TIME = 2 // filesystem uses dir_entry_ug_time
|
||||
};
|
||||
|
||||
struct file_header {
|
||||
char magic[6]; // "DWARFS"
|
||||
uint8_t major; // major version
|
||||
@ -97,41 +92,6 @@ struct section_header {
|
||||
void dump(std::ostream& os) const;
|
||||
};
|
||||
|
||||
struct dir_entry { // 128 bits (16 bytes) / entry
|
||||
uint32_t name_offset;
|
||||
uint16_t name_size;
|
||||
uint16_t mode;
|
||||
uint32_t inode; // dirs start at 1, then links, then files
|
||||
union {
|
||||
uint32_t file_size; // for files only
|
||||
uint32_t offset; // for dirs, offset to directory,
|
||||
} u; // for links, offset to content in link table
|
||||
};
|
||||
|
||||
struct dir_entry_ug { // 160 bits (20 bytes) / entry
|
||||
dir_entry de;
|
||||
uint16_t owner;
|
||||
uint16_t group;
|
||||
};
|
||||
|
||||
struct dir_entry_ug_time { // 256 bits (32 bytes) / entry
|
||||
dir_entry_ug ug;
|
||||
uint32_t atime; // yeah, I know... in a few years we can switch to 64 bits
|
||||
uint32_t mtime;
|
||||
uint32_t ctime;
|
||||
};
|
||||
|
||||
struct directory {
|
||||
uint32_t count;
|
||||
uint32_t self;
|
||||
uint32_t parent;
|
||||
union {
|
||||
dir_entry entries[1];
|
||||
dir_entry_ug entries_ug[1];
|
||||
dir_entry_ug_time entries_ug_time[1];
|
||||
} u;
|
||||
};
|
||||
|
||||
std::string get_compression_name(compression_type type);
|
||||
|
||||
std::string get_section_name(section_type type);
|
||||
|
@ -81,9 +81,9 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
|
||||
bco.max_bytes = opts.cachesize;
|
||||
bco.num_workers = opts.workers;
|
||||
bco.decompress_ratio = opts.decompress_ratio;
|
||||
s_fs =
|
||||
std::make_shared<filesystem_v2>(s_lgr, std::make_shared<mmap>(opts.fsimage),
|
||||
bco, &opts.stat_defaults, FUSE_ROOT_ID);
|
||||
s_fs = std::make_shared<filesystem_v2>(
|
||||
s_lgr, std::make_shared<mmap>(opts.fsimage), bco, &opts.stat_defaults,
|
||||
FUSE_ROOT_ID);
|
||||
}
|
||||
|
||||
void op_destroy(void* /*userdata*/) {
|
||||
@ -263,17 +263,15 @@ void op_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
|
||||
}
|
||||
|
||||
err = -rv;
|
||||
}
|
||||
catch (const dwarfs::error& e) {
|
||||
std::cerr << "ERROR: " << e.what() << std::endl;
|
||||
err = e.get_errno();
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "ERROR: " << e.what() << std::endl;
|
||||
err = EIO;
|
||||
}
|
||||
} catch (const dwarfs::error& e) {
|
||||
std::cerr << "ERROR: " << e.what() << std::endl;
|
||||
err = e.get_errno();
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "ERROR: " << e.what() << std::endl;
|
||||
err = EIO;
|
||||
}
|
||||
|
||||
fuse_reply_err(req, err);
|
||||
fuse_reply_err(req, err);
|
||||
} // namespace dwarfs
|
||||
|
||||
void op_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
|
||||
|
@ -75,66 +75,6 @@ void global_entry_data::index(std::unordered_map<std::string, uint32_t>& map) {
|
||||
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
|
||||
}
|
||||
|
||||
template <typename DirEntryType>
|
||||
class dir_ : public dir {
|
||||
public:
|
||||
using dir::dir;
|
||||
|
||||
size_t packed_entry_size() const override { return sizeof(DirEntryType); }
|
||||
|
||||
void pack_entry(uint8_t* buf) const override {
|
||||
DirEntryType* de = reinterpret_cast<DirEntryType*>(buf);
|
||||
entry::pack(*de);
|
||||
}
|
||||
|
||||
void pack_entry(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const override {
|
||||
mv2.entry_index.at(inode_num()) = mv2.entries.size();
|
||||
mv2.entries.emplace_back();
|
||||
entry::pack(mv2.entries.back(), data);
|
||||
}
|
||||
|
||||
size_t packed_size() const override {
|
||||
return offsetof(directory, u) + sizeof(DirEntryType) * entries_.size();
|
||||
}
|
||||
|
||||
void pack(uint8_t* buf,
|
||||
std::function<void(const entry* e, size_t offset)> const& offset_cb)
|
||||
const override {
|
||||
directory* p = reinterpret_cast<directory*>(buf);
|
||||
DirEntryType* de = reinterpret_cast<DirEntryType*>(&p->u);
|
||||
|
||||
p->count = entries_.size();
|
||||
|
||||
p->self = offset_;
|
||||
p->parent = has_parent()
|
||||
? std::dynamic_pointer_cast<dir_>(parent())->offset_
|
||||
: offset_;
|
||||
|
||||
for (entry_ptr const& e : entries_) {
|
||||
e->pack(*de);
|
||||
offset_cb(e.get(), offset_ + (reinterpret_cast<uint8_t*>(de) - buf));
|
||||
++de;
|
||||
}
|
||||
}
|
||||
|
||||
void pack(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const override {
|
||||
thrift::metadata::directory dir;
|
||||
dir.parent_inode =
|
||||
has_parent() ? std::dynamic_pointer_cast<dir_>(parent())->inode_num()
|
||||
: 0;
|
||||
dir.first_entry = mv2.entries.size();
|
||||
dir.entry_count = entries_.size();
|
||||
mv2.directories.push_back(dir);
|
||||
for (entry_ptr const& e : entries_) {
|
||||
mv2.entry_index.at(e->inode_num()) = mv2.entries.size();
|
||||
mv2.entries.emplace_back();
|
||||
e->pack(mv2.entries.back(), data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
entry::entry(const std::string& name, std::shared_ptr<entry> parent,
|
||||
const struct ::stat& st)
|
||||
: name_(name)
|
||||
@ -192,29 +132,6 @@ void entry::walk(std::function<void(entry*)> const& f) { f(this); }
|
||||
|
||||
void entry::walk(std::function<void(const entry*)> const& f) const { f(this); }
|
||||
|
||||
void entry::pack(dir_entry& de) const {
|
||||
de.name_offset = name_offset_;
|
||||
de.name_size = folly::to<uint16_t>(name_.size());
|
||||
de.mode = stat_.st_mode & 0xFFFF;
|
||||
|
||||
pack_specific(de);
|
||||
}
|
||||
|
||||
void entry::pack(dir_entry_ug& de) const {
|
||||
de.owner = stat_.st_uid;
|
||||
de.group = stat_.st_gid;
|
||||
|
||||
pack(de.de);
|
||||
}
|
||||
|
||||
void entry::pack(dir_entry_ug_time& de) const {
|
||||
de.atime = stat_.st_atime;
|
||||
de.mtime = stat_.st_mtime;
|
||||
de.ctime = stat_.st_ctime;
|
||||
|
||||
pack(de.ug);
|
||||
}
|
||||
|
||||
void entry::update(global_entry_data& data) const {
|
||||
data.add_uid(stat_.st_uid);
|
||||
data.add_gid(stat_.st_gid);
|
||||
@ -256,11 +173,6 @@ uint32_t file::inode_num() const { return inode_->num(); }
|
||||
|
||||
void file::accept(entry_visitor& v, bool) { v.visit(this); }
|
||||
|
||||
void file::pack_specific(dir_entry& de) const {
|
||||
de.inode = inode_->num();
|
||||
de.u.file_size = folly::to<uint32_t>(inode_->size());
|
||||
}
|
||||
|
||||
void file::scan(os_access& os, const std::string& p, progress& prog) {
|
||||
assert(SHA_DIGEST_LENGTH == hash_.size());
|
||||
|
||||
@ -335,12 +247,29 @@ void dir::set_offset(size_t offset) { offset_ = folly::to<uint32_t>(offset); }
|
||||
|
||||
void dir::set_inode(uint32_t inode) { inode_ = inode; }
|
||||
|
||||
void dir::pack_specific(dir_entry& de) const {
|
||||
de.inode = inode_;
|
||||
de.u.offset = offset_;
|
||||
void dir::scan(os_access&, const std::string&, progress&) {}
|
||||
|
||||
void dir::pack_entry(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const {
|
||||
mv2.entry_index.at(inode_num()) = mv2.entries.size();
|
||||
mv2.entries.emplace_back();
|
||||
entry::pack(mv2.entries.back(), data);
|
||||
}
|
||||
|
||||
void dir::scan(os_access&, const std::string&, progress&) {}
|
||||
void dir::pack(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const {
|
||||
thrift::metadata::directory d;
|
||||
d.parent_inode =
|
||||
has_parent() ? std::dynamic_pointer_cast<dir>(parent())->inode_num() : 0;
|
||||
d.first_entry = mv2.entries.size();
|
||||
d.entry_count = entries_.size();
|
||||
mv2.directories.push_back(d);
|
||||
for (entry_ptr const& e : entries_) {
|
||||
mv2.entry_index.at(e->inode_num()) = mv2.entries.size();
|
||||
mv2.entries.emplace_back();
|
||||
e->pack(mv2.entries.back(), data);
|
||||
}
|
||||
}
|
||||
|
||||
entry::type_t link::type() const { return E_LINK; }
|
||||
|
||||
@ -352,17 +281,11 @@ void link::set_inode(uint32_t inode) { inode_ = inode; }
|
||||
|
||||
void link::accept(entry_visitor& v, bool) { v.visit(this); }
|
||||
|
||||
void link::pack_specific(dir_entry& de) const {
|
||||
de.inode = inode_;
|
||||
de.u.offset = offset_;
|
||||
}
|
||||
|
||||
void link::scan(os_access& os, const std::string& p, progress& prog) {
|
||||
link_ = os.readlink(p, size());
|
||||
prog.original_size += size();
|
||||
}
|
||||
|
||||
template <typename DirEntryType>
|
||||
class entry_factory_ : public entry_factory {
|
||||
public:
|
||||
entry_factory_(bool with_similarity)
|
||||
@ -379,7 +302,7 @@ class entry_factory_ : public entry_factory {
|
||||
return std::make_shared<file>(name, std::move(parent), st,
|
||||
with_similarity_);
|
||||
} else if (S_ISDIR(st.st_mode)) {
|
||||
return std::make_shared<dir_<DirEntryType>>(name, std::move(parent), st);
|
||||
return std::make_shared<dir>(name, std::move(parent), st);
|
||||
} else if (S_ISLNK(st.st_mode)) {
|
||||
return std::make_shared<link>(name, std::move(parent), st);
|
||||
} else {
|
||||
@ -389,42 +312,11 @@ class entry_factory_ : public entry_factory {
|
||||
return std::shared_ptr<entry>();
|
||||
}
|
||||
|
||||
dir_entry_type de_type() const override;
|
||||
|
||||
private:
|
||||
const bool with_similarity_;
|
||||
};
|
||||
|
||||
template <>
|
||||
dir_entry_type entry_factory_<dir_entry>::de_type() const {
|
||||
return dir_entry_type::DIR_ENTRY;
|
||||
}
|
||||
|
||||
template <>
|
||||
dir_entry_type entry_factory_<dir_entry_ug>::de_type() const {
|
||||
return dir_entry_type::DIR_ENTRY_UG;
|
||||
}
|
||||
|
||||
template <>
|
||||
dir_entry_type entry_factory_<dir_entry_ug_time>::de_type() const {
|
||||
return dir_entry_type::DIR_ENTRY_UG_TIME;
|
||||
}
|
||||
|
||||
std::shared_ptr<entry_factory>
|
||||
entry_factory::create(bool no_owner, bool no_time, bool with_similarity) {
|
||||
if (no_owner) {
|
||||
if (!no_time) {
|
||||
throw std::runtime_error("no_owner implies no_time");
|
||||
}
|
||||
|
||||
// no owner/time information
|
||||
return std::make_shared<entry_factory_<dir_entry>>(with_similarity);
|
||||
} else if (no_time) {
|
||||
// no time information
|
||||
return std::make_shared<entry_factory_<dir_entry_ug>>(with_similarity);
|
||||
} else {
|
||||
// the full monty
|
||||
return std::make_shared<entry_factory_<dir_entry_ug_time>>(with_similarity);
|
||||
}
|
||||
std::unique_ptr<entry_factory> entry_factory::create(bool with_similarity) {
|
||||
return std::make_unique<entry_factory_>(with_similarity);
|
||||
}
|
||||
} // namespace dwarfs
|
||||
|
@ -228,8 +228,7 @@ void filesystem_writer_<LoggerPolicy>::writer_thread() {
|
||||
|
||||
fsb->wait_until_compressed();
|
||||
|
||||
log_.debug() << get_section_name(fsb->type())
|
||||
<< " compressed from "
|
||||
log_.debug() << get_section_name(fsb->type()) << " compressed from "
|
||||
<< size_with_unit(fsb->uncompressed_size()) << " to "
|
||||
<< size_with_unit(fsb->size());
|
||||
|
||||
|
@ -39,14 +39,6 @@ const std::map<section_type, std::string> sections{
|
||||
#undef SECTION_TYPE_
|
||||
};
|
||||
|
||||
// TODO: remove
|
||||
const std::map<dir_entry_type, std::string> dir_entries{
|
||||
#define DIR_ENTRY_TYPE_(x) {dir_entry_type::x, #x}
|
||||
DIR_ENTRY_TYPE_(DIR_ENTRY), DIR_ENTRY_TYPE_(DIR_ENTRY_UG),
|
||||
DIR_ENTRY_TYPE_(DIR_ENTRY_UG_TIME)
|
||||
#undef DIR_ENTRY_TYPE_
|
||||
};
|
||||
|
||||
const std::map<compression_type, std::string> compressions{
|
||||
#define COMPRESSION_TYPE_(x) {compression_type::x, #x}
|
||||
COMPRESSION_TYPE_(NONE), COMPRESSION_TYPE_(LZMA), COMPRESSION_TYPE_(ZSTD),
|
||||
|
@ -77,8 +77,7 @@ class inode_manager_ : public inode_manager {
|
||||
return file_;
|
||||
}
|
||||
|
||||
void
|
||||
append_chunks_to(std::vector<chunk_type>& vec) const override {
|
||||
void append_chunks_to(std::vector<chunk_type>& vec) const override {
|
||||
vec.insert(vec.end(), chunks_.begin(), chunks_.end());
|
||||
}
|
||||
|
||||
|
@ -85,7 +85,8 @@ int dwarfsbench(int argc, char** argv) {
|
||||
bco.num_workers = num_workers;
|
||||
bco.decompress_ratio = folly::to<double>(decompress_ratio_str);
|
||||
|
||||
dwarfs::filesystem_v2 fs(lgr, std::make_shared<dwarfs::mmap>(filesystem), bco);
|
||||
dwarfs::filesystem_v2 fs(lgr, std::make_shared<dwarfs::mmap>(filesystem),
|
||||
bco);
|
||||
|
||||
worker_group wg("reader", num_readers);
|
||||
|
||||
|
@ -458,16 +458,16 @@ int mkdwarfs(int argc, char** argv) {
|
||||
|
||||
if (recompress) {
|
||||
auto ti = log.timed_info();
|
||||
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path), fsw);
|
||||
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path),
|
||||
fsw);
|
||||
wg_writer.wait();
|
||||
ti << "filesystem rewritten";
|
||||
} else {
|
||||
options.no_time = no_time;
|
||||
|
||||
scanner s(lgr, wg_scanner, cfg,
|
||||
entry_factory::create(no_owner, no_owner || no_time,
|
||||
options.file_order ==
|
||||
file_order_mode::SIMILARITY),
|
||||
entry_factory::create(options.file_order ==
|
||||
file_order_mode::SIMILARITY),
|
||||
std::make_shared<os_access_posix>(), script, options);
|
||||
|
||||
{
|
||||
|
@ -163,8 +163,8 @@ using namespace dwarfs;
|
||||
namespace {
|
||||
|
||||
void basic_end_to_end_test(const std::string& compressor,
|
||||
unsigned block_size_bits, file_order_mode file_order,
|
||||
bool no_owner, bool no_time) {
|
||||
unsigned block_size_bits,
|
||||
file_order_mode file_order) {
|
||||
block_manager::config cfg;
|
||||
scanner_options options;
|
||||
|
||||
@ -181,8 +181,7 @@ void basic_end_to_end_test(const std::string& compressor,
|
||||
lgr.set_policy<prod_logger_policy>();
|
||||
|
||||
scanner s(lgr, wg, cfg,
|
||||
entry_factory::create(no_owner, no_time,
|
||||
file_order == file_order_mode::SIMILARITY),
|
||||
entry_factory::create(file_order == file_order_mode::SIMILARITY),
|
||||
std::make_shared<test::os_access_mock>(),
|
||||
std::make_shared<test::script_mock>(), options);
|
||||
|
||||
@ -233,30 +232,18 @@ std::vector<std::string> const compressions{"null",
|
||||
} // namespace
|
||||
|
||||
class basic : public testing::TestWithParam<
|
||||
std::tuple<std::string, unsigned, file_order_mode, int>> {};
|
||||
std::tuple<std::string, unsigned, file_order_mode>> {};
|
||||
|
||||
TEST_P(basic, end_to_end) {
|
||||
bool no_owner = false, no_time = false;
|
||||
|
||||
switch (std::get<3>(GetParam())) {
|
||||
case 1:
|
||||
no_time = true;
|
||||
break;
|
||||
case 2:
|
||||
no_owner = no_time = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
basic_end_to_end_test(std::get<0>(GetParam()), std::get<1>(GetParam()),
|
||||
std::get<2>(GetParam()), no_owner, no_time);
|
||||
std::get<2>(GetParam()));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
dwarfs, basic,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(compressions), ::testing::Values(12, 15, 20, 28),
|
||||
::testing::Values(file_order_mode::NONE, file_order_mode::PATH,
|
||||
file_order_mode::SCRIPT, file_order_mode::SIMILARITY),
|
||||
::testing::Values(0, 1, 2)));
|
||||
::testing::Combine(::testing::ValuesIn(compressions),
|
||||
::testing::Values(12, 15, 20, 28),
|
||||
::testing::Values(file_order_mode::NONE,
|
||||
file_order_mode::PATH,
|
||||
file_order_mode::SCRIPT,
|
||||
file_order_mode::SIMILARITY)));
|
||||
|
Loading…
x
Reference in New Issue
Block a user