metadata_v2: more cleaning up

This commit is contained in:
Marcus Holland-Moritz 2020-11-28 02:06:41 +01:00
parent 5858c1c53d
commit c767b0b93e
10 changed files with 61 additions and 249 deletions

View File

@ -38,6 +38,7 @@
namespace dwarfs {
// TODO: clean up
struct global_entry_data {
global_entry_data(bool no_time)
: no_time_(no_time) {}
@ -151,9 +152,6 @@ class entry : public file_interface {
virtual size_t total_size() const;
virtual void walk(std::function<void(entry*)> const& f);
virtual void walk(std::function<void(const entry*)> const& f) const;
void pack(dir_entry& de) const;
void pack(dir_entry_ug& de) const;
void pack(dir_entry_ug_time& de) const;
void
pack(thrift::metadata::entry& entry_v2, global_entry_data const& data) const;
void update(global_entry_data& data) const;
@ -161,7 +159,6 @@ class entry : public file_interface {
virtual uint32_t inode_num() const = 0;
protected:
virtual void pack_specific(dir_entry& de) const = 0;
virtual void scan(os_access& os, const std::string& p, progress& prog) = 0;
private:
@ -187,7 +184,6 @@ class file : public entry {
uint32_t similarity_hash() const { return similarity_hash_; }
protected:
void pack_specific(dir_entry& de) const override;
void scan(os_access& os, const std::string& p, progress& prog) override;
private:
@ -210,21 +206,13 @@ class dir : public entry {
void sort();
void set_offset(size_t offset);
void set_inode(uint32_t inode);
virtual size_t packed_size() const = 0;
virtual void
pack(uint8_t* buf,
std::function<void(const entry* e, size_t offset)> const& offset_cb)
const = 0;
virtual void pack(thrift::metadata::metadata& mv2,
global_entry_data const& data) const = 0;
virtual size_t packed_entry_size() const = 0;
virtual void pack_entry(uint8_t* buf) const = 0;
virtual void pack_entry(thrift::metadata::metadata& mv2,
global_entry_data const& data) const = 0;
void
pack(thrift::metadata::metadata& mv2, global_entry_data const& data) const;
void pack_entry(thrift::metadata::metadata& mv2,
global_entry_data const& data) const;
uint32_t inode_num() const override { return inode_; }
protected:
void pack_specific(dir_entry& de) const override;
void scan(os_access& os, const std::string& p, progress& prog) override;
using entry_ptr = std::shared_ptr<entry>;
@ -246,7 +234,6 @@ class link : public entry {
uint32_t inode_num() const override { return inode_; }
protected:
void pack_specific(dir_entry& de) const override;
void scan(os_access& os, const std::string& p, progress& prog) override;
private:
@ -257,15 +244,12 @@ class link : public entry {
class entry_factory {
public:
static std::shared_ptr<entry_factory>
create(bool no_owner = false, bool no_time = false,
bool with_similarity = false);
static std::unique_ptr<entry_factory> create(bool with_similarity = false);
virtual ~entry_factory() = default;
virtual std::shared_ptr<entry>
create(os_access& os, const std::string& name,
std::shared_ptr<entry> parent = std::shared_ptr<entry>()) = 0;
virtual dir_entry_type de_type() const = 0;
};
} // namespace dwarfs

View File

@ -53,6 +53,7 @@ class block_range {
std::shared_ptr<cached_block const> block_;
};
// TODO: move elsewhere
struct iovec_read_buf {
// This covers more than 95% of reads
static constexpr size_t inline_storage = 16;
@ -75,12 +76,6 @@ enum class section_type : uint16_t {
// Frozen metadata.
};
enum class dir_entry_type : uint8_t {
DIR_ENTRY = 0, // filesystem uses dir_entry
DIR_ENTRY_UG = 1, // filesystem uses dir_entry_ug
DIR_ENTRY_UG_TIME = 2 // filesystem uses dir_entry_ug_time
};
struct file_header {
char magic[6]; // "DWARFS"
uint8_t major; // major version
@ -97,41 +92,6 @@ struct section_header {
void dump(std::ostream& os) const;
};
struct dir_entry { // 128 bits (16 bytes) / entry
uint32_t name_offset;
uint16_t name_size;
uint16_t mode;
uint32_t inode; // dirs start at 1, then links, then files
union {
uint32_t file_size; // for files only
uint32_t offset; // for dirs, offset to directory,
} u; // for links, offset to content in link table
};
struct dir_entry_ug { // 160 bits (20 bytes) / entry
dir_entry de;
uint16_t owner;
uint16_t group;
};
struct dir_entry_ug_time { // 256 bits (32 bytes) / entry
dir_entry_ug ug;
uint32_t atime; // yeah, I know... in a few years we can switch to 64 bits
uint32_t mtime;
uint32_t ctime;
};
struct directory {
uint32_t count;
uint32_t self;
uint32_t parent;
union {
dir_entry entries[1];
dir_entry_ug entries_ug[1];
dir_entry_ug_time entries_ug_time[1];
} u;
};
std::string get_compression_name(compression_type type);
std::string get_section_name(section_type type);

View File

@ -81,9 +81,9 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
bco.max_bytes = opts.cachesize;
bco.num_workers = opts.workers;
bco.decompress_ratio = opts.decompress_ratio;
s_fs =
std::make_shared<filesystem_v2>(s_lgr, std::make_shared<mmap>(opts.fsimage),
bco, &opts.stat_defaults, FUSE_ROOT_ID);
s_fs = std::make_shared<filesystem_v2>(
s_lgr, std::make_shared<mmap>(opts.fsimage), bco, &opts.stat_defaults,
FUSE_ROOT_ID);
}
void op_destroy(void* /*userdata*/) {
@ -263,17 +263,15 @@ void op_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
}
err = -rv;
}
catch (const dwarfs::error& e) {
std::cerr << "ERROR: " << e.what() << std::endl;
err = e.get_errno();
}
catch (const std::exception& e) {
std::cerr << "ERROR: " << e.what() << std::endl;
err = EIO;
}
} catch (const dwarfs::error& e) {
std::cerr << "ERROR: " << e.what() << std::endl;
err = e.get_errno();
} catch (const std::exception& e) {
std::cerr << "ERROR: " << e.what() << std::endl;
err = EIO;
}
fuse_reply_err(req, err);
fuse_reply_err(req, err);
} // namespace dwarfs
void op_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,

View File

@ -75,66 +75,6 @@ void global_entry_data::index(std::unordered_map<std::string, uint32_t>& map) {
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
}
template <typename DirEntryType>
class dir_ : public dir {
public:
using dir::dir;
size_t packed_entry_size() const override { return sizeof(DirEntryType); }
void pack_entry(uint8_t* buf) const override {
DirEntryType* de = reinterpret_cast<DirEntryType*>(buf);
entry::pack(*de);
}
void pack_entry(thrift::metadata::metadata& mv2,
global_entry_data const& data) const override {
mv2.entry_index.at(inode_num()) = mv2.entries.size();
mv2.entries.emplace_back();
entry::pack(mv2.entries.back(), data);
}
size_t packed_size() const override {
return offsetof(directory, u) + sizeof(DirEntryType) * entries_.size();
}
void pack(uint8_t* buf,
std::function<void(const entry* e, size_t offset)> const& offset_cb)
const override {
directory* p = reinterpret_cast<directory*>(buf);
DirEntryType* de = reinterpret_cast<DirEntryType*>(&p->u);
p->count = entries_.size();
p->self = offset_;
p->parent = has_parent()
? std::dynamic_pointer_cast<dir_>(parent())->offset_
: offset_;
for (entry_ptr const& e : entries_) {
e->pack(*de);
offset_cb(e.get(), offset_ + (reinterpret_cast<uint8_t*>(de) - buf));
++de;
}
}
void pack(thrift::metadata::metadata& mv2,
global_entry_data const& data) const override {
thrift::metadata::directory dir;
dir.parent_inode =
has_parent() ? std::dynamic_pointer_cast<dir_>(parent())->inode_num()
: 0;
dir.first_entry = mv2.entries.size();
dir.entry_count = entries_.size();
mv2.directories.push_back(dir);
for (entry_ptr const& e : entries_) {
mv2.entry_index.at(e->inode_num()) = mv2.entries.size();
mv2.entries.emplace_back();
e->pack(mv2.entries.back(), data);
}
}
};
entry::entry(const std::string& name, std::shared_ptr<entry> parent,
const struct ::stat& st)
: name_(name)
@ -192,29 +132,6 @@ void entry::walk(std::function<void(entry*)> const& f) { f(this); }
void entry::walk(std::function<void(const entry*)> const& f) const { f(this); }
void entry::pack(dir_entry& de) const {
de.name_offset = name_offset_;
de.name_size = folly::to<uint16_t>(name_.size());
de.mode = stat_.st_mode & 0xFFFF;
pack_specific(de);
}
void entry::pack(dir_entry_ug& de) const {
de.owner = stat_.st_uid;
de.group = stat_.st_gid;
pack(de.de);
}
void entry::pack(dir_entry_ug_time& de) const {
de.atime = stat_.st_atime;
de.mtime = stat_.st_mtime;
de.ctime = stat_.st_ctime;
pack(de.ug);
}
void entry::update(global_entry_data& data) const {
data.add_uid(stat_.st_uid);
data.add_gid(stat_.st_gid);
@ -256,11 +173,6 @@ uint32_t file::inode_num() const { return inode_->num(); }
void file::accept(entry_visitor& v, bool) { v.visit(this); }
void file::pack_specific(dir_entry& de) const {
de.inode = inode_->num();
de.u.file_size = folly::to<uint32_t>(inode_->size());
}
void file::scan(os_access& os, const std::string& p, progress& prog) {
assert(SHA_DIGEST_LENGTH == hash_.size());
@ -335,12 +247,29 @@ void dir::set_offset(size_t offset) { offset_ = folly::to<uint32_t>(offset); }
void dir::set_inode(uint32_t inode) { inode_ = inode; }
void dir::pack_specific(dir_entry& de) const {
de.inode = inode_;
de.u.offset = offset_;
void dir::scan(os_access&, const std::string&, progress&) {}
void dir::pack_entry(thrift::metadata::metadata& mv2,
global_entry_data const& data) const {
mv2.entry_index.at(inode_num()) = mv2.entries.size();
mv2.entries.emplace_back();
entry::pack(mv2.entries.back(), data);
}
void dir::scan(os_access&, const std::string&, progress&) {}
void dir::pack(thrift::metadata::metadata& mv2,
global_entry_data const& data) const {
thrift::metadata::directory d;
d.parent_inode =
has_parent() ? std::dynamic_pointer_cast<dir>(parent())->inode_num() : 0;
d.first_entry = mv2.entries.size();
d.entry_count = entries_.size();
mv2.directories.push_back(d);
for (entry_ptr const& e : entries_) {
mv2.entry_index.at(e->inode_num()) = mv2.entries.size();
mv2.entries.emplace_back();
e->pack(mv2.entries.back(), data);
}
}
entry::type_t link::type() const { return E_LINK; }
@ -352,17 +281,11 @@ void link::set_inode(uint32_t inode) { inode_ = inode; }
void link::accept(entry_visitor& v, bool) { v.visit(this); }
void link::pack_specific(dir_entry& de) const {
de.inode = inode_;
de.u.offset = offset_;
}
void link::scan(os_access& os, const std::string& p, progress& prog) {
link_ = os.readlink(p, size());
prog.original_size += size();
}
template <typename DirEntryType>
class entry_factory_ : public entry_factory {
public:
entry_factory_(bool with_similarity)
@ -379,7 +302,7 @@ class entry_factory_ : public entry_factory {
return std::make_shared<file>(name, std::move(parent), st,
with_similarity_);
} else if (S_ISDIR(st.st_mode)) {
return std::make_shared<dir_<DirEntryType>>(name, std::move(parent), st);
return std::make_shared<dir>(name, std::move(parent), st);
} else if (S_ISLNK(st.st_mode)) {
return std::make_shared<link>(name, std::move(parent), st);
} else {
@ -389,42 +312,11 @@ class entry_factory_ : public entry_factory {
return std::shared_ptr<entry>();
}
dir_entry_type de_type() const override;
private:
const bool with_similarity_;
};
template <>
dir_entry_type entry_factory_<dir_entry>::de_type() const {
return dir_entry_type::DIR_ENTRY;
}
template <>
dir_entry_type entry_factory_<dir_entry_ug>::de_type() const {
return dir_entry_type::DIR_ENTRY_UG;
}
template <>
dir_entry_type entry_factory_<dir_entry_ug_time>::de_type() const {
return dir_entry_type::DIR_ENTRY_UG_TIME;
}
std::shared_ptr<entry_factory>
entry_factory::create(bool no_owner, bool no_time, bool with_similarity) {
if (no_owner) {
if (!no_time) {
throw std::runtime_error("no_owner implies no_time");
}
// no owner/time information
return std::make_shared<entry_factory_<dir_entry>>(with_similarity);
} else if (no_time) {
// no time information
return std::make_shared<entry_factory_<dir_entry_ug>>(with_similarity);
} else {
// the full monty
return std::make_shared<entry_factory_<dir_entry_ug_time>>(with_similarity);
}
std::unique_ptr<entry_factory> entry_factory::create(bool with_similarity) {
return std::make_unique<entry_factory_>(with_similarity);
}
} // namespace dwarfs

View File

@ -228,8 +228,7 @@ void filesystem_writer_<LoggerPolicy>::writer_thread() {
fsb->wait_until_compressed();
log_.debug() << get_section_name(fsb->type())
<< " compressed from "
log_.debug() << get_section_name(fsb->type()) << " compressed from "
<< size_with_unit(fsb->uncompressed_size()) << " to "
<< size_with_unit(fsb->size());

View File

@ -39,14 +39,6 @@ const std::map<section_type, std::string> sections{
#undef SECTION_TYPE_
};
// TODO: remove
const std::map<dir_entry_type, std::string> dir_entries{
#define DIR_ENTRY_TYPE_(x) {dir_entry_type::x, #x}
DIR_ENTRY_TYPE_(DIR_ENTRY), DIR_ENTRY_TYPE_(DIR_ENTRY_UG),
DIR_ENTRY_TYPE_(DIR_ENTRY_UG_TIME)
#undef DIR_ENTRY_TYPE_
};
const std::map<compression_type, std::string> compressions{
#define COMPRESSION_TYPE_(x) {compression_type::x, #x}
COMPRESSION_TYPE_(NONE), COMPRESSION_TYPE_(LZMA), COMPRESSION_TYPE_(ZSTD),

View File

@ -77,8 +77,7 @@ class inode_manager_ : public inode_manager {
return file_;
}
void
append_chunks_to(std::vector<chunk_type>& vec) const override {
void append_chunks_to(std::vector<chunk_type>& vec) const override {
vec.insert(vec.end(), chunks_.begin(), chunks_.end());
}

View File

@ -85,7 +85,8 @@ int dwarfsbench(int argc, char** argv) {
bco.num_workers = num_workers;
bco.decompress_ratio = folly::to<double>(decompress_ratio_str);
dwarfs::filesystem_v2 fs(lgr, std::make_shared<dwarfs::mmap>(filesystem), bco);
dwarfs::filesystem_v2 fs(lgr, std::make_shared<dwarfs::mmap>(filesystem),
bco);
worker_group wg("reader", num_readers);

View File

@ -458,16 +458,16 @@ int mkdwarfs(int argc, char** argv) {
if (recompress) {
auto ti = log.timed_info();
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path), fsw);
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path),
fsw);
wg_writer.wait();
ti << "filesystem rewritten";
} else {
options.no_time = no_time;
scanner s(lgr, wg_scanner, cfg,
entry_factory::create(no_owner, no_owner || no_time,
options.file_order ==
file_order_mode::SIMILARITY),
entry_factory::create(options.file_order ==
file_order_mode::SIMILARITY),
std::make_shared<os_access_posix>(), script, options);
{

View File

@ -163,8 +163,8 @@ using namespace dwarfs;
namespace {
void basic_end_to_end_test(const std::string& compressor,
unsigned block_size_bits, file_order_mode file_order,
bool no_owner, bool no_time) {
unsigned block_size_bits,
file_order_mode file_order) {
block_manager::config cfg;
scanner_options options;
@ -181,8 +181,7 @@ void basic_end_to_end_test(const std::string& compressor,
lgr.set_policy<prod_logger_policy>();
scanner s(lgr, wg, cfg,
entry_factory::create(no_owner, no_time,
file_order == file_order_mode::SIMILARITY),
entry_factory::create(file_order == file_order_mode::SIMILARITY),
std::make_shared<test::os_access_mock>(),
std::make_shared<test::script_mock>(), options);
@ -233,30 +232,18 @@ std::vector<std::string> const compressions{"null",
} // namespace
class basic : public testing::TestWithParam<
std::tuple<std::string, unsigned, file_order_mode, int>> {};
std::tuple<std::string, unsigned, file_order_mode>> {};
TEST_P(basic, end_to_end) {
bool no_owner = false, no_time = false;
switch (std::get<3>(GetParam())) {
case 1:
no_time = true;
break;
case 2:
no_owner = no_time = true;
break;
default:
break;
}
basic_end_to_end_test(std::get<0>(GetParam()), std::get<1>(GetParam()),
std::get<2>(GetParam()), no_owner, no_time);
std::get<2>(GetParam()));
}
INSTANTIATE_TEST_SUITE_P(
dwarfs, basic,
::testing::Combine(
::testing::ValuesIn(compressions), ::testing::Values(12, 15, 20, 28),
::testing::Values(file_order_mode::NONE, file_order_mode::PATH,
file_order_mode::SCRIPT, file_order_mode::SIMILARITY),
::testing::Values(0, 1, 2)));
::testing::Combine(::testing::ValuesIn(compressions),
::testing::Values(12, 15, 20, 28),
::testing::Values(file_order_mode::NONE,
file_order_mode::PATH,
file_order_mode::SCRIPT,
file_order_mode::SIMILARITY)));