diff --git a/include/dwarfs/entry.h b/include/dwarfs/entry.h index d3435882..7576c6a4 100644 --- a/include/dwarfs/entry.h +++ b/include/dwarfs/entry.h @@ -82,7 +82,8 @@ class entry : public entry_interface { global_entry_data const& data) const; void update(global_entry_data& data) const; virtual void accept(entry_visitor& v, bool preorder = false) = 0; - virtual uint32_t inode_num() const = 0; + void set_inode_num(uint32_t inode_num) { inode_num_ = inode_num; } + uint32_t inode_num() const { return inode_num_; } virtual void scan(os_access& os, progress& prog) = 0; const struct ::stat& status() const { return stat_; } @@ -104,6 +105,7 @@ class entry : public entry_interface { std::string name_; std::weak_ptr parent_; struct ::stat stat_; + uint32_t inode_num_{0}; }; class file : public entry { @@ -117,12 +119,12 @@ class file : public entry { void set_inode(std::shared_ptr ino); std::shared_ptr get_inode() const; void accept(entry_visitor& v, bool preorder) override; - uint32_t inode_num() const override; void scan(os_access& os, progress& prog) override; void create_data(); void hardlink(file* other, progress& prog); uint64_t raw_inode_num() const; unsigned num_hard_links() const; + uint32_t content_index() const; private: struct data { @@ -144,12 +146,10 @@ class dir : public entry { void walk(std::function const& f) const override; void accept(entry_visitor& v, bool preorder) override; void sort(); - void set_inode(uint32_t inode); void pack(thrift::metadata::metadata& mv2, global_entry_data const& data) const; void pack_entry(thrift::metadata::metadata& mv2, global_entry_data const& data) const; - uint32_t inode_num() const override { return inode_; } void scan(os_access& os, progress& prog) override; bool empty() const { return entries_.empty(); } void remove_empty_dirs(progress& prog); @@ -158,7 +158,6 @@ class dir : public entry { using entry_ptr = std::shared_ptr; std::vector> entries_; - uint32_t inode_{0}; }; class link : public entry { @@ -167,14 +166,11 @@ class link : public entry { type_t type() const override; const std::string& linkname() const; - void set_inode(uint32_t inode); void accept(entry_visitor& v, bool preorder) override; - uint32_t inode_num() const override { return inode_; } void scan(os_access& os, progress& prog) override; private: std::string link_; - uint32_t inode_{0}; }; /** @@ -186,14 +182,9 @@ class device : public entry { using entry::entry; type_t type() const override; - void set_inode(uint32_t inode); void accept(entry_visitor& v, bool preorder) override; - uint32_t inode_num() const override { return inode_; } void scan(os_access& os, progress& prog) override; uint64_t device_id() const; - - private: - uint32_t inode_{0}; }; class entry_factory { diff --git a/include/dwarfs/filesystem_v2.h b/include/dwarfs/filesystem_v2.h index 18067171..d614d158 100644 --- a/include/dwarfs/filesystem_v2.h +++ b/include/dwarfs/filesystem_v2.h @@ -81,63 +81,54 @@ class filesystem_v2 { return impl_->serialize_metadata_as_json(simple); } - void walk(std::function const& func) const { + void walk(std::function const& func) const { impl_->walk(func); } - void walk(std::function const& func) const { - impl_->walk(func); - } - - void walk_inode_order(std::function const& func) const { + void walk_inode_order(std::function const& func) const { impl_->walk_inode_order(func); } - void walk_inode_order( - std::function const& func) const { - impl_->walk_inode_order(func); - } - - std::optional find(const char* path) const { + std::optional find(const char* path) const { return impl_->find(path); } - std::optional find(int inode) const { return impl_->find(inode); } + std::optional find(int inode) const { return impl_->find(inode); } - std::optional find(int inode, const char* name) const { + std::optional find(int inode, const char* name) const { return impl_->find(inode, name); } - int getattr(entry_view entry, struct ::stat* stbuf) const { + int getattr(inode_view entry, struct ::stat* stbuf) const { return impl_->getattr(entry, stbuf); } - int access(entry_view entry, int mode, uid_t uid, gid_t gid) const { + int access(inode_view entry, int mode, uid_t uid, gid_t gid) const { return impl_->access(entry, mode, uid, gid); } - std::optional opendir(entry_view entry) const { + std::optional opendir(inode_view entry) const { return impl_->opendir(entry); } - std::optional> + std::optional> readdir(directory_view dir, size_t offset) const { return impl_->readdir(dir, offset); } size_t dirsize(directory_view dir) const { return impl_->dirsize(dir); } - int readlink(entry_view entry, std::string* buf) const { + int readlink(inode_view entry, std::string* buf) const { return impl_->readlink(entry, buf); } - folly::Expected readlink(entry_view entry) const { + folly::Expected readlink(inode_view entry) const { return impl_->readlink(entry); } int statvfs(struct ::statvfs* stbuf) const { return impl_->statvfs(stbuf); } - int open(entry_view entry) const { return impl_->open(entry); } + int open(inode_view entry) const { return impl_->open(entry); } ssize_t read(uint32_t inode, char* buf, size_t size, off_t offset = 0) const { return impl_->read(inode, buf, size, offset); @@ -160,29 +151,26 @@ class filesystem_v2 { virtual void dump(std::ostream& os, int detail_level) const = 0; virtual folly::dynamic metadata_as_dynamic() const = 0; virtual std::string serialize_metadata_as_json(bool simple) const = 0; - virtual void walk(std::function const& func) const = 0; virtual void - walk(std::function const& func) const = 0; + walk(std::function const& func) const = 0; virtual void - walk_inode_order(std::function const& func) const = 0; - virtual void walk_inode_order( - std::function const& func) const = 0; - virtual std::optional find(const char* path) const = 0; - virtual std::optional find(int inode) const = 0; - virtual std::optional + walk_inode_order(std::function const& func) const = 0; + virtual std::optional find(const char* path) const = 0; + virtual std::optional find(int inode) const = 0; + virtual std::optional find(int inode, const char* name) const = 0; - virtual int getattr(entry_view entry, struct ::stat* stbuf) const = 0; + virtual int getattr(inode_view entry, struct ::stat* stbuf) const = 0; virtual int - access(entry_view entry, int mode, uid_t uid, gid_t gid) const = 0; - virtual std::optional opendir(entry_view entry) const = 0; - virtual std::optional> + access(inode_view entry, int mode, uid_t uid, gid_t gid) const = 0; + virtual std::optional opendir(inode_view entry) const = 0; + virtual std::optional> readdir(directory_view dir, size_t offset) const = 0; virtual size_t dirsize(directory_view dir) const = 0; - virtual int readlink(entry_view entry, std::string* buf) const = 0; + virtual int readlink(inode_view entry, std::string* buf) const = 0; virtual folly::Expected - readlink(entry_view entry) const = 0; + readlink(inode_view entry) const = 0; virtual int statvfs(struct ::statvfs* stbuf) const = 0; - virtual int open(entry_view entry) const = 0; + virtual int open(inode_view entry) const = 0; virtual ssize_t read(uint32_t inode, char* buf, size_t size, off_t offset) const = 0; virtual ssize_t readv(uint32_t inode, iovec_read_buf& buf, size_t size, diff --git a/include/dwarfs/fstypes.h b/include/dwarfs/fstypes.h index 79a6f7de..68093445 100644 --- a/include/dwarfs/fstypes.h +++ b/include/dwarfs/fstypes.h @@ -65,7 +65,7 @@ struct iovec_read_buf { }; constexpr uint8_t MAJOR_VERSION = 2; -constexpr uint8_t MINOR_VERSION = 2; +constexpr uint8_t MINOR_VERSION = 3; enum class section_type : uint16_t { BLOCK = 0, diff --git a/include/dwarfs/metadata_types.h b/include/dwarfs/metadata_types.h index 7c8765b8..44c4b4a9 100644 --- a/include/dwarfs/metadata_types.h +++ b/include/dwarfs/metadata_types.h @@ -23,7 +23,9 @@ #include #include +#include #include +#include #include #include @@ -37,9 +39,11 @@ namespace dwarfs { template class metadata_; -class entry_view +class dir_entry_view; + +class inode_view : public ::apache::thrift::frozen::View { - using EntryView = + using InodeView = ::apache::thrift::frozen::View; using Meta = ::apache::thrift::frozen::MappedFrozen; @@ -47,53 +51,116 @@ class entry_view template friend class metadata_; + friend class dir_entry_view; + + public: + uint16_t mode() const; + uint16_t getuid() const; + uint16_t getgid() const; + uint32_t inode_num() const { return inode_num_; } + + private: + inode_view(InodeView iv, uint32_t inode_num_, Meta const* meta) + : InodeView{iv} + , inode_num_{inode_num_} + , meta_{meta} {} + + uint32_t inode_num_; + Meta const* meta_; +}; + +/** + * THIS *MUST* BE CONSTRUCTIBLE FROM ONLY AN INODE NUMBER (NOT EVEN AN + * INODE_VIEW) + */ +class directory_view + : public ::apache::thrift::frozen::View { + using DirView = ::apache::thrift::frozen::View; + using Meta = + ::apache::thrift::frozen::MappedFrozen; + + template + friend class metadata_; + + friend class dir_entry_view; + + public: + // TODO: not sure if these are needed + uint32_t inode() const { return inode_; } + bool is_root() const { return inode_ == 0; } + + uint32_t entry_count() const; + + boost::integer_range entry_range() const; + + std::optional parent() const; + + uint32_t parent_inode() const; + + private: + directory_view(uint32_t inode, Meta const* meta); + + DirView getdir(uint32_t ino) const; + static DirView getdir(uint32_t ino, Meta const* meta); + + uint32_t inode_; + Meta const* meta_; +}; + +class dir_entry_view { + using InodeView = + ::apache::thrift::frozen::View; + using DirEntryView = + ::apache::thrift::frozen::View; + using Meta = + ::apache::thrift::frozen::MappedFrozen; + + template + friend class metadata_; + public: std::string_view name() const; - uint16_t mode() const; - uint16_t getuid() const; - uint16_t getgid() const; + inode_view inode() const; - private: - entry_view(EntryView ev, Meta const* meta) - : EntryView(ev) - , meta_(meta) {} + bool is_root() const; - Meta const* meta_; -}; - -class directory_view { - using EntryView = - ::apache::thrift::frozen::View; - using DirView = ::apache::thrift::frozen::View; - using Meta = - ::apache::thrift::frozen::MappedFrozen; - - template - friend class metadata_; - - public: - uint32_t inode() const { return entry_.inode(); } - uint32_t parent_inode() const; - uint32_t first_entry() const; - uint32_t entry_count() const; - - boost::integer_range entry_range() const; + // TODO: remove? + // std::optional directory() const; + std::optional parent() const; std::string path() const; void append_path_to(std::string& s) const; + uint32_t self_index() const { return self_index_; } + private: - directory_view(EntryView ev, Meta const* meta) - : entry_(ev) - , meta_(meta) {} + dir_entry_view(DirEntryView v, uint32_t self_index, uint32_t parent_index, + Meta const* meta) + : v_{v} + , self_index_{self_index} // TODO: check if we really need this + , parent_index_{parent_index} + , meta_{meta} {} - directory_view(uint32_t inode, Meta const* meta); + dir_entry_view(InodeView v, uint32_t self_index, uint32_t parent_index, + Meta const* meta) + : v_{v} + , self_index_{self_index} + , parent_index_{parent_index} + , meta_{meta} {} - DirView getdir() const; - DirView getdir(uint32_t ino) const; - uint32_t entry_count(DirView self) const; + static dir_entry_view + from_dir_entry_index(uint32_t self_index, uint32_t parent_index, + Meta const* meta); + static dir_entry_view + from_dir_entry_index(uint32_t self_index, Meta const* meta); - EntryView entry_; + // TODO: this works, but it's strange; a limited version of dir_entry_view + // should work without a parent for these use cases + static std::string_view name(uint32_t index, Meta const* meta); + static inode_view inode(uint32_t index, Meta const* meta); + + std::variant v_; + uint32_t self_index_, parent_index_; Meta const* meta_; }; diff --git a/include/dwarfs/metadata_v2.h b/include/dwarfs/metadata_v2.h index c5d206f1..81a13d57 100644 --- a/include/dwarfs/metadata_v2.h +++ b/include/dwarfs/metadata_v2.h @@ -83,59 +83,50 @@ class metadata_v2 { // TODO: check if this is needed bool empty() const { return !impl_ || impl_->empty(); } - void walk(std::function const& func) const { + void walk(std::function const& func) const { impl_->walk(func); } - void walk(std::function const& func) const { - impl_->walk(func); - } - - void walk_inode_order(std::function const& func) const { + void walk_inode_order(std::function const& func) const { impl_->walk_inode_order(func); } - void walk_inode_order( - std::function const& func) const { - impl_->walk_inode_order(func); - } - - std::optional find(const char* path) const { + std::optional find(const char* path) const { return impl_->find(path); } - std::optional find(int inode) const { return impl_->find(inode); } + std::optional find(int inode) const { return impl_->find(inode); } - std::optional find(int inode, const char* name) const { + std::optional find(int inode, const char* name) const { return impl_->find(inode, name); } - int getattr(entry_view entry, struct ::stat* stbuf) const { + int getattr(inode_view entry, struct ::stat* stbuf) const { return impl_->getattr(entry, stbuf); } - std::optional opendir(entry_view entry) const { + std::optional opendir(inode_view entry) const { return impl_->opendir(entry); } - std::optional> + std::optional> readdir(directory_view dir, size_t offset) const { return impl_->readdir(dir, offset); } size_t dirsize(directory_view dir) const { return impl_->dirsize(dir); } - int access(entry_view entry, int mode, uid_t uid, gid_t gid) const { + int access(inode_view entry, int mode, uid_t uid, gid_t gid) const { return impl_->access(entry, mode, uid, gid); } - int open(entry_view entry) const { return impl_->open(entry); } + int open(inode_view entry) const { return impl_->open(entry); } - int readlink(entry_view entry, std::string* buf) const { + int readlink(inode_view entry, std::string* buf) const { return impl_->readlink(entry, buf); } - folly::Expected readlink(entry_view entry) const { + folly::Expected readlink(inode_view entry) const { return impl_->readlink(entry); } @@ -164,38 +155,35 @@ class metadata_v2 { virtual size_t size() const = 0; virtual bool empty() const = 0; - virtual void walk(std::function const& func) const = 0; virtual void - walk(std::function const& func) const = 0; + walk(std::function const& func) const = 0; virtual void - walk_inode_order(std::function const& func) const = 0; - virtual void walk_inode_order( - std::function const& func) const = 0; + walk_inode_order(std::function const& func) const = 0; - virtual std::optional find(const char* path) const = 0; - virtual std::optional find(int inode) const = 0; - virtual std::optional + virtual std::optional find(const char* path) const = 0; + virtual std::optional find(int inode) const = 0; + virtual std::optional find(int inode, const char* name) const = 0; - virtual int getattr(entry_view entry, struct ::stat* stbuf) const = 0; + virtual int getattr(inode_view entry, struct ::stat* stbuf) const = 0; - virtual std::optional opendir(entry_view entry) const = 0; + virtual std::optional opendir(inode_view entry) const = 0; - virtual std::optional> + virtual std::optional> readdir(directory_view dir, size_t offset) const = 0; virtual size_t dirsize(directory_view dir) const = 0; virtual int - access(entry_view entry, int mode, uid_t uid, gid_t gid) const = 0; + access(inode_view entry, int mode, uid_t uid, gid_t gid) const = 0; - virtual int open(entry_view entry) const = 0; + virtual int open(inode_view entry) const = 0; - virtual int readlink(entry_view entry, std::string* buf) const = 0; + virtual int readlink(inode_view entry, std::string* buf) const = 0; virtual folly::Expected - readlink(entry_view entry) const = 0; + readlink(inode_view entry) const = 0; virtual int statvfs(struct ::statvfs* stbuf) const = 0; diff --git a/include/dwarfs/overloaded.h b/include/dwarfs/overloaded.h new file mode 100644 index 00000000..c29ad354 --- /dev/null +++ b/include/dwarfs/overloaded.h @@ -0,0 +1,34 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +namespace dwarfs { + +template +struct overloaded : Ts... { + using Ts::operator()...; +}; + +template +overloaded(Ts...) -> overloaded; + +} // namespace dwarfs diff --git a/src/dwarfs.cpp b/src/dwarfs.cpp index b61671ba..0b01d30f 100644 --- a/src/dwarfs.cpp +++ b/src/dwarfs.cpp @@ -137,7 +137,7 @@ template void op_lookup(fuse_req_t req, fuse_ino_t parent, const char* name) { LOG_PROXY(LoggerPolicy, s_lgr); - LOG_DEBUG << __func__; + LOG_DEBUG << __func__ << "(" << parent << ", " << name << ")"; int err = ENOENT; @@ -175,7 +175,7 @@ template void op_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info*) { LOG_PROXY(LoggerPolicy, s_lgr); - LOG_DEBUG << __func__; + LOG_DEBUG << __func__ << "(" << ino << ")"; int err = ENOENT; @@ -282,7 +282,8 @@ void op_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { } else if (fi->flags & (O_APPEND | O_CREAT | O_TRUNC)) { err = EACCES; } else { - fi->fh = FUSE_ROOT_ID + entry->inode(); + fi->fh = FUSE_ROOT_ID + + entry->content_index(); // <<---- THIS IS NOT THE INODE!!!! fi->direct_io = !s_opts.cache_files; fi->keep_cache = s_opts.cache_files; fuse_reply_open(req, fi); diff --git a/src/dwarfs/entry.cpp b/src/dwarfs/entry.cpp index 65fc5efd..c720bf0f 100644 --- a/src/dwarfs/entry.cpp +++ b/src/dwarfs/entry.cpp @@ -103,14 +103,17 @@ void entry::update(global_entry_data& data) const { void entry::pack(thrift::metadata::inode_data& entry_v2, global_entry_data const& data) const { - entry_v2.name_index_v2_2 = has_parent() ? data.get_name_index(name_) : 0; entry_v2.mode_index = data.get_mode_index(stat_.st_mode & 0xFFFF); entry_v2.owner_index = data.get_uid_index(stat_.st_uid); entry_v2.group_index = data.get_gid_index(stat_.st_gid); entry_v2.atime_offset = data.get_atime_offset(stat_.st_atime); entry_v2.mtime_offset = data.get_mtime_offset(stat_.st_mtime); entry_v2.ctime_offset = data.get_ctime_offset(stat_.st_ctime); - entry_v2.inode = inode_num(); + if (auto fp = dynamic_cast(this)) { + entry_v2.content_index = fp->content_index(); + } else { + entry_v2.content_index = inode_num(); + } } entry::type_t file::type() const { return E_FILE; } @@ -157,8 +160,6 @@ void file::set_inode(std::shared_ptr ino) { std::shared_ptr file::get_inode() const { return inode_; } -uint32_t file::inode_num() const { return inode_->num(); } - void file::accept(entry_visitor& v, bool) { v.visit(this); } void file::scan(os_access& os, progress& prog) { @@ -188,6 +189,8 @@ void file::scan(os_access& os, progress& prog) { } } +uint32_t file::content_index() const { return inode_->num(); } + uint64_t file::raw_inode_num() const { return status().st_ino; } unsigned file::num_hard_links() const { return status().st_nlink; } @@ -204,6 +207,7 @@ void file::hardlink(file* other, progress& prog) { prog.original_size += s; prog.hardlink_size += s; } + ++prog.hardlinks; data_ = other->data_; } @@ -248,30 +252,28 @@ void dir::sort() { }); } -void dir::set_inode(uint32_t inode) { inode_ = inode; } - void dir::scan(os_access&, progress&) {} void dir::pack_entry(thrift::metadata::metadata& mv2, global_entry_data const& data) const { - DWARFS_NOTHROW(mv2.entry_table_v2_2.at(inode_num())) = mv2.entries.size(); - mv2.entries.emplace_back(); - entry::pack(mv2.entries.back(), data); + auto& de = mv2.dir_entries_ref()->emplace_back(); + de.name_index = has_parent() ? data.get_name_index(name()) : 0; + de.inode_num = inode_num(); + entry::pack(DWARFS_NOTHROW(mv2.entries.at(inode_num())), data); } void dir::pack(thrift::metadata::metadata& mv2, global_entry_data const& data) const { thrift::metadata::directory d; - d.parent_inode = + d.parent_entry = has_parent() ? std::dynamic_pointer_cast(parent())->inode_num() : 0; - d.first_entry = mv2.entries.size(); - // d.entry_count = entries_.size(); + d.first_entry = mv2.dir_entries_ref()->size(); mv2.directories.push_back(d); for (entry_ptr const& e : entries_) { - DWARFS_NOTHROW(mv2.entry_table_v2_2.at(e->inode_num())) = - mv2.entries.size(); - mv2.entries.emplace_back(); - e->pack(mv2.entries.back(), data); + auto& de = mv2.dir_entries_ref()->emplace_back(); + de.name_index = data.get_name_index(e->name()); + de.inode_num = e->inode_num(); + e->pack(DWARFS_NOTHROW(mv2.entries.at(e->inode_num())), data); } } @@ -297,8 +299,6 @@ entry::type_t link::type() const { return E_LINK; } const std::string& link::linkname() const { return link_; } -void link::set_inode(uint32_t inode) { inode_ = inode; } - void link::accept(entry_visitor& v, bool) { v.visit(this); } void link::scan(os_access& os, progress& prog) { @@ -311,8 +311,6 @@ entry::type_t device::type() const { return S_ISCHR(mode) || S_ISBLK(mode) ? E_DEVICE : E_OTHER; } -void device::set_inode(uint32_t inode) { inode_ = inode; } - void device::accept(entry_visitor& v, bool) { v.visit(this); } void device::scan(os_access&, progress&) {} diff --git a/src/dwarfs/filesystem_extractor.cpp b/src/dwarfs/filesystem_extractor.cpp index 0e0dc196..8c007374 100644 --- a/src/dwarfs/filesystem_extractor.cpp +++ b/src/dwarfs/filesystem_extractor.cpp @@ -150,7 +150,8 @@ void filesystem_extractor_::extract(filesystem_v2& fs, sem.post(max_queued_bytes); - auto do_archive = [&](::archive_entry* ae, entry_view entry) { + auto do_archive = [&](::archive_entry* ae, + inode_view entry) { // TODO: inode vs. entry if (auto size = ::archive_entry_size(ae); S_ISREG(entry.mode()) && size > 0) { auto fd = fs.open(entry); @@ -180,21 +181,23 @@ void filesystem_extractor_::extract(filesystem_v2& fs, } }; - fs.walk_inode_order([&](auto entry, auto parent) { - if (entry.inode() == 0) { + fs.walk_inode_order([&](auto entry) { + if (entry.is_root()) { return; } + auto inode = entry.inode(); + auto ae = ::archive_entry_new(); struct ::stat stbuf; - if (fs.getattr(entry, &stbuf) != 0) { + if (fs.getattr(inode, &stbuf) != 0) { DWARFS_THROW(runtime_error, "getattr() failed"); } std::string path; path.reserve(256); - parent.append_path_to(path); + entry.append_path_to(path); if (!path.empty()) { path += '/'; } @@ -203,9 +206,9 @@ void filesystem_extractor_::extract(filesystem_v2& fs, ::archive_entry_set_pathname(ae, path.c_str()); ::archive_entry_copy_stat(ae, &stbuf); - if (S_ISLNK(entry.mode())) { + if (S_ISLNK(inode.mode())) { std::string link; - if (fs.readlink(entry, &link) != 0) { + if (fs.readlink(inode, &link) != 0) { LOG_ERROR << "readlink() failed"; } ::archive_entry_set_symlink(ae, link.c_str()); @@ -214,7 +217,7 @@ void filesystem_extractor_::extract(filesystem_v2& fs, ::archive_entry_linkify(lr, &ae, &spare); if (ae) { - do_archive(ae, entry); + do_archive(ae, inode); } if (spare) { diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index 0d5cc6f6..12d267a8 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -182,28 +182,23 @@ class filesystem_ final : public filesystem_v2::impl { void dump(std::ostream& os, int detail_level) const override; folly::dynamic metadata_as_dynamic() const override; std::string serialize_metadata_as_json(bool simple) const override; - void walk(std::function const& func) const override; - void walk(std::function const& func) - const override; - void - walk_inode_order(std::function const& func) const override; - void - walk_inode_order(std::function const& func) - const override; - std::optional find(const char* path) const override; - std::optional find(int inode) const override; - std::optional find(int inode, const char* name) const override; - int getattr(entry_view entry, struct ::stat* stbuf) const override; - int access(entry_view entry, int mode, uid_t uid, gid_t gid) const override; - std::optional opendir(entry_view entry) const override; - std::optional> + void walk(std::function const& func) const override; + void walk_inode_order( + std::function const& func) const override; + std::optional find(const char* path) const override; + std::optional find(int inode) const override; + std::optional find(int inode, const char* name) const override; + int getattr(inode_view entry, struct ::stat* stbuf) const override; + int access(inode_view entry, int mode, uid_t uid, gid_t gid) const override; + std::optional opendir(inode_view entry) const override; + std::optional> readdir(directory_view dir, size_t offset) const override; size_t dirsize(directory_view dir) const override; - int readlink(entry_view entry, std::string* buf) const override; + int readlink(inode_view entry, std::string* buf) const override; folly::Expected - readlink(entry_view entry) const override; + readlink(inode_view entry) const override; int statvfs(struct ::statvfs* stbuf) const override; - int open(entry_view entry) const override; + int open(inode_view entry) const override; ssize_t read(uint32_t inode, char* buf, size_t size, off_t offset) const override; ssize_t readv(uint32_t inode, iovec_read_buf& buf, size_t size, @@ -286,65 +281,53 @@ filesystem_::serialize_metadata_as_json(bool simple) const { template void filesystem_::walk( - std::function const& func) const { - meta_.walk(func); -} - -template -void filesystem_::walk( - std::function const& func) const { + std::function const& func) const { meta_.walk(func); } template void filesystem_::walk_inode_order( - std::function const& func) const { + std::function const& func) const { meta_.walk_inode_order(func); } template -void filesystem_::walk_inode_order( - std::function const& func) const { - meta_.walk_inode_order(func); -} - -template -std::optional +std::optional filesystem_::find(const char* path) const { return meta_.find(path); } template -std::optional filesystem_::find(int inode) const { +std::optional filesystem_::find(int inode) const { return meta_.find(inode); } template -std::optional +std::optional filesystem_::find(int inode, const char* name) const { return meta_.find(inode, name); } template -int filesystem_::getattr(entry_view entry, +int filesystem_::getattr(inode_view entry, struct ::stat* stbuf) const { return meta_.getattr(entry, stbuf); } template -int filesystem_::access(entry_view entry, int mode, uid_t uid, +int filesystem_::access(inode_view entry, int mode, uid_t uid, gid_t gid) const { return meta_.access(entry, mode, uid, gid); } template std::optional -filesystem_::opendir(entry_view entry) const { +filesystem_::opendir(inode_view entry) const { return meta_.opendir(entry); } template -std::optional> +std::optional> filesystem_::readdir(directory_view dir, size_t offset) const { return meta_.readdir(dir, offset); } @@ -355,14 +338,14 @@ size_t filesystem_::dirsize(directory_view dir) const { } template -int filesystem_::readlink(entry_view entry, +int filesystem_::readlink(inode_view entry, std::string* buf) const { return meta_.readlink(entry, buf); } template folly::Expected -filesystem_::readlink(entry_view entry) const { +filesystem_::readlink(inode_view entry) const { return meta_.readlink(entry); } @@ -373,7 +356,7 @@ int filesystem_::statvfs(struct ::statvfs* stbuf) const { } template -int filesystem_::open(entry_view entry) const { +int filesystem_::open(inode_view entry) const { return meta_.open(entry); } diff --git a/src/dwarfs/metadata_types.cpp b/src/dwarfs/metadata_types.cpp index acd111df..67d49921 100644 --- a/src/dwarfs/metadata_types.cpp +++ b/src/dwarfs/metadata_types.cpp @@ -20,69 +20,211 @@ */ #include "dwarfs/metadata_types.h" +#include "dwarfs/error.h" +#include "dwarfs/overloaded.h" #include "dwarfs/gen-cpp2/metadata_types_custom_protocol.h" namespace dwarfs { -std::string_view entry_view::name() const { - return meta_->names()[name_index_v2_2()]; +uint16_t inode_view::mode() const { return meta_->modes()[mode_index()]; } + +uint16_t inode_view::getuid() const { return meta_->uids()[owner_index()]; } + +uint16_t inode_view::getgid() const { return meta_->gids()[group_index()]; } + +// TODO: pretty certain some of this stuff can be simplified + +std::string_view dir_entry_view::name() const { + return std::visit(overloaded{ + [this](DirEntryView const& dev) { + return meta_->names()[dev.name_index()]; + }, + [this](InodeView const& iv) { + return meta_->names()[iv.name_index_v2_2()]; + }, + }, + v_); } -uint16_t entry_view::mode() const { return meta_->modes()[mode_index()]; } - -uint16_t entry_view::getuid() const { return meta_->uids()[owner_index()]; } - -uint16_t entry_view::getgid() const { return meta_->gids()[group_index()]; } - -::apache::thrift::frozen::View -directory_view::getdir() const { - return getdir(entry_.inode()); +inode_view dir_entry_view::inode() const { + return std::visit(overloaded{ + [this](DirEntryView const& dev) { + return inode_view(meta_->entries()[dev.inode_num()], + dev.inode_num(), meta_); + }, + [this](InodeView const& iv) { + return inode_view(iv, iv.content_index(), meta_); + }, + }, + v_); } -::apache::thrift::frozen::View -directory_view::getdir(uint32_t ino) const { - return meta_->directories()[ino]; +// TODO: remove? +// std::optional dir_entry_view::directory() const { +// if (is_root()) { +// return std::nullopt; +// } +// +// auto dir_inode = parent_index_; +// +// if (auto de = meta_->dir_entries()) { +// dir_inode = (*de)[dir_inode].entry_index(); +// } +// +// return directory_view(dir_inode, meta_); +// } + +bool dir_entry_view::is_root() const { + return std::visit( + overloaded{ + [](DirEntryView const& dev) { return dev.inode_num() == 0; }, + [](InodeView const& iv) { return iv.content_index() == 0; }, + }, + v_); } -uint32_t directory_view::entry_count() const { return entry_count(getdir()); } +/** + * We need a parent index if the dir_entry_view is for a file. For + * directories, the parent can be determined via the directory's + * inode, but for files, this isn't possible. + */ -uint32_t directory_view::entry_count( - ::apache::thrift::frozen::View self) const { - auto next = getdir(entry_.inode() + 1); - return next.first_entry() - self.first_entry(); +dir_entry_view +dir_entry_view::from_dir_entry_index(uint32_t self_index, uint32_t parent_index, + Meta const* meta) { + if (auto de = meta->dir_entries()) { + DWARFS_CHECK(self_index < de->size(), "self_index out of range"); + DWARFS_CHECK(parent_index < de->size(), "parent_index out of range"); + + auto dev = (*de)[self_index]; + + return dir_entry_view(dev, self_index, parent_index, meta); + } + + DWARFS_CHECK(self_index < meta->entries().size(), "self_index out of range"); + DWARFS_CHECK(parent_index < meta->entries().size(), + "self_index out of range"); + + auto iv = meta->entries()[self_index]; + + return dir_entry_view(iv, self_index, parent_index, meta); } -boost::integer_range directory_view::entry_range() const { - auto d = getdir(); - auto first = d.first_entry(); - return boost::irange(first, first + entry_count(d)); +dir_entry_view +dir_entry_view::from_dir_entry_index(uint32_t self_index, Meta const* meta) { + if (auto de = meta->dir_entries()) { + DWARFS_CHECK(self_index < de->size(), "self_index out of range"); + auto dev = (*de)[self_index]; + DWARFS_CHECK(dev.inode_num() < meta->directories().size(), + "self_index inode out of range"); + return dir_entry_view(dev, self_index, + meta->directories()[dev.inode_num()].parent_entry(), + meta); + } + + DWARFS_CHECK(self_index < meta->entries().size(), "self_index out of range"); + auto iv = meta->entries()[self_index]; + + DWARFS_CHECK(iv.content_index() < meta->directories().size(), + "parent_index out of range"); + return dir_entry_view( + iv, self_index, + meta->entry_table_v2_2()[meta->directories()[iv.content_index()] + .parent_entry()], + meta); } -uint32_t directory_view::first_entry() const { return getdir().first_entry(); } +std::optional dir_entry_view::parent() const { + if (is_root()) { + return std::nullopt; + } -uint32_t directory_view::parent_inode() const { - return getdir().parent_inode(); + return from_dir_entry_index(parent_index_, meta_); } -directory_view::directory_view(uint32_t inode, Meta const* meta) - : entry_(meta->entries()[meta->entry_table_v2_2()[inode]]) - , meta_(meta) {} +std::string_view dir_entry_view::name(uint32_t index, Meta const* meta) { + if (auto de = meta->dir_entries()) { + DWARFS_CHECK(index < de->size(), "index out of range"); + auto dev = (*de)[index]; + return meta->names()[dev.name_index()]; + } -std::string directory_view::path() const { + DWARFS_CHECK(index < meta->entries().size(), "index out of range"); + auto iv = meta->entries()[index]; + return meta->names()[iv.name_index_v2_2()]; +} + +inode_view dir_entry_view::inode(uint32_t index, Meta const* meta) { + if (auto de = meta->dir_entries()) { + DWARFS_CHECK(index < de->size(), "index out of range"); + auto dev = (*de)[index]; + return inode_view(meta->entries()[dev.inode_num()], dev.inode_num(), meta); + } + + DWARFS_CHECK(index < meta->entries().size(), "index out of range"); + auto iv = meta->entries()[index]; + return inode_view(iv, iv.content_index(), meta); +} + +std::string dir_entry_view::path() const { std::string p; append_path_to(p); return p; } -void directory_view::append_path_to(std::string& s) const { - if (auto ino = parent_inode(); ino != 0) { - directory_view(parent_inode(), meta_).append_path_to(s); +void dir_entry_view::append_path_to(std::string& s) const { + if (auto p = parent()) { + p->append_path_to(s); s += '/'; } - if (inode() != 0) { - s += meta_->names()[entry_.name_index_v2_2()]; + if (!is_root()) { + s += name(); } } +directory_view::directory_view(uint32_t inode, Meta const* meta) + : DirView{getdir(inode, meta)} + , inode_{inode} + , meta_{meta} {} + +auto directory_view::getdir(uint32_t ino) const -> DirView { + return getdir(ino, meta_); +} + +auto directory_view::getdir(uint32_t ino, Meta const* meta) -> DirView { + return meta->directories()[ino]; +} + +uint32_t directory_view::entry_count() const { + return getdir(inode_ + 1).first_entry() - first_entry(); +} + +boost::integer_range directory_view::entry_range() const { + auto first = first_entry(); + return boost::irange(first, first + entry_count()); +} + +uint32_t directory_view::parent_inode() const { + if (inode_ == 0) { + return 0; + } + + auto ent = parent_entry(); + + if (auto e = meta_->dir_entries()) { + ent = (*e)[ent].inode_num(); + } + + return ent; +} + +std::optional directory_view::parent() const { + if (inode_ == 0) { + return std::nullopt; + } + + return directory_view(parent_inode(), meta_); +} + } // namespace dwarfs diff --git a/src/dwarfs/metadata_v2.cpp b/src/dwarfs/metadata_v2.cpp index c28da136..769e2e25 100644 --- a/src/dwarfs/metadata_v2.cpp +++ b/src/dwarfs/metadata_v2.cpp @@ -113,14 +113,17 @@ class metadata_ final : public metadata_v2::impl { int inode_offset) : data_(data) , meta_(map_frozen(schema, data_)) - , root_(meta_.entries()[meta_.entry_table_v2_2()[0]], &meta_) + , root_(dir_entry_view::from_dir_entry_index(0, &meta_)) , log_(lgr) , inode_offset_(inode_offset) , symlink_table_offset_(find_index_offset(inode_rank::INO_LNK)) , file_index_offset_(find_index_offset(inode_rank::INO_REG)) , dev_index_offset_(find_index_offset(inode_rank::INO_DEV)) + , inode_count_(meta_.dir_entries() ? meta_.entries().size() + : meta_.entry_table_v2_2().size()) , nlinks_(build_nlinks(options)) , options_(options) { + LOG_DEBUG << "inode count: " << inode_count_; LOG_DEBUG << "symlink table offset: " << symlink_table_offset_; LOG_DEBUG << "chunk index offset: " << file_index_offset_; LOG_DEBUG << "device index offset: " << dev_index_offset_; @@ -145,16 +148,18 @@ class metadata_ final : public metadata_v2::impl { file_index_offset_ - symlink_table_offset_)); } - if (int(meta_.chunk_table().size() - 1) != - (dev_index_offset_ - file_index_offset_)) { - DWARFS_THROW( - runtime_error, - fmt::format( - "metadata inconsistency: number of files ({}) does not match " - "device/chunk index delta ({} - {} = {})", - meta_.chunk_table().size() - 1, dev_index_offset_, - file_index_offset_, dev_index_offset_ - file_index_offset_)); - } + // TODO: this might be a silly check for v2.3 + // + // if (int(meta_.chunk_table().size() - 1) != + // (dev_index_offset_ - file_index_offset_)) { + // DWARFS_THROW( + // runtime_error, + // fmt::format( + // "metadata inconsistency: number of files ({}) does not match " + // "device/chunk index delta ({} - {} = {})", + // meta_.chunk_table().size() - 1, dev_index_offset_, + // file_index_offset_, dev_index_offset_ - file_index_offset_)); + // } if (auto devs = meta_.devices()) { auto other_offset = find_index_offset(inode_rank::INO_OTH); @@ -181,49 +186,40 @@ class metadata_ final : public metadata_v2::impl { bool empty() const override { return data_.empty(); } - void walk(std::function const& func) const override { - walk_impl(func); + void walk(std::function const& func) const override { + walk_tree([&](uint32_t self_index, uint32_t parent_index) { + walk_call(func, self_index, parent_index); + }); } - void walk(std::function const& func) - const override { - walk_impl(func); - } - - void - walk_inode_order(std::function const& func) const override { + void walk_inode_order( + std::function const& func) const override { walk_inode_order_impl(func); } - void - walk_inode_order(std::function const& func) - const override { - walk_inode_order_impl(func); - } + std::optional find(const char* path) const override; + std::optional find(int inode) const override; + std::optional find(int inode, const char* name) const override; - std::optional find(const char* path) const override; - std::optional find(int inode) const override; - std::optional find(int inode, const char* name) const override; + int getattr(inode_view entry, struct ::stat* stbuf) const override; - int getattr(entry_view entry, struct ::stat* stbuf) const override; + std::optional opendir(inode_view entry) const override; - std::optional opendir(entry_view entry) const override; - - std::optional> + std::optional> readdir(directory_view dir, size_t offset) const override; size_t dirsize(directory_view dir) const override { return 2 + dir.entry_count(); // adds '.' and '..', which we fake in ;-) } - int access(entry_view entry, int mode, uid_t uid, gid_t gid) const override; + int access(inode_view entry, int mode, uid_t uid, gid_t gid) const override; - int open(entry_view entry) const override; + int open(inode_view entry) const override; - int readlink(entry_view entry, std::string* buf) const override; + int readlink(inode_view entry, std::string* buf) const override; folly::Expected - readlink(entry_view entry) const override; + readlink(inode_view entry) const override; int statvfs(struct ::statvfs* stbuf) const override; @@ -235,12 +231,21 @@ class metadata_ final : public metadata_v2::impl { template using set_type = folly::F14ValueSet; - entry_view make_entry_view(size_t index) const { - return entry_view(meta_.entries()[index], &meta_); + inode_view make_inode_view(uint32_t inode) const { + // TODO: move compatibility details to metadata_types + uint32_t index = + meta_.dir_entries() ? inode : meta_.entry_table_v2_2()[inode]; + return inode_view(meta_.entries()[index], inode, &meta_); } - entry_view make_entry_view_from_inode(uint32_t inode) const { - return make_entry_view(meta_.entry_table_v2_2()[inode]); + dir_entry_view make_dir_entry_view(uint32_t self_index) const { + return dir_entry_view::from_dir_entry_index(self_index, &meta_); + } + + dir_entry_view + make_dir_entry_view(uint32_t self_index, uint32_t parent_index) const { + return dir_entry_view::from_dir_entry_index(self_index, parent_index, + &meta_); } // This represents the order in which inodes are stored in entry_table_v2_2 @@ -295,38 +300,51 @@ class metadata_ final : public metadata_v2::impl { } size_t find_index_offset(inode_rank rank) const { - auto range = boost::irange(size_t(0), meta_.entry_table_v2_2().size()); + if (meta_.dir_entries()) { + auto range = boost::irange(size_t(0), meta_.entries().size()); - auto it = std::lower_bound(range.begin(), range.end(), rank, - [&](auto inode, inode_rank r) { - auto e = make_entry_view_from_inode(inode); - return get_inode_rank(e.mode()) < r; - }); + auto it = std::lower_bound( + range.begin(), range.end(), rank, [&](auto inode, inode_rank r) { + auto mode = meta_.modes()[meta_.entries()[inode].mode_index()]; + return get_inode_rank(mode) < r; + }); - return *it; + return *it; + } else { + auto range = boost::irange(size_t(0), meta_.entry_table_v2_2().size()); + + auto it = std::lower_bound(range.begin(), range.end(), rank, + [&](auto inode, inode_rank r) { + auto e = make_inode_view(inode); + return get_inode_rank(e.mode()) < r; + }); + + return *it; + } } - directory_view make_directory_view(entry_view entry) const { - return directory_view(entry, &meta_); + directory_view make_directory_view(inode_view inode) const { + // TODO: revisit: is this the way to do it? + return directory_view(inode.inode_num(), &meta_); } - void dump(std::ostream& os, const std::string& indent, entry_view entry, + void dump(std::ostream& os, const std::string& indent, dir_entry_view entry, int detail_level, std::function const& icb) const; void dump(std::ostream& os, const std::string& indent, directory_view dir, - int detail_level, + dir_entry_view entry, int detail_level, std::function const& icb) const; - folly::dynamic as_dynamic(entry_view entry) const; - folly::dynamic as_dynamic(directory_view dir) const; + folly::dynamic as_dynamic(dir_entry_view entry) const; + folly::dynamic as_dynamic(directory_view dir, dir_entry_view entry) const; - std::optional + std::optional find(directory_view dir, std::string_view name) const; std::string modestring(uint16_t mode) const; - size_t reg_file_size(entry_view entry) const { - auto inode = entry.inode() - file_index_offset_; + size_t reg_file_size(inode_view entry) const { + auto inode = entry.content_index() - file_index_offset_; uint32_t cur = meta_.chunk_table()[inode]; uint32_t end = meta_.chunk_table()[inode + 1]; if (cur > end) { @@ -345,7 +363,7 @@ class metadata_ final : public metadata_v2::impl { return size; } - size_t file_size(entry_view entry, uint16_t mode) const { + size_t file_size(inode_view entry, uint16_t mode) const { if (S_ISREG(mode)) { return reg_file_size(entry); } else if (S_ISLNK(mode)) { @@ -355,49 +373,35 @@ class metadata_ final : public metadata_v2::impl { } } - void walk_call(std::function const& func, uint32_t entry, - uint32_t) const { - func(make_entry_view(entry)); - } - - void walk_call(std::function const& func, - uint32_t entry, uint32_t dir) const { - func(make_entry_view(entry), make_directory_view(make_entry_view(dir))); + void walk_call(std::function const& func, + uint32_t self_index, uint32_t parent_index) const { + func(make_dir_entry_view(self_index, parent_index)); } template - void walk(uint32_t parent_ix, uint32_t entry_ix, set_type& seen, + void walk(uint32_t self_index, uint32_t parent_index, set_type& seen, T&& func) const; template void walk_tree(T&& func) const { set_type seen; - auto root = meta_.entry_table_v2_2()[0]; - walk(root, root, seen, std::forward(func)); + walk(0, 0, seen, std::forward(func)); } - template - void walk_impl(std::function const& func) const { - walk_tree([&](uint32_t entry, uint32_t parent) { - walk_call(func, entry, parent); - }); - } + void + walk_inode_order_impl(std::function const& func) const; - template - void walk_inode_order_impl(std::function const& func) const; - - std::optional get_entry(int inode) const { + std::optional get_entry(int inode) const { inode -= inode_offset_; - std::optional rv; - if (inode >= 0 && - inode < static_cast(meta_.entry_table_v2_2().size())) { - rv = make_entry_view_from_inode(inode); + std::optional rv; + if (inode >= 0 && inode < inode_count_) { + rv = make_inode_view(inode); } return rv; } - std::string_view link_value(entry_view entry) const { - return meta_.symlinks()[meta_.symlink_table()[entry.inode() - + std::string_view link_value(inode_view entry) const { + return meta_.symlinks()[meta_.symlink_table()[entry.content_index() - symlink_table_offset_]]; } @@ -417,10 +421,19 @@ class metadata_ final : public metadata_v2::impl { nlinks.resize(dev_index_offset_ - file_index_offset_); - for (auto e : meta_.entries()) { - auto index = int(e.inode()) - file_index_offset_; - if (index >= 0 && index < int(nlinks.size())) { - ++DWARFS_NOTHROW(nlinks.at(index)); + if (auto de = meta_.dir_entries()) { + for (auto e : *de) { + auto index = int(e.inode_num()) - file_index_offset_; + if (index >= 0 && index < int(nlinks.size())) { + ++DWARFS_NOTHROW(nlinks.at(index)); + } + } + } else { + for (auto e : meta_.entries()) { + auto index = int(e.content_index()) - file_index_offset_; + if (index >= 0 && index < int(nlinks.size())) { + ++DWARFS_NOTHROW(nlinks.at(index)); + } } } @@ -433,23 +446,25 @@ class metadata_ final : public metadata_v2::impl { folly::ByteRange data_; MappedFrozen meta_; - entry_view root_; + dir_entry_view root_; log_proxy log_; const int inode_offset_; const int symlink_table_offset_; const int file_index_offset_; const int dev_index_offset_; + const int inode_count_; const std::vector nlinks_; const metadata_options options_; }; template void metadata_::dump( - std::ostream& os, const std::string& indent, entry_view entry, + std::ostream& os, const std::string& indent, dir_entry_view entry, int detail_level, std::function const& icb) const { - auto mode = entry.mode(); - auto inode = entry.inode(); + auto inode_data = entry.inode(); + auto mode = inode_data.mode(); + auto inode = inode_data.content_index(); // TODO: rename inode appropriately os << indent << " " << modestring(mode); @@ -461,15 +476,15 @@ void metadata_::dump( uint32_t beg = meta_.chunk_table()[inode - file_index_offset_]; uint32_t end = meta_.chunk_table()[inode - file_index_offset_ + 1]; os << " [" << beg << ", " << end << "]"; - os << " " << file_size(entry, mode) << "\n"; + os << " " << file_size(inode_data, mode) << "\n"; if (detail_level > 3) { icb(indent + " ", inode); } } else if (S_ISDIR(mode)) { - dump(os, indent + " ", make_directory_view(entry), detail_level, - std::move(icb)); + dump(os, indent + " ", make_directory_view(inode_data), entry, + detail_level, std::move(icb)); } else if (S_ISLNK(mode)) { - os << " -> " << link_value(entry) << "\n"; + os << " -> " << link_value(inode_data) << "\n"; } else if (S_ISBLK(mode)) { os << " (block device: " << get_device_id(inode) << ")\n"; } else if (S_ISCHR(mode)) { @@ -481,18 +496,20 @@ void metadata_::dump( } } +// TODO: can we move this to dir_entry_view? template void metadata_::dump( std::ostream& os, const std::string& indent, directory_view dir, - int detail_level, + dir_entry_view entry, int detail_level, std::function const& icb) const { auto count = dir.entry_count(); auto first = dir.first_entry(); - os << " (" << count << " entries, parent=" << dir.parent_inode() << ")\n"; + os << " (" << count << " entries, parent=" << dir.parent_entry() << ")\n"; for (size_t i = 0; i < count; ++i) { - dump(os, indent, make_entry_view(first + i), detail_level, icb); + dump(os, indent, make_dir_entry_view(first + i, entry.self_index()), + detail_level, icb); } } @@ -533,35 +550,38 @@ void metadata_::dump( analyze_frozen(os, meta_); } - if (detail_level > 2) { - dump(os, "", root_, detail_level, icb); - } - if (detail_level > 4) { os << ::apache::thrift::debugString(meta_.thaw()) << '\n'; } + + if (detail_level > 2) { + dump(os, "", root_, detail_level, icb); + } } template -folly::dynamic metadata_::as_dynamic(directory_view dir) const { +folly::dynamic metadata_::as_dynamic(directory_view dir, + dir_entry_view entry) const { folly::dynamic obj = folly::dynamic::array; auto count = dir.entry_count(); auto first = dir.first_entry(); for (size_t i = 0; i < count; ++i) { - obj.push_back(as_dynamic(make_entry_view(first + i))); + obj.push_back( + as_dynamic(make_dir_entry_view(first + i, entry.self_index()))); } return obj; } template -folly::dynamic metadata_::as_dynamic(entry_view entry) const { +folly::dynamic metadata_::as_dynamic(dir_entry_view entry) const { folly::dynamic obj = folly::dynamic::object; - auto mode = entry.mode(); - auto inode = entry.inode(); + auto inode_data = entry.inode(); + auto mode = inode_data.mode(); + auto inode = inode_data.content_index(); // TODO: rename all the things obj["mode"] = mode; obj["modestring"] = modestring(mode); @@ -573,13 +593,13 @@ folly::dynamic metadata_::as_dynamic(entry_view entry) const { if (S_ISREG(mode)) { obj["type"] = "file"; - obj["size"] = file_size(entry, mode); + obj["size"] = file_size(inode_data, mode); } else if (S_ISDIR(mode)) { obj["type"] = "directory"; - obj["entries"] = as_dynamic(make_directory_view(entry)); + obj["entries"] = as_dynamic(make_directory_view(inode_data), entry); } else if (S_ISLNK(mode)) { obj["type"] = "link"; - obj["target"] = std::string(link_value(entry)); + obj["target"] = std::string(link_value(inode_data)); } else if (S_ISBLK(mode)) { obj["type"] = "blockdev"; obj["device_id"] = get_device_id(inode); @@ -646,23 +666,24 @@ std::string metadata_::modestring(uint16_t mode) const { template template -void metadata_::walk(uint32_t parent_ix, uint32_t entry_ix, +void metadata_::walk(uint32_t self_index, uint32_t parent_index, set_type& seen, T&& func) const { - func(entry_ix, parent_ix); + func(self_index, parent_index); - auto entry = make_entry_view(entry_ix); + auto entry = make_dir_entry_view(self_index, parent_index); + auto inode_data = entry.inode(); - if (S_ISDIR(entry.mode())) { - auto inode = entry.inode(); + if (S_ISDIR(inode_data.mode())) { + auto inode = inode_data.content_index(); if (!seen.emplace(inode).second) { DWARFS_THROW(runtime_error, "cycle detected during directory walk"); } - auto dir = make_directory_view(entry); + auto dir = make_directory_view(inode_data); - for (auto cur : dir.entry_range()) { - walk(entry_ix, cur, seen, func); + for (auto cur_index : dir.entry_range()) { + walk(cur_index, self_index, seen, func); } seen.erase(inode); @@ -670,49 +691,53 @@ void metadata_::walk(uint32_t parent_ix, uint32_t entry_ix, } template -template void metadata_::walk_inode_order_impl( - std::function const& func) const { + std::function const& func) const { std::vector> entries; { auto td = LOG_TIMED_DEBUG; - walk_tree([&](uint32_t entry_ix, uint32_t parent_ix) { - entries.emplace_back(entry_ix, parent_ix); + walk_tree([&](uint32_t self_index, uint32_t parent_index) { + entries.emplace_back(self_index, parent_index); }); - std::sort(entries.begin(), entries.end(), - [this](auto const& a, auto const& b) { - return meta_.entries()[a.first].inode() < - meta_.entries()[b.first].inode(); - }); + if (auto dep = meta_.dir_entries()) { + std::sort(entries.begin(), entries.end(), + [de = *dep](auto const& a, auto const& b) { + return de[a.first].inode_num() < de[b.first].inode_num(); + }); + } else { + std::sort(entries.begin(), entries.end(), + [this](auto const& a, auto const& b) { + return meta_.entries()[a.first].content_index() < + meta_.entries()[b.first].content_index(); + }); + } td << "ordered " << entries.size() << " entries by inode"; } - for (auto [entry, parent] : entries) { - walk_call(func, entry, parent); + for (auto [self_index, parent_index] : entries) { + walk_call(func, self_index, parent_index); } } template -std::optional +std::optional metadata_::find(directory_view dir, std::string_view name) const { auto range = dir.entry_range(); auto it = std::lower_bound(range.begin(), range.end(), name, [&](auto ix, std::string_view name) { - return make_entry_view(ix).name() < name; + return dir_entry_view::name(ix, &meta_) < name; }); - std::optional rv; + std::optional rv; if (it != range.end()) { - auto cand = make_entry_view(*it); - - if (cand.name() == name) { - rv = cand; + if (dir_entry_view::name(*it, &meta_) == name) { + rv = dir_entry_view::inode(*it, &meta_); } } @@ -720,13 +745,13 @@ metadata_::find(directory_view dir, std::string_view name) const { } template -std::optional +std::optional metadata_::find(const char* path) const { while (*path and *path == '/') { ++path; } - std::optional entry = root_; + std::optional entry = root_.inode(); while (*path) { const char* next = ::strchr(path, '/'); @@ -745,12 +770,12 @@ metadata_::find(const char* path) const { } template -std::optional metadata_::find(int inode) const { +std::optional metadata_::find(int inode) const { return get_entry(inode); } template -std::optional +std::optional metadata_::find(int inode, const char* name) const { auto entry = get_entry(inode); @@ -762,13 +787,13 @@ metadata_::find(int inode, const char* name) const { } template -int metadata_::getattr(entry_view entry, +int metadata_::getattr(inode_view entry, struct ::stat* stbuf) const { ::memset(stbuf, 0, sizeof(*stbuf)); auto mode = entry.mode(); auto timebase = meta_.timestamp_base(); - auto inode = entry.inode(); + auto inode = entry.inode_num(); bool mtime_only = meta_.options() && meta_.options()->mtime_only(); uint32_t resolution = 1; if (meta_.options()) { @@ -808,7 +833,7 @@ int metadata_::getattr(entry_view entry, template std::optional -metadata_::opendir(entry_view entry) const { +metadata_::opendir(inode_view entry) const { std::optional rv; if (S_ISDIR(entry.mode())) { @@ -819,14 +844,14 @@ metadata_::opendir(entry_view entry) const { } template -std::optional> +std::optional> metadata_::readdir(directory_view dir, size_t offset) const { switch (offset) { case 0: - return std::pair(make_entry_view_from_inode(dir.inode()), "."); + return std::pair(make_inode_view(dir.inode()), "."); case 1: - return std::pair(make_entry_view_from_inode(dir.parent_inode()), ".."); + return std::pair(make_inode_view(dir.parent_inode()), ".."); default: offset -= 2; @@ -835,16 +860,16 @@ metadata_::readdir(directory_view dir, size_t offset) const { break; } - auto entry = make_entry_view(dir.first_entry() + offset); - - return std::pair(entry, entry.name()); + auto index = dir.first_entry() + offset; + auto inode = dir_entry_view::inode(index, &meta_); + return std::pair(inode, dir_entry_view::name(index, &meta_)); } return std::nullopt; } template -int metadata_::access(entry_view entry, int mode, uid_t uid, +int metadata_::access(inode_view entry, int mode, uid_t uid, gid_t gid) const { if (mode == F_OK) { // easy; we're only interested in the file's existance @@ -878,16 +903,16 @@ int metadata_::access(entry_view entry, int mode, uid_t uid, } template -int metadata_::open(entry_view entry) const { +int metadata_::open(inode_view entry) const { if (S_ISREG(entry.mode())) { - return entry.inode(); + return entry.content_index(); } return -1; } template -int metadata_::readlink(entry_view entry, +int metadata_::readlink(inode_view entry, std::string* buf) const { if (S_ISLNK(entry.mode())) { buf->assign(link_value(entry)); @@ -899,7 +924,7 @@ int metadata_::readlink(entry_view entry, template folly::Expected -metadata_::readlink(entry_view entry) const { +metadata_::readlink(inode_view entry) const { if (S_ISLNK(entry.mode())) { return link_value(entry); } diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index 6c1afa20..77705e1e 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -72,10 +72,12 @@ class visitor_base : public entry_visitor { class scan_files_visitor : public visitor_base { public: - scan_files_visitor(worker_group& wg, os_access& os, progress& prog) + scan_files_visitor(worker_group& wg, os_access& os, progress& prog, + uint32_t& inode_num) : wg_(wg) , os_(os) - , prog_(prog) {} + , prog_(prog) + , inode_num_(inode_num) {} void visit(file* p) override { if (p->num_hard_links() > 1) { @@ -84,18 +86,19 @@ class scan_files_visitor : public visitor_base { if (!is_new) { p->hardlink(it->second, prog_); - prog_.files_scanned++; - prog_.hardlinks++; + p->set_inode_num(it->second->inode_num()); + ++prog_.files_scanned; return; } } p->create_data(); + p->set_inode_num(inode_num_++); wg_.add_job([=] { prog_.current.store(p); p->scan(os_, prog_); - prog_.files_scanned++; + ++prog_.files_scanned; }); } @@ -104,6 +107,7 @@ class scan_files_visitor : public visitor_base { os_access& os_; progress& prog_; folly::F14FastMap cache_; + uint32_t& inode_num_; }; class file_deduplication_visitor : public visitor_base { @@ -150,39 +154,39 @@ class file_deduplication_visitor : public visitor_base { class dir_set_inode_visitor : public visitor_base { public: - explicit dir_set_inode_visitor(uint32_t& inode_no) - : inode_no_(inode_no) {} + explicit dir_set_inode_visitor(uint32_t& inode_num) + : inode_num_(inode_num) {} void visit(dir* p) override { p->sort(); - p->set_inode(inode_no_++); + p->set_inode_num(inode_num_++); } - uint32_t inode_no() const { return inode_no_; } + uint32_t inode_num() const { return inode_num_; } private: - uint32_t& inode_no_; + uint32_t& inode_num_; }; class link_set_inode_visitor : public visitor_base { public: - explicit link_set_inode_visitor(uint32_t& inode_no) - : inode_no_(inode_no) {} + explicit link_set_inode_visitor(uint32_t& inode_num) + : inode_num_(inode_num) {} - void visit(link* p) override { p->set_inode(inode_no_++); } + void visit(link* p) override { p->set_inode_num(inode_num_++); } private: - uint32_t& inode_no_; + uint32_t& inode_num_; }; class device_set_inode_visitor : public visitor_base { public: - explicit device_set_inode_visitor(uint32_t& inode_no) - : inode_no_(inode_no) {} + explicit device_set_inode_visitor(uint32_t& inode_num) + : inode_num_(inode_num) {} void visit(device* p) override { if (p->type() == entry::E_DEVICE) { - p->set_inode(inode_no_++); + p->set_inode_num(inode_num_++); dev_ids_.push_back(p->device_id()); } } @@ -191,22 +195,22 @@ class device_set_inode_visitor : public visitor_base { private: std::vector dev_ids_; - uint32_t& inode_no_; + uint32_t& inode_num_; }; class pipe_set_inode_visitor : public visitor_base { public: - explicit pipe_set_inode_visitor(uint32_t& inode_no) - : inode_no_(inode_no) {} + explicit pipe_set_inode_visitor(uint32_t& inode_num) + : inode_num_(inode_num) {} void visit(device* p) override { if (p->type() != entry::E_DEVICE) { - p->set_inode(inode_no_++); + p->set_inode_num(inode_num_++); } } private: - uint32_t& inode_no_; + uint32_t& inode_num_; }; class names_and_symlinks_visitor : public entry_visitor { @@ -251,8 +255,8 @@ class save_directories_visitor : public visitor_base { } thrift::metadata::directory dummy; - dummy.parent_inode = 0; - dummy.first_entry = mv2.entries.size(); + dummy.parent_entry = 0; + dummy.first_entry = mv2.dir_entries_ref()->size(); mv2.directories.push_back(dummy); } @@ -452,6 +456,8 @@ scanner_::scan_tree(const std::string& path, progress& prog) { return root; } +// TODO: all _inode stuff should be named _index or something + template void scanner_::scan(filesystem_writer& fsw, const std::string& path, progress& prog) { @@ -467,13 +473,6 @@ void scanner_::scan(filesystem_writer& fsw, d->remove_empty_dirs(prog); } - // now scan all files - scan_files_visitor sfv(wg_, *os_, prog); - root->accept(sfv); - - LOG_INFO << "waiting for background scanners..."; - wg_.wait(); - LOG_INFO << "assigning directory and link inodes..."; uint32_t first_link_inode = 0; @@ -484,6 +483,14 @@ void scanner_::scan(filesystem_writer& fsw, link_set_inode_visitor lsiv(first_file_inode); root->accept(lsiv, true); + // now scan all files + uint32_t first_device_inode = first_file_inode; + scan_files_visitor sfv(wg_, *os_, prog, first_device_inode); + root->accept(sfv); + + LOG_INFO << "waiting for background scanners..."; + wg_.wait(); + LOG_INFO << "finding duplicate files..."; inode_manager im(lgr_, prog); @@ -509,14 +516,14 @@ void scanner_::scan(filesystem_writer& fsw, mv2.symlink_table.resize(first_file_inode - first_link_inode); LOG_INFO << "assigning device inodes..."; - uint32_t first_device_inode = first_file_inode + im.count(); - device_set_inode_visitor devsiv(first_device_inode); + uint32_t first_pipe_inode = first_device_inode; + device_set_inode_visitor devsiv(first_pipe_inode); root->accept(devsiv); mv2.devices_ref() = std::move(devsiv.device_ids()); LOG_INFO << "assigning pipe/socket inodes..."; - uint32_t first_pipe_inode = first_device_inode; - pipe_set_inode_visitor pipsiv(first_pipe_inode); + uint32_t last_inode = first_pipe_inode; + pipe_set_inode_visitor pipsiv(last_inode); root->accept(pipsiv); LOG_INFO << "building metadata..."; @@ -592,7 +599,8 @@ void scanner_::scan(filesystem_writer& fsw, LOG_DEBUG << "total number of chunks: " << mv2.chunks.size(); LOG_INFO << "saving directories..."; - mv2.entry_table_v2_2.resize(first_pipe_inode); + mv2.set_dir_entries(std::vector()); + mv2.entries.resize(last_inode); mv2.directories.reserve(first_link_inode + 1); save_directories_visitor sdv(first_link_inode); root->accept(sdv); diff --git a/src/dwarfsbench.cpp b/src/dwarfsbench.cpp index 42d23c60..80bd8d94 100644 --- a/src/dwarfsbench.cpp +++ b/src/dwarfsbench.cpp @@ -106,13 +106,14 @@ int dwarfsbench(int argc, char** argv) { worker_group wg("reader", num_readers); fs.walk([&](auto entry) { - if (S_ISREG(entry.mode())) { - wg.add_job([&fs, entry] { + auto inode_data = entry.inode(); + if (S_ISREG(inode_data.mode())) { + wg.add_job([&fs, inode_data] { try { struct ::stat stbuf; - if (fs.getattr(entry, &stbuf) == 0) { + if (fs.getattr(inode_data, &stbuf) == 0) { std::vector buf(stbuf.st_size); - int fh = fs.open(entry); + int fh = fs.open(inode_data); fs.read(fh, buf.data(), buf.size()); } } catch (runtime_error const& e) { diff --git a/test/dwarfs.cpp b/test/dwarfs.cpp index a7c0378d..43eaf71e 100644 --- a/test/dwarfs.cpp +++ b/test/dwarfs.cpp @@ -245,6 +245,14 @@ void basic_end_to_end_test(std::string const& compressor, filesystem_v2 fs(lgr, mm, opts); + // fs.dump(std::cerr, 9); + + std::ostringstream dumpss; + + fs.dump(dumpss, 9); + + EXPECT_GT(dumpss.str().size(), 1000) << dumpss.str(); + auto entry = fs.find("/foo.pl"); struct ::stat st; @@ -395,7 +403,7 @@ void basic_end_to_end_test(std::string const& compressor, auto e2 = fs.find("/bar.pl"); ASSERT_TRUE(e2); - EXPECT_EQ(entry->inode(), e2->inode()); + EXPECT_EQ(entry->content_index(), e2->content_index()); struct ::stat st1, st2; ASSERT_EQ(0, fs.getattr(*entry, &st1)); @@ -403,21 +411,19 @@ void basic_end_to_end_test(std::string const& compressor, EXPECT_EQ(st1.st_ino, st2.st_ino); if (enable_nlink) { - EXPECT_EQ(3, st1.st_nlink); // TODO: this should be 2 - EXPECT_EQ(3, st2.st_nlink); // TODO: this should be 2 + EXPECT_EQ(2, st1.st_nlink); + EXPECT_EQ(2, st2.st_nlink); } entry = fs.find("/"); ASSERT_TRUE(entry); - EXPECT_EQ(0, entry->inode()); + EXPECT_EQ(0, entry->content_index()); e2 = fs.find(0); ASSERT_TRUE(e2); - EXPECT_EQ(e2->inode(), 0); - e2 = fs.find(0, "baz.pl"); - ASSERT_TRUE(e2); - EXPECT_GT(e2->inode(), 0); - entry = fs.find(e2->inode()); + EXPECT_EQ(e2->content_index(), 0); + entry = fs.find(0, "baz.pl"); ASSERT_TRUE(entry); + EXPECT_GT(entry->content_index(), 0); ASSERT_EQ(0, fs.getattr(*entry, &st1)); EXPECT_EQ(23456, st1.st_size); e2 = fs.find(0, "somedir"); @@ -432,52 +438,38 @@ void basic_end_to_end_test(std::string const& compressor, ASSERT_TRUE(entry); EXPECT_EQ(set_uid ? EACCES : 0, fs.access(*entry, R_OK, 1337, 0)); - using mptype = void (filesystem_v2::*)( - std::function const&) const; - - for (auto mp : {static_cast(&filesystem_v2::walk), - static_cast(&filesystem_v2::walk_inode_order)}) { + for (auto mp : {&filesystem_v2::walk, &filesystem_v2::walk_inode_order}) { std::map entries; std::vector inodes; - (fs.*mp)([&](entry_view e, directory_view d) { + (fs.*mp)([&](dir_entry_view e) { struct ::stat stbuf; - ASSERT_EQ(0, fs.getattr(e, &stbuf)); + ASSERT_EQ(0, fs.getattr(e.inode(), &stbuf)); inodes.push_back(stbuf.st_ino); - std::string path; - if (e.inode() > 0) { - if (auto dp = d.path(); !dp.empty()) { - path += "/" + dp; - } - path += "/" + std::string(e.name()); - } - EXPECT_TRUE(entries.emplace(path, stbuf).second); + EXPECT_TRUE(entries.emplace(e.path(), stbuf).second); }); EXPECT_EQ(entries.size(), dwarfs::test::statmap.size() + 2 * with_devices + with_specials - 3); for (auto const& [p, st] : entries) { - auto const& stref = dwarfs::test::statmap.at(p); - EXPECT_EQ(stref.st_mode, st.st_mode) << p; - EXPECT_EQ(set_uid ? 0 : stref.st_uid, st.st_uid) << p; - EXPECT_EQ(set_gid ? 0 : stref.st_gid, st.st_gid) << p; - if (!S_ISDIR(st.st_mode)) { - EXPECT_EQ(stref.st_size, st.st_size) << p; + auto it = dwarfs::test::statmap.find(p); + EXPECT_TRUE(it != dwarfs::test::statmap.end()) << p; + if (it != dwarfs::test::statmap.end()) { + EXPECT_EQ(it->second.st_mode, st.st_mode) << p; + EXPECT_EQ(set_uid ? 0 : it->second.st_uid, st.st_uid) << p; + EXPECT_EQ(set_gid ? 0 : it->second.st_gid, st.st_gid) << p; + if (!S_ISDIR(st.st_mode)) { + EXPECT_EQ(it->second.st_size, st.st_size) << p; + } } } - if (mp == static_cast(&filesystem_v2::walk_inode_order)) { + if (mp == &filesystem_v2::walk_inode_order) { EXPECT_TRUE(std::is_sorted(inodes.begin(), inodes.end())); } } - std::ostringstream dumpss; - - fs.dump(dumpss, 9); - - EXPECT_GT(dumpss.str().size(), 1000) << dumpss.str(); - auto dyn = fs.metadata_as_dynamic(); EXPECT_TRUE(dyn.isObject()); diff --git a/test/dwarfs_compat.cpp b/test/dwarfs_compat.cpp index 707275db..51e33a9e 100644 --- a/test/dwarfs_compat.cpp +++ b/test/dwarfs_compat.cpp @@ -288,39 +288,31 @@ TEST_P(compat_filesystem, backwards_compat) { {"/test.py", make_stat(S_IFREG | 0644, 1012)}, }; - using mptype = void (filesystem_v2::*)( - std::function const&) const; - - for (auto mp : {static_cast(&filesystem_v2::walk), - static_cast(&filesystem_v2::walk_inode_order)}) { + for (auto mp : {&filesystem_v2::walk, &filesystem_v2::walk_inode_order}) { std::map entries; std::vector inodes; - (fs.*mp)([&](entry_view e, directory_view d) { + (fs.*mp)([&](dir_entry_view e) { struct ::stat stbuf; - ASSERT_EQ(0, fs.getattr(e, &stbuf)); + ASSERT_EQ(0, fs.getattr(e.inode(), &stbuf)); inodes.push_back(stbuf.st_ino); - std::string path; - if (e.inode() > 0) { - if (auto dp = d.path(); !dp.empty()) { - path += "/" + dp; - } - path += "/" + std::string(e.name()); - } - EXPECT_TRUE(entries.emplace(path, stbuf).second); + EXPECT_TRUE(entries.emplace(e.path(), stbuf).second); }); EXPECT_EQ(entries.size(), ref_entries.size()); for (auto const& [p, st] : entries) { - auto const& stref = ref_entries.at(p); - EXPECT_EQ(stref.st_mode, st.st_mode) << p; - EXPECT_EQ(1000, st.st_uid) << p; - EXPECT_EQ(100, st.st_gid) << p; - EXPECT_EQ(stref.st_size, st.st_size) << p; + auto it = ref_entries.find(p); + EXPECT_TRUE(it != ref_entries.end()) << p; + if (it != ref_entries.end()) { + EXPECT_EQ(it->second.st_mode, st.st_mode) << p; + EXPECT_EQ(1000, st.st_uid) << p; + EXPECT_EQ(100, st.st_gid) << p; + EXPECT_EQ(it->second.st_size, st.st_size) << p; + } } - if (mp == static_cast(&filesystem_v2::walk_inode_order)) { + if (mp == &filesystem_v2::walk_inode_order) { EXPECT_TRUE(std::is_sorted(inodes.begin(), inodes.end())); } } diff --git a/thrift/metadata.thrift b/thrift/metadata.thrift index 4d9254dd..62c59c14 100644 --- a/thrift/metadata.thrift +++ b/thrift/metadata.thrift @@ -42,7 +42,7 @@ struct chunk { * `metadata.entries`. */ struct directory { - 1: required UInt32 parent_inode, // indexes into entries + 1: required UInt32 parent_entry, // indexes into dir_entries 2: required UInt32 first_entry, // indexes into dir_entries } @@ -74,7 +74,14 @@ struct inode_data { * - For files, (inode - chunk_index_offset) can be * used as in index into metadata.chunk_table. */ - 3: required UInt32 inode, ///// <---------- rename to content_index + 3: required UInt32 content_index, + + //-------------------------------------------------------------------------- + // TODO: actually, the inode field is redundant as of v2.3, as entries are + // ordered by inode already; maybe we can drop this? + // + // we definitely need it for files to point into chunk_table + //-------------------------------------------------------------------------- // index into metadata.uids 4: required UInt16 owner_index, @@ -101,7 +108,7 @@ struct dir_entry { ///// <--------- or entry? 1: required UInt32 name_index, // index into metadata.entries - 2: required UInt32 entry_index, ///// <--------- entries (inodes) are shared for hardlinks + 2: required UInt32 inode_num, ///// <--------- entries (inodes) are shared for hardlinks } struct fs_options { @@ -216,11 +223,10 @@ struct metadata { //=========================================================// /** - * - * - * + * TODO TODO TODO describe this */ 19: optional list dir_entries, - 20: optional UInt64 timestamp, + // TODO: add timestamp + // 20: optional UInt64 timestamp, }