From 594026143952ff21863f67b1e72b0c72d3eda6fb Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Fri, 27 Nov 2020 00:12:02 +0100 Subject: [PATCH] metadata_v2: implement find --- include/dwarfs/entry.h | 3 +- include/dwarfs/metadata_v2.h | 71 +++++---- src/dwarfs/filesystem.cpp | 2 +- src/dwarfs/metadata_v2.cpp | 283 ++++++++++++++++++++--------------- src/dwarfs/scanner.cpp | 9 +- 5 files changed, 207 insertions(+), 161 deletions(-) diff --git a/include/dwarfs/entry.h b/include/dwarfs/entry.h index 551dd595..0496162b 100644 --- a/include/dwarfs/entry.h +++ b/include/dwarfs/entry.h @@ -39,7 +39,8 @@ namespace dwarfs { struct global_entry_data { - global_entry_data(bool no_time) : no_time_(no_time) {} + global_entry_data(bool no_time) + : no_time_(no_time) {} void add_uid(uint16_t uid) { add(uid, uids, next_uid_index); } diff --git a/include/dwarfs/metadata_v2.h b/include/dwarfs/metadata_v2.h index 4b3d1d8e..579e7c9a 100644 --- a/include/dwarfs/metadata_v2.h +++ b/include/dwarfs/metadata_v2.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -38,14 +39,15 @@ namespace dwarfs { using entry_view = ::apache::thrift::frozen::View; -using directory_view = ::apache::thrift::frozen::View; +using directory_view = + ::apache::thrift::frozen::View; class metadata_v2 { public: metadata_v2() = default; metadata_v2(logger& lgr, std::vector&& data, - const struct ::stat* defaults); + const struct ::stat* defaults, int inode_offset); metadata_v2& operator=(metadata_v2&&) = default; @@ -65,49 +67,51 @@ class metadata_v2 { impl_->walk(func); } + std::optional find(const char* path) const { + return impl_->find(path); + } + + std::optional find(int inode) const { return impl_->find(inode); } + + std::optional find(int inode, const char* name) const { + return impl_->find(inode, name); + } + #if 0 size_t block_size() const { return impl_->block_size(); } unsigned block_size_bits() const { return impl_->block_size_bits(); } - const dir_entry* find(const char* path) const { return impl_->find(path); } - - const dir_entry* find(int inode) const { return impl_->find(inode); } - - const dir_entry* find(int inode, const char* name) const { - return impl_->find(inode, name); - } - - int getattr(const dir_entry* de, struct ::stat* stbuf) const { + int getattr(entry_view de, struct ::stat* stbuf) const { return impl_->getattr(de, stbuf); } - int access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const { + int access(entry_view de, int mode, uid_t uid, gid_t gid) const { return impl_->access(de, mode, uid, gid); } - const directory* opendir(const dir_entry* de) const { + directory_view opendir(entry_view de) const { return impl_->opendir(de); } - const dir_entry* - readdir(const directory* d, size_t offset, std::string* name) const { + entry_view + readdir(directory_view d, size_t offset, std::string* name) const { return impl_->readdir(d, offset, name); } - size_t dirsize(const directory* d) const { return impl_->dirsize(d); } + size_t dirsize(directory_view d) const { return impl_->dirsize(d); } - int readlink(const dir_entry* de, char* buf, size_t size) const { + int readlink(entry_view de, char* buf, size_t size) const { return impl_->readlink(de, buf, size); } - int readlink(const dir_entry* de, std::string* buf) const { + int readlink(entry_view de, std::string* buf) const { return impl_->readlink(de, buf); } int statvfs(struct ::statvfs* stbuf) const { return impl_->statvfs(stbuf); } - int open(const dir_entry* de) const { return impl_->open(de); } + int open(entry_view de) const { return impl_->open(de); } const chunk_type* get_chunks(int inode, size_t& num) const { return impl_->get_chunks(inode, num); @@ -125,25 +129,26 @@ class metadata_v2 { virtual size_t size() const = 0; virtual bool empty() const = 0; - virtual void - walk(std::function const& func) const = 0; + virtual void walk(std::function const& func) const = 0; + + virtual std::optional find(const char* path) const = 0; + virtual std::optional find(int inode) const = 0; + virtual std::optional + find(int inode, const char* name) const = 0; #if 0 virtual size_t block_size() const = 0; virtual unsigned block_size_bits() const = 0; - virtual const dir_entry* find(const char* path) const = 0; - virtual const dir_entry* find(int inode) const = 0; - virtual const dir_entry* find(int inode, const char* name) const = 0; - virtual int getattr(const dir_entry* de, struct ::stat* stbuf) const = 0; + virtual int getattr(entry_view de, struct ::stat* stbuf) const = 0; virtual int - access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const = 0; - virtual const directory* opendir(const dir_entry* de) const = 0; - virtual const dir_entry* - readdir(const directory* d, size_t offset, std::string* name) const = 0; - virtual size_t dirsize(const directory* d) const = 0; - virtual int readlink(const dir_entry* de, char* buf, size_t size) const = 0; - virtual int readlink(const dir_entry* de, std::string* buf) const = 0; + access(entry_view de, int mode, uid_t uid, gid_t gid) const = 0; + virtual directory_view opendir(entry_view de) const = 0; + virtual entry_view + readdir(directory_view d, size_t offset, std::string* name) const = 0; + virtual size_t dirsize(directory_view d) const = 0; + virtual int readlink(entry_view de, char* buf, size_t size) const = 0; + virtual int readlink(entry_view de, std::string* buf) const = 0; virtual int statvfs(struct ::statvfs* stbuf) const = 0; - virtual int open(const dir_entry* de) const = 0; + virtual int open(entry_view de) const = 0; virtual const chunk_type* get_chunks(int inode, size_t& num) const = 0; #endif }; diff --git a/src/dwarfs/filesystem.cpp b/src/dwarfs/filesystem.cpp index 811bb35e..a3516729 100644 --- a/src/dwarfs/filesystem.cpp +++ b/src/dwarfs/filesystem.cpp @@ -165,7 +165,7 @@ filesystem_::filesystem_(logger& lgr, std::shared_ptr mm, metadata_v2(lgr, block_decompressor::decompress( sh.compression, mm_->as(start), sh.length), - stat_defaults); + stat_defaults, inode_offset); break; default: diff --git a/src/dwarfs/metadata_v2.cpp b/src/dwarfs/metadata_v2.cpp index b99a2b1a..60d2fc38 100644 --- a/src/dwarfs/metadata_v2.cpp +++ b/src/dwarfs/metadata_v2.cpp @@ -25,6 +25,8 @@ #include +#include + #include "dwarfs/metadata_v2.h" #include "dwarfs/gen-cpp2/metadata_layouts.h" @@ -39,14 +41,16 @@ namespace dwarfs { template class metadata_v2_ : public metadata_v2::impl { public: - // TODO: pass folly::ByteRange instead of vector (so we can support memory mapping) + // TODO: pass folly::ByteRange instead of vector (so we can support memory + // mapping) metadata_v2_(logger& lgr, std::vector&& meta, - const struct ::stat* /*defaults*/) + const struct ::stat* /*defaults*/, int inode_offset) : data_(std::move(meta)) , meta_(::apache::thrift::frozen::mapFrozen( data_)) , root_(meta_.entries()[meta_.entry_index()[0]]) - , inode_offset_(meta_.chunk_index_offset()) + , inode_offset_(inode_offset) + , chunk_index_offset_(meta_.chunk_index_offset()) , log_(lgr) { // TODO: defaults? log_.debug() << ::apache::thrift::debugString(meta_.thaw()); @@ -68,6 +72,10 @@ class metadata_v2_ : public metadata_v2::impl { void walk(std::function const& func) const override; + std::optional find(const char* path) const override; + std::optional find(int inode) const override; + std::optional find(int inode, const char* name) const override; + #if 0 size_t block_size() const override { return static_cast(1) << cfg_->block_size_bits; @@ -75,22 +83,19 @@ class metadata_v2_ : public metadata_v2::impl { unsigned block_size_bits() const override { return cfg_->block_size_bits; } - const dir_entry* find(const char* path) const override; - const dir_entry* find(int inode) const override; - const dir_entry* find(int inode, const char* name) const override; - int getattr(const dir_entry* de, struct ::stat* stbuf) const override; - int access(const dir_entry* de, int mode, uid_t uid, + int getattr(entry_view entry, struct ::stat* stbuf) const override; + int access(entry_view entry, int mode, uid_t uid, gid_t gid) const override; - const directory* opendir(const dir_entry* de) const override; - const dir_entry* - readdir(const directory* d, size_t offset, std::string* name) const override; - size_t dirsize(const directory* d) const override { + directory_view opendir(entry_view entry) const override; + entry_view + readdir(directory_view d, size_t offset, std::string* name) const override; + size_t dirsize(directory_view d) const override { return d->count + 2; // adds '.' and '..', which we fake in ;-) } - int readlink(const dir_entry* de, char* buf, size_t size) const override; - int readlink(const dir_entry* de, std::string* buf) const override; + int readlink(entry_view entry, char* buf, size_t size) const override; + int readlink(entry_view entry, std::string* buf) const override; int statvfs(struct ::statvfs* stbuf) const override; - int open(const dir_entry* de) const override; + int open(entry_view entry) const override; const chunk_type* get_chunks(int inode, size_t& num) const override; #endif @@ -101,9 +106,12 @@ class metadata_v2_ : public metadata_v2::impl { void dump(std::ostream& os, const std::string& indent, directory_view dir, std::function const& icb) const; + std::optional find(directory_view d, std::string_view name) const; + std::string modestring(uint16_t mode) const; size_t reg_filesize(uint32_t inode) const { + inode -= chunk_index_offset_; uint32_t cur = meta_.chunk_index()[inode]; uint32_t end = meta_.chunk_index()[inode + 1]; size_t size = 0; @@ -117,7 +125,10 @@ class metadata_v2_ : public metadata_v2::impl { if (S_ISREG(mode)) { return reg_filesize(entry.inode()); } else if (S_ISLNK(mode)) { - return meta_.links()[meta_.link_index()[entry.inode() - meta_.link_index_offset()]].size(); + return meta_ + .links()[meta_ + .link_index()[entry.inode() - meta_.link_index_offset()]] + .size(); } else { return 0; } @@ -127,25 +138,34 @@ class metadata_v2_ : public metadata_v2::impl { return meta_.directories()[entry.inode()]; } - void walk(entry_view entry, - std::function const& func) const; + void + walk(entry_view entry, std::function const& func) const; + + std::optional get_entry(int inode) const { + inode -= inode_offset_; + std::optional rv; + if (inode >= 0 && inode < int(meta_.entry_index().size())) { + rv = meta_.entries()[meta_.entry_index()[inode]]; + } + return rv; + } #if 0 - std::string name(const dir_entry* de) const { - return std::string(as(de->name_offset), de->name_size); + std::string name(entry_view entry) const { + return std::string(as(entry->name_offset), entry->name_size); } - size_t linksize(const dir_entry* de) const { - return *as(de->u.offset); + size_t linksize(entry_view entry) const { + return *as(entry->u.offset); } - std::string linkname(const dir_entry* de) const { - size_t offs = de->u.offset; + std::string linkname(entry_view entry) const { + size_t offs = entry->u.offset; return std::string(as(offs + sizeof(uint16_t)), *as(offs)); } - const char* linkptr(const dir_entry* de) const { - return as(de->u.offset + sizeof(uint16_t)); + const char* linkptr(entry_view entry) const { + return as(entry->u.offset + sizeof(uint16_t)); } template @@ -154,18 +174,11 @@ class metadata_v2_ : public metadata_v2::impl { reinterpret_cast(data_.data()) + offset); } - const dir_entry* get_entry(int inode) const { - inode -= inode_offset_; - return inode >= 0 && inode < static_cast(cfg_->inode_count) - ? as(inode_index_[inode]) - : nullptr; - } - void parse(const struct ::stat* defaults); const uint32_t* chunk_index_ = nullptr; const uint32_t* inode_index_ = nullptr; - const dir_entry* root_ = nullptr; + entry_view root_ = nullptr; const meta_config* cfg_ = nullptr; std::shared_ptr dir_reader_; #endif @@ -173,6 +186,7 @@ class metadata_v2_ : public metadata_v2::impl { ::apache::thrift::frozen::MappedFrozen meta_; entry_view root_; const int inode_offset_; + const int chunk_index_offset_; log_proxy log_; }; @@ -190,20 +204,17 @@ void metadata_v2_::dump( } if (S_ISREG(mode)) { - uint32_t cur = meta_.chunk_index()[inode - inode_offset_]; - uint32_t end = meta_.chunk_index()[inode - inode_offset_ + 1]; - os << " [" << cur << ", " << end << "]"; - size_t size = 0; - while (cur < end) { - size += meta_.chunks()[cur++].size(); - } - os << " " << size << "\n"; - // os << " " << filesize(entry, mode) << "\n"; - // icb(indent + " ", de->inode); + uint32_t beg = meta_.chunk_index()[inode - chunk_index_offset_]; + uint32_t end = meta_.chunk_index()[inode - chunk_index_offset_ + 1]; + os << " [" << beg << ", " << end << "]"; + os << " " << filesize(entry, mode) << "\n"; + icb(indent + " ", inode); } else if (S_ISDIR(mode)) { dump(os, indent + " ", meta_.directories()[inode], std::move(icb)); } else if (S_ISLNK(mode)) { - os << " -> " << meta_.links()[meta_.link_index()[inode] - meta_.link_index_offset()] << "\n"; + os << " -> " + << meta_.links()[meta_.link_index()[inode] - meta_.link_index_offset()] + << "\n"; } else { os << " (unknown type)\n"; } @@ -252,15 +263,14 @@ std::string metadata_v2_::modestring(uint16_t mode) const { template void metadata_v2_::walk( - entry_view entry, - std::function const& func) const { + entry_view entry, std::function const& func) const { func(entry); if (S_ISDIR(entry.mode())) { auto dir = getdir(entry); - auto curr = dir.first_entry(); - auto last = curr + dir.entry_count(); - while (curr < last) { - walk(meta_.entries()[curr++], func); + auto cur = dir.first_entry(); + auto end = cur + dir.entry_count(); + while (cur < end) { + walk(meta_.entries()[cur++], func); } } } @@ -271,9 +281,77 @@ void metadata_v2_::walk( walk(root_, func); } +template +std::optional +metadata_v2_::find(directory_view dir, + std::string_view name) const { + auto first = dir.first_entry(); + auto range = boost::irange(first, first + dir.entry_count()); + + auto it = std::lower_bound( + range.begin(), range.end(), name, [&](auto it, std::string_view name) { + return meta_.names()[meta_.entries()[it].name_index()].compare(name); + }); + + std::optional rv; + + if (it != range.end()) { + auto cand = meta_.entries()[*it]; + + if (meta_.names()[cand.name_index()] == name) { + rv = cand; + } + } + + return rv; +} + +template +std::optional +metadata_v2_::find(const char* path) const { + while (*path and *path == '/') { + ++path; + } + + std::optional entry = root_; + + while (*path) { + const char* next = ::strchr(path, '/'); + size_t clen = next ? next - path : ::strlen(path); + + entry = find(getdir(*entry), std::string_view(path, clen)); + + if (!entry) { + break; + } + + path = next ? next + 1 : path + clen; + } + + return entry; +} + +template +std::optional metadata_v2_::find(int inode) const { + return get_entry(inode); +} + +template +std::optional +metadata_v2_::find(int inode, + const char* name) const { // TODO: string_view? + auto entry = get_entry(inode); + + if (entry) { + entry = find(getdir(*entry), std::string_view(name)); // TODO + } + + return entry; +} + #if 0 template -void metadata_::parse(const struct ::stat* defaults) { +void metadata_v2_::parse(const struct ::stat* defaults) { size_t offset = 0; while (offset + sizeof(section_header) <= size()) { @@ -342,87 +420,46 @@ void metadata_::parse(const struct ::stat* defaults) { } template -const dir_entry* metadata_::find(const char* path) const { - while (*path and *path == '/') { - ++path; - } - - const dir_entry* de = root_; - - while (*path) { - const char* next = ::strchr(path, '/'); - size_t clen = next ? next - path : ::strlen(path); - - de = dir_reader_->find(getdir(de), path, clen); - - if (!de) { - break; - } - - path = next ? next + 1 : path + clen; - } - - return de; -} - -template -const dir_entry* metadata_::find(int inode) const { - return get_entry(inode); -} - -template -const dir_entry* -metadata_::find(int inode, const char* name) const { - auto de = get_entry(inode); - - if (de) { - de = dir_reader_->find(getdir(de), name, ::strlen(name)); - } - - return de; -} - -template -int metadata_::getattr(const dir_entry* de, +int metadata_v2_::getattr(entry_view entry, struct ::stat* stbuf) const { ::memset(stbuf, 0, sizeof(*stbuf)); - dir_reader_->getattr(de, stbuf, filesize(de)); + dir_reader_->getattr(entry, stbuf, filesize(entry)); return 0; } template -int metadata_::access(const dir_entry* de, int mode, uid_t uid, +int metadata_v2_::access(entry_view entry, int mode, uid_t uid, gid_t gid) const { - return dir_reader_->access(de, mode, uid, gid); + return dir_reader_->access(entry, mode, uid, gid); } template -const directory* metadata_::opendir(const dir_entry* de) const { - if (S_ISDIR(de->mode)) { - return getdir(de); +directory_view metadata_v2_::opendir(entry_view entry) const { + if (S_ISDIR(entry->mode)) { + return getdir(entry); } return nullptr; } template -int metadata_::open(const dir_entry* de) const { - if (S_ISREG(de->mode)) { - return de->inode; +int metadata_v2_::open(entry_view entry) const { + if (S_ISREG(entry->mode)) { + return entry->inode; } return -1; } template -const dir_entry* -metadata_::readdir(const directory* d, size_t offset, +entry_view +metadata_v2_::readdir(directory_view d, size_t offset, std::string* name) const { - const dir_entry* de; + entry_view entry; switch (offset) { case 0: - de = as(d->self); + entry = as(d->self); if (name) { name->assign("."); @@ -430,7 +467,7 @@ metadata_::readdir(const directory* d, size_t offset, break; case 1: - de = as(d->parent); + entry = as(d->parent); if (name) { name->assign(".."); @@ -441,7 +478,7 @@ metadata_::readdir(const directory* d, size_t offset, offset -= 2; if (offset < d->count) { - de = dir_reader_->readdir(d, offset, name); + entry = dir_reader_->readdir(d, offset, name); } else { return nullptr; } @@ -449,16 +486,16 @@ metadata_::readdir(const directory* d, size_t offset, break; } - return de; + return entry; } template -int metadata_::readlink(const dir_entry* de, char* buf, +int metadata_v2_::readlink(entry_view entry, char* buf, size_t size) const { - if (S_ISLNK(de->mode)) { - size_t lsize = linksize(de); + if (S_ISLNK(entry->mode)) { + size_t lsize = linksize(entry); - ::memcpy(buf, linkptr(de), std::min(lsize, size)); + ::memcpy(buf, linkptr(entry), std::min(lsize, size)); if (size > lsize) { buf[lsize] = '\0'; @@ -471,12 +508,12 @@ int metadata_::readlink(const dir_entry* de, char* buf, } template -int metadata_::readlink(const dir_entry* de, +int metadata_v2_::readlink(entry_view entry, std::string* buf) const { - if (S_ISLNK(de->mode)) { - size_t lsize = linksize(de); + if (S_ISLNK(entry->mode)) { + size_t lsize = linksize(entry); - buf->assign(linkptr(de), lsize); + buf->assign(linkptr(entry), lsize); return 0; } @@ -485,7 +522,7 @@ int metadata_::readlink(const dir_entry* de, } template -int metadata_::statvfs(struct ::statvfs* stbuf) const { +int metadata_v2_::statvfs(struct ::statvfs* stbuf) const { ::memset(stbuf, 0, sizeof(*stbuf)); stbuf->f_bsize = 1UL << cfg_->block_size_bits; @@ -500,7 +537,7 @@ int metadata_::statvfs(struct ::statvfs* stbuf) const { template const chunk_type* -metadata_::get_chunks(int inode, size_t& num) const { +metadata_v2_::get_chunks(int inode, size_t& num) const { inode -= inode_offset_; if (inode < static_cast(cfg_->chunk_index_offset) || inode >= static_cast(cfg_->inode_count)) { @@ -524,8 +561,8 @@ void metadata_v2::get_stat_defaults(struct ::stat* defaults) { } metadata_v2::metadata_v2(logger& lgr, std::vector&& data, - const struct ::stat* defaults) + const struct ::stat* defaults, int inode_offset) : impl_(make_unique_logging_object(lgr, std::move(data), - defaults)) {} + logger_policies>( + lgr, std::move(data), defaults, inode_offset)) {} } // namespace dwarfs diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index 6fb86a19..a8924673 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -244,7 +244,8 @@ scanner_::compress_names_table( class dir_set_inode_visitor : public entry_visitor { public: - dir_set_inode_visitor(uint32_t& inode_no) : inode_no_(inode_no) {}; + dir_set_inode_visitor(uint32_t& inode_no) + : inode_no_(inode_no){}; void visit(file*) override {} @@ -263,7 +264,8 @@ class dir_set_inode_visitor : public entry_visitor { class link_set_inode_visitor : public entry_visitor { public: - link_set_inode_visitor(uint32_t& inode_no) : inode_no_(inode_no) {}; + link_set_inode_visitor(uint32_t& inode_no) + : inode_no_(inode_no){}; void visit(file*) override {} @@ -551,7 +553,8 @@ void scanner_::scan(filesystem_writer& fsw, log_.info() << "building metadata..."; std::vector metadata_vec; metadata_writer mw(lgr_, metadata_vec); - global_entry_data ge_data(options_.no_time); // TODO: just pass options directly + global_entry_data ge_data( + options_.no_time); // TODO: just pass options directly thrift::metadata::metadata mv2; std::vector dir_index; dir_index.resize(first_link_inode);