Unique files table, deprecate inode_v2_2, some cleanup

This commit is contained in:
Marcus Holland-Moritz 2021-03-16 20:09:07 +01:00
parent d507ade9d1
commit ad48cae7b1
9 changed files with 87 additions and 83 deletions

View File

@ -124,7 +124,7 @@ class file : public entry {
void hardlink(file* other, progress& prog); void hardlink(file* other, progress& prog);
uint64_t raw_inode_num() const; uint64_t raw_inode_num() const;
unsigned num_hard_links() const; unsigned num_hard_links() const;
uint32_t content_index() const; uint32_t unique_file_id() const;
private: private:
struct data { struct data {

View File

@ -69,10 +69,6 @@ class inode_view
Meta const* meta_; Meta const* meta_;
}; };
/**
* THIS *MUST* BE CONSTRUCTIBLE FROM ONLY AN INODE NUMBER (NOT EVEN AN
* INODE_VIEW)
*/
class directory_view class directory_view
: public ::apache::thrift::frozen::View<thrift::metadata::directory> { : public ::apache::thrift::frozen::View<thrift::metadata::directory> {
using DirView = ::apache::thrift::frozen::View<thrift::metadata::directory>; using DirView = ::apache::thrift::frozen::View<thrift::metadata::directory>;
@ -124,8 +120,6 @@ class dir_entry_view {
bool is_root() const; bool is_root() const;
// TODO: remove?
// std::optional<directory_view> directory() const;
std::optional<dir_entry_view> parent() const; std::optional<dir_entry_view> parent() const;
std::string path() const; std::string path() const;

View File

@ -282,8 +282,7 @@ void op_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
} else if (fi->flags & (O_APPEND | O_CREAT | O_TRUNC)) { } else if (fi->flags & (O_APPEND | O_CREAT | O_TRUNC)) {
err = EACCES; err = EACCES;
} else { } else {
fi->fh = FUSE_ROOT_ID + fi->fh = FUSE_ROOT_ID + entry->inode_num();
entry->content_index(); // <<---- THIS IS NOT THE INODE!!!!
fi->direct_io = !s_opts.cache_files; fi->direct_io = !s_opts.cache_files;
fi->keep_cache = s_opts.cache_files; fi->keep_cache = s_opts.cache_files;
fuse_reply_open(req, fi); fuse_reply_open(req, fi);

View File

@ -109,11 +109,6 @@ void entry::pack(thrift::metadata::inode_data& entry_v2,
entry_v2.atime_offset = data.get_atime_offset(stat_.st_atime); entry_v2.atime_offset = data.get_atime_offset(stat_.st_atime);
entry_v2.mtime_offset = data.get_mtime_offset(stat_.st_mtime); entry_v2.mtime_offset = data.get_mtime_offset(stat_.st_mtime);
entry_v2.ctime_offset = data.get_ctime_offset(stat_.st_ctime); entry_v2.ctime_offset = data.get_ctime_offset(stat_.st_ctime);
if (auto fp = dynamic_cast<file const*>(this)) {
entry_v2.content_index = fp->content_index();
} else {
entry_v2.content_index = inode_num();
}
} }
entry::type_t file::type() const { return E_FILE; } entry::type_t file::type() const { return E_FILE; }
@ -189,7 +184,7 @@ void file::scan(os_access& os, progress& prog) {
} }
} }
uint32_t file::content_index() const { return inode_->num(); } uint32_t file::unique_file_id() const { return inode_->num(); }
uint64_t file::raw_inode_num() const { return status().st_ino; } uint64_t file::raw_inode_num() const { return status().st_ino; }

View File

@ -54,32 +54,17 @@ inode_view dir_entry_view::inode() const {
dev.inode_num(), meta_); dev.inode_num(), meta_);
}, },
[this](InodeView const& iv) { [this](InodeView const& iv) {
return inode_view(iv, iv.content_index(), meta_); return inode_view(iv, iv.inode_v2_2(), meta_);
}, },
}, },
v_); v_);
} }
// TODO: remove?
// std::optional<directory_view> dir_entry_view::directory() const {
// if (is_root()) {
// return std::nullopt;
// }
//
// auto dir_inode = parent_index_;
//
// if (auto de = meta_->dir_entries()) {
// dir_inode = (*de)[dir_inode].entry_index();
// }
//
// return directory_view(dir_inode, meta_);
// }
bool dir_entry_view::is_root() const { bool dir_entry_view::is_root() const {
return std::visit( return std::visit(
overloaded{ overloaded{
[](DirEntryView const& dev) { return dev.inode_num() == 0; }, [](DirEntryView const& dev) { return dev.inode_num() == 0; },
[](InodeView const& iv) { return iv.content_index() == 0; }, [](InodeView const& iv) { return iv.inode_v2_2() == 0; },
}, },
v_); v_);
} }
@ -126,11 +111,11 @@ dir_entry_view::from_dir_entry_index(uint32_t self_index, Meta const* meta) {
DWARFS_CHECK(self_index < meta->entries().size(), "self_index out of range"); DWARFS_CHECK(self_index < meta->entries().size(), "self_index out of range");
auto iv = meta->entries()[self_index]; auto iv = meta->entries()[self_index];
DWARFS_CHECK(iv.content_index() < meta->directories().size(), DWARFS_CHECK(iv.inode_v2_2() < meta->directories().size(),
"parent_index out of range"); "parent_index out of range");
return dir_entry_view( return dir_entry_view(
iv, self_index, iv, self_index,
meta->entry_table_v2_2()[meta->directories()[iv.content_index()] meta->entry_table_v2_2()[meta->directories()[iv.inode_v2_2()]
.parent_entry()], .parent_entry()],
meta); meta);
} }
@ -164,7 +149,7 @@ inode_view dir_entry_view::inode(uint32_t index, Meta const* meta) {
DWARFS_CHECK(index < meta->entries().size(), "index out of range"); DWARFS_CHECK(index < meta->entries().size(), "index out of range");
auto iv = meta->entries()[index]; auto iv = meta->entries()[index];
return inode_view(iv, iv.content_index(), meta); return inode_view(iv, iv.inode_v2_2(), meta);
} }
std::string dir_entry_view::path() const { std::string dir_entry_view::path() const {

View File

@ -343,24 +343,35 @@ class metadata_ final : public metadata_v2::impl {
std::string modestring(uint16_t mode) const; std::string modestring(uint16_t mode) const;
std::optional<chunk_range> get_chunk_range(int inode) const {
std::optional<chunk_range> rv;
inode -= file_index_offset_;
if (auto uf = meta_.unique_files_table()) {
if (inode < 0 or inode >= static_cast<int>(uf->size())) {
return rv;
}
inode = (*uf)[inode];
}
if (inode >= 0 &&
inode < (static_cast<int>(meta_.chunk_table().size()) - 1)) {
uint32_t begin = meta_.chunk_table()[inode];
uint32_t end = meta_.chunk_table()[inode + 1];
rv = chunk_range(&meta_, begin, end);
}
return rv;
}
size_t reg_file_size(inode_view entry) const { size_t reg_file_size(inode_view entry) const {
auto inode = entry.content_index() - file_index_offset_; auto cr = get_chunk_range(entry.inode_num());
uint32_t cur = meta_.chunk_table()[inode]; DWARFS_CHECK(cr, "invalid chunk range");
uint32_t end = meta_.chunk_table()[inode + 1]; return std::accumulate(
if (cur > end) { cr->begin(), cr->end(), static_cast<size_t>(0),
DWARFS_THROW(runtime_error, [](size_t s, chunk_view cv) { return s + cv.size(); });
fmt::format("invalid chunk range: [{0}..{1}]", cur, end));
}
if (end > meta_.chunks().size()) {
DWARFS_THROW(runtime_error,
fmt::format("chunk index out of range: {0} > {1}", end,
meta_.chunks().size()));
}
size_t size = 0;
while (cur < end) {
size += meta_.chunks()[cur++].size();
}
return size;
} }
size_t file_size(inode_view entry, uint16_t mode) const { size_t file_size(inode_view entry, uint16_t mode) const {
@ -401,7 +412,7 @@ class metadata_ final : public metadata_v2::impl {
} }
std::string_view link_value(inode_view entry) const { std::string_view link_value(inode_view entry) const {
return meta_.symlinks()[meta_.symlink_table()[entry.content_index() - return meta_.symlinks()[meta_.symlink_table()[entry.inode_num() -
symlink_table_offset_]]; symlink_table_offset_]];
} }
@ -430,7 +441,7 @@ class metadata_ final : public metadata_v2::impl {
} }
} else { } else {
for (auto e : meta_.entries()) { for (auto e : meta_.entries()) {
auto index = int(e.content_index()) - file_index_offset_; auto index = int(e.inode_v2_2()) - file_index_offset_;
if (index >= 0 && index < int(nlinks.size())) { if (index >= 0 && index < int(nlinks.size())) {
++DWARFS_NOTHROW(nlinks.at(index)); ++DWARFS_NOTHROW(nlinks.at(index));
} }
@ -464,7 +475,7 @@ void metadata_<LoggerPolicy>::dump(
std::function<void(const std::string&, uint32_t)> const& icb) const { std::function<void(const std::string&, uint32_t)> const& icb) const {
auto inode_data = entry.inode(); auto inode_data = entry.inode();
auto mode = inode_data.mode(); auto mode = inode_data.mode();
auto inode = inode_data.content_index(); // TODO: rename inode appropriately auto inode = inode_data.inode_num(); // TODO: rename inode appropriately
os << indent << "<inode:" << inode << "> " << modestring(mode); os << indent << "<inode:" << inode << "> " << modestring(mode);
@ -473,9 +484,9 @@ void metadata_<LoggerPolicy>::dump(
} }
if (S_ISREG(mode)) { if (S_ISREG(mode)) {
uint32_t beg = meta_.chunk_table()[inode - file_index_offset_]; auto cr = get_chunk_range(inode);
uint32_t end = meta_.chunk_table()[inode - file_index_offset_ + 1]; DWARFS_CHECK(cr, "invalid chunk range");
os << " [" << beg << ", " << end << "]"; os << " [" << cr->begin_ << ", " << cr->end_ << "]";
os << " " << file_size(inode_data, mode) << "\n"; os << " " << file_size(inode_data, mode) << "\n";
if (detail_level > 3) { if (detail_level > 3) {
icb(indent + " ", inode); icb(indent + " ", inode);
@ -581,7 +592,7 @@ folly::dynamic metadata_<LoggerPolicy>::as_dynamic(dir_entry_view entry) const {
auto inode_data = entry.inode(); auto inode_data = entry.inode();
auto mode = inode_data.mode(); auto mode = inode_data.mode();
auto inode = inode_data.content_index(); // TODO: rename all the things auto inode = inode_data.inode_num(); // TODO: rename all the things
obj["mode"] = mode; obj["mode"] = mode;
obj["modestring"] = modestring(mode); obj["modestring"] = modestring(mode);
@ -674,7 +685,7 @@ void metadata_<LoggerPolicy>::walk(uint32_t self_index, uint32_t parent_index,
auto inode_data = entry.inode(); auto inode_data = entry.inode();
if (S_ISDIR(inode_data.mode())) { if (S_ISDIR(inode_data.mode())) {
auto inode = inode_data.content_index(); auto inode = inode_data.inode_num();
if (!seen.emplace(inode).second) { if (!seen.emplace(inode).second) {
DWARFS_THROW(runtime_error, "cycle detected during directory walk"); DWARFS_THROW(runtime_error, "cycle detected during directory walk");
@ -710,8 +721,8 @@ void metadata_<LoggerPolicy>::walk_inode_order_impl(
} else { } else {
std::sort(entries.begin(), entries.end(), std::sort(entries.begin(), entries.end(),
[this](auto const& a, auto const& b) { [this](auto const& a, auto const& b) {
return meta_.entries()[a.first].content_index() < return meta_.entries()[a.first].inode_v2_2() <
meta_.entries()[b.first].content_index(); meta_.entries()[b.first].inode_v2_2();
}); });
} }
@ -905,7 +916,7 @@ int metadata_<LoggerPolicy>::access(inode_view entry, int mode, uid_t uid,
template <typename LoggerPolicy> template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::open(inode_view entry) const { int metadata_<LoggerPolicy>::open(inode_view entry) const {
if (S_ISREG(entry.mode())) { if (S_ISREG(entry.mode())) {
return entry.content_index(); return entry.inode_num();
} }
return -1; return -1;
@ -949,15 +960,7 @@ int metadata_<LoggerPolicy>::statvfs(struct ::statvfs* stbuf) const {
template <typename LoggerPolicy> template <typename LoggerPolicy>
std::optional<chunk_range> std::optional<chunk_range>
metadata_<LoggerPolicy>::get_chunks(int inode) const { metadata_<LoggerPolicy>::get_chunks(int inode) const {
std::optional<chunk_range> rv; return get_chunk_range(inode - inode_offset_);
inode -= inode_offset_ + file_index_offset_;
if (inode >= 0 &&
inode < (static_cast<int>(meta_.chunk_table().size()) - 1)) {
uint32_t begin = meta_.chunk_table()[inode];
uint32_t end = meta_.chunk_table()[inode + 1];
rv = chunk_range(&meta_, begin, end);
}
return rv;
} }
void metadata_v2::get_stat_defaults(struct ::stat* defaults) { void metadata_v2::get_stat_defaults(struct ::stat* defaults) {

View File

@ -213,7 +213,7 @@ class pipe_set_inode_visitor : public visitor_base {
uint32_t& inode_num_; uint32_t& inode_num_;
}; };
class names_and_symlinks_visitor : public entry_visitor { class names_and_symlinks_visitor : public visitor_base {
public: public:
explicit names_and_symlinks_visitor(global_entry_data& data) explicit names_and_symlinks_visitor(global_entry_data& data)
: data_(data) {} : data_(data) {}
@ -264,6 +264,24 @@ class save_directories_visitor : public visitor_base {
std::vector<dir*> directories_; std::vector<dir*> directories_;
}; };
class save_unique_files_visitor : public visitor_base {
public:
explicit save_unique_files_visitor(uint32_t inode_begin, uint32_t inode_end)
: inode_begin_{inode_begin} {
unique_files_.resize(inode_end - inode_begin);
}
void visit(file* p) override {
unique_files_.at(p->inode_num() - inode_begin_) = p->unique_file_id();
}
std::vector<uint32_t>& get_unique_files() { return unique_files_; }
private:
uint32_t const inode_begin_;
std::vector<uint32_t> unique_files_;
};
std::string status_string(progress const& p, size_t width) { std::string status_string(progress const& p, size_t width) {
auto cp = p.current.load(); auto cp = p.current.load();
std::string label, path; std::string label, path;
@ -551,7 +569,7 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
worker_group blockify("blockify", 1, 1 << 20); worker_group blockify("blockify", 1, 1 << 20);
im.order_inodes(script_, options_.file_order, first_file_inode, im.order_inodes(script_, options_.file_order, 0,
[&](std::shared_ptr<inode> const& ino) { [&](std::shared_ptr<inode> const& ino) {
blockify.add_job([&] { blockify.add_job([&] {
prog.current.store(ino.get()); prog.current.store(ino.get());
@ -587,15 +605,15 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
// TODO: we should be able to start this once all blocks have been // TODO: we should be able to start this once all blocks have been
// submitted for compression // submitted for compression
im.for_each_inode([&](std::shared_ptr<inode> const& ino) { im.for_each_inode([&](std::shared_ptr<inode> const& ino) {
DWARFS_NOTHROW(mv2.chunk_table.at(ino->num() - first_file_inode)) = // TODO: no need for this offset stuff here...
mv2.chunks.size(); DWARFS_NOTHROW(mv2.chunk_table.at(ino->num())) = mv2.chunks.size();
ino->append_chunks_to(mv2.chunks); ino->append_chunks_to(mv2.chunks);
}); });
// insert dummy inode to help determine number of chunks per inode // insert dummy inode to help determine number of chunks per inode
DWARFS_NOTHROW(mv2.chunk_table.at(im.count())) = mv2.chunks.size(); DWARFS_NOTHROW(mv2.chunk_table.at(im.count())) = mv2.chunks.size();
LOG_DEBUG << "total number of file inodes: " << im.count(); LOG_DEBUG << "total number of unique files: " << im.count();
LOG_DEBUG << "total number of chunks: " << mv2.chunks.size(); LOG_DEBUG << "total number of chunks: " << mv2.chunks.size();
LOG_INFO << "saving directories..."; LOG_INFO << "saving directories...";
@ -606,6 +624,11 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
root->accept(sdv); root->accept(sdv);
sdv.pack(mv2, ge_data); sdv.pack(mv2, ge_data);
LOG_INFO << "saving unique files table...";
save_unique_files_visitor sufv(first_file_inode, first_device_inode);
root->accept(sufv);
mv2.unique_files_table_ref() = std::move(sufv.get_unique_files());
thrift::metadata::fs_options fsopts; thrift::metadata::fs_options fsopts;
fsopts.mtime_only = !options_.keep_all_times; fsopts.mtime_only = !options_.keep_all_times;
if (options_.time_resolution_sec > 1) { if (options_.time_resolution_sec > 1) {

View File

@ -403,7 +403,7 @@ void basic_end_to_end_test(std::string const& compressor,
auto e2 = fs.find("/bar.pl"); auto e2 = fs.find("/bar.pl");
ASSERT_TRUE(e2); ASSERT_TRUE(e2);
EXPECT_EQ(entry->content_index(), e2->content_index()); EXPECT_EQ(entry->inode_num(), e2->inode_num());
struct ::stat st1, st2; struct ::stat st1, st2;
ASSERT_EQ(0, fs.getattr(*entry, &st1)); ASSERT_EQ(0, fs.getattr(*entry, &st1));
@ -417,13 +417,13 @@ void basic_end_to_end_test(std::string const& compressor,
entry = fs.find("/"); entry = fs.find("/");
ASSERT_TRUE(entry); ASSERT_TRUE(entry);
EXPECT_EQ(0, entry->content_index()); EXPECT_EQ(0, entry->inode_num());
e2 = fs.find(0); e2 = fs.find(0);
ASSERT_TRUE(e2); ASSERT_TRUE(e2);
EXPECT_EQ(e2->content_index(), 0); EXPECT_EQ(e2->inode_num(), 0);
entry = fs.find(0, "baz.pl"); entry = fs.find(0, "baz.pl");
ASSERT_TRUE(entry); ASSERT_TRUE(entry);
EXPECT_GT(entry->content_index(), 0); EXPECT_GT(entry->inode_num(), 0);
ASSERT_EQ(0, fs.getattr(*entry, &st1)); ASSERT_EQ(0, fs.getattr(*entry, &st1));
EXPECT_EQ(23456, st1.st_size); EXPECT_EQ(23456, st1.st_size);
e2 = fs.find(0, "somedir"); e2 = fs.find(0, "somedir");

View File

@ -74,7 +74,7 @@ struct inode_data {
* - For files, (inode - chunk_index_offset) can be * - For files, (inode - chunk_index_offset) can be
* used as in index into metadata.chunk_table. * used as in index into metadata.chunk_table.
*/ */
3: required UInt32 content_index, 3: required UInt32 inode_v2_2,
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
// TODO: actually, the inode field is redundant as of v2.3, as entries are // TODO: actually, the inode field is redundant as of v2.3, as entries are
@ -227,6 +227,11 @@ struct metadata {
*/ */
19: optional list<dir_entry> dir_entries, 19: optional list<dir_entry> dir_entries,
/**
* Maps from file inode to chunk_table index
*/
20: optional list<UInt32> unique_files_table,
// TODO: add timestamp // TODO: add timestamp
// 20: optional UInt64 timestamp, // 21: optional UInt64 timestamp,
} }