Unique files table, deprecate inode_v2_2, some cleanup

This commit is contained in:
Marcus Holland-Moritz 2021-03-16 20:09:07 +01:00
parent d507ade9d1
commit ad48cae7b1
9 changed files with 87 additions and 83 deletions

View File

@ -124,7 +124,7 @@ class file : public entry {
void hardlink(file* other, progress& prog);
uint64_t raw_inode_num() const;
unsigned num_hard_links() const;
uint32_t content_index() const;
uint32_t unique_file_id() const;
private:
struct data {

View File

@ -69,10 +69,6 @@ class inode_view
Meta const* meta_;
};
/**
* THIS *MUST* BE CONSTRUCTIBLE FROM ONLY AN INODE NUMBER (NOT EVEN AN
* INODE_VIEW)
*/
class directory_view
: public ::apache::thrift::frozen::View<thrift::metadata::directory> {
using DirView = ::apache::thrift::frozen::View<thrift::metadata::directory>;
@ -124,8 +120,6 @@ class dir_entry_view {
bool is_root() const;
// TODO: remove?
// std::optional<directory_view> directory() const;
std::optional<dir_entry_view> parent() const;
std::string path() const;

View File

@ -282,8 +282,7 @@ void op_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
} else if (fi->flags & (O_APPEND | O_CREAT | O_TRUNC)) {
err = EACCES;
} else {
fi->fh = FUSE_ROOT_ID +
entry->content_index(); // <<---- THIS IS NOT THE INODE!!!!
fi->fh = FUSE_ROOT_ID + entry->inode_num();
fi->direct_io = !s_opts.cache_files;
fi->keep_cache = s_opts.cache_files;
fuse_reply_open(req, fi);

View File

@ -109,11 +109,6 @@ void entry::pack(thrift::metadata::inode_data& entry_v2,
entry_v2.atime_offset = data.get_atime_offset(stat_.st_atime);
entry_v2.mtime_offset = data.get_mtime_offset(stat_.st_mtime);
entry_v2.ctime_offset = data.get_ctime_offset(stat_.st_ctime);
if (auto fp = dynamic_cast<file const*>(this)) {
entry_v2.content_index = fp->content_index();
} else {
entry_v2.content_index = inode_num();
}
}
entry::type_t file::type() const { return E_FILE; }
@ -189,7 +184,7 @@ void file::scan(os_access& os, progress& prog) {
}
}
uint32_t file::content_index() const { return inode_->num(); }
uint32_t file::unique_file_id() const { return inode_->num(); }
uint64_t file::raw_inode_num() const { return status().st_ino; }

View File

@ -54,32 +54,17 @@ inode_view dir_entry_view::inode() const {
dev.inode_num(), meta_);
},
[this](InodeView const& iv) {
return inode_view(iv, iv.content_index(), meta_);
return inode_view(iv, iv.inode_v2_2(), meta_);
},
},
v_);
}
// TODO: remove?
// std::optional<directory_view> dir_entry_view::directory() const {
// if (is_root()) {
// return std::nullopt;
// }
//
// auto dir_inode = parent_index_;
//
// if (auto de = meta_->dir_entries()) {
// dir_inode = (*de)[dir_inode].entry_index();
// }
//
// return directory_view(dir_inode, meta_);
// }
bool dir_entry_view::is_root() const {
return std::visit(
overloaded{
[](DirEntryView const& dev) { return dev.inode_num() == 0; },
[](InodeView const& iv) { return iv.content_index() == 0; },
[](InodeView const& iv) { return iv.inode_v2_2() == 0; },
},
v_);
}
@ -126,11 +111,11 @@ dir_entry_view::from_dir_entry_index(uint32_t self_index, Meta const* meta) {
DWARFS_CHECK(self_index < meta->entries().size(), "self_index out of range");
auto iv = meta->entries()[self_index];
DWARFS_CHECK(iv.content_index() < meta->directories().size(),
DWARFS_CHECK(iv.inode_v2_2() < meta->directories().size(),
"parent_index out of range");
return dir_entry_view(
iv, self_index,
meta->entry_table_v2_2()[meta->directories()[iv.content_index()]
meta->entry_table_v2_2()[meta->directories()[iv.inode_v2_2()]
.parent_entry()],
meta);
}
@ -164,7 +149,7 @@ inode_view dir_entry_view::inode(uint32_t index, Meta const* meta) {
DWARFS_CHECK(index < meta->entries().size(), "index out of range");
auto iv = meta->entries()[index];
return inode_view(iv, iv.content_index(), meta);
return inode_view(iv, iv.inode_v2_2(), meta);
}
std::string dir_entry_view::path() const {

View File

@ -343,24 +343,35 @@ class metadata_ final : public metadata_v2::impl {
std::string modestring(uint16_t mode) const;
std::optional<chunk_range> get_chunk_range(int inode) const {
std::optional<chunk_range> rv;
inode -= file_index_offset_;
if (auto uf = meta_.unique_files_table()) {
if (inode < 0 or inode >= static_cast<int>(uf->size())) {
return rv;
}
inode = (*uf)[inode];
}
if (inode >= 0 &&
inode < (static_cast<int>(meta_.chunk_table().size()) - 1)) {
uint32_t begin = meta_.chunk_table()[inode];
uint32_t end = meta_.chunk_table()[inode + 1];
rv = chunk_range(&meta_, begin, end);
}
return rv;
}
size_t reg_file_size(inode_view entry) const {
auto inode = entry.content_index() - file_index_offset_;
uint32_t cur = meta_.chunk_table()[inode];
uint32_t end = meta_.chunk_table()[inode + 1];
if (cur > end) {
DWARFS_THROW(runtime_error,
fmt::format("invalid chunk range: [{0}..{1}]", cur, end));
}
if (end > meta_.chunks().size()) {
DWARFS_THROW(runtime_error,
fmt::format("chunk index out of range: {0} > {1}", end,
meta_.chunks().size()));
}
size_t size = 0;
while (cur < end) {
size += meta_.chunks()[cur++].size();
}
return size;
auto cr = get_chunk_range(entry.inode_num());
DWARFS_CHECK(cr, "invalid chunk range");
return std::accumulate(
cr->begin(), cr->end(), static_cast<size_t>(0),
[](size_t s, chunk_view cv) { return s + cv.size(); });
}
size_t file_size(inode_view entry, uint16_t mode) const {
@ -401,7 +412,7 @@ class metadata_ final : public metadata_v2::impl {
}
std::string_view link_value(inode_view entry) const {
return meta_.symlinks()[meta_.symlink_table()[entry.content_index() -
return meta_.symlinks()[meta_.symlink_table()[entry.inode_num() -
symlink_table_offset_]];
}
@ -430,7 +441,7 @@ class metadata_ final : public metadata_v2::impl {
}
} else {
for (auto e : meta_.entries()) {
auto index = int(e.content_index()) - file_index_offset_;
auto index = int(e.inode_v2_2()) - file_index_offset_;
if (index >= 0 && index < int(nlinks.size())) {
++DWARFS_NOTHROW(nlinks.at(index));
}
@ -464,7 +475,7 @@ void metadata_<LoggerPolicy>::dump(
std::function<void(const std::string&, uint32_t)> const& icb) const {
auto inode_data = entry.inode();
auto mode = inode_data.mode();
auto inode = inode_data.content_index(); // TODO: rename inode appropriately
auto inode = inode_data.inode_num(); // TODO: rename inode appropriately
os << indent << "<inode:" << inode << "> " << modestring(mode);
@ -473,9 +484,9 @@ void metadata_<LoggerPolicy>::dump(
}
if (S_ISREG(mode)) {
uint32_t beg = meta_.chunk_table()[inode - file_index_offset_];
uint32_t end = meta_.chunk_table()[inode - file_index_offset_ + 1];
os << " [" << beg << ", " << end << "]";
auto cr = get_chunk_range(inode);
DWARFS_CHECK(cr, "invalid chunk range");
os << " [" << cr->begin_ << ", " << cr->end_ << "]";
os << " " << file_size(inode_data, mode) << "\n";
if (detail_level > 3) {
icb(indent + " ", inode);
@ -581,7 +592,7 @@ folly::dynamic metadata_<LoggerPolicy>::as_dynamic(dir_entry_view entry) const {
auto inode_data = entry.inode();
auto mode = inode_data.mode();
auto inode = inode_data.content_index(); // TODO: rename all the things
auto inode = inode_data.inode_num(); // TODO: rename all the things
obj["mode"] = mode;
obj["modestring"] = modestring(mode);
@ -674,7 +685,7 @@ void metadata_<LoggerPolicy>::walk(uint32_t self_index, uint32_t parent_index,
auto inode_data = entry.inode();
if (S_ISDIR(inode_data.mode())) {
auto inode = inode_data.content_index();
auto inode = inode_data.inode_num();
if (!seen.emplace(inode).second) {
DWARFS_THROW(runtime_error, "cycle detected during directory walk");
@ -710,8 +721,8 @@ void metadata_<LoggerPolicy>::walk_inode_order_impl(
} else {
std::sort(entries.begin(), entries.end(),
[this](auto const& a, auto const& b) {
return meta_.entries()[a.first].content_index() <
meta_.entries()[b.first].content_index();
return meta_.entries()[a.first].inode_v2_2() <
meta_.entries()[b.first].inode_v2_2();
});
}
@ -905,7 +916,7 @@ int metadata_<LoggerPolicy>::access(inode_view entry, int mode, uid_t uid,
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::open(inode_view entry) const {
if (S_ISREG(entry.mode())) {
return entry.content_index();
return entry.inode_num();
}
return -1;
@ -949,15 +960,7 @@ int metadata_<LoggerPolicy>::statvfs(struct ::statvfs* stbuf) const {
template <typename LoggerPolicy>
std::optional<chunk_range>
metadata_<LoggerPolicy>::get_chunks(int inode) const {
std::optional<chunk_range> rv;
inode -= inode_offset_ + file_index_offset_;
if (inode >= 0 &&
inode < (static_cast<int>(meta_.chunk_table().size()) - 1)) {
uint32_t begin = meta_.chunk_table()[inode];
uint32_t end = meta_.chunk_table()[inode + 1];
rv = chunk_range(&meta_, begin, end);
}
return rv;
return get_chunk_range(inode - inode_offset_);
}
void metadata_v2::get_stat_defaults(struct ::stat* defaults) {

View File

@ -213,7 +213,7 @@ class pipe_set_inode_visitor : public visitor_base {
uint32_t& inode_num_;
};
class names_and_symlinks_visitor : public entry_visitor {
class names_and_symlinks_visitor : public visitor_base {
public:
explicit names_and_symlinks_visitor(global_entry_data& data)
: data_(data) {}
@ -264,6 +264,24 @@ class save_directories_visitor : public visitor_base {
std::vector<dir*> directories_;
};
class save_unique_files_visitor : public visitor_base {
public:
explicit save_unique_files_visitor(uint32_t inode_begin, uint32_t inode_end)
: inode_begin_{inode_begin} {
unique_files_.resize(inode_end - inode_begin);
}
void visit(file* p) override {
unique_files_.at(p->inode_num() - inode_begin_) = p->unique_file_id();
}
std::vector<uint32_t>& get_unique_files() { return unique_files_; }
private:
uint32_t const inode_begin_;
std::vector<uint32_t> unique_files_;
};
std::string status_string(progress const& p, size_t width) {
auto cp = p.current.load();
std::string label, path;
@ -551,7 +569,7 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
worker_group blockify("blockify", 1, 1 << 20);
im.order_inodes(script_, options_.file_order, first_file_inode,
im.order_inodes(script_, options_.file_order, 0,
[&](std::shared_ptr<inode> const& ino) {
blockify.add_job([&] {
prog.current.store(ino.get());
@ -587,15 +605,15 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
// TODO: we should be able to start this once all blocks have been
// submitted for compression
im.for_each_inode([&](std::shared_ptr<inode> const& ino) {
DWARFS_NOTHROW(mv2.chunk_table.at(ino->num() - first_file_inode)) =
mv2.chunks.size();
// TODO: no need for this offset stuff here...
DWARFS_NOTHROW(mv2.chunk_table.at(ino->num())) = mv2.chunks.size();
ino->append_chunks_to(mv2.chunks);
});
// insert dummy inode to help determine number of chunks per inode
DWARFS_NOTHROW(mv2.chunk_table.at(im.count())) = mv2.chunks.size();
LOG_DEBUG << "total number of file inodes: " << im.count();
LOG_DEBUG << "total number of unique files: " << im.count();
LOG_DEBUG << "total number of chunks: " << mv2.chunks.size();
LOG_INFO << "saving directories...";
@ -606,6 +624,11 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
root->accept(sdv);
sdv.pack(mv2, ge_data);
LOG_INFO << "saving unique files table...";
save_unique_files_visitor sufv(first_file_inode, first_device_inode);
root->accept(sufv);
mv2.unique_files_table_ref() = std::move(sufv.get_unique_files());
thrift::metadata::fs_options fsopts;
fsopts.mtime_only = !options_.keep_all_times;
if (options_.time_resolution_sec > 1) {

View File

@ -403,7 +403,7 @@ void basic_end_to_end_test(std::string const& compressor,
auto e2 = fs.find("/bar.pl");
ASSERT_TRUE(e2);
EXPECT_EQ(entry->content_index(), e2->content_index());
EXPECT_EQ(entry->inode_num(), e2->inode_num());
struct ::stat st1, st2;
ASSERT_EQ(0, fs.getattr(*entry, &st1));
@ -417,13 +417,13 @@ void basic_end_to_end_test(std::string const& compressor,
entry = fs.find("/");
ASSERT_TRUE(entry);
EXPECT_EQ(0, entry->content_index());
EXPECT_EQ(0, entry->inode_num());
e2 = fs.find(0);
ASSERT_TRUE(e2);
EXPECT_EQ(e2->content_index(), 0);
EXPECT_EQ(e2->inode_num(), 0);
entry = fs.find(0, "baz.pl");
ASSERT_TRUE(entry);
EXPECT_GT(entry->content_index(), 0);
EXPECT_GT(entry->inode_num(), 0);
ASSERT_EQ(0, fs.getattr(*entry, &st1));
EXPECT_EQ(23456, st1.st_size);
e2 = fs.find(0, "somedir");

View File

@ -74,7 +74,7 @@ struct inode_data {
* - For files, (inode - chunk_index_offset) can be
* used as in index into metadata.chunk_table.
*/
3: required UInt32 content_index,
3: required UInt32 inode_v2_2,
//--------------------------------------------------------------------------
// TODO: actually, the inode field is redundant as of v2.3, as entries are
@ -227,6 +227,11 @@ struct metadata {
*/
19: optional list<dir_entry> dir_entries,
/**
* Maps from file inode to chunk_table index
*/
20: optional list<UInt32> unique_files_table,
// TODO: add timestamp
// 20: optional UInt64 timestamp,
// 21: optional UInt64 timestamp,
}