mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 12:28:13 -04:00
Unique files table, deprecate inode_v2_2, some cleanup
This commit is contained in:
parent
d507ade9d1
commit
ad48cae7b1
@ -124,7 +124,7 @@ class file : public entry {
|
||||
void hardlink(file* other, progress& prog);
|
||||
uint64_t raw_inode_num() const;
|
||||
unsigned num_hard_links() const;
|
||||
uint32_t content_index() const;
|
||||
uint32_t unique_file_id() const;
|
||||
|
||||
private:
|
||||
struct data {
|
||||
|
@ -69,10 +69,6 @@ class inode_view
|
||||
Meta const* meta_;
|
||||
};
|
||||
|
||||
/**
|
||||
* THIS *MUST* BE CONSTRUCTIBLE FROM ONLY AN INODE NUMBER (NOT EVEN AN
|
||||
* INODE_VIEW)
|
||||
*/
|
||||
class directory_view
|
||||
: public ::apache::thrift::frozen::View<thrift::metadata::directory> {
|
||||
using DirView = ::apache::thrift::frozen::View<thrift::metadata::directory>;
|
||||
@ -124,8 +120,6 @@ class dir_entry_view {
|
||||
|
||||
bool is_root() const;
|
||||
|
||||
// TODO: remove?
|
||||
// std::optional<directory_view> directory() const;
|
||||
std::optional<dir_entry_view> parent() const;
|
||||
|
||||
std::string path() const;
|
||||
|
@ -282,8 +282,7 @@ void op_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
|
||||
} else if (fi->flags & (O_APPEND | O_CREAT | O_TRUNC)) {
|
||||
err = EACCES;
|
||||
} else {
|
||||
fi->fh = FUSE_ROOT_ID +
|
||||
entry->content_index(); // <<---- THIS IS NOT THE INODE!!!!
|
||||
fi->fh = FUSE_ROOT_ID + entry->inode_num();
|
||||
fi->direct_io = !s_opts.cache_files;
|
||||
fi->keep_cache = s_opts.cache_files;
|
||||
fuse_reply_open(req, fi);
|
||||
|
@ -109,11 +109,6 @@ void entry::pack(thrift::metadata::inode_data& entry_v2,
|
||||
entry_v2.atime_offset = data.get_atime_offset(stat_.st_atime);
|
||||
entry_v2.mtime_offset = data.get_mtime_offset(stat_.st_mtime);
|
||||
entry_v2.ctime_offset = data.get_ctime_offset(stat_.st_ctime);
|
||||
if (auto fp = dynamic_cast<file const*>(this)) {
|
||||
entry_v2.content_index = fp->content_index();
|
||||
} else {
|
||||
entry_v2.content_index = inode_num();
|
||||
}
|
||||
}
|
||||
|
||||
entry::type_t file::type() const { return E_FILE; }
|
||||
@ -189,7 +184,7 @@ void file::scan(os_access& os, progress& prog) {
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t file::content_index() const { return inode_->num(); }
|
||||
uint32_t file::unique_file_id() const { return inode_->num(); }
|
||||
|
||||
uint64_t file::raw_inode_num() const { return status().st_ino; }
|
||||
|
||||
|
@ -54,32 +54,17 @@ inode_view dir_entry_view::inode() const {
|
||||
dev.inode_num(), meta_);
|
||||
},
|
||||
[this](InodeView const& iv) {
|
||||
return inode_view(iv, iv.content_index(), meta_);
|
||||
return inode_view(iv, iv.inode_v2_2(), meta_);
|
||||
},
|
||||
},
|
||||
v_);
|
||||
}
|
||||
|
||||
// TODO: remove?
|
||||
// std::optional<directory_view> dir_entry_view::directory() const {
|
||||
// if (is_root()) {
|
||||
// return std::nullopt;
|
||||
// }
|
||||
//
|
||||
// auto dir_inode = parent_index_;
|
||||
//
|
||||
// if (auto de = meta_->dir_entries()) {
|
||||
// dir_inode = (*de)[dir_inode].entry_index();
|
||||
// }
|
||||
//
|
||||
// return directory_view(dir_inode, meta_);
|
||||
// }
|
||||
|
||||
bool dir_entry_view::is_root() const {
|
||||
return std::visit(
|
||||
overloaded{
|
||||
[](DirEntryView const& dev) { return dev.inode_num() == 0; },
|
||||
[](InodeView const& iv) { return iv.content_index() == 0; },
|
||||
[](InodeView const& iv) { return iv.inode_v2_2() == 0; },
|
||||
},
|
||||
v_);
|
||||
}
|
||||
@ -126,11 +111,11 @@ dir_entry_view::from_dir_entry_index(uint32_t self_index, Meta const* meta) {
|
||||
DWARFS_CHECK(self_index < meta->entries().size(), "self_index out of range");
|
||||
auto iv = meta->entries()[self_index];
|
||||
|
||||
DWARFS_CHECK(iv.content_index() < meta->directories().size(),
|
||||
DWARFS_CHECK(iv.inode_v2_2() < meta->directories().size(),
|
||||
"parent_index out of range");
|
||||
return dir_entry_view(
|
||||
iv, self_index,
|
||||
meta->entry_table_v2_2()[meta->directories()[iv.content_index()]
|
||||
meta->entry_table_v2_2()[meta->directories()[iv.inode_v2_2()]
|
||||
.parent_entry()],
|
||||
meta);
|
||||
}
|
||||
@ -164,7 +149,7 @@ inode_view dir_entry_view::inode(uint32_t index, Meta const* meta) {
|
||||
|
||||
DWARFS_CHECK(index < meta->entries().size(), "index out of range");
|
||||
auto iv = meta->entries()[index];
|
||||
return inode_view(iv, iv.content_index(), meta);
|
||||
return inode_view(iv, iv.inode_v2_2(), meta);
|
||||
}
|
||||
|
||||
std::string dir_entry_view::path() const {
|
||||
|
@ -343,24 +343,35 @@ class metadata_ final : public metadata_v2::impl {
|
||||
|
||||
std::string modestring(uint16_t mode) const;
|
||||
|
||||
std::optional<chunk_range> get_chunk_range(int inode) const {
|
||||
std::optional<chunk_range> rv;
|
||||
|
||||
inode -= file_index_offset_;
|
||||
|
||||
if (auto uf = meta_.unique_files_table()) {
|
||||
if (inode < 0 or inode >= static_cast<int>(uf->size())) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
inode = (*uf)[inode];
|
||||
}
|
||||
|
||||
if (inode >= 0 &&
|
||||
inode < (static_cast<int>(meta_.chunk_table().size()) - 1)) {
|
||||
uint32_t begin = meta_.chunk_table()[inode];
|
||||
uint32_t end = meta_.chunk_table()[inode + 1];
|
||||
rv = chunk_range(&meta_, begin, end);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
size_t reg_file_size(inode_view entry) const {
|
||||
auto inode = entry.content_index() - file_index_offset_;
|
||||
uint32_t cur = meta_.chunk_table()[inode];
|
||||
uint32_t end = meta_.chunk_table()[inode + 1];
|
||||
if (cur > end) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
fmt::format("invalid chunk range: [{0}..{1}]", cur, end));
|
||||
}
|
||||
if (end > meta_.chunks().size()) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
fmt::format("chunk index out of range: {0} > {1}", end,
|
||||
meta_.chunks().size()));
|
||||
}
|
||||
size_t size = 0;
|
||||
while (cur < end) {
|
||||
size += meta_.chunks()[cur++].size();
|
||||
}
|
||||
return size;
|
||||
auto cr = get_chunk_range(entry.inode_num());
|
||||
DWARFS_CHECK(cr, "invalid chunk range");
|
||||
return std::accumulate(
|
||||
cr->begin(), cr->end(), static_cast<size_t>(0),
|
||||
[](size_t s, chunk_view cv) { return s + cv.size(); });
|
||||
}
|
||||
|
||||
size_t file_size(inode_view entry, uint16_t mode) const {
|
||||
@ -401,7 +412,7 @@ class metadata_ final : public metadata_v2::impl {
|
||||
}
|
||||
|
||||
std::string_view link_value(inode_view entry) const {
|
||||
return meta_.symlinks()[meta_.symlink_table()[entry.content_index() -
|
||||
return meta_.symlinks()[meta_.symlink_table()[entry.inode_num() -
|
||||
symlink_table_offset_]];
|
||||
}
|
||||
|
||||
@ -430,7 +441,7 @@ class metadata_ final : public metadata_v2::impl {
|
||||
}
|
||||
} else {
|
||||
for (auto e : meta_.entries()) {
|
||||
auto index = int(e.content_index()) - file_index_offset_;
|
||||
auto index = int(e.inode_v2_2()) - file_index_offset_;
|
||||
if (index >= 0 && index < int(nlinks.size())) {
|
||||
++DWARFS_NOTHROW(nlinks.at(index));
|
||||
}
|
||||
@ -464,7 +475,7 @@ void metadata_<LoggerPolicy>::dump(
|
||||
std::function<void(const std::string&, uint32_t)> const& icb) const {
|
||||
auto inode_data = entry.inode();
|
||||
auto mode = inode_data.mode();
|
||||
auto inode = inode_data.content_index(); // TODO: rename inode appropriately
|
||||
auto inode = inode_data.inode_num(); // TODO: rename inode appropriately
|
||||
|
||||
os << indent << "<inode:" << inode << "> " << modestring(mode);
|
||||
|
||||
@ -473,9 +484,9 @@ void metadata_<LoggerPolicy>::dump(
|
||||
}
|
||||
|
||||
if (S_ISREG(mode)) {
|
||||
uint32_t beg = meta_.chunk_table()[inode - file_index_offset_];
|
||||
uint32_t end = meta_.chunk_table()[inode - file_index_offset_ + 1];
|
||||
os << " [" << beg << ", " << end << "]";
|
||||
auto cr = get_chunk_range(inode);
|
||||
DWARFS_CHECK(cr, "invalid chunk range");
|
||||
os << " [" << cr->begin_ << ", " << cr->end_ << "]";
|
||||
os << " " << file_size(inode_data, mode) << "\n";
|
||||
if (detail_level > 3) {
|
||||
icb(indent + " ", inode);
|
||||
@ -581,7 +592,7 @@ folly::dynamic metadata_<LoggerPolicy>::as_dynamic(dir_entry_view entry) const {
|
||||
|
||||
auto inode_data = entry.inode();
|
||||
auto mode = inode_data.mode();
|
||||
auto inode = inode_data.content_index(); // TODO: rename all the things
|
||||
auto inode = inode_data.inode_num(); // TODO: rename all the things
|
||||
|
||||
obj["mode"] = mode;
|
||||
obj["modestring"] = modestring(mode);
|
||||
@ -674,7 +685,7 @@ void metadata_<LoggerPolicy>::walk(uint32_t self_index, uint32_t parent_index,
|
||||
auto inode_data = entry.inode();
|
||||
|
||||
if (S_ISDIR(inode_data.mode())) {
|
||||
auto inode = inode_data.content_index();
|
||||
auto inode = inode_data.inode_num();
|
||||
|
||||
if (!seen.emplace(inode).second) {
|
||||
DWARFS_THROW(runtime_error, "cycle detected during directory walk");
|
||||
@ -710,8 +721,8 @@ void metadata_<LoggerPolicy>::walk_inode_order_impl(
|
||||
} else {
|
||||
std::sort(entries.begin(), entries.end(),
|
||||
[this](auto const& a, auto const& b) {
|
||||
return meta_.entries()[a.first].content_index() <
|
||||
meta_.entries()[b.first].content_index();
|
||||
return meta_.entries()[a.first].inode_v2_2() <
|
||||
meta_.entries()[b.first].inode_v2_2();
|
||||
});
|
||||
}
|
||||
|
||||
@ -905,7 +916,7 @@ int metadata_<LoggerPolicy>::access(inode_view entry, int mode, uid_t uid,
|
||||
template <typename LoggerPolicy>
|
||||
int metadata_<LoggerPolicy>::open(inode_view entry) const {
|
||||
if (S_ISREG(entry.mode())) {
|
||||
return entry.content_index();
|
||||
return entry.inode_num();
|
||||
}
|
||||
|
||||
return -1;
|
||||
@ -949,15 +960,7 @@ int metadata_<LoggerPolicy>::statvfs(struct ::statvfs* stbuf) const {
|
||||
template <typename LoggerPolicy>
|
||||
std::optional<chunk_range>
|
||||
metadata_<LoggerPolicy>::get_chunks(int inode) const {
|
||||
std::optional<chunk_range> rv;
|
||||
inode -= inode_offset_ + file_index_offset_;
|
||||
if (inode >= 0 &&
|
||||
inode < (static_cast<int>(meta_.chunk_table().size()) - 1)) {
|
||||
uint32_t begin = meta_.chunk_table()[inode];
|
||||
uint32_t end = meta_.chunk_table()[inode + 1];
|
||||
rv = chunk_range(&meta_, begin, end);
|
||||
}
|
||||
return rv;
|
||||
return get_chunk_range(inode - inode_offset_);
|
||||
}
|
||||
|
||||
void metadata_v2::get_stat_defaults(struct ::stat* defaults) {
|
||||
|
@ -213,7 +213,7 @@ class pipe_set_inode_visitor : public visitor_base {
|
||||
uint32_t& inode_num_;
|
||||
};
|
||||
|
||||
class names_and_symlinks_visitor : public entry_visitor {
|
||||
class names_and_symlinks_visitor : public visitor_base {
|
||||
public:
|
||||
explicit names_and_symlinks_visitor(global_entry_data& data)
|
||||
: data_(data) {}
|
||||
@ -264,6 +264,24 @@ class save_directories_visitor : public visitor_base {
|
||||
std::vector<dir*> directories_;
|
||||
};
|
||||
|
||||
class save_unique_files_visitor : public visitor_base {
|
||||
public:
|
||||
explicit save_unique_files_visitor(uint32_t inode_begin, uint32_t inode_end)
|
||||
: inode_begin_{inode_begin} {
|
||||
unique_files_.resize(inode_end - inode_begin);
|
||||
}
|
||||
|
||||
void visit(file* p) override {
|
||||
unique_files_.at(p->inode_num() - inode_begin_) = p->unique_file_id();
|
||||
}
|
||||
|
||||
std::vector<uint32_t>& get_unique_files() { return unique_files_; }
|
||||
|
||||
private:
|
||||
uint32_t const inode_begin_;
|
||||
std::vector<uint32_t> unique_files_;
|
||||
};
|
||||
|
||||
std::string status_string(progress const& p, size_t width) {
|
||||
auto cp = p.current.load();
|
||||
std::string label, path;
|
||||
@ -551,7 +569,7 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
|
||||
worker_group blockify("blockify", 1, 1 << 20);
|
||||
|
||||
im.order_inodes(script_, options_.file_order, first_file_inode,
|
||||
im.order_inodes(script_, options_.file_order, 0,
|
||||
[&](std::shared_ptr<inode> const& ino) {
|
||||
blockify.add_job([&] {
|
||||
prog.current.store(ino.get());
|
||||
@ -587,15 +605,15 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
// TODO: we should be able to start this once all blocks have been
|
||||
// submitted for compression
|
||||
im.for_each_inode([&](std::shared_ptr<inode> const& ino) {
|
||||
DWARFS_NOTHROW(mv2.chunk_table.at(ino->num() - first_file_inode)) =
|
||||
mv2.chunks.size();
|
||||
// TODO: no need for this offset stuff here...
|
||||
DWARFS_NOTHROW(mv2.chunk_table.at(ino->num())) = mv2.chunks.size();
|
||||
ino->append_chunks_to(mv2.chunks);
|
||||
});
|
||||
|
||||
// insert dummy inode to help determine number of chunks per inode
|
||||
DWARFS_NOTHROW(mv2.chunk_table.at(im.count())) = mv2.chunks.size();
|
||||
|
||||
LOG_DEBUG << "total number of file inodes: " << im.count();
|
||||
LOG_DEBUG << "total number of unique files: " << im.count();
|
||||
LOG_DEBUG << "total number of chunks: " << mv2.chunks.size();
|
||||
|
||||
LOG_INFO << "saving directories...";
|
||||
@ -606,6 +624,11 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
root->accept(sdv);
|
||||
sdv.pack(mv2, ge_data);
|
||||
|
||||
LOG_INFO << "saving unique files table...";
|
||||
save_unique_files_visitor sufv(first_file_inode, first_device_inode);
|
||||
root->accept(sufv);
|
||||
mv2.unique_files_table_ref() = std::move(sufv.get_unique_files());
|
||||
|
||||
thrift::metadata::fs_options fsopts;
|
||||
fsopts.mtime_only = !options_.keep_all_times;
|
||||
if (options_.time_resolution_sec > 1) {
|
||||
|
@ -403,7 +403,7 @@ void basic_end_to_end_test(std::string const& compressor,
|
||||
auto e2 = fs.find("/bar.pl");
|
||||
ASSERT_TRUE(e2);
|
||||
|
||||
EXPECT_EQ(entry->content_index(), e2->content_index());
|
||||
EXPECT_EQ(entry->inode_num(), e2->inode_num());
|
||||
|
||||
struct ::stat st1, st2;
|
||||
ASSERT_EQ(0, fs.getattr(*entry, &st1));
|
||||
@ -417,13 +417,13 @@ void basic_end_to_end_test(std::string const& compressor,
|
||||
|
||||
entry = fs.find("/");
|
||||
ASSERT_TRUE(entry);
|
||||
EXPECT_EQ(0, entry->content_index());
|
||||
EXPECT_EQ(0, entry->inode_num());
|
||||
e2 = fs.find(0);
|
||||
ASSERT_TRUE(e2);
|
||||
EXPECT_EQ(e2->content_index(), 0);
|
||||
EXPECT_EQ(e2->inode_num(), 0);
|
||||
entry = fs.find(0, "baz.pl");
|
||||
ASSERT_TRUE(entry);
|
||||
EXPECT_GT(entry->content_index(), 0);
|
||||
EXPECT_GT(entry->inode_num(), 0);
|
||||
ASSERT_EQ(0, fs.getattr(*entry, &st1));
|
||||
EXPECT_EQ(23456, st1.st_size);
|
||||
e2 = fs.find(0, "somedir");
|
||||
|
@ -74,7 +74,7 @@ struct inode_data {
|
||||
* - For files, (inode - chunk_index_offset) can be
|
||||
* used as in index into metadata.chunk_table.
|
||||
*/
|
||||
3: required UInt32 content_index,
|
||||
3: required UInt32 inode_v2_2,
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// TODO: actually, the inode field is redundant as of v2.3, as entries are
|
||||
@ -227,6 +227,11 @@ struct metadata {
|
||||
*/
|
||||
19: optional list<dir_entry> dir_entries,
|
||||
|
||||
/**
|
||||
* Maps from file inode to chunk_table index
|
||||
*/
|
||||
20: optional list<UInt32> unique_files_table,
|
||||
|
||||
// TODO: add timestamp
|
||||
// 20: optional UInt64 timestamp,
|
||||
// 21: optional UInt64 timestamp,
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user