mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-13 14:27:30 -04:00
refactor(metadata): include self index in directories table
This commit is contained in:
parent
682cf883a2
commit
01f20aa1f1
@ -132,18 +132,18 @@ to each other:
|
|||||||
╔════╗ ┌────────────────┐ │ S_IFDIR ──►┌───────────────────┐ │ ┌────────────────┴─┐
|
╔════╗ ┌────────────────┐ │ S_IFDIR ──►┌───────────────────┐ │ ┌────────────────┴─┐
|
||||||
║root╟──►│ name_index: 0 │ │ │ mode_index: 0 ├──────┐ └─►│ parent_entry: 0 │
|
║root╟──►│ name_index: 0 │ │ │ mode_index: 0 ├──────┐ └─►│ parent_entry: 0 │
|
||||||
╚════╝ │ inode_num: 0 ├───────┴────────────►│ owner_index: 0 │ │ │ first_entry: 1 │
|
╚════╝ │ inode_num: 0 ├───────┴────────────►│ owner_index: 0 │ │ │ first_entry: 1 │
|
||||||
├────────────────┤ │ group_index: 0 │ │ ├──────────────────┤
|
├────────────────┤ │ group_index: 0 │ │ | self_entry: 0 |
|
||||||
┌───┤ name_index: 2 │ │ atime_offset: 0 │ │ │ parent_entry: 0 │
|
┌───┤ name_index: 2 │ │ atime_offset: 0 │ │ ├──────────────────┤
|
||||||
┌────┼───┤ inode_num: 5 ├───────┐ │ mtime_offset: 417 │ │ │ first_entry: 11 │
|
┌────┼───┤ inode_num: 5 ├───────┐ │ mtime_offset: 417 │ │ │ parent_entry: 0 │
|
||||||
│ │ ├────────────────┤ │ │ ctime_offset: 0 │ │ ├──────────────────┤
|
│ │ ├────────────────┤ │ │ ctime_offset: 0 │ │ │ first_entry: 11 │
|
||||||
│ ┌──┼───┤ name_index: 3 │ │ ├───────────────────┤ │ │ parent_entry: 5 │
|
│ ┌──┼───┤ name_index: 3 │ │ ├───────────────────┤ │ | self_entry: 1 |
|
||||||
│ │ │ │ inode_num: 9 ├────┐ │ │ ... │ │ │ first_entry: 12 │
|
│ │ │ │ inode_num: 9 ├────┐ │ │ ... │ │ ├──────────────────┤
|
||||||
│ │ │ ├────────────────┤ │ │ S_IFLNK ──►├───────────────────┤ │ ├──────────────────┤
|
│ │ │ ├────────────────┤ │ │ S_IFLNK ──►├───────────────────┤ │ │ parent_entry: 5 │
|
||||||
│ │ │ │ │ │ │ │ mode_index: 2 │ │ │ │
|
│ │ │ │ │ │ │ │ mode_index: 2 │ │ │ first_entry: 12 │
|
||||||
│ │ │ │ ... │ │ └────────────►│ owner_index: 2 │ │ │ ... │
|
│ │ │ │ ... │ │ └────────────►│ owner_index: 2 │ │ | self_entry: 7 |
|
||||||
│ │ │ │ │ │ │ group_index: 0 │ │ │ │
|
│ │ │ │ │ │ │ group_index: 0 │ │ ├──────────────────┤
|
||||||
│ │ │ └────────────────┘ │ │ atime_offset: 0 │ │ └──────────────────┘
|
│ │ │ └────────────────┘ │ │ atime_offset: 0 │ │ │ ... │
|
||||||
│ │ │ │ │ mtime_offset: 298 │ │
|
│ │ │ │ │ mtime_offset: 298 │ │ └──────────────────┘
|
||||||
│ │ │ │ │ ctime_offset: 0 │ │
|
│ │ │ │ │ ctime_offset: 0 │ │
|
||||||
│ │ │ names[] │ ├───────────────────┤ │ modes[]
|
│ │ │ names[] │ ├───────────────────┤ │ modes[]
|
||||||
│ │ │ ┌────────────┐ │ │ ... │ │ ┌─────────────┐
|
│ │ │ ┌────────────┐ │ │ ... │ │ ┌─────────────┐
|
||||||
|
@ -52,6 +52,11 @@ class global_metadata {
|
|||||||
using Meta =
|
using Meta =
|
||||||
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
|
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
|
||||||
|
|
||||||
|
using directories_view = ::apache::thrift::frozen::Layout<
|
||||||
|
std::vector<thrift::metadata::directory>>::View;
|
||||||
|
using bundled_directories_view =
|
||||||
|
::apache::thrift::frozen::Bundled<directories_view>;
|
||||||
|
|
||||||
global_metadata(logger& lgr, Meta const& meta);
|
global_metadata(logger& lgr, Meta const& meta);
|
||||||
|
|
||||||
static void check_consistency(logger& lgr, Meta const& meta);
|
static void check_consistency(logger& lgr, Meta const& meta);
|
||||||
@ -65,14 +70,12 @@ class global_metadata {
|
|||||||
|
|
||||||
dwarfs::internal::string_table const& names() const { return names_; }
|
dwarfs::internal::string_table const& names() const { return names_; }
|
||||||
|
|
||||||
std::vector<thrift::metadata::directory> const& directories() const {
|
std::optional<directories_view> bundled_directories() const;
|
||||||
return directories_;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Meta const& meta_;
|
Meta const& meta_;
|
||||||
std::vector<thrift::metadata::directory> const directories_;
|
std::optional<bundled_directories_view> const bundled_directories_;
|
||||||
std::vector<uint32_t> const dir_self_index_;
|
directories_view const directories_;
|
||||||
dwarfs::internal::string_table const names_;
|
dwarfs::internal::string_table const names_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <bit>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
@ -38,6 +39,7 @@
|
|||||||
namespace dwarfs::reader::internal {
|
namespace dwarfs::reader::internal {
|
||||||
|
|
||||||
using namespace dwarfs::internal;
|
using namespace dwarfs::internal;
|
||||||
|
using namespace ::apache::thrift;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -59,90 +61,127 @@ class stack_ctor {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<thrift::metadata::directory>
|
std::optional<global_metadata::bundled_directories_view>
|
||||||
unpack_directories(logger& lgr, global_metadata::Meta const& meta) {
|
unpack_directories(logger& lgr, global_metadata::Meta const& meta) {
|
||||||
|
auto has_self_entry = [&] {
|
||||||
|
auto layout = meta.findFirstOfType<
|
||||||
|
std::unique_ptr<frozen::Layout<thrift::metadata::metadata>>>();
|
||||||
|
return (*layout)
|
||||||
|
->directoriesField.layout.itemField.layout.self_entryField.layout
|
||||||
|
.bits > 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto opts = meta.options();
|
||||||
|
auto dep = meta.dir_entries();
|
||||||
|
|
||||||
|
if ((!opts or !opts->packed_directories()) and (!dep or has_self_entry())) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_PROXY(debug_logger_policy, lgr);
|
||||||
|
|
||||||
|
auto td = LOG_TIMED_DEBUG;
|
||||||
|
|
||||||
|
auto dirent = *dep;
|
||||||
|
auto metadir = meta.directories();
|
||||||
|
|
||||||
std::vector<thrift::metadata::directory> directories;
|
std::vector<thrift::metadata::directory> directories;
|
||||||
|
|
||||||
if (auto opts = meta.options(); opts and opts->packed_directories()) {
|
if (opts->packed_directories()) {
|
||||||
LOG_PROXY(debug_logger_policy, lgr);
|
|
||||||
|
|
||||||
auto ti = LOG_TIMED_DEBUG;
|
|
||||||
|
|
||||||
auto dirent = *meta.dir_entries();
|
|
||||||
auto metadir = meta.directories();
|
|
||||||
|
|
||||||
directories.resize(metadir.size());
|
directories.resize(metadir.size());
|
||||||
|
|
||||||
// delta-decode first entries first
|
// delta-decode first entries first
|
||||||
directories[0].first_entry() = metadir[0].first_entry();
|
{
|
||||||
|
auto tt = LOG_TIMED_TRACE;
|
||||||
|
|
||||||
for (size_t i = 1; i < directories.size(); ++i) {
|
directories[0].first_entry() = metadir[0].first_entry();
|
||||||
directories[i].first_entry() =
|
|
||||||
directories[i - 1].first_entry().value() + metadir[i].first_entry();
|
for (size_t i = 1; i < directories.size(); ++i) {
|
||||||
|
directories[i].first_entry() =
|
||||||
|
directories[i - 1].first_entry().value() + metadir[i].first_entry();
|
||||||
|
}
|
||||||
|
|
||||||
|
tt << "delta-decoded " << directories.size() << " first entries";
|
||||||
}
|
}
|
||||||
|
|
||||||
// then traverse to recover parent entries
|
// then traverse to recover parent entries
|
||||||
std::queue<uint32_t> queue;
|
{
|
||||||
queue.push(0);
|
auto tt = LOG_TIMED_TRACE;
|
||||||
|
|
||||||
while (!queue.empty()) {
|
std::queue<uint32_t> queue;
|
||||||
auto parent = queue.front();
|
queue.push(0);
|
||||||
queue.pop();
|
|
||||||
|
|
||||||
auto p_ino = dirent[parent].inode_num();
|
while (!queue.empty()) {
|
||||||
|
auto parent = queue.front();
|
||||||
|
queue.pop();
|
||||||
|
|
||||||
auto beg = directories[p_ino].first_entry().value();
|
auto p_ino = dirent[parent].inode_num();
|
||||||
auto end = directories[p_ino + 1].first_entry().value();
|
|
||||||
|
|
||||||
for (auto e = beg; e < end; ++e) {
|
auto beg = directories[p_ino].first_entry().value();
|
||||||
if (auto e_ino = dirent[e].inode_num();
|
auto end = directories[p_ino + 1].first_entry().value();
|
||||||
e_ino < (directories.size() - 1)) {
|
|
||||||
directories[e_ino].parent_entry() = parent;
|
for (auto e = beg; e < end; ++e) {
|
||||||
queue.push(e);
|
if (auto e_ino = dirent[e].inode_num();
|
||||||
|
e_ino < (directories.size() - 1)) {
|
||||||
|
directories[e_ino].parent_entry() = parent;
|
||||||
|
queue.push(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
ti << "unpacked directories table";
|
tt << "recovered " << directories.size() << " parent entries";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto tt = LOG_TIMED_TRACE;
|
||||||
|
|
||||||
|
directories = metadir.thaw();
|
||||||
|
|
||||||
|
tt << "thawed " << directories.size() << " directories";
|
||||||
}
|
}
|
||||||
|
|
||||||
return directories;
|
// finally, set self entries
|
||||||
}
|
{
|
||||||
|
auto tt = LOG_TIMED_TRACE;
|
||||||
|
|
||||||
std::vector<uint32_t>
|
for (size_t i = 0; i < dirent.size(); ++i) {
|
||||||
build_dir_self_index(logger& lgr, global_metadata::Meta const& meta) {
|
auto ino = dirent[i].inode_num();
|
||||||
std::vector<uint32_t> index;
|
if (ino < directories.size()) {
|
||||||
|
directories[ino].self_entry() = i;
|
||||||
if (auto dep = meta.dir_entries()) {
|
|
||||||
LOG_PROXY(debug_logger_policy, lgr);
|
|
||||||
|
|
||||||
auto ti = LOG_TIMED_DEBUG;
|
|
||||||
|
|
||||||
auto const dir_count = meta.directories().size() - 1;
|
|
||||||
|
|
||||||
index.resize(dir_count);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < dep->size(); ++i) {
|
|
||||||
auto ino = (*dep)[i].inode_num();
|
|
||||||
if (ino < dir_count) {
|
|
||||||
index[ino] = i;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto check_index [[maybe_unused]] = [&] {
|
tt << "recoverd " << directories.size() << " self entries from "
|
||||||
auto tmp = index;
|
<< dirent.size() << " dir entries";
|
||||||
std::sort(tmp.begin(), tmp.end());
|
|
||||||
std::adjacent_difference(tmp.begin(), tmp.end(), tmp.begin());
|
|
||||||
return std::all_of(tmp.begin() + 1, tmp.end(),
|
|
||||||
[](auto i) { return i != 0; });
|
|
||||||
};
|
|
||||||
|
|
||||||
assert(check_index());
|
|
||||||
|
|
||||||
ti << "built directory self index table (size: " << dir_count << ")";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return index;
|
// freeze to save memory
|
||||||
|
auto view = [&] {
|
||||||
|
auto tt = LOG_TIMED_TRACE;
|
||||||
|
|
||||||
|
auto v = frozen::freeze(directories);
|
||||||
|
|
||||||
|
tt << "froze " << directories.size() << " directories ("
|
||||||
|
<< size_with_unit(sizeof(thrift::metadata::directory) *
|
||||||
|
directories.size())
|
||||||
|
<< ")";
|
||||||
|
|
||||||
|
return v;
|
||||||
|
}();
|
||||||
|
|
||||||
|
auto l_old = meta.findFirstOfType<
|
||||||
|
std::unique_ptr<frozen::Layout<thrift::metadata::metadata>>>();
|
||||||
|
auto bits_per_dir_old =
|
||||||
|
(*l_old)->directoriesField.layout.itemField.layout.bits;
|
||||||
|
auto l_new = view.findFirstOfType<std::unique_ptr<
|
||||||
|
frozen::Layout<std::vector<thrift::metadata::directory>>>>();
|
||||||
|
auto bits_per_dir_new = (*l_new)->itemField.layout.bits;
|
||||||
|
|
||||||
|
td << "unpacked directories table with " << directories.size() << " entries ("
|
||||||
|
<< size_with_unit((bits_per_dir_old * directories.size() + 7) / 8)
|
||||||
|
<< " -> "
|
||||||
|
<< size_with_unit((bits_per_dir_new * directories.size() + 7) / 8) << ")";
|
||||||
|
|
||||||
|
return view;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: merge with inode_rank in metadata_v2
|
// TODO: merge with inode_rank in metadata_v2
|
||||||
@ -682,12 +721,18 @@ check_metadata(logger& lgr, global_metadata::Meta const& meta, bool check) {
|
|||||||
return meta;
|
return meta;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T unbundled(frozen::Bundled<T> const& bundle) {
|
||||||
|
return bundle;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
global_metadata::global_metadata(logger& lgr, Meta const& meta)
|
global_metadata::global_metadata(logger& lgr, Meta const& meta)
|
||||||
: meta_{meta}
|
: meta_{meta}
|
||||||
, directories_{unpack_directories(lgr, meta_)}
|
, bundled_directories_{unpack_directories(lgr, meta_)}
|
||||||
, dir_self_index_{build_dir_self_index(lgr, meta_)}
|
, directories_{bundled_directories_ ? unbundled(*bundled_directories_)
|
||||||
|
: meta_.directories()}
|
||||||
, names_{meta_.compact_names()
|
, names_{meta_.compact_names()
|
||||||
? string_table(lgr, "names", *meta_.compact_names())
|
? string_table(lgr, "names", *meta_.compact_names())
|
||||||
: string_table(meta_.names())} {}
|
: string_table(meta_.names())} {}
|
||||||
@ -701,18 +746,27 @@ void global_metadata::check_consistency(logger& lgr) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t global_metadata::first_dir_entry(uint32_t ino) const {
|
uint32_t global_metadata::first_dir_entry(uint32_t ino) const {
|
||||||
return !directories_.empty() ? directories_[ino].first_entry().value()
|
return directories_[ino].first_entry();
|
||||||
: meta_.directories()[ino].first_entry();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t global_metadata::parent_dir_entry(uint32_t ino) const {
|
uint32_t global_metadata::parent_dir_entry(uint32_t ino) const {
|
||||||
return !directories_.empty() ? directories_[ino].parent_entry().value()
|
return directories_[ino].parent_entry();
|
||||||
: meta_.directories()[ino].parent_entry();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t global_metadata::self_dir_entry(uint32_t ino) const {
|
uint32_t global_metadata::self_dir_entry(uint32_t ino) const {
|
||||||
return !dir_self_index_.empty() ? dir_self_index_[ino]
|
if (!meta_.entry_table_v2_2().empty()) {
|
||||||
: meta_.entry_table_v2_2()[ino];
|
return meta_.entry_table_v2_2()[ino];
|
||||||
|
}
|
||||||
|
|
||||||
|
return directories_[ino].self_entry();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto global_metadata::bundled_directories() const
|
||||||
|
-> std::optional<directories_view> {
|
||||||
|
if (bundled_directories_) {
|
||||||
|
return directories_;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto inode_view_impl::mode() const -> mode_type {
|
auto inode_view_impl::mode() const -> mode_type {
|
||||||
|
@ -1453,8 +1453,8 @@ thrift::metadata::metadata metadata_<LoggerPolicy>::unpack_metadata() const {
|
|||||||
if (opts->packed_chunk_table().value()) {
|
if (opts->packed_chunk_table().value()) {
|
||||||
meta.chunk_table() = chunk_table_;
|
meta.chunk_table() = chunk_table_;
|
||||||
}
|
}
|
||||||
if (opts->packed_directories().value()) {
|
if (auto const& dirs = global_.bundled_directories()) {
|
||||||
meta.directories() = global_.directories();
|
meta.directories() = dirs->thaw();
|
||||||
}
|
}
|
||||||
if (opts->packed_shared_files_table().value()) {
|
if (opts->packed_shared_files_table().value()) {
|
||||||
meta.shared_files_table() = shared_files_;
|
meta.shared_files_table() = shared_files_;
|
||||||
|
@ -378,6 +378,9 @@ void dir::pack(thrift::metadata::metadata& mv2,
|
|||||||
d.parent_entry() = 0;
|
d.parent_entry() = 0;
|
||||||
}
|
}
|
||||||
d.first_entry() = mv2.dir_entries()->size();
|
d.first_entry() = mv2.dir_entries()->size();
|
||||||
|
auto se = entry_index();
|
||||||
|
DWARFS_CHECK(se, "self entry index not set");
|
||||||
|
d.self_entry() = *se;
|
||||||
mv2.directories()->push_back(d);
|
mv2.directories()->push_back(d);
|
||||||
for (entry_ptr const& e : entries_) {
|
for (entry_ptr const& e : entries_) {
|
||||||
e->set_entry_index(mv2.dir_entries()->size());
|
e->set_entry_index(mv2.dir_entries()->size());
|
||||||
|
@ -201,6 +201,7 @@ class save_directories_visitor : public visitor_base {
|
|||||||
thrift::metadata::directory dummy;
|
thrift::metadata::directory dummy;
|
||||||
dummy.parent_entry() = 0;
|
dummy.parent_entry() = 0;
|
||||||
dummy.first_entry() = mv2.dir_entries()->size();
|
dummy.first_entry() = mv2.dir_entries()->size();
|
||||||
|
dummy.self_entry() = 0;
|
||||||
mv2.directories()->push_back(dummy);
|
mv2.directories()->push_back(dummy);
|
||||||
|
|
||||||
directories_.clear();
|
directories_.clear();
|
||||||
@ -939,6 +940,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
for (auto& d : mv2.directories().value()) {
|
for (auto& d : mv2.directories().value()) {
|
||||||
d.parent_entry() = 0; // this will be recovered
|
d.parent_entry() = 0; // this will be recovered
|
||||||
|
d.self_entry() = 0; // this will be recovered
|
||||||
auto delta = d.first_entry().value() - last_first_entry;
|
auto delta = d.first_entry().value() - last_first_entry;
|
||||||
last_first_entry = d.first_entry().value();
|
last_first_entry = d.first_entry().value();
|
||||||
d.first_entry() = delta;
|
d.first_entry() = delta;
|
||||||
|
@ -62,17 +62,23 @@ struct chunk {
|
|||||||
* ..
|
* ..
|
||||||
* dir_entries[directory[inode + 1].first_entry - 1]
|
* dir_entries[directory[inode + 1].first_entry - 1]
|
||||||
*
|
*
|
||||||
* Note that the `first_entry` fields are stored delta-compressed
|
* Note that as of v2.3, directory entries can be stored "packed", in
|
||||||
* as of v2.3 and must be unpacked before using. Also note that
|
* which case only the `first_entry` fields are populated and stored
|
||||||
* the `parent_entry` fields are all set to zero as of v2.3. The
|
* delta-compressed. The `first_entry` field must be unpacked before
|
||||||
* `parent_entry` information can easily and quickly be built by
|
* using and the `parent_entry` and `self_entry` fields must be built
|
||||||
* traversing the `dir_entries` using the unpacked `first_entry`
|
* by traversing the `dir_entries` using the unpacked `first_entry`
|
||||||
* fields.
|
* fields.
|
||||||
*/
|
*/
|
||||||
struct directory {
|
struct directory {
|
||||||
1: UInt32 parent_entry // indexes into `dir_entries`
|
1: UInt32 parent_entry // indexes into `dir_entries`
|
||||||
|
|
||||||
2: UInt32 first_entry // indexes into `dir_entries`
|
2: UInt32 first_entry // indexes into `dir_entries`
|
||||||
|
|
||||||
|
//==========================================================//
|
||||||
|
// fields added with dwarfs-0.11.0, file system version 2.5 //
|
||||||
|
//==========================================================//
|
||||||
|
|
||||||
|
3: UInt32 self_entry // indexes into `dir_entries`
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user