refactor: split out metadata_types internals that depend on thrift

This commit is contained in:
Marcus Holland-Moritz 2024-07-27 17:14:16 +02:00
parent bf4ae3794e
commit 160afe38dd
8 changed files with 1089 additions and 890 deletions

View File

@ -635,6 +635,7 @@ list(APPEND LIBDWARFS_READER_SRC
src/dwarfs/fs_section.cpp
src/dwarfs/internal/inode_reader_v2.cpp
src/dwarfs/internal/metadata_v2.cpp
src/dwarfs/internal/metadata_types.cpp
src/dwarfs/metadata_types.cpp
)

View File

@ -29,7 +29,7 @@
#include <folly/Expected.h>
#include <dwarfs/block_range.h>
#include <dwarfs/metadata_types.h>
#include <dwarfs/internal/metadata_types.h>
#include <dwarfs/types.h>
namespace dwarfs {

View File

@ -0,0 +1,242 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <string>
#include <variant>
#include <boost/iterator/iterator_facade.hpp>
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
#include <dwarfs/file_stat.h>
#include <dwarfs/file_type.h>
#include <dwarfs/string_table.h>
#include <dwarfs/gen-cpp2/metadata_layouts.h>
namespace dwarfs {
class logger;
namespace internal {
template <typename T>
class metadata_;
class global_metadata {
public:
using Meta =
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
global_metadata(logger& lgr, Meta const& meta);
static void check_consistency(logger& lgr, Meta const& meta);
void check_consistency(logger& lgr) const;
Meta const& meta() const { return meta_; }
uint32_t first_dir_entry(uint32_t ino) const;
uint32_t parent_dir_entry(uint32_t ino) const;
string_table const& names() const { return names_; }
std::vector<thrift::metadata::directory> const& directories() const {
return directories_storage_;
}
private:
Meta const& meta_;
std::vector<thrift::metadata::directory> const directories_storage_;
thrift::metadata::directory const* const directories_;
string_table const names_;
};
class inode_view_impl
: public ::apache::thrift::frozen::View<thrift::metadata::inode_data> {
using InodeView =
::apache::thrift::frozen::View<thrift::metadata::inode_data>;
using Meta =
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
public:
using uid_type = file_stat::uid_type;
using gid_type = file_stat::gid_type;
using mode_type = file_stat::mode_type;
inode_view_impl(InodeView iv, uint32_t inode_num_, Meta const& meta)
: InodeView{iv}
, inode_num_{inode_num_}
, meta_{&meta} {}
mode_type mode() const;
std::string mode_string() const;
std::string perm_string() const;
posix_file_type::value type() const {
return posix_file_type::from_mode(mode());
}
uid_type getuid() const;
gid_type getgid() const;
uint32_t inode_num() const { return inode_num_; }
private:
uint32_t inode_num_;
Meta const* meta_;
};
class dir_entry_view_impl {
public:
using InodeView =
::apache::thrift::frozen::View<thrift::metadata::inode_data>;
using DirEntryView =
::apache::thrift::frozen::View<thrift::metadata::dir_entry>;
dir_entry_view_impl(DirEntryView v, uint32_t self_index,
uint32_t parent_index, global_metadata const& g)
: v_{v}
, self_index_{self_index}
, parent_index_{parent_index}
, g_{&g} {}
dir_entry_view_impl(InodeView v, uint32_t self_index, uint32_t parent_index,
global_metadata const& g)
: v_{v}
, self_index_{self_index}
, parent_index_{parent_index}
, g_{&g} {}
static std::shared_ptr<dir_entry_view_impl>
from_dir_entry_index(uint32_t self_index, uint32_t parent_index,
global_metadata const& g);
static std::shared_ptr<dir_entry_view_impl>
from_dir_entry_index(uint32_t self_index, global_metadata const& g);
// TODO: this works, but it's strange; a limited version of
// dir_entry_view_impl
// should work without a parent for these use cases
static std::string name(uint32_t index, global_metadata const& g);
static std::shared_ptr<inode_view_impl>
inode(uint32_t index, global_metadata const& g);
std::string name() const;
std::shared_ptr<inode_view_impl> inode() const;
bool is_root() const;
std::shared_ptr<dir_entry_view_impl> parent() const;
std::string path() const;
std::string unix_path() const;
std::filesystem::path fs_path() const;
std::wstring wpath() const;
void append_to(std::filesystem::path& p) const;
uint32_t self_index() const { return self_index_; }
private:
std::variant<DirEntryView, InodeView> v_;
uint32_t self_index_, parent_index_;
global_metadata const* g_;
};
using chunk_view = ::apache::thrift::frozen::View<thrift::metadata::chunk>;
class chunk_range {
using Meta =
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
template <typename T>
friend class internal::metadata_;
public:
class iterator
: public boost::iterator_facade<iterator, chunk_view const,
boost::random_access_traversal_tag> {
public:
iterator() = default;
iterator(iterator const& other)
: meta_(other.meta_)
, it_(other.it_) {}
private:
friend class boost::iterator_core_access;
friend class chunk_range;
iterator(Meta const* meta, uint32_t it)
: meta_{meta}
, it_{it} {}
bool equal(iterator const& other) const {
return meta_ == other.meta_ && it_ == other.it_;
}
void increment() { ++it_; }
void decrement() { --it_; }
void advance(difference_type n) { it_ += n; }
difference_type distance_to(iterator const& other) const {
return static_cast<difference_type>(other.it_) -
static_cast<difference_type>(it_);
}
// TODO: this is nasty; can we do this without boost::iterator_facade?
chunk_view const& dereference() const {
view_ = meta_->chunks()[it_];
return view_;
}
Meta const* meta_;
uint32_t it_{0};
mutable chunk_view view_;
};
iterator begin() const { return iterator(meta_, begin_); }
iterator end() const { return iterator(meta_, end_); }
size_t size() const { return end_ - begin_; }
bool empty() const { return end_ == begin_; }
chunk_view operator[](uint32_t index) const { return meta_->chunks()[index]; }
private:
chunk_range(Meta const& meta, uint32_t begin, uint32_t end)
: meta_(&meta)
, begin_(begin)
, end_(end) {}
Meta const* meta_;
uint32_t begin_{0};
uint32_t end_{0};
};
} // namespace internal
} // namespace dwarfs

View File

@ -34,6 +34,7 @@
#include <nlohmann/json.hpp>
#include <dwarfs/internal/metadata_types.h>
#include <dwarfs/metadata_types.h>
namespace dwarfs {

View File

@ -26,19 +26,13 @@
#include <filesystem>
#include <optional>
#include <string>
#include <variant>
#include <boost/iterator/iterator_facade.hpp>
#include <boost/range/irange.hpp>
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
#include <dwarfs/file_stat.h>
#include <dwarfs/file_type.h>
#include <dwarfs/string_table.h>
#include <dwarfs/gen-cpp2/metadata_layouts.h>
namespace dwarfs {
namespace internal {
@ -46,83 +40,71 @@ namespace internal {
template <typename T>
class metadata_;
}
class inode_view_impl;
class dir_entry_view_impl;
class global_metadata;
class dir_entry_view;
class logger;
} // namespace internal
// TODO: move this elsewhere
enum class readlink_mode {
raw,
preferred,
unix,
};
class global_metadata {
public:
using Meta =
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
global_metadata(logger& lgr, Meta const& meta);
static void check_consistency(logger& lgr, Meta const& meta);
void check_consistency(logger& lgr) const;
Meta const& meta() const { return meta_; }
uint32_t first_dir_entry(uint32_t ino) const;
uint32_t parent_dir_entry(uint32_t ino) const;
string_table const& names() const { return names_; }
std::vector<thrift::metadata::directory> const& directories() const {
return directories_storage_;
}
private:
Meta const& meta_;
std::vector<thrift::metadata::directory> const directories_storage_;
thrift::metadata::directory const* const directories_;
string_table const names_;
};
class inode_view
: public ::apache::thrift::frozen::View<thrift::metadata::inode_data> {
using InodeView =
::apache::thrift::frozen::View<thrift::metadata::inode_data>;
using Meta =
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
template <typename T>
friend class internal::metadata_;
friend class dir_entry_view;
class inode_view {
public:
using uid_type = file_stat::uid_type;
using gid_type = file_stat::gid_type;
using mode_type = file_stat::mode_type;
inode_view() = default;
explicit inode_view(std::shared_ptr<internal::inode_view_impl const> iv)
: iv_{std::move(iv)} {}
mode_type mode() const;
std::string mode_string() const;
std::string perm_string() const;
posix_file_type::value type() const {
return posix_file_type::from_mode(mode());
}
bool is_regular_file() const { return type() == posix_file_type::regular; }
bool is_directory() const { return type() == posix_file_type::directory; }
bool is_symlink() const { return type() == posix_file_type::symlink; }
posix_file_type::value type() const;
bool is_regular_file() const;
bool is_directory() const;
bool is_symlink() const;
uid_type getuid() const;
gid_type getgid() const;
uint32_t inode_num() const { return inode_num_; }
uint32_t inode_num() const;
internal::inode_view_impl const& raw() const { return *iv_; }
private:
inode_view(InodeView iv, uint32_t inode_num_, Meta const& meta)
: InodeView{iv}
, inode_num_{inode_num_}
, meta_{&meta} {}
std::shared_ptr<internal::inode_view_impl const> iv_;
};
uint32_t inode_num_;
Meta const* meta_;
class dir_entry_view {
public:
dir_entry_view() = default;
dir_entry_view(std::shared_ptr<internal::dir_entry_view_impl const> impl)
: impl_{std::move(impl)} {}
std::string name() const;
inode_view inode() const;
bool is_root() const;
std::optional<dir_entry_view> parent() const;
std::string path() const;
std::string unix_path() const;
std::filesystem::path fs_path() const;
std::wstring wpath() const;
void append_to(std::filesystem::path& p) const;
uint32_t self_index() const;
internal::dir_entry_view_impl const& raw() const { return *impl_; }
private:
std::shared_ptr<internal::dir_entry_view_impl const> impl_;
};
class directory_view {
@ -141,7 +123,7 @@ class directory_view {
boost::integer_range<uint32_t> entry_range() const;
private:
directory_view(uint32_t inode, global_metadata const& g)
directory_view(uint32_t inode, internal::global_metadata const& g)
: inode_{inode}
, g_{&g} {}
@ -149,138 +131,7 @@ class directory_view {
uint32_t parent_entry(uint32_t ino) const;
uint32_t inode_;
global_metadata const* g_;
};
class dir_entry_view {
using InodeView =
::apache::thrift::frozen::View<thrift::metadata::inode_data>;
using DirEntryView =
::apache::thrift::frozen::View<thrift::metadata::dir_entry>;
template <typename T>
friend class internal::metadata_;
public:
std::string name() const;
inode_view inode() const;
bool is_root() const;
std::optional<dir_entry_view> parent() const;
std::string path() const;
std::string unix_path() const;
std::filesystem::path fs_path() const;
std::wstring wpath() const;
void append_to(std::filesystem::path& p) const;
uint32_t self_index() const { return self_index_; }
private:
dir_entry_view(DirEntryView v, uint32_t self_index, uint32_t parent_index,
global_metadata const& g)
: v_{v}
, self_index_{self_index}
, parent_index_{parent_index}
, g_{&g} {}
dir_entry_view(InodeView v, uint32_t self_index, uint32_t parent_index,
global_metadata const& g)
: v_{v}
, self_index_{self_index}
, parent_index_{parent_index}
, g_{&g} {}
static dir_entry_view
from_dir_entry_index(uint32_t self_index, uint32_t parent_index,
global_metadata const& g);
static dir_entry_view
from_dir_entry_index(uint32_t self_index, global_metadata const& g);
// TODO: this works, but it's strange; a limited version of dir_entry_view
// should work without a parent for these use cases
static std::string name(uint32_t index, global_metadata const& g);
static inode_view inode(uint32_t index, global_metadata const& g);
std::variant<DirEntryView, InodeView> v_;
uint32_t self_index_, parent_index_;
global_metadata const* g_;
};
using chunk_view = ::apache::thrift::frozen::View<thrift::metadata::chunk>;
class chunk_range {
using Meta =
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
template <typename T>
friend class internal::metadata_;
public:
class iterator
: public boost::iterator_facade<iterator, chunk_view const,
boost::random_access_traversal_tag> {
public:
iterator() = default;
iterator(iterator const& other)
: meta_(other.meta_)
, it_(other.it_) {}
private:
friend class boost::iterator_core_access;
friend class chunk_range;
iterator(Meta const* meta, uint32_t it)
: meta_{meta}
, it_{it} {}
bool equal(iterator const& other) const {
return meta_ == other.meta_ && it_ == other.it_;
}
void increment() { ++it_; }
void decrement() { --it_; }
void advance(difference_type n) { it_ += n; }
difference_type distance_to(iterator const& other) const {
return static_cast<difference_type>(other.it_) -
static_cast<difference_type>(it_);
}
chunk_view const& dereference() const {
view_ = meta_->chunks()[it_];
return view_;
}
Meta const* meta_;
uint32_t it_{0};
mutable chunk_view view_;
};
iterator begin() const { return iterator(meta_, begin_); }
iterator end() const { return iterator(meta_, end_); }
size_t size() const { return end_ - begin_; }
bool empty() const { return end_ == begin_; }
chunk_view operator[](uint32_t index) const { return meta_->chunks()[index]; }
private:
chunk_range(Meta const& meta, uint32_t begin, uint32_t end)
: meta_(&meta)
, begin_(begin)
, end_(end) {}
Meta const* meta_;
uint32_t begin_{0};
uint32_t end_{0};
internal::global_metadata const* g_;
};
} // namespace dwarfs

View File

@ -0,0 +1,752 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <cassert>
#include <numeric>
#include <queue>
#include <fmt/format.h>
#include <dwarfs/error.h>
#include <dwarfs/internal/metadata_types.h>
#include <dwarfs/logger.h>
#include <dwarfs/match.h>
#include <dwarfs/util.h>
#include <dwarfs/gen-cpp2/metadata_types_custom_protocol.h>
namespace dwarfs::internal {
namespace {
std::vector<thrift::metadata::directory>
unpack_directories(logger& lgr, global_metadata::Meta const& meta) {
std::vector<thrift::metadata::directory> directories;
if (auto opts = meta.options(); opts and opts->packed_directories()) {
LOG_PROXY(debug_logger_policy, lgr);
auto ti = LOG_TIMED_DEBUG;
auto dirent = *meta.dir_entries();
auto metadir = meta.directories();
directories.resize(metadir.size());
// delta-decode first entries first
directories[0].first_entry() = metadir[0].first_entry();
for (size_t i = 1; i < directories.size(); ++i) {
directories[i].first_entry() =
directories[i - 1].first_entry().value() + metadir[i].first_entry();
}
// then traverse to recover parent entries
std::queue<uint32_t> queue;
queue.push(0);
while (!queue.empty()) {
auto parent = queue.front();
queue.pop();
auto p_ino = dirent[parent].inode_num();
auto beg = directories[p_ino].first_entry().value();
auto end = directories[p_ino + 1].first_entry().value();
for (auto e = beg; e < end; ++e) {
if (auto e_ino = dirent[e].inode_num();
e_ino < (directories.size() - 1)) {
directories[e_ino].parent_entry() = parent;
queue.push(e);
}
}
}
ti << "unpacked directories table";
}
return directories;
}
// TODO: merge with inode_rank in metadata_v2
int mode_rank(uint16_t mode) {
switch (posix_file_type::from_mode(mode)) {
case posix_file_type::directory:
return 0;
case posix_file_type::symlink:
return 1;
case posix_file_type::regular:
return 2;
case posix_file_type::block:
case posix_file_type::character:
return 3;
default:
return 4;
}
}
void check_empty_tables(global_metadata::Meta const& meta) {
if (meta.inodes().empty()) {
DWARFS_THROW(runtime_error, "empty inodes table");
}
if (meta.directories().empty()) {
DWARFS_THROW(runtime_error, "empty directories table");
}
if (meta.chunk_table().empty()) {
DWARFS_THROW(runtime_error, "empty chunk_table table");
}
if (auto de = meta.dir_entries()) {
if (de->empty()) {
DWARFS_THROW(runtime_error, "empty dir_entries table");
}
} else {
if (meta.entry_table_v2_2().empty()) {
DWARFS_THROW(runtime_error, "empty entry_table_v2_2 table");
}
}
if (meta.modes().empty()) {
DWARFS_THROW(runtime_error, "empty modes table");
}
}
void check_index_range(global_metadata::Meta const& meta) {
auto num_modes = meta.modes().size();
auto num_uids = meta.uids().size();
auto num_gids = meta.gids().size();
auto num_names = meta.names().size();
auto num_inodes = meta.inodes().size();
bool v2_2 = !static_cast<bool>(meta.dir_entries());
if (num_modes >= std::numeric_limits<uint16_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of modes");
}
if (num_uids >= std::numeric_limits<uint16_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of uids");
}
if (num_gids >= std::numeric_limits<uint16_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of gids");
}
if (num_names >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of names");
}
if (num_inodes >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of inodes");
}
for (auto ino : meta.inodes()) {
if (ino.mode_index() >= num_modes) {
DWARFS_THROW(runtime_error, "mode_index out of range");
}
// Special handling for legacy filesystems built with --set-owner
// where num_uids == 0 is valid and owner_index is used to store
// the uid.
if (num_uids > 0) {
if (auto i = ino.owner_index(); i >= num_uids) {
DWARFS_THROW(runtime_error, "owner_index out of range");
}
}
// Special handling for legacy filesystems built with --set-group
// where num_gids == 0 is valid and group_index is used to store
// the gid.
if (num_gids > 0) {
if (auto i = ino.group_index(); i >= num_gids) {
DWARFS_THROW(runtime_error, "group_index out of range");
}
}
if (v2_2) {
if (auto i = ino.name_index_v2_2(); i >= num_names && i > 0) {
DWARFS_THROW(runtime_error, "name_index_v2_2 out of range");
}
}
}
if (auto dep = meta.dir_entries()) {
if (dep->size() >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of dir_entries");
}
if (auto cn = meta.compact_names()) {
num_names = cn->index().size();
if (!cn->packed_index()) {
if (num_names == 0) {
DWARFS_THROW(runtime_error, "empty compact_names index");
}
--num_names;
}
}
for (auto de : *dep) {
if (auto i = de.name_index(); i >= num_names && i > 0) {
DWARFS_THROW(runtime_error, "name_index out of range");
}
if (auto i = de.inode_num(); i >= num_inodes) {
DWARFS_THROW(runtime_error, "inode_num out of range");
}
}
} else {
if (meta.entry_table_v2_2().size() >=
std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of entries");
}
for (auto ent : meta.entry_table_v2_2()) {
if (ent >= num_inodes) {
DWARFS_THROW(runtime_error, "entry_table_v2_2 value out of range");
}
}
}
}
void check_packed_tables(global_metadata::Meta const& meta) {
if (meta.directories().size() >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of directories");
}
if (meta.chunk_table().size() >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of chunk_table entries");
}
if (auto opt = meta.options(); opt and opt->packed_directories()) {
if (std::any_of(meta.directories().begin(), meta.directories().end(),
[](auto i) { return i.parent_entry() != 0; })) {
DWARFS_THROW(runtime_error, "parent_entry set in packed directory");
}
if (std::accumulate(meta.directories().begin(), meta.directories().end(),
static_cast<size_t>(0), [](auto n, auto d) {
return n + d.first_entry();
}) != meta.dir_entries()->size()) {
DWARFS_THROW(runtime_error,
"first_entry inconsistency in packed directories");
}
} else {
size_t num_entries =
meta.dir_entries() ? meta.dir_entries()->size() : meta.inodes().size();
if (!std::is_sorted(
meta.directories().begin(), meta.directories().end(),
[](auto a, auto b) { return a.first_entry() < b.first_entry(); })) {
DWARFS_THROW(runtime_error, "first_entry inconsistency");
}
for (auto d : meta.directories()) {
if (auto i = d.first_entry(); i > num_entries) {
DWARFS_THROW(runtime_error, "first_entry out of range");
}
if (auto i = d.parent_entry(); i >= num_entries) {
DWARFS_THROW(runtime_error, "parent_entry out of range");
}
}
}
if (auto opt = meta.options(); opt and opt->packed_chunk_table()) {
if (std::accumulate(meta.chunk_table().begin(), meta.chunk_table().end(),
static_cast<size_t>(0)) != meta.chunks().size()) {
DWARFS_THROW(runtime_error, "packed chunk_table inconsistency");
}
} else {
if (!std::is_sorted(meta.chunk_table().begin(), meta.chunk_table().end()) or
meta.chunk_table().back() != meta.chunks().size()) {
DWARFS_THROW(runtime_error, "chunk_table inconsistency");
}
}
}
void check_compact_strings(
::apache::thrift::frozen::View<thrift::metadata::string_table> v,
size_t expected_num, size_t max_item_len, std::string const& what) {
size_t index_size = v.index().size();
if (!v.packed_index() && index_size > 0) {
--index_size;
}
if (index_size != expected_num) {
DWARFS_THROW(runtime_error, "unexpected number of compact " + what);
}
size_t expected_data_size = 0;
size_t longest_item_len = 0;
if (!v.index().empty()) {
if (v.packed_index()) {
expected_data_size =
std::accumulate(v.index().begin(), v.index().end(), 0);
longest_item_len = *std::max_element(v.index().begin(), v.index().end());
} else {
expected_data_size = v.index().back();
if (!std::is_sorted(v.index().begin(), v.index().end())) {
DWARFS_THROW(runtime_error, "inconsistent index for compact " + what);
}
}
}
if (v.buffer().size() != expected_data_size) {
DWARFS_THROW(runtime_error, "data size mismatch for compact " + what);
}
if (longest_item_len > max_item_len) {
DWARFS_THROW(runtime_error,
fmt::format("invalid item length in compact {0}: {1} > {2}",
what, longest_item_len, max_item_len));
}
}
void check_plain_strings(
::apache::thrift::frozen::View<std::vector<std::string>> v,
size_t expected_num, size_t max_item_len, std::string const& what) {
if (v.size() != expected_num) {
DWARFS_THROW(runtime_error, "unexpected number of " + what);
}
size_t total_size = 0;
for (auto s : v) {
if (s.size() > max_item_len) {
DWARFS_THROW(runtime_error, "unexpectedly long item in " + what);
}
total_size += s.size();
}
if (!v.empty()) {
if (total_size != static_cast<size_t>(v.back().end() - v.front().begin())) {
DWARFS_THROW(runtime_error, "unexpectedly data size in " + what);
}
}
}
void check_string_tables(global_metadata::Meta const& meta) {
size_t num_names = 0;
if (auto dep = meta.dir_entries()) {
if (dep->size() > 1) {
num_names = std::max_element(dep->begin(), dep->end(),
[](auto const& a, auto const& b) {
return a.name_index() < b.name_index();
})
->name_index() +
1;
}
} else {
if (meta.inodes().size() > 1) {
num_names =
std::max_element(meta.inodes().begin(), meta.inodes().end(),
[](auto const& a, auto const& b) {
return a.name_index_v2_2() < b.name_index_v2_2();
})
->name_index_v2_2() +
1;
}
}
// max name length is usually 255, but fsst compression, in the worst
// case, will use 2 bytes per input byte...
constexpr size_t max_name_len = 512;
constexpr size_t max_symlink_len = 4096;
if (auto cn = meta.compact_names()) {
check_compact_strings(*cn, num_names, max_name_len, "names");
} else {
check_plain_strings(meta.names(), num_names, max_name_len, "names");
}
size_t num_symlink_strings = 0;
if (!meta.symlink_table().empty()) {
num_symlink_strings = *std::max_element(meta.symlink_table().begin(),
meta.symlink_table().end()) +
1;
}
if (auto cs = meta.compact_symlinks()) {
check_compact_strings(*cs, num_symlink_strings, max_symlink_len,
"symlink strings");
} else {
check_plain_strings(meta.symlinks(), num_symlink_strings, max_symlink_len,
"symlink strings");
}
}
void check_chunks(global_metadata::Meta const& meta) {
auto block_size = meta.block_size();
if (block_size == 0 || (block_size & (block_size - 1))) {
DWARFS_THROW(runtime_error, "invalid block size");
}
if (meta.chunks().size() >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of chunks");
}
for (auto c : meta.chunks()) {
if (c.offset() >= block_size || c.size() > block_size) {
DWARFS_THROW(runtime_error, "chunk offset/size out of range");
}
if (c.offset() + c.size() > block_size) {
DWARFS_THROW(runtime_error, "chunk end outside of block");
}
}
}
std::array<size_t, 6> check_partitioning(global_metadata::Meta const& meta) {
std::array<size_t, 6> offsets;
for (int r = 0; r < static_cast<int>(offsets.size()); ++r) {
if (auto dep = meta.dir_entries()) {
auto pred = [r, modes = meta.modes()](auto ino) {
return mode_rank(modes[ino.mode_index()]) < r;
};
auto inodes = meta.inodes();
if (!std::is_partitioned(inodes.begin(), inodes.end(), pred)) {
DWARFS_THROW(runtime_error, "inode table inconsistency");
}
offsets[r] = std::distance(
inodes.begin(),
std::partition_point(inodes.begin(), inodes.end(), pred));
} else {
auto pred = [r, modes = meta.modes(), inodes = meta.inodes()](auto ent) {
return mode_rank(modes[inodes[ent].mode_index()]) < r;
};
auto entries = meta.entry_table_v2_2();
if (!std::is_partitioned(entries.begin(), entries.end(), pred)) {
DWARFS_THROW(runtime_error, "entry_table_v2_2 inconsistency");
}
offsets[r] = std::distance(
entries.begin(),
std::partition_point(entries.begin(), entries.end(), pred));
}
}
return offsets;
}
global_metadata::Meta const&
check_metadata(logger& lgr, global_metadata::Meta const& meta, bool check) {
if (check) {
LOG_PROXY(debug_logger_policy, lgr);
auto ti = LOG_TIMED_DEBUG;
ti << "check metadata consistency";
check_empty_tables(meta);
check_index_range(meta);
check_packed_tables(meta);
check_string_tables(meta);
check_chunks(meta);
auto offsets = check_partitioning(meta);
auto num_dir = meta.directories().size() - 1;
auto num_lnk = meta.symlink_table().size();
auto num_reg_unique = meta.chunk_table().size() - 1;
size_t num_reg_shared = 0;
if (auto sfp = meta.shared_files_table()) {
if (meta.options()->packed_shared_files_table()) {
num_reg_shared =
std::accumulate(sfp->begin(), sfp->end(), 2 * sfp->size());
num_reg_unique -= sfp->size();
} else {
if (!std::is_sorted(sfp->begin(), sfp->end())) {
DWARFS_THROW(runtime_error,
"unpacked shared_files_table is not sorted");
}
num_reg_shared = sfp->size();
if (!sfp->empty()) {
num_reg_unique -= sfp->back() + 1;
}
}
}
size_t num_dev = meta.devices() ? meta.devices()->size() : 0;
if (num_dir != offsets[1]) {
DWARFS_THROW(runtime_error, "wrong number of directories");
}
if (num_lnk != offsets[2] - offsets[1]) {
DWARFS_THROW(runtime_error, "wrong number of links");
}
if (num_reg_unique + num_reg_shared != offsets[3] - offsets[2]) {
DWARFS_THROW(runtime_error, "wrong number of files");
}
if (num_dev != offsets[4] - offsets[3]) {
DWARFS_THROW(runtime_error, "wrong number of devices");
}
if (!meta.dir_entries()) {
for (auto ino : meta.inodes()) {
auto mode = meta.modes()[ino.mode_index()];
auto i = ino.inode_v2_2();
int base = mode_rank(mode);
if (i < offsets[base] ||
(i >= offsets[base + 1] && i > offsets[base])) {
DWARFS_THROW(runtime_error, "inode_v2_2 out of range");
}
}
}
}
return meta;
}
} // namespace
global_metadata::global_metadata(logger& lgr, Meta const& meta)
: meta_{meta}
, directories_storage_{unpack_directories(lgr, meta_)}
, directories_{directories_storage_.empty() ? nullptr
: directories_storage_.data()}
, names_{meta_.compact_names()
? string_table(lgr, "names", *meta_.compact_names())
: string_table(meta_.names())} {}
void global_metadata::check_consistency(logger& lgr, Meta const& meta) {
check_metadata(lgr, meta, true);
}
void global_metadata::check_consistency(logger& lgr) const {
check_consistency(lgr, meta_);
}
uint32_t global_metadata::first_dir_entry(uint32_t ino) const {
return directories_ ? directories_[ino].first_entry().value()
: meta_.directories()[ino].first_entry();
}
uint32_t global_metadata::parent_dir_entry(uint32_t ino) const {
return directories_ ? directories_[ino].parent_entry().value()
: meta_.directories()[ino].parent_entry();
}
auto inode_view_impl::mode() const -> mode_type {
assert(mode_index() < meta_->modes().size());
return meta_->modes()[mode_index()];
}
auto inode_view_impl::mode_string() const -> std::string {
return file_stat::mode_string(mode());
}
auto inode_view_impl::perm_string() const -> std::string {
return file_stat::perm_string(mode());
}
auto inode_view_impl::getuid() const -> uid_type {
auto uids = meta_->uids();
auto ix = owner_index();
if (!uids.empty()) {
assert(ix < uids.size());
return uids[ix];
}
// Releases up to and including 0.7.x, when using --set-owner, would store
// the uid in the owner_index field and leave the uids table empty.
return ix;
}
auto inode_view_impl::getgid() const -> gid_type {
auto gids = meta_->gids();
auto ix = group_index();
if (!gids.empty()) {
assert(ix < gids.size());
return gids[ix];
}
// Releases up to and including 0.7.x, when using --set-group, would store
// the gid in the group_index field and leave the gids table empty.
return ix;
}
// TODO: pretty certain some of this stuff can be simplified
std::string dir_entry_view_impl::name() const {
return v_ |
match{
[this](DirEntryView const& dev) {
return g_->names()[dev.name_index()];
},
[this](InodeView const& iv) {
return std::string(g_->meta().names()[iv.name_index_v2_2()]);
},
};
}
std::shared_ptr<inode_view_impl> dir_entry_view_impl::inode() const {
return v_ | match{
[this](DirEntryView const& dev) {
return std::make_shared<internal::inode_view_impl>(
g_->meta().inodes()[dev.inode_num()], dev.inode_num(),
g_->meta());
},
[this](InodeView const& iv) {
return std::make_shared<internal::inode_view_impl>(
iv, iv.inode_v2_2(), g_->meta());
},
};
}
bool dir_entry_view_impl::is_root() const {
return v_ | match{
[](DirEntryView const& dev) { return dev.inode_num() == 0; },
[](InodeView const& iv) { return iv.inode_v2_2() == 0; },
};
}
/**
* We need a parent index if the dir_entry_view_impl is for a file. For
* directories, the parent can be determined via the directory's
* inode, but for files, this isn't possible.
*/
std::shared_ptr<dir_entry_view_impl>
dir_entry_view_impl::from_dir_entry_index(uint32_t self_index,
uint32_t parent_index,
global_metadata const& g) {
auto& meta = g.meta();
if (auto de = meta.dir_entries()) {
DWARFS_CHECK(self_index < de->size(), "self_index out of range");
DWARFS_CHECK(parent_index < de->size(), "parent_index out of range");
auto dev = (*de)[self_index];
return std::make_shared<dir_entry_view_impl>(dev, self_index, parent_index,
g);
}
DWARFS_CHECK(self_index < meta.inodes().size(), "self_index out of range");
DWARFS_CHECK(parent_index < meta.inodes().size(), "self_index out of range");
auto iv = meta.inodes()[self_index];
return std::make_shared<dir_entry_view_impl>(iv, self_index, parent_index, g);
}
std::shared_ptr<dir_entry_view_impl>
dir_entry_view_impl::from_dir_entry_index(uint32_t self_index,
global_metadata const& g) {
auto& meta = g.meta();
if (auto de = meta.dir_entries()) {
DWARFS_CHECK(self_index < de->size(), "self_index out of range");
auto dev = (*de)[self_index];
DWARFS_CHECK(dev.inode_num() < meta.directories().size(),
"self_index inode out of range");
return std::make_shared<dir_entry_view_impl>(
dev, self_index, g.parent_dir_entry(dev.inode_num()), g);
}
DWARFS_CHECK(self_index < meta.inodes().size(), "self_index out of range");
auto iv = meta.inodes()[self_index];
DWARFS_CHECK(iv.inode_v2_2() < meta.directories().size(),
"parent_index out of range");
return std::make_shared<dir_entry_view_impl>(
iv, self_index,
meta.entry_table_v2_2()[meta.directories()[iv.inode_v2_2()]
.parent_entry()],
g);
}
std::shared_ptr<dir_entry_view_impl> dir_entry_view_impl::parent() const {
if (is_root()) {
return nullptr;
}
return from_dir_entry_index(parent_index_, *g_);
}
std::string
dir_entry_view_impl::name(uint32_t index, global_metadata const& g) {
if (auto de = g.meta().dir_entries()) {
DWARFS_CHECK(index < de->size(), "index out of range");
auto dev = (*de)[index];
return g.names()[dev.name_index()];
}
DWARFS_CHECK(index < g.meta().inodes().size(), "index out of range");
auto iv = g.meta().inodes()[index];
return std::string(g.meta().names()[iv.name_index_v2_2()]);
}
std::shared_ptr<inode_view_impl>
dir_entry_view_impl::inode(uint32_t index, global_metadata const& g) {
if (auto de = g.meta().dir_entries()) {
DWARFS_CHECK(index < de->size(), "index out of range");
auto dev = (*de)[index];
return std::make_shared<internal::inode_view_impl>(
g.meta().inodes()[dev.inode_num()], dev.inode_num(), g.meta());
}
DWARFS_CHECK(index < g.meta().inodes().size(), "index out of range");
auto iv = g.meta().inodes()[index];
return std::make_shared<internal::inode_view_impl>(iv, iv.inode_v2_2(),
g.meta());
}
std::string dir_entry_view_impl::path() const {
return u8string_to_string(fs_path().u8string());
}
std::string dir_entry_view_impl::unix_path() const {
#ifdef _WIN32
auto p = fs_path().u8string();
std::replace(p.begin(), p.end(),
static_cast<char>(std::filesystem::path::preferred_separator),
'/');
return u8string_to_string(p);
#else
return path();
#endif
}
std::wstring dir_entry_view_impl::wpath() const { return fs_path().wstring(); }
std::filesystem::path dir_entry_view_impl::fs_path() const {
std::filesystem::path p;
append_to(p);
return p;
}
void dir_entry_view_impl::append_to(std::filesystem::path& p) const {
if (auto ev = parent()) {
if (!ev->is_root()) {
ev->append_to(p);
}
}
if (!is_root()) {
p /= string_to_u8string(name());
}
}
} // namespace dwarfs::internal

View File

@ -374,7 +374,7 @@ class metadata_ final : public metadata_v2::impl {
, global_(lgr, check_metadata_consistency(lgr, meta_,
options.check_consistency ||
force_consistency_check))
, root_(dir_entry_view::from_dir_entry_index(0, global_))
, root_(internal::dir_entry_view_impl::from_dir_entry_index(0, global_))
, LOG_PROXY_INIT(lgr)
, inode_offset_(inode_offset)
, symlink_inode_offset_(find_inode_offset(inode_rank::INO_LNK))
@ -526,13 +526,14 @@ class metadata_ final : public metadata_v2::impl {
// TODO: move compatibility details to metadata_types
uint32_t index =
meta_.dir_entries() ? inode : meta_.entry_table_v2_2()[inode];
return inode_view(meta_.inodes()[index], inode, meta_);
return inode_view{std::make_shared<internal::inode_view_impl>(
meta_.inodes()[index], inode, meta_)};
}
dir_entry_view
make_dir_entry_view(uint32_t self_index, uint32_t parent_index) const {
return dir_entry_view::from_dir_entry_index(self_index, parent_index,
global_);
return dir_entry_view{dir_entry_view_impl::from_dir_entry_index(
self_index, parent_index, global_)};
}
// This represents the order in which inodes are stored in inodes
@ -1454,16 +1455,16 @@ metadata_<LoggerPolicy>::find(directory_view dir, std::string_view name) const {
auto range = dir.entry_range();
auto it = std::lower_bound(range.begin(), range.end(), name,
[&](auto ix, std::string_view name) {
return dir_entry_view::name(ix, global_) < name;
});
auto it = std::lower_bound(
range.begin(), range.end(), name, [&](auto ix, std::string_view name) {
return internal::dir_entry_view_impl::name(ix, global_) < name;
});
std::optional<inode_view> rv;
if (it != range.end()) {
if (dir_entry_view::name(*it, global_) == name) {
rv = dir_entry_view::inode(*it, global_);
if (internal::dir_entry_view_impl::name(*it, global_) == name) {
rv = inode_view{internal::dir_entry_view_impl::inode(*it, global_)};
}
}
@ -1551,12 +1552,12 @@ int metadata_<LoggerPolicy>::getattr(inode_view iv, file_stat* stbuf) const {
stbuf->blocks = (stbuf->size + 511) / 512;
stbuf->uid = iv.getuid();
stbuf->gid = iv.getgid();
stbuf->mtime = resolution * (timebase + iv.mtime_offset());
stbuf->mtime = resolution * (timebase + iv.raw().mtime_offset());
if (mtime_only) {
stbuf->atime = stbuf->ctime = stbuf->mtime;
} else {
stbuf->atime = resolution * (timebase + iv.atime_offset());
stbuf->ctime = resolution * (timebase + iv.ctime_offset());
stbuf->atime = resolution * (timebase + iv.raw().atime_offset());
stbuf->ctime = resolution * (timebase + iv.raw().ctime_offset());
}
stbuf->nlink = options_.enable_nlink && stbuf->is_regular_file()
? DWARFS_NOTHROW(nlinks_.at(inode - file_inode_offset_))
@ -1601,8 +1602,10 @@ metadata_<LoggerPolicy>::readdir(directory_view dir, size_t offset) const {
}
auto index = dir.first_entry() + offset;
auto inode = dir_entry_view::inode(index, global_);
return std::pair(inode, dir_entry_view::name(index, global_));
auto inode =
inode_view{internal::dir_entry_view_impl::inode(index, global_)};
return std::pair(inode,
internal::dir_entry_view_impl::name(index, global_));
}
return std::nullopt;

View File

@ -27,720 +27,69 @@
#include <fmt/format.h>
#include <dwarfs/error.h>
#include <dwarfs/internal/metadata_types.h>
#include <dwarfs/logger.h>
#include <dwarfs/match.h>
#include <dwarfs/metadata_types.h>
#include <dwarfs/util.h>
#include <dwarfs/gen-cpp2/metadata_types_custom_protocol.h>
namespace dwarfs {
namespace {
inode_view::mode_type inode_view::mode() const { return iv_->mode(); }
std::vector<thrift::metadata::directory>
unpack_directories(logger& lgr, global_metadata::Meta const& meta) {
std::vector<thrift::metadata::directory> directories;
std::string inode_view::mode_string() const { return iv_->mode_string(); }
if (auto opts = meta.options(); opts and opts->packed_directories()) {
LOG_PROXY(debug_logger_policy, lgr);
std::string inode_view::perm_string() const { return iv_->perm_string(); }
auto ti = LOG_TIMED_DEBUG;
posix_file_type::value inode_view::type() const { return iv_->type(); }
auto dirent = *meta.dir_entries();
auto metadir = meta.directories();
directories.resize(metadir.size());
// delta-decode first entries first
directories[0].first_entry() = metadir[0].first_entry();
for (size_t i = 1; i < directories.size(); ++i) {
directories[i].first_entry() =
directories[i - 1].first_entry().value() + metadir[i].first_entry();
}
// then traverse to recover parent entries
std::queue<uint32_t> queue;
queue.push(0);
while (!queue.empty()) {
auto parent = queue.front();
queue.pop();
auto p_ino = dirent[parent].inode_num();
auto beg = directories[p_ino].first_entry().value();
auto end = directories[p_ino + 1].first_entry().value();
for (auto e = beg; e < end; ++e) {
if (auto e_ino = dirent[e].inode_num();
e_ino < (directories.size() - 1)) {
directories[e_ino].parent_entry() = parent;
queue.push(e);
}
}
}
ti << "unpacked directories table";
}
return directories;
bool inode_view::is_regular_file() const {
return iv_->type() == posix_file_type::regular;
}
// TODO: merge with inode_rank in metadata_v2
int mode_rank(uint16_t mode) {
switch (posix_file_type::from_mode(mode)) {
case posix_file_type::directory:
return 0;
case posix_file_type::symlink:
return 1;
case posix_file_type::regular:
return 2;
case posix_file_type::block:
case posix_file_type::character:
return 3;
default:
return 4;
}
bool inode_view::is_directory() const {
return iv_->type() == posix_file_type::directory;
}
void check_empty_tables(global_metadata::Meta const& meta) {
if (meta.inodes().empty()) {
DWARFS_THROW(runtime_error, "empty inodes table");
}
if (meta.directories().empty()) {
DWARFS_THROW(runtime_error, "empty directories table");
}
if (meta.chunk_table().empty()) {
DWARFS_THROW(runtime_error, "empty chunk_table table");
}
if (auto de = meta.dir_entries()) {
if (de->empty()) {
DWARFS_THROW(runtime_error, "empty dir_entries table");
}
} else {
if (meta.entry_table_v2_2().empty()) {
DWARFS_THROW(runtime_error, "empty entry_table_v2_2 table");
}
}
if (meta.modes().empty()) {
DWARFS_THROW(runtime_error, "empty modes table");
}
bool inode_view::is_symlink() const {
return iv_->type() == posix_file_type::symlink;
}
void check_index_range(global_metadata::Meta const& meta) {
auto num_modes = meta.modes().size();
auto num_uids = meta.uids().size();
auto num_gids = meta.gids().size();
auto num_names = meta.names().size();
auto num_inodes = meta.inodes().size();
bool v2_2 = !static_cast<bool>(meta.dir_entries());
inode_view::uid_type inode_view::getuid() const { return iv_->getuid(); }
if (num_modes >= std::numeric_limits<uint16_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of modes");
}
inode_view::gid_type inode_view::getgid() const { return iv_->getgid(); }
if (num_uids >= std::numeric_limits<uint16_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of uids");
}
uint32_t inode_view::inode_num() const { return iv_->inode_num(); }
if (num_gids >= std::numeric_limits<uint16_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of gids");
}
std::string dir_entry_view::name() const { return impl_->name(); }
if (num_names >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of names");
}
inode_view dir_entry_view::inode() const { return inode_view{impl_->inode()}; }
if (num_inodes >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of inodes");
}
for (auto ino : meta.inodes()) {
if (ino.mode_index() >= num_modes) {
DWARFS_THROW(runtime_error, "mode_index out of range");
}
// Special handling for legacy filesystems built with --set-owner
// where num_uids == 0 is valid and owner_index is used to store
// the uid.
if (num_uids > 0) {
if (auto i = ino.owner_index(); i >= num_uids) {
DWARFS_THROW(runtime_error, "owner_index out of range");
}
}
// Special handling for legacy filesystems built with --set-group
// where num_gids == 0 is valid and group_index is used to store
// the gid.
if (num_gids > 0) {
if (auto i = ino.group_index(); i >= num_gids) {
DWARFS_THROW(runtime_error, "group_index out of range");
}
}
if (v2_2) {
if (auto i = ino.name_index_v2_2(); i >= num_names && i > 0) {
DWARFS_THROW(runtime_error, "name_index_v2_2 out of range");
}
}
}
if (auto dep = meta.dir_entries()) {
if (dep->size() >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of dir_entries");
}
if (auto cn = meta.compact_names()) {
num_names = cn->index().size();
if (!cn->packed_index()) {
if (num_names == 0) {
DWARFS_THROW(runtime_error, "empty compact_names index");
}
--num_names;
}
}
for (auto de : *dep) {
if (auto i = de.name_index(); i >= num_names && i > 0) {
DWARFS_THROW(runtime_error, "name_index out of range");
}
if (auto i = de.inode_num(); i >= num_inodes) {
DWARFS_THROW(runtime_error, "inode_num out of range");
}
}
} else {
if (meta.entry_table_v2_2().size() >=
std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of entries");
}
for (auto ent : meta.entry_table_v2_2()) {
if (ent >= num_inodes) {
DWARFS_THROW(runtime_error, "entry_table_v2_2 value out of range");
}
}
}
}
void check_packed_tables(global_metadata::Meta const& meta) {
if (meta.directories().size() >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of directories");
}
if (meta.chunk_table().size() >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of chunk_table entries");
}
if (auto opt = meta.options(); opt and opt->packed_directories()) {
if (std::any_of(meta.directories().begin(), meta.directories().end(),
[](auto i) { return i.parent_entry() != 0; })) {
DWARFS_THROW(runtime_error, "parent_entry set in packed directory");
}
if (std::accumulate(meta.directories().begin(), meta.directories().end(),
static_cast<size_t>(0), [](auto n, auto d) {
return n + d.first_entry();
}) != meta.dir_entries()->size()) {
DWARFS_THROW(runtime_error,
"first_entry inconsistency in packed directories");
}
} else {
size_t num_entries =
meta.dir_entries() ? meta.dir_entries()->size() : meta.inodes().size();
if (!std::is_sorted(
meta.directories().begin(), meta.directories().end(),
[](auto a, auto b) { return a.first_entry() < b.first_entry(); })) {
DWARFS_THROW(runtime_error, "first_entry inconsistency");
}
for (auto d : meta.directories()) {
if (auto i = d.first_entry(); i > num_entries) {
DWARFS_THROW(runtime_error, "first_entry out of range");
}
if (auto i = d.parent_entry(); i >= num_entries) {
DWARFS_THROW(runtime_error, "parent_entry out of range");
}
}
}
if (auto opt = meta.options(); opt and opt->packed_chunk_table()) {
if (std::accumulate(meta.chunk_table().begin(), meta.chunk_table().end(),
static_cast<size_t>(0)) != meta.chunks().size()) {
DWARFS_THROW(runtime_error, "packed chunk_table inconsistency");
}
} else {
if (!std::is_sorted(meta.chunk_table().begin(), meta.chunk_table().end()) or
meta.chunk_table().back() != meta.chunks().size()) {
DWARFS_THROW(runtime_error, "chunk_table inconsistency");
}
}
}
void check_compact_strings(
::apache::thrift::frozen::View<thrift::metadata::string_table> v,
size_t expected_num, size_t max_item_len, std::string const& what) {
size_t index_size = v.index().size();
if (!v.packed_index() && index_size > 0) {
--index_size;
}
if (index_size != expected_num) {
DWARFS_THROW(runtime_error, "unexpected number of compact " + what);
}
size_t expected_data_size = 0;
size_t longest_item_len = 0;
if (!v.index().empty()) {
if (v.packed_index()) {
expected_data_size =
std::accumulate(v.index().begin(), v.index().end(), 0);
longest_item_len = *std::max_element(v.index().begin(), v.index().end());
} else {
expected_data_size = v.index().back();
if (!std::is_sorted(v.index().begin(), v.index().end())) {
DWARFS_THROW(runtime_error, "inconsistent index for compact " + what);
}
}
}
if (v.buffer().size() != expected_data_size) {
DWARFS_THROW(runtime_error, "data size mismatch for compact " + what);
}
if (longest_item_len > max_item_len) {
DWARFS_THROW(runtime_error,
fmt::format("invalid item length in compact {0}: {1} > {2}",
what, longest_item_len, max_item_len));
}
}
void check_plain_strings(
::apache::thrift::frozen::View<std::vector<std::string>> v,
size_t expected_num, size_t max_item_len, std::string const& what) {
if (v.size() != expected_num) {
DWARFS_THROW(runtime_error, "unexpected number of " + what);
}
size_t total_size = 0;
for (auto s : v) {
if (s.size() > max_item_len) {
DWARFS_THROW(runtime_error, "unexpectedly long item in " + what);
}
total_size += s.size();
}
if (!v.empty()) {
if (total_size != static_cast<size_t>(v.back().end() - v.front().begin())) {
DWARFS_THROW(runtime_error, "unexpectedly data size in " + what);
}
}
}
void check_string_tables(global_metadata::Meta const& meta) {
size_t num_names = 0;
if (auto dep = meta.dir_entries()) {
if (dep->size() > 1) {
num_names = std::max_element(dep->begin(), dep->end(),
[](auto const& a, auto const& b) {
return a.name_index() < b.name_index();
})
->name_index() +
1;
}
} else {
if (meta.inodes().size() > 1) {
num_names =
std::max_element(meta.inodes().begin(), meta.inodes().end(),
[](auto const& a, auto const& b) {
return a.name_index_v2_2() < b.name_index_v2_2();
})
->name_index_v2_2() +
1;
}
}
// max name length is usually 255, but fsst compression, in the worst
// case, will use 2 bytes per input byte...
constexpr size_t max_name_len = 512;
constexpr size_t max_symlink_len = 4096;
if (auto cn = meta.compact_names()) {
check_compact_strings(*cn, num_names, max_name_len, "names");
} else {
check_plain_strings(meta.names(), num_names, max_name_len, "names");
}
size_t num_symlink_strings = 0;
if (!meta.symlink_table().empty()) {
num_symlink_strings = *std::max_element(meta.symlink_table().begin(),
meta.symlink_table().end()) +
1;
}
if (auto cs = meta.compact_symlinks()) {
check_compact_strings(*cs, num_symlink_strings, max_symlink_len,
"symlink strings");
} else {
check_plain_strings(meta.symlinks(), num_symlink_strings, max_symlink_len,
"symlink strings");
}
}
void check_chunks(global_metadata::Meta const& meta) {
auto block_size = meta.block_size();
if (block_size == 0 || (block_size & (block_size - 1))) {
DWARFS_THROW(runtime_error, "invalid block size");
}
if (meta.chunks().size() >= std::numeric_limits<uint32_t>::max()) {
DWARFS_THROW(runtime_error, "invalid number of chunks");
}
for (auto c : meta.chunks()) {
if (c.offset() >= block_size || c.size() > block_size) {
DWARFS_THROW(runtime_error, "chunk offset/size out of range");
}
if (c.offset() + c.size() > block_size) {
DWARFS_THROW(runtime_error, "chunk end outside of block");
}
}
}
std::array<size_t, 6> check_partitioning(global_metadata::Meta const& meta) {
std::array<size_t, 6> offsets;
for (int r = 0; r < static_cast<int>(offsets.size()); ++r) {
if (auto dep = meta.dir_entries()) {
auto pred = [r, modes = meta.modes()](auto ino) {
return mode_rank(modes[ino.mode_index()]) < r;
};
auto inodes = meta.inodes();
if (!std::is_partitioned(inodes.begin(), inodes.end(), pred)) {
DWARFS_THROW(runtime_error, "inode table inconsistency");
}
offsets[r] = std::distance(
inodes.begin(),
std::partition_point(inodes.begin(), inodes.end(), pred));
} else {
auto pred = [r, modes = meta.modes(), inodes = meta.inodes()](auto ent) {
return mode_rank(modes[inodes[ent].mode_index()]) < r;
};
auto entries = meta.entry_table_v2_2();
if (!std::is_partitioned(entries.begin(), entries.end(), pred)) {
DWARFS_THROW(runtime_error, "entry_table_v2_2 inconsistency");
}
offsets[r] = std::distance(
entries.begin(),
std::partition_point(entries.begin(), entries.end(), pred));
}
}
return offsets;
}
global_metadata::Meta const&
check_metadata(logger& lgr, global_metadata::Meta const& meta, bool check) {
if (check) {
LOG_PROXY(debug_logger_policy, lgr);
auto ti = LOG_TIMED_DEBUG;
ti << "check metadata consistency";
check_empty_tables(meta);
check_index_range(meta);
check_packed_tables(meta);
check_string_tables(meta);
check_chunks(meta);
auto offsets = check_partitioning(meta);
auto num_dir = meta.directories().size() - 1;
auto num_lnk = meta.symlink_table().size();
auto num_reg_unique = meta.chunk_table().size() - 1;
size_t num_reg_shared = 0;
if (auto sfp = meta.shared_files_table()) {
if (meta.options()->packed_shared_files_table()) {
num_reg_shared =
std::accumulate(sfp->begin(), sfp->end(), 2 * sfp->size());
num_reg_unique -= sfp->size();
} else {
if (!std::is_sorted(sfp->begin(), sfp->end())) {
DWARFS_THROW(runtime_error,
"unpacked shared_files_table is not sorted");
}
num_reg_shared = sfp->size();
if (!sfp->empty()) {
num_reg_unique -= sfp->back() + 1;
}
}
}
size_t num_dev = meta.devices() ? meta.devices()->size() : 0;
if (num_dir != offsets[1]) {
DWARFS_THROW(runtime_error, "wrong number of directories");
}
if (num_lnk != offsets[2] - offsets[1]) {
DWARFS_THROW(runtime_error, "wrong number of links");
}
if (num_reg_unique + num_reg_shared != offsets[3] - offsets[2]) {
DWARFS_THROW(runtime_error, "wrong number of files");
}
if (num_dev != offsets[4] - offsets[3]) {
DWARFS_THROW(runtime_error, "wrong number of devices");
}
if (!meta.dir_entries()) {
for (auto ino : meta.inodes()) {
auto mode = meta.modes()[ino.mode_index()];
auto i = ino.inode_v2_2();
int base = mode_rank(mode);
if (i < offsets[base] ||
(i >= offsets[base + 1] && i > offsets[base])) {
DWARFS_THROW(runtime_error, "inode_v2_2 out of range");
}
}
}
}
return meta;
}
} // namespace
global_metadata::global_metadata(logger& lgr, Meta const& meta)
: meta_{meta}
, directories_storage_{unpack_directories(lgr, meta_)}
, directories_{directories_storage_.empty() ? nullptr
: directories_storage_.data()}
, names_{meta_.compact_names()
? string_table(lgr, "names", *meta_.compact_names())
: string_table(meta_.names())} {}
void global_metadata::check_consistency(logger& lgr, Meta const& meta) {
check_metadata(lgr, meta, true);
}
void global_metadata::check_consistency(logger& lgr) const {
check_consistency(lgr, meta_);
}
uint32_t global_metadata::first_dir_entry(uint32_t ino) const {
return directories_ ? directories_[ino].first_entry().value()
: meta_.directories()[ino].first_entry();
}
uint32_t global_metadata::parent_dir_entry(uint32_t ino) const {
return directories_ ? directories_[ino].parent_entry().value()
: meta_.directories()[ino].parent_entry();
}
auto inode_view::mode() const -> mode_type {
assert(mode_index() < meta_->modes().size());
return meta_->modes()[mode_index()];
}
auto inode_view::mode_string() const -> std::string {
return file_stat::mode_string(mode());
}
auto inode_view::perm_string() const -> std::string {
return file_stat::perm_string(mode());
}
auto inode_view::getuid() const -> uid_type {
auto uids = meta_->uids();
auto ix = owner_index();
if (!uids.empty()) {
assert(ix < uids.size());
return uids[ix];
}
// Releases up to and including 0.7.x, when using --set-owner, would store
// the uid in the owner_index field and leave the uids table empty.
return ix;
}
auto inode_view::getgid() const -> gid_type {
auto gids = meta_->gids();
auto ix = group_index();
if (!gids.empty()) {
assert(ix < gids.size());
return gids[ix];
}
// Releases up to and including 0.7.x, when using --set-group, would store
// the gid in the group_index field and leave the gids table empty.
return ix;
}
// TODO: pretty certain some of this stuff can be simplified
std::string dir_entry_view::name() const {
return v_ |
match{
[this](DirEntryView const& dev) {
return g_->names()[dev.name_index()];
},
[this](InodeView const& iv) {
return std::string(g_->meta().names()[iv.name_index_v2_2()]);
},
};
}
inode_view dir_entry_view::inode() const {
return v_ | match{
[this](DirEntryView const& dev) {
return inode_view(g_->meta().inodes()[dev.inode_num()],
dev.inode_num(), g_->meta());
},
[this](InodeView const& iv) {
return inode_view(iv, iv.inode_v2_2(), g_->meta());
},
};
}
bool dir_entry_view::is_root() const {
return v_ | match{
[](DirEntryView const& dev) { return dev.inode_num() == 0; },
[](InodeView const& iv) { return iv.inode_v2_2() == 0; },
};
}
/**
* We need a parent index if the dir_entry_view is for a file. For
* directories, the parent can be determined via the directory's
* inode, but for files, this isn't possible.
*/
dir_entry_view
dir_entry_view::from_dir_entry_index(uint32_t self_index, uint32_t parent_index,
global_metadata const& g) {
auto& meta = g.meta();
if (auto de = meta.dir_entries()) {
DWARFS_CHECK(self_index < de->size(), "self_index out of range");
DWARFS_CHECK(parent_index < de->size(), "parent_index out of range");
auto dev = (*de)[self_index];
return dir_entry_view(dev, self_index, parent_index, g);
}
DWARFS_CHECK(self_index < meta.inodes().size(), "self_index out of range");
DWARFS_CHECK(parent_index < meta.inodes().size(), "self_index out of range");
auto iv = meta.inodes()[self_index];
return dir_entry_view(iv, self_index, parent_index, g);
}
dir_entry_view dir_entry_view::from_dir_entry_index(uint32_t self_index,
global_metadata const& g) {
auto& meta = g.meta();
if (auto de = meta.dir_entries()) {
DWARFS_CHECK(self_index < de->size(), "self_index out of range");
auto dev = (*de)[self_index];
DWARFS_CHECK(dev.inode_num() < meta.directories().size(),
"self_index inode out of range");
return dir_entry_view(dev, self_index, g.parent_dir_entry(dev.inode_num()),
g);
}
DWARFS_CHECK(self_index < meta.inodes().size(), "self_index out of range");
auto iv = meta.inodes()[self_index];
DWARFS_CHECK(iv.inode_v2_2() < meta.directories().size(),
"parent_index out of range");
return dir_entry_view(
iv, self_index,
meta.entry_table_v2_2()[meta.directories()[iv.inode_v2_2()]
.parent_entry()],
g);
}
bool dir_entry_view::is_root() const { return impl_->is_root(); }
std::optional<dir_entry_view> dir_entry_view::parent() const {
if (is_root()) {
return std::nullopt;
if (auto p = impl_->parent()) {
return dir_entry_view{std::move(p)};
}
return from_dir_entry_index(parent_index_, *g_);
return std::nullopt;
}
std::string dir_entry_view::name(uint32_t index, global_metadata const& g) {
if (auto de = g.meta().dir_entries()) {
DWARFS_CHECK(index < de->size(), "index out of range");
auto dev = (*de)[index];
return g.names()[dev.name_index()];
}
std::string dir_entry_view::path() const { return impl_->path(); }
DWARFS_CHECK(index < g.meta().inodes().size(), "index out of range");
auto iv = g.meta().inodes()[index];
return std::string(g.meta().names()[iv.name_index_v2_2()]);
}
inode_view dir_entry_view::inode(uint32_t index, global_metadata const& g) {
if (auto de = g.meta().dir_entries()) {
DWARFS_CHECK(index < de->size(), "index out of range");
auto dev = (*de)[index];
return inode_view(g.meta().inodes()[dev.inode_num()], dev.inode_num(),
g.meta());
}
DWARFS_CHECK(index < g.meta().inodes().size(), "index out of range");
auto iv = g.meta().inodes()[index];
return inode_view(iv, iv.inode_v2_2(), g.meta());
}
std::string dir_entry_view::path() const {
return u8string_to_string(fs_path().u8string());
}
std::string dir_entry_view::unix_path() const {
#ifdef _WIN32
auto p = fs_path().u8string();
std::replace(p.begin(), p.end(),
static_cast<char>(std::filesystem::path::preferred_separator),
'/');
return u8string_to_string(p);
#else
return path();
#endif
}
std::wstring dir_entry_view::wpath() const { return fs_path().wstring(); }
std::string dir_entry_view::unix_path() const { return impl_->unix_path(); }
std::filesystem::path dir_entry_view::fs_path() const {
std::filesystem::path p;
append_to(p);
return p;
return impl_->fs_path();
}
std::wstring dir_entry_view::wpath() const { return impl_->wpath(); }
void dir_entry_view::append_to(std::filesystem::path& p) const {
if (auto ev = parent()) {
if (!ev->is_root()) {
ev->append_to(p);
}
}
if (!is_root()) {
p /= string_to_u8string(name());
}
impl_->append_to(p);
}
uint32_t dir_entry_view::self_index() const { return impl_->self_index(); }
uint32_t directory_view::first_entry(uint32_t ino) const {
return g_->first_dir_entry(ino);
}