From 160afe38dd1f79c5bb1917be02636c2bfa91d091 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 27 Jul 2024 17:14:16 +0200 Subject: [PATCH] refactor: split out metadata_types internals that depend on thrift --- CMakeLists.txt | 1 + include/dwarfs/internal/inode_reader_v2.h | 2 +- include/dwarfs/internal/metadata_types.h | 242 +++++++ include/dwarfs/internal/metadata_v2.h | 1 + include/dwarfs/metadata_types.h | 241 ++----- src/dwarfs/internal/metadata_types.cpp | 752 ++++++++++++++++++++++ src/dwarfs/internal/metadata_v2.cpp | 33 +- src/dwarfs/metadata_types.cpp | 707 +------------------- 8 files changed, 1089 insertions(+), 890 deletions(-) create mode 100644 include/dwarfs/internal/metadata_types.h create mode 100644 src/dwarfs/internal/metadata_types.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c9e025b3..f3523668 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -635,6 +635,7 @@ list(APPEND LIBDWARFS_READER_SRC src/dwarfs/fs_section.cpp src/dwarfs/internal/inode_reader_v2.cpp src/dwarfs/internal/metadata_v2.cpp + src/dwarfs/internal/metadata_types.cpp src/dwarfs/metadata_types.cpp ) diff --git a/include/dwarfs/internal/inode_reader_v2.h b/include/dwarfs/internal/inode_reader_v2.h index bbfbc9a6..2a3af18c 100644 --- a/include/dwarfs/internal/inode_reader_v2.h +++ b/include/dwarfs/internal/inode_reader_v2.h @@ -29,7 +29,7 @@ #include #include -#include +#include #include namespace dwarfs { diff --git a/include/dwarfs/internal/metadata_types.h b/include/dwarfs/internal/metadata_types.h new file mode 100644 index 00000000..bc67e01f --- /dev/null +++ b/include/dwarfs/internal/metadata_types.h @@ -0,0 +1,242 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include + +namespace dwarfs { + +class logger; + +namespace internal { + +template +class metadata_; + +class global_metadata { + public: + using Meta = + ::apache::thrift::frozen::MappedFrozen; + + global_metadata(logger& lgr, Meta const& meta); + + static void check_consistency(logger& lgr, Meta const& meta); + void check_consistency(logger& lgr) const; + + Meta const& meta() const { return meta_; } + + uint32_t first_dir_entry(uint32_t ino) const; + uint32_t parent_dir_entry(uint32_t ino) const; + + string_table const& names() const { return names_; } + + std::vector const& directories() const { + return directories_storage_; + } + + private: + Meta const& meta_; + std::vector const directories_storage_; + thrift::metadata::directory const* const directories_; + string_table const names_; +}; + +class inode_view_impl + : public ::apache::thrift::frozen::View { + using InodeView = + ::apache::thrift::frozen::View; + using Meta = + ::apache::thrift::frozen::MappedFrozen; + + public: + using uid_type = file_stat::uid_type; + using gid_type = file_stat::gid_type; + using mode_type = file_stat::mode_type; + + inode_view_impl(InodeView iv, uint32_t inode_num_, Meta const& meta) + : InodeView{iv} + , inode_num_{inode_num_} + , meta_{&meta} {} + + mode_type mode() const; + std::string mode_string() const; + std::string perm_string() const; + posix_file_type::value type() const { + return posix_file_type::from_mode(mode()); + } + uid_type getuid() const; + gid_type getgid() const; + uint32_t inode_num() const { return inode_num_; } + + private: + uint32_t inode_num_; + Meta const* meta_; +}; + +class dir_entry_view_impl { + public: + using InodeView = + ::apache::thrift::frozen::View; + using DirEntryView = + ::apache::thrift::frozen::View; + + dir_entry_view_impl(DirEntryView v, uint32_t self_index, + uint32_t parent_index, global_metadata const& g) + : v_{v} + , self_index_{self_index} + , parent_index_{parent_index} + , g_{&g} {} + + dir_entry_view_impl(InodeView v, uint32_t self_index, uint32_t parent_index, + global_metadata const& g) + : v_{v} + , self_index_{self_index} + , parent_index_{parent_index} + , g_{&g} {} + + static std::shared_ptr + from_dir_entry_index(uint32_t self_index, uint32_t parent_index, + global_metadata const& g); + static std::shared_ptr + from_dir_entry_index(uint32_t self_index, global_metadata const& g); + + // TODO: this works, but it's strange; a limited version of + // dir_entry_view_impl + // should work without a parent for these use cases + static std::string name(uint32_t index, global_metadata const& g); + static std::shared_ptr + inode(uint32_t index, global_metadata const& g); + + std::string name() const; + std::shared_ptr inode() const; + + bool is_root() const; + + std::shared_ptr parent() const; + + std::string path() const; + std::string unix_path() const; + std::filesystem::path fs_path() const; + std::wstring wpath() const; + + void append_to(std::filesystem::path& p) const; + + uint32_t self_index() const { return self_index_; } + + private: + std::variant v_; + uint32_t self_index_, parent_index_; + global_metadata const* g_; +}; + +using chunk_view = ::apache::thrift::frozen::View; + +class chunk_range { + using Meta = + ::apache::thrift::frozen::MappedFrozen; + + template + friend class internal::metadata_; + + public: + class iterator + : public boost::iterator_facade { + public: + iterator() = default; + + iterator(iterator const& other) + : meta_(other.meta_) + , it_(other.it_) {} + + private: + friend class boost::iterator_core_access; + friend class chunk_range; + + iterator(Meta const* meta, uint32_t it) + : meta_{meta} + , it_{it} {} + + bool equal(iterator const& other) const { + return meta_ == other.meta_ && it_ == other.it_; + } + + void increment() { ++it_; } + + void decrement() { --it_; } + + void advance(difference_type n) { it_ += n; } + + difference_type distance_to(iterator const& other) const { + return static_cast(other.it_) - + static_cast(it_); + } + + // TODO: this is nasty; can we do this without boost::iterator_facade? + chunk_view const& dereference() const { + view_ = meta_->chunks()[it_]; + return view_; + } + + Meta const* meta_; + uint32_t it_{0}; + mutable chunk_view view_; + }; + + iterator begin() const { return iterator(meta_, begin_); } + + iterator end() const { return iterator(meta_, end_); } + + size_t size() const { return end_ - begin_; } + + bool empty() const { return end_ == begin_; } + + chunk_view operator[](uint32_t index) const { return meta_->chunks()[index]; } + + private: + chunk_range(Meta const& meta, uint32_t begin, uint32_t end) + : meta_(&meta) + , begin_(begin) + , end_(end) {} + + Meta const* meta_; + uint32_t begin_{0}; + uint32_t end_{0}; +}; + +} // namespace internal + +} // namespace dwarfs diff --git a/include/dwarfs/internal/metadata_v2.h b/include/dwarfs/internal/metadata_v2.h index fce3d934..d4ac5202 100644 --- a/include/dwarfs/internal/metadata_v2.h +++ b/include/dwarfs/internal/metadata_v2.h @@ -34,6 +34,7 @@ #include +#include #include namespace dwarfs { diff --git a/include/dwarfs/metadata_types.h b/include/dwarfs/metadata_types.h index ebc3ee66..199e578f 100644 --- a/include/dwarfs/metadata_types.h +++ b/include/dwarfs/metadata_types.h @@ -26,19 +26,13 @@ #include #include #include -#include -#include #include -#include - #include #include #include -#include - namespace dwarfs { namespace internal { @@ -46,83 +40,71 @@ namespace internal { template class metadata_; -} +class inode_view_impl; +class dir_entry_view_impl; +class global_metadata; -class dir_entry_view; -class logger; +} // namespace internal +// TODO: move this elsewhere enum class readlink_mode { raw, preferred, unix, }; -class global_metadata { - public: - using Meta = - ::apache::thrift::frozen::MappedFrozen; - - global_metadata(logger& lgr, Meta const& meta); - - static void check_consistency(logger& lgr, Meta const& meta); - void check_consistency(logger& lgr) const; - - Meta const& meta() const { return meta_; } - - uint32_t first_dir_entry(uint32_t ino) const; - uint32_t parent_dir_entry(uint32_t ino) const; - - string_table const& names() const { return names_; } - - std::vector const& directories() const { - return directories_storage_; - } - - private: - Meta const& meta_; - std::vector const directories_storage_; - thrift::metadata::directory const* const directories_; - string_table const names_; -}; - -class inode_view - : public ::apache::thrift::frozen::View { - using InodeView = - ::apache::thrift::frozen::View; - using Meta = - ::apache::thrift::frozen::MappedFrozen; - - template - friend class internal::metadata_; - - friend class dir_entry_view; - +class inode_view { public: using uid_type = file_stat::uid_type; using gid_type = file_stat::gid_type; using mode_type = file_stat::mode_type; + inode_view() = default; + explicit inode_view(std::shared_ptr iv) + : iv_{std::move(iv)} {} + mode_type mode() const; std::string mode_string() const; std::string perm_string() const; - posix_file_type::value type() const { - return posix_file_type::from_mode(mode()); - } - bool is_regular_file() const { return type() == posix_file_type::regular; } - bool is_directory() const { return type() == posix_file_type::directory; } - bool is_symlink() const { return type() == posix_file_type::symlink; } + posix_file_type::value type() const; + bool is_regular_file() const; + bool is_directory() const; + bool is_symlink() const; uid_type getuid() const; gid_type getgid() const; - uint32_t inode_num() const { return inode_num_; } + uint32_t inode_num() const; + + internal::inode_view_impl const& raw() const { return *iv_; } private: - inode_view(InodeView iv, uint32_t inode_num_, Meta const& meta) - : InodeView{iv} - , inode_num_{inode_num_} - , meta_{&meta} {} + std::shared_ptr iv_; +}; - uint32_t inode_num_; - Meta const* meta_; +class dir_entry_view { + public: + dir_entry_view() = default; + dir_entry_view(std::shared_ptr impl) + : impl_{std::move(impl)} {} + + std::string name() const; + inode_view inode() const; + + bool is_root() const; + std::optional parent() const; + + std::string path() const; + std::string unix_path() const; + std::filesystem::path fs_path() const; + std::wstring wpath() const; + + void append_to(std::filesystem::path& p) const; + + uint32_t self_index() const; + + internal::dir_entry_view_impl const& raw() const { return *impl_; } + + private: + std::shared_ptr impl_; }; class directory_view { @@ -141,7 +123,7 @@ class directory_view { boost::integer_range entry_range() const; private: - directory_view(uint32_t inode, global_metadata const& g) + directory_view(uint32_t inode, internal::global_metadata const& g) : inode_{inode} , g_{&g} {} @@ -149,138 +131,7 @@ class directory_view { uint32_t parent_entry(uint32_t ino) const; uint32_t inode_; - global_metadata const* g_; -}; - -class dir_entry_view { - using InodeView = - ::apache::thrift::frozen::View; - using DirEntryView = - ::apache::thrift::frozen::View; - - template - friend class internal::metadata_; - - public: - std::string name() const; - inode_view inode() const; - - bool is_root() const; - - std::optional parent() const; - - std::string path() const; - std::string unix_path() const; - std::filesystem::path fs_path() const; - std::wstring wpath() const; - - void append_to(std::filesystem::path& p) const; - - uint32_t self_index() const { return self_index_; } - - private: - dir_entry_view(DirEntryView v, uint32_t self_index, uint32_t parent_index, - global_metadata const& g) - : v_{v} - , self_index_{self_index} - , parent_index_{parent_index} - , g_{&g} {} - - dir_entry_view(InodeView v, uint32_t self_index, uint32_t parent_index, - global_metadata const& g) - : v_{v} - , self_index_{self_index} - , parent_index_{parent_index} - , g_{&g} {} - - static dir_entry_view - from_dir_entry_index(uint32_t self_index, uint32_t parent_index, - global_metadata const& g); - static dir_entry_view - from_dir_entry_index(uint32_t self_index, global_metadata const& g); - - // TODO: this works, but it's strange; a limited version of dir_entry_view - // should work without a parent for these use cases - static std::string name(uint32_t index, global_metadata const& g); - static inode_view inode(uint32_t index, global_metadata const& g); - - std::variant v_; - uint32_t self_index_, parent_index_; - global_metadata const* g_; -}; - -using chunk_view = ::apache::thrift::frozen::View; - -class chunk_range { - using Meta = - ::apache::thrift::frozen::MappedFrozen; - - template - friend class internal::metadata_; - - public: - class iterator - : public boost::iterator_facade { - public: - iterator() = default; - - iterator(iterator const& other) - : meta_(other.meta_) - , it_(other.it_) {} - - private: - friend class boost::iterator_core_access; - friend class chunk_range; - - iterator(Meta const* meta, uint32_t it) - : meta_{meta} - , it_{it} {} - - bool equal(iterator const& other) const { - return meta_ == other.meta_ && it_ == other.it_; - } - - void increment() { ++it_; } - - void decrement() { --it_; } - - void advance(difference_type n) { it_ += n; } - - difference_type distance_to(iterator const& other) const { - return static_cast(other.it_) - - static_cast(it_); - } - - chunk_view const& dereference() const { - view_ = meta_->chunks()[it_]; - return view_; - } - - Meta const* meta_; - uint32_t it_{0}; - mutable chunk_view view_; - }; - - iterator begin() const { return iterator(meta_, begin_); } - - iterator end() const { return iterator(meta_, end_); } - - size_t size() const { return end_ - begin_; } - - bool empty() const { return end_ == begin_; } - - chunk_view operator[](uint32_t index) const { return meta_->chunks()[index]; } - - private: - chunk_range(Meta const& meta, uint32_t begin, uint32_t end) - : meta_(&meta) - , begin_(begin) - , end_(end) {} - - Meta const* meta_; - uint32_t begin_{0}; - uint32_t end_{0}; + internal::global_metadata const* g_; }; } // namespace dwarfs diff --git a/src/dwarfs/internal/metadata_types.cpp b/src/dwarfs/internal/metadata_types.cpp new file mode 100644 index 00000000..c7d93095 --- /dev/null +++ b/src/dwarfs/internal/metadata_types.cpp @@ -0,0 +1,752 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +namespace dwarfs::internal { + +namespace { + +std::vector +unpack_directories(logger& lgr, global_metadata::Meta const& meta) { + std::vector directories; + + if (auto opts = meta.options(); opts and opts->packed_directories()) { + LOG_PROXY(debug_logger_policy, lgr); + + auto ti = LOG_TIMED_DEBUG; + + auto dirent = *meta.dir_entries(); + auto metadir = meta.directories(); + + directories.resize(metadir.size()); + + // delta-decode first entries first + directories[0].first_entry() = metadir[0].first_entry(); + + for (size_t i = 1; i < directories.size(); ++i) { + directories[i].first_entry() = + directories[i - 1].first_entry().value() + metadir[i].first_entry(); + } + + // then traverse to recover parent entries + std::queue queue; + queue.push(0); + + while (!queue.empty()) { + auto parent = queue.front(); + queue.pop(); + + auto p_ino = dirent[parent].inode_num(); + + auto beg = directories[p_ino].first_entry().value(); + auto end = directories[p_ino + 1].first_entry().value(); + + for (auto e = beg; e < end; ++e) { + if (auto e_ino = dirent[e].inode_num(); + e_ino < (directories.size() - 1)) { + directories[e_ino].parent_entry() = parent; + queue.push(e); + } + } + } + + ti << "unpacked directories table"; + } + + return directories; +} + +// TODO: merge with inode_rank in metadata_v2 +int mode_rank(uint16_t mode) { + switch (posix_file_type::from_mode(mode)) { + case posix_file_type::directory: + return 0; + case posix_file_type::symlink: + return 1; + case posix_file_type::regular: + return 2; + case posix_file_type::block: + case posix_file_type::character: + return 3; + default: + return 4; + } +} + +void check_empty_tables(global_metadata::Meta const& meta) { + if (meta.inodes().empty()) { + DWARFS_THROW(runtime_error, "empty inodes table"); + } + + if (meta.directories().empty()) { + DWARFS_THROW(runtime_error, "empty directories table"); + } + + if (meta.chunk_table().empty()) { + DWARFS_THROW(runtime_error, "empty chunk_table table"); + } + + if (auto de = meta.dir_entries()) { + if (de->empty()) { + DWARFS_THROW(runtime_error, "empty dir_entries table"); + } + } else { + if (meta.entry_table_v2_2().empty()) { + DWARFS_THROW(runtime_error, "empty entry_table_v2_2 table"); + } + } + + if (meta.modes().empty()) { + DWARFS_THROW(runtime_error, "empty modes table"); + } +} + +void check_index_range(global_metadata::Meta const& meta) { + auto num_modes = meta.modes().size(); + auto num_uids = meta.uids().size(); + auto num_gids = meta.gids().size(); + auto num_names = meta.names().size(); + auto num_inodes = meta.inodes().size(); + bool v2_2 = !static_cast(meta.dir_entries()); + + if (num_modes >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of modes"); + } + + if (num_uids >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of uids"); + } + + if (num_gids >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of gids"); + } + + if (num_names >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of names"); + } + + if (num_inodes >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of inodes"); + } + + for (auto ino : meta.inodes()) { + if (ino.mode_index() >= num_modes) { + DWARFS_THROW(runtime_error, "mode_index out of range"); + } + // Special handling for legacy filesystems built with --set-owner + // where num_uids == 0 is valid and owner_index is used to store + // the uid. + if (num_uids > 0) { + if (auto i = ino.owner_index(); i >= num_uids) { + DWARFS_THROW(runtime_error, "owner_index out of range"); + } + } + // Special handling for legacy filesystems built with --set-group + // where num_gids == 0 is valid and group_index is used to store + // the gid. + if (num_gids > 0) { + if (auto i = ino.group_index(); i >= num_gids) { + DWARFS_THROW(runtime_error, "group_index out of range"); + } + } + if (v2_2) { + if (auto i = ino.name_index_v2_2(); i >= num_names && i > 0) { + DWARFS_THROW(runtime_error, "name_index_v2_2 out of range"); + } + } + } + + if (auto dep = meta.dir_entries()) { + if (dep->size() >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of dir_entries"); + } + + if (auto cn = meta.compact_names()) { + num_names = cn->index().size(); + if (!cn->packed_index()) { + if (num_names == 0) { + DWARFS_THROW(runtime_error, "empty compact_names index"); + } + --num_names; + } + } + + for (auto de : *dep) { + if (auto i = de.name_index(); i >= num_names && i > 0) { + DWARFS_THROW(runtime_error, "name_index out of range"); + } + if (auto i = de.inode_num(); i >= num_inodes) { + DWARFS_THROW(runtime_error, "inode_num out of range"); + } + } + } else { + if (meta.entry_table_v2_2().size() >= + std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of entries"); + } + + for (auto ent : meta.entry_table_v2_2()) { + if (ent >= num_inodes) { + DWARFS_THROW(runtime_error, "entry_table_v2_2 value out of range"); + } + } + } +} + +void check_packed_tables(global_metadata::Meta const& meta) { + if (meta.directories().size() >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of directories"); + } + + if (meta.chunk_table().size() >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of chunk_table entries"); + } + + if (auto opt = meta.options(); opt and opt->packed_directories()) { + if (std::any_of(meta.directories().begin(), meta.directories().end(), + [](auto i) { return i.parent_entry() != 0; })) { + DWARFS_THROW(runtime_error, "parent_entry set in packed directory"); + } + if (std::accumulate(meta.directories().begin(), meta.directories().end(), + static_cast(0), [](auto n, auto d) { + return n + d.first_entry(); + }) != meta.dir_entries()->size()) { + DWARFS_THROW(runtime_error, + "first_entry inconsistency in packed directories"); + } + } else { + size_t num_entries = + meta.dir_entries() ? meta.dir_entries()->size() : meta.inodes().size(); + + if (!std::is_sorted( + meta.directories().begin(), meta.directories().end(), + [](auto a, auto b) { return a.first_entry() < b.first_entry(); })) { + DWARFS_THROW(runtime_error, "first_entry inconsistency"); + } + + for (auto d : meta.directories()) { + if (auto i = d.first_entry(); i > num_entries) { + DWARFS_THROW(runtime_error, "first_entry out of range"); + } + if (auto i = d.parent_entry(); i >= num_entries) { + DWARFS_THROW(runtime_error, "parent_entry out of range"); + } + } + } + + if (auto opt = meta.options(); opt and opt->packed_chunk_table()) { + if (std::accumulate(meta.chunk_table().begin(), meta.chunk_table().end(), + static_cast(0)) != meta.chunks().size()) { + DWARFS_THROW(runtime_error, "packed chunk_table inconsistency"); + } + } else { + if (!std::is_sorted(meta.chunk_table().begin(), meta.chunk_table().end()) or + meta.chunk_table().back() != meta.chunks().size()) { + DWARFS_THROW(runtime_error, "chunk_table inconsistency"); + } + } +} + +void check_compact_strings( + ::apache::thrift::frozen::View v, + size_t expected_num, size_t max_item_len, std::string const& what) { + size_t index_size = v.index().size(); + + if (!v.packed_index() && index_size > 0) { + --index_size; + } + + if (index_size != expected_num) { + DWARFS_THROW(runtime_error, "unexpected number of compact " + what); + } + + size_t expected_data_size = 0; + size_t longest_item_len = 0; + if (!v.index().empty()) { + if (v.packed_index()) { + expected_data_size = + std::accumulate(v.index().begin(), v.index().end(), 0); + longest_item_len = *std::max_element(v.index().begin(), v.index().end()); + } else { + expected_data_size = v.index().back(); + if (!std::is_sorted(v.index().begin(), v.index().end())) { + DWARFS_THROW(runtime_error, "inconsistent index for compact " + what); + } + } + } + + if (v.buffer().size() != expected_data_size) { + DWARFS_THROW(runtime_error, "data size mismatch for compact " + what); + } + + if (longest_item_len > max_item_len) { + DWARFS_THROW(runtime_error, + fmt::format("invalid item length in compact {0}: {1} > {2}", + what, longest_item_len, max_item_len)); + } +} + +void check_plain_strings( + ::apache::thrift::frozen::View> v, + size_t expected_num, size_t max_item_len, std::string const& what) { + if (v.size() != expected_num) { + DWARFS_THROW(runtime_error, "unexpected number of " + what); + } + + size_t total_size = 0; + + for (auto s : v) { + if (s.size() > max_item_len) { + DWARFS_THROW(runtime_error, "unexpectedly long item in " + what); + } + total_size += s.size(); + } + + if (!v.empty()) { + if (total_size != static_cast(v.back().end() - v.front().begin())) { + DWARFS_THROW(runtime_error, "unexpectedly data size in " + what); + } + } +} + +void check_string_tables(global_metadata::Meta const& meta) { + size_t num_names = 0; + if (auto dep = meta.dir_entries()) { + if (dep->size() > 1) { + num_names = std::max_element(dep->begin(), dep->end(), + [](auto const& a, auto const& b) { + return a.name_index() < b.name_index(); + }) + ->name_index() + + 1; + } + } else { + if (meta.inodes().size() > 1) { + num_names = + std::max_element(meta.inodes().begin(), meta.inodes().end(), + [](auto const& a, auto const& b) { + return a.name_index_v2_2() < b.name_index_v2_2(); + }) + ->name_index_v2_2() + + 1; + } + } + + // max name length is usually 255, but fsst compression, in the worst + // case, will use 2 bytes per input byte... + constexpr size_t max_name_len = 512; + constexpr size_t max_symlink_len = 4096; + + if (auto cn = meta.compact_names()) { + check_compact_strings(*cn, num_names, max_name_len, "names"); + } else { + check_plain_strings(meta.names(), num_names, max_name_len, "names"); + } + + size_t num_symlink_strings = 0; + if (!meta.symlink_table().empty()) { + num_symlink_strings = *std::max_element(meta.symlink_table().begin(), + meta.symlink_table().end()) + + 1; + } + + if (auto cs = meta.compact_symlinks()) { + check_compact_strings(*cs, num_symlink_strings, max_symlink_len, + "symlink strings"); + } else { + check_plain_strings(meta.symlinks(), num_symlink_strings, max_symlink_len, + "symlink strings"); + } +} + +void check_chunks(global_metadata::Meta const& meta) { + auto block_size = meta.block_size(); + + if (block_size == 0 || (block_size & (block_size - 1))) { + DWARFS_THROW(runtime_error, "invalid block size"); + } + + if (meta.chunks().size() >= std::numeric_limits::max()) { + DWARFS_THROW(runtime_error, "invalid number of chunks"); + } + + for (auto c : meta.chunks()) { + if (c.offset() >= block_size || c.size() > block_size) { + DWARFS_THROW(runtime_error, "chunk offset/size out of range"); + } + if (c.offset() + c.size() > block_size) { + DWARFS_THROW(runtime_error, "chunk end outside of block"); + } + } +} + +std::array check_partitioning(global_metadata::Meta const& meta) { + std::array offsets; + + for (int r = 0; r < static_cast(offsets.size()); ++r) { + if (auto dep = meta.dir_entries()) { + auto pred = [r, modes = meta.modes()](auto ino) { + return mode_rank(modes[ino.mode_index()]) < r; + }; + auto inodes = meta.inodes(); + + if (!std::is_partitioned(inodes.begin(), inodes.end(), pred)) { + DWARFS_THROW(runtime_error, "inode table inconsistency"); + } + + offsets[r] = std::distance( + inodes.begin(), + std::partition_point(inodes.begin(), inodes.end(), pred)); + } else { + auto pred = [r, modes = meta.modes(), inodes = meta.inodes()](auto ent) { + return mode_rank(modes[inodes[ent].mode_index()]) < r; + }; + auto entries = meta.entry_table_v2_2(); + + if (!std::is_partitioned(entries.begin(), entries.end(), pred)) { + DWARFS_THROW(runtime_error, "entry_table_v2_2 inconsistency"); + } + + offsets[r] = std::distance( + entries.begin(), + std::partition_point(entries.begin(), entries.end(), pred)); + } + } + + return offsets; +} + +global_metadata::Meta const& +check_metadata(logger& lgr, global_metadata::Meta const& meta, bool check) { + if (check) { + LOG_PROXY(debug_logger_policy, lgr); + + auto ti = LOG_TIMED_DEBUG; + + ti << "check metadata consistency"; + + check_empty_tables(meta); + check_index_range(meta); + check_packed_tables(meta); + check_string_tables(meta); + check_chunks(meta); + auto offsets = check_partitioning(meta); + + auto num_dir = meta.directories().size() - 1; + auto num_lnk = meta.symlink_table().size(); + auto num_reg_unique = meta.chunk_table().size() - 1; + size_t num_reg_shared = 0; + + if (auto sfp = meta.shared_files_table()) { + if (meta.options()->packed_shared_files_table()) { + num_reg_shared = + std::accumulate(sfp->begin(), sfp->end(), 2 * sfp->size()); + num_reg_unique -= sfp->size(); + } else { + if (!std::is_sorted(sfp->begin(), sfp->end())) { + DWARFS_THROW(runtime_error, + "unpacked shared_files_table is not sorted"); + } + num_reg_shared = sfp->size(); + if (!sfp->empty()) { + num_reg_unique -= sfp->back() + 1; + } + } + } + + size_t num_dev = meta.devices() ? meta.devices()->size() : 0; + + if (num_dir != offsets[1]) { + DWARFS_THROW(runtime_error, "wrong number of directories"); + } + + if (num_lnk != offsets[2] - offsets[1]) { + DWARFS_THROW(runtime_error, "wrong number of links"); + } + + if (num_reg_unique + num_reg_shared != offsets[3] - offsets[2]) { + DWARFS_THROW(runtime_error, "wrong number of files"); + } + + if (num_dev != offsets[4] - offsets[3]) { + DWARFS_THROW(runtime_error, "wrong number of devices"); + } + + if (!meta.dir_entries()) { + for (auto ino : meta.inodes()) { + auto mode = meta.modes()[ino.mode_index()]; + auto i = ino.inode_v2_2(); + int base = mode_rank(mode); + + if (i < offsets[base] || + (i >= offsets[base + 1] && i > offsets[base])) { + DWARFS_THROW(runtime_error, "inode_v2_2 out of range"); + } + } + } + } + + return meta; +} + +} // namespace + +global_metadata::global_metadata(logger& lgr, Meta const& meta) + : meta_{meta} + , directories_storage_{unpack_directories(lgr, meta_)} + , directories_{directories_storage_.empty() ? nullptr + : directories_storage_.data()} + , names_{meta_.compact_names() + ? string_table(lgr, "names", *meta_.compact_names()) + : string_table(meta_.names())} {} + +void global_metadata::check_consistency(logger& lgr, Meta const& meta) { + check_metadata(lgr, meta, true); +} + +void global_metadata::check_consistency(logger& lgr) const { + check_consistency(lgr, meta_); +} + +uint32_t global_metadata::first_dir_entry(uint32_t ino) const { + return directories_ ? directories_[ino].first_entry().value() + : meta_.directories()[ino].first_entry(); +} + +uint32_t global_metadata::parent_dir_entry(uint32_t ino) const { + return directories_ ? directories_[ino].parent_entry().value() + : meta_.directories()[ino].parent_entry(); +} + +auto inode_view_impl::mode() const -> mode_type { + assert(mode_index() < meta_->modes().size()); + return meta_->modes()[mode_index()]; +} + +auto inode_view_impl::mode_string() const -> std::string { + return file_stat::mode_string(mode()); +} + +auto inode_view_impl::perm_string() const -> std::string { + return file_stat::perm_string(mode()); +} + +auto inode_view_impl::getuid() const -> uid_type { + auto uids = meta_->uids(); + auto ix = owner_index(); + if (!uids.empty()) { + assert(ix < uids.size()); + return uids[ix]; + } + // Releases up to and including 0.7.x, when using --set-owner, would store + // the uid in the owner_index field and leave the uids table empty. + return ix; +} + +auto inode_view_impl::getgid() const -> gid_type { + auto gids = meta_->gids(); + auto ix = group_index(); + if (!gids.empty()) { + assert(ix < gids.size()); + return gids[ix]; + } + // Releases up to and including 0.7.x, when using --set-group, would store + // the gid in the group_index field and leave the gids table empty. + return ix; +} + +// TODO: pretty certain some of this stuff can be simplified + +std::string dir_entry_view_impl::name() const { + return v_ | + match{ + [this](DirEntryView const& dev) { + return g_->names()[dev.name_index()]; + }, + [this](InodeView const& iv) { + return std::string(g_->meta().names()[iv.name_index_v2_2()]); + }, + }; +} + +std::shared_ptr dir_entry_view_impl::inode() const { + return v_ | match{ + [this](DirEntryView const& dev) { + return std::make_shared( + g_->meta().inodes()[dev.inode_num()], dev.inode_num(), + g_->meta()); + }, + [this](InodeView const& iv) { + return std::make_shared( + iv, iv.inode_v2_2(), g_->meta()); + }, + }; +} + +bool dir_entry_view_impl::is_root() const { + return v_ | match{ + [](DirEntryView const& dev) { return dev.inode_num() == 0; }, + [](InodeView const& iv) { return iv.inode_v2_2() == 0; }, + }; +} + +/** + * We need a parent index if the dir_entry_view_impl is for a file. For + * directories, the parent can be determined via the directory's + * inode, but for files, this isn't possible. + */ + +std::shared_ptr +dir_entry_view_impl::from_dir_entry_index(uint32_t self_index, + uint32_t parent_index, + global_metadata const& g) { + auto& meta = g.meta(); + + if (auto de = meta.dir_entries()) { + DWARFS_CHECK(self_index < de->size(), "self_index out of range"); + DWARFS_CHECK(parent_index < de->size(), "parent_index out of range"); + + auto dev = (*de)[self_index]; + + return std::make_shared(dev, self_index, parent_index, + g); + } + + DWARFS_CHECK(self_index < meta.inodes().size(), "self_index out of range"); + DWARFS_CHECK(parent_index < meta.inodes().size(), "self_index out of range"); + + auto iv = meta.inodes()[self_index]; + + return std::make_shared(iv, self_index, parent_index, g); +} + +std::shared_ptr +dir_entry_view_impl::from_dir_entry_index(uint32_t self_index, + global_metadata const& g) { + auto& meta = g.meta(); + + if (auto de = meta.dir_entries()) { + DWARFS_CHECK(self_index < de->size(), "self_index out of range"); + auto dev = (*de)[self_index]; + DWARFS_CHECK(dev.inode_num() < meta.directories().size(), + "self_index inode out of range"); + return std::make_shared( + dev, self_index, g.parent_dir_entry(dev.inode_num()), g); + } + + DWARFS_CHECK(self_index < meta.inodes().size(), "self_index out of range"); + auto iv = meta.inodes()[self_index]; + + DWARFS_CHECK(iv.inode_v2_2() < meta.directories().size(), + "parent_index out of range"); + return std::make_shared( + iv, self_index, + meta.entry_table_v2_2()[meta.directories()[iv.inode_v2_2()] + .parent_entry()], + g); +} + +std::shared_ptr dir_entry_view_impl::parent() const { + if (is_root()) { + return nullptr; + } + + return from_dir_entry_index(parent_index_, *g_); +} + +std::string +dir_entry_view_impl::name(uint32_t index, global_metadata const& g) { + if (auto de = g.meta().dir_entries()) { + DWARFS_CHECK(index < de->size(), "index out of range"); + auto dev = (*de)[index]; + return g.names()[dev.name_index()]; + } + + DWARFS_CHECK(index < g.meta().inodes().size(), "index out of range"); + auto iv = g.meta().inodes()[index]; + return std::string(g.meta().names()[iv.name_index_v2_2()]); +} + +std::shared_ptr +dir_entry_view_impl::inode(uint32_t index, global_metadata const& g) { + if (auto de = g.meta().dir_entries()) { + DWARFS_CHECK(index < de->size(), "index out of range"); + auto dev = (*de)[index]; + return std::make_shared( + g.meta().inodes()[dev.inode_num()], dev.inode_num(), g.meta()); + } + + DWARFS_CHECK(index < g.meta().inodes().size(), "index out of range"); + auto iv = g.meta().inodes()[index]; + return std::make_shared(iv, iv.inode_v2_2(), + g.meta()); +} + +std::string dir_entry_view_impl::path() const { + return u8string_to_string(fs_path().u8string()); +} + +std::string dir_entry_view_impl::unix_path() const { +#ifdef _WIN32 + auto p = fs_path().u8string(); + std::replace(p.begin(), p.end(), + static_cast(std::filesystem::path::preferred_separator), + '/'); + return u8string_to_string(p); +#else + return path(); +#endif +} + +std::wstring dir_entry_view_impl::wpath() const { return fs_path().wstring(); } + +std::filesystem::path dir_entry_view_impl::fs_path() const { + std::filesystem::path p; + append_to(p); + return p; +} + +void dir_entry_view_impl::append_to(std::filesystem::path& p) const { + if (auto ev = parent()) { + if (!ev->is_root()) { + ev->append_to(p); + } + } + if (!is_root()) { + p /= string_to_u8string(name()); + } +} + +} // namespace dwarfs::internal diff --git a/src/dwarfs/internal/metadata_v2.cpp b/src/dwarfs/internal/metadata_v2.cpp index c7d8914f..b24cac82 100644 --- a/src/dwarfs/internal/metadata_v2.cpp +++ b/src/dwarfs/internal/metadata_v2.cpp @@ -374,7 +374,7 @@ class metadata_ final : public metadata_v2::impl { , global_(lgr, check_metadata_consistency(lgr, meta_, options.check_consistency || force_consistency_check)) - , root_(dir_entry_view::from_dir_entry_index(0, global_)) + , root_(internal::dir_entry_view_impl::from_dir_entry_index(0, global_)) , LOG_PROXY_INIT(lgr) , inode_offset_(inode_offset) , symlink_inode_offset_(find_inode_offset(inode_rank::INO_LNK)) @@ -526,13 +526,14 @@ class metadata_ final : public metadata_v2::impl { // TODO: move compatibility details to metadata_types uint32_t index = meta_.dir_entries() ? inode : meta_.entry_table_v2_2()[inode]; - return inode_view(meta_.inodes()[index], inode, meta_); + return inode_view{std::make_shared( + meta_.inodes()[index], inode, meta_)}; } dir_entry_view make_dir_entry_view(uint32_t self_index, uint32_t parent_index) const { - return dir_entry_view::from_dir_entry_index(self_index, parent_index, - global_); + return dir_entry_view{dir_entry_view_impl::from_dir_entry_index( + self_index, parent_index, global_)}; } // This represents the order in which inodes are stored in inodes @@ -1454,16 +1455,16 @@ metadata_::find(directory_view dir, std::string_view name) const { auto range = dir.entry_range(); - auto it = std::lower_bound(range.begin(), range.end(), name, - [&](auto ix, std::string_view name) { - return dir_entry_view::name(ix, global_) < name; - }); + auto it = std::lower_bound( + range.begin(), range.end(), name, [&](auto ix, std::string_view name) { + return internal::dir_entry_view_impl::name(ix, global_) < name; + }); std::optional rv; if (it != range.end()) { - if (dir_entry_view::name(*it, global_) == name) { - rv = dir_entry_view::inode(*it, global_); + if (internal::dir_entry_view_impl::name(*it, global_) == name) { + rv = inode_view{internal::dir_entry_view_impl::inode(*it, global_)}; } } @@ -1551,12 +1552,12 @@ int metadata_::getattr(inode_view iv, file_stat* stbuf) const { stbuf->blocks = (stbuf->size + 511) / 512; stbuf->uid = iv.getuid(); stbuf->gid = iv.getgid(); - stbuf->mtime = resolution * (timebase + iv.mtime_offset()); + stbuf->mtime = resolution * (timebase + iv.raw().mtime_offset()); if (mtime_only) { stbuf->atime = stbuf->ctime = stbuf->mtime; } else { - stbuf->atime = resolution * (timebase + iv.atime_offset()); - stbuf->ctime = resolution * (timebase + iv.ctime_offset()); + stbuf->atime = resolution * (timebase + iv.raw().atime_offset()); + stbuf->ctime = resolution * (timebase + iv.raw().ctime_offset()); } stbuf->nlink = options_.enable_nlink && stbuf->is_regular_file() ? DWARFS_NOTHROW(nlinks_.at(inode - file_inode_offset_)) @@ -1601,8 +1602,10 @@ metadata_::readdir(directory_view dir, size_t offset) const { } auto index = dir.first_entry() + offset; - auto inode = dir_entry_view::inode(index, global_); - return std::pair(inode, dir_entry_view::name(index, global_)); + auto inode = + inode_view{internal::dir_entry_view_impl::inode(index, global_)}; + return std::pair(inode, + internal::dir_entry_view_impl::name(index, global_)); } return std::nullopt; diff --git a/src/dwarfs/metadata_types.cpp b/src/dwarfs/metadata_types.cpp index 393a338e..f6b81fe0 100644 --- a/src/dwarfs/metadata_types.cpp +++ b/src/dwarfs/metadata_types.cpp @@ -27,720 +27,69 @@ #include #include +#include #include #include #include #include -#include - namespace dwarfs { -namespace { +inode_view::mode_type inode_view::mode() const { return iv_->mode(); } -std::vector -unpack_directories(logger& lgr, global_metadata::Meta const& meta) { - std::vector directories; +std::string inode_view::mode_string() const { return iv_->mode_string(); } - if (auto opts = meta.options(); opts and opts->packed_directories()) { - LOG_PROXY(debug_logger_policy, lgr); +std::string inode_view::perm_string() const { return iv_->perm_string(); } - auto ti = LOG_TIMED_DEBUG; +posix_file_type::value inode_view::type() const { return iv_->type(); } - auto dirent = *meta.dir_entries(); - auto metadir = meta.directories(); - - directories.resize(metadir.size()); - - // delta-decode first entries first - directories[0].first_entry() = metadir[0].first_entry(); - - for (size_t i = 1; i < directories.size(); ++i) { - directories[i].first_entry() = - directories[i - 1].first_entry().value() + metadir[i].first_entry(); - } - - // then traverse to recover parent entries - std::queue queue; - queue.push(0); - - while (!queue.empty()) { - auto parent = queue.front(); - queue.pop(); - - auto p_ino = dirent[parent].inode_num(); - - auto beg = directories[p_ino].first_entry().value(); - auto end = directories[p_ino + 1].first_entry().value(); - - for (auto e = beg; e < end; ++e) { - if (auto e_ino = dirent[e].inode_num(); - e_ino < (directories.size() - 1)) { - directories[e_ino].parent_entry() = parent; - queue.push(e); - } - } - } - - ti << "unpacked directories table"; - } - - return directories; +bool inode_view::is_regular_file() const { + return iv_->type() == posix_file_type::regular; } -// TODO: merge with inode_rank in metadata_v2 -int mode_rank(uint16_t mode) { - switch (posix_file_type::from_mode(mode)) { - case posix_file_type::directory: - return 0; - case posix_file_type::symlink: - return 1; - case posix_file_type::regular: - return 2; - case posix_file_type::block: - case posix_file_type::character: - return 3; - default: - return 4; - } +bool inode_view::is_directory() const { + return iv_->type() == posix_file_type::directory; } -void check_empty_tables(global_metadata::Meta const& meta) { - if (meta.inodes().empty()) { - DWARFS_THROW(runtime_error, "empty inodes table"); - } - - if (meta.directories().empty()) { - DWARFS_THROW(runtime_error, "empty directories table"); - } - - if (meta.chunk_table().empty()) { - DWARFS_THROW(runtime_error, "empty chunk_table table"); - } - - if (auto de = meta.dir_entries()) { - if (de->empty()) { - DWARFS_THROW(runtime_error, "empty dir_entries table"); - } - } else { - if (meta.entry_table_v2_2().empty()) { - DWARFS_THROW(runtime_error, "empty entry_table_v2_2 table"); - } - } - - if (meta.modes().empty()) { - DWARFS_THROW(runtime_error, "empty modes table"); - } +bool inode_view::is_symlink() const { + return iv_->type() == posix_file_type::symlink; } -void check_index_range(global_metadata::Meta const& meta) { - auto num_modes = meta.modes().size(); - auto num_uids = meta.uids().size(); - auto num_gids = meta.gids().size(); - auto num_names = meta.names().size(); - auto num_inodes = meta.inodes().size(); - bool v2_2 = !static_cast(meta.dir_entries()); +inode_view::uid_type inode_view::getuid() const { return iv_->getuid(); } - if (num_modes >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of modes"); - } +inode_view::gid_type inode_view::getgid() const { return iv_->getgid(); } - if (num_uids >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of uids"); - } +uint32_t inode_view::inode_num() const { return iv_->inode_num(); } - if (num_gids >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of gids"); - } +std::string dir_entry_view::name() const { return impl_->name(); } - if (num_names >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of names"); - } +inode_view dir_entry_view::inode() const { return inode_view{impl_->inode()}; } - if (num_inodes >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of inodes"); - } - - for (auto ino : meta.inodes()) { - if (ino.mode_index() >= num_modes) { - DWARFS_THROW(runtime_error, "mode_index out of range"); - } - // Special handling for legacy filesystems built with --set-owner - // where num_uids == 0 is valid and owner_index is used to store - // the uid. - if (num_uids > 0) { - if (auto i = ino.owner_index(); i >= num_uids) { - DWARFS_THROW(runtime_error, "owner_index out of range"); - } - } - // Special handling for legacy filesystems built with --set-group - // where num_gids == 0 is valid and group_index is used to store - // the gid. - if (num_gids > 0) { - if (auto i = ino.group_index(); i >= num_gids) { - DWARFS_THROW(runtime_error, "group_index out of range"); - } - } - if (v2_2) { - if (auto i = ino.name_index_v2_2(); i >= num_names && i > 0) { - DWARFS_THROW(runtime_error, "name_index_v2_2 out of range"); - } - } - } - - if (auto dep = meta.dir_entries()) { - if (dep->size() >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of dir_entries"); - } - - if (auto cn = meta.compact_names()) { - num_names = cn->index().size(); - if (!cn->packed_index()) { - if (num_names == 0) { - DWARFS_THROW(runtime_error, "empty compact_names index"); - } - --num_names; - } - } - - for (auto de : *dep) { - if (auto i = de.name_index(); i >= num_names && i > 0) { - DWARFS_THROW(runtime_error, "name_index out of range"); - } - if (auto i = de.inode_num(); i >= num_inodes) { - DWARFS_THROW(runtime_error, "inode_num out of range"); - } - } - } else { - if (meta.entry_table_v2_2().size() >= - std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of entries"); - } - - for (auto ent : meta.entry_table_v2_2()) { - if (ent >= num_inodes) { - DWARFS_THROW(runtime_error, "entry_table_v2_2 value out of range"); - } - } - } -} - -void check_packed_tables(global_metadata::Meta const& meta) { - if (meta.directories().size() >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of directories"); - } - - if (meta.chunk_table().size() >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of chunk_table entries"); - } - - if (auto opt = meta.options(); opt and opt->packed_directories()) { - if (std::any_of(meta.directories().begin(), meta.directories().end(), - [](auto i) { return i.parent_entry() != 0; })) { - DWARFS_THROW(runtime_error, "parent_entry set in packed directory"); - } - if (std::accumulate(meta.directories().begin(), meta.directories().end(), - static_cast(0), [](auto n, auto d) { - return n + d.first_entry(); - }) != meta.dir_entries()->size()) { - DWARFS_THROW(runtime_error, - "first_entry inconsistency in packed directories"); - } - } else { - size_t num_entries = - meta.dir_entries() ? meta.dir_entries()->size() : meta.inodes().size(); - - if (!std::is_sorted( - meta.directories().begin(), meta.directories().end(), - [](auto a, auto b) { return a.first_entry() < b.first_entry(); })) { - DWARFS_THROW(runtime_error, "first_entry inconsistency"); - } - - for (auto d : meta.directories()) { - if (auto i = d.first_entry(); i > num_entries) { - DWARFS_THROW(runtime_error, "first_entry out of range"); - } - if (auto i = d.parent_entry(); i >= num_entries) { - DWARFS_THROW(runtime_error, "parent_entry out of range"); - } - } - } - - if (auto opt = meta.options(); opt and opt->packed_chunk_table()) { - if (std::accumulate(meta.chunk_table().begin(), meta.chunk_table().end(), - static_cast(0)) != meta.chunks().size()) { - DWARFS_THROW(runtime_error, "packed chunk_table inconsistency"); - } - } else { - if (!std::is_sorted(meta.chunk_table().begin(), meta.chunk_table().end()) or - meta.chunk_table().back() != meta.chunks().size()) { - DWARFS_THROW(runtime_error, "chunk_table inconsistency"); - } - } -} - -void check_compact_strings( - ::apache::thrift::frozen::View v, - size_t expected_num, size_t max_item_len, std::string const& what) { - size_t index_size = v.index().size(); - - if (!v.packed_index() && index_size > 0) { - --index_size; - } - - if (index_size != expected_num) { - DWARFS_THROW(runtime_error, "unexpected number of compact " + what); - } - - size_t expected_data_size = 0; - size_t longest_item_len = 0; - if (!v.index().empty()) { - if (v.packed_index()) { - expected_data_size = - std::accumulate(v.index().begin(), v.index().end(), 0); - longest_item_len = *std::max_element(v.index().begin(), v.index().end()); - } else { - expected_data_size = v.index().back(); - if (!std::is_sorted(v.index().begin(), v.index().end())) { - DWARFS_THROW(runtime_error, "inconsistent index for compact " + what); - } - } - } - - if (v.buffer().size() != expected_data_size) { - DWARFS_THROW(runtime_error, "data size mismatch for compact " + what); - } - - if (longest_item_len > max_item_len) { - DWARFS_THROW(runtime_error, - fmt::format("invalid item length in compact {0}: {1} > {2}", - what, longest_item_len, max_item_len)); - } -} - -void check_plain_strings( - ::apache::thrift::frozen::View> v, - size_t expected_num, size_t max_item_len, std::string const& what) { - if (v.size() != expected_num) { - DWARFS_THROW(runtime_error, "unexpected number of " + what); - } - - size_t total_size = 0; - - for (auto s : v) { - if (s.size() > max_item_len) { - DWARFS_THROW(runtime_error, "unexpectedly long item in " + what); - } - total_size += s.size(); - } - - if (!v.empty()) { - if (total_size != static_cast(v.back().end() - v.front().begin())) { - DWARFS_THROW(runtime_error, "unexpectedly data size in " + what); - } - } -} - -void check_string_tables(global_metadata::Meta const& meta) { - size_t num_names = 0; - if (auto dep = meta.dir_entries()) { - if (dep->size() > 1) { - num_names = std::max_element(dep->begin(), dep->end(), - [](auto const& a, auto const& b) { - return a.name_index() < b.name_index(); - }) - ->name_index() + - 1; - } - } else { - if (meta.inodes().size() > 1) { - num_names = - std::max_element(meta.inodes().begin(), meta.inodes().end(), - [](auto const& a, auto const& b) { - return a.name_index_v2_2() < b.name_index_v2_2(); - }) - ->name_index_v2_2() + - 1; - } - } - - // max name length is usually 255, but fsst compression, in the worst - // case, will use 2 bytes per input byte... - constexpr size_t max_name_len = 512; - constexpr size_t max_symlink_len = 4096; - - if (auto cn = meta.compact_names()) { - check_compact_strings(*cn, num_names, max_name_len, "names"); - } else { - check_plain_strings(meta.names(), num_names, max_name_len, "names"); - } - - size_t num_symlink_strings = 0; - if (!meta.symlink_table().empty()) { - num_symlink_strings = *std::max_element(meta.symlink_table().begin(), - meta.symlink_table().end()) + - 1; - } - - if (auto cs = meta.compact_symlinks()) { - check_compact_strings(*cs, num_symlink_strings, max_symlink_len, - "symlink strings"); - } else { - check_plain_strings(meta.symlinks(), num_symlink_strings, max_symlink_len, - "symlink strings"); - } -} - -void check_chunks(global_metadata::Meta const& meta) { - auto block_size = meta.block_size(); - - if (block_size == 0 || (block_size & (block_size - 1))) { - DWARFS_THROW(runtime_error, "invalid block size"); - } - - if (meta.chunks().size() >= std::numeric_limits::max()) { - DWARFS_THROW(runtime_error, "invalid number of chunks"); - } - - for (auto c : meta.chunks()) { - if (c.offset() >= block_size || c.size() > block_size) { - DWARFS_THROW(runtime_error, "chunk offset/size out of range"); - } - if (c.offset() + c.size() > block_size) { - DWARFS_THROW(runtime_error, "chunk end outside of block"); - } - } -} - -std::array check_partitioning(global_metadata::Meta const& meta) { - std::array offsets; - - for (int r = 0; r < static_cast(offsets.size()); ++r) { - if (auto dep = meta.dir_entries()) { - auto pred = [r, modes = meta.modes()](auto ino) { - return mode_rank(modes[ino.mode_index()]) < r; - }; - auto inodes = meta.inodes(); - - if (!std::is_partitioned(inodes.begin(), inodes.end(), pred)) { - DWARFS_THROW(runtime_error, "inode table inconsistency"); - } - - offsets[r] = std::distance( - inodes.begin(), - std::partition_point(inodes.begin(), inodes.end(), pred)); - } else { - auto pred = [r, modes = meta.modes(), inodes = meta.inodes()](auto ent) { - return mode_rank(modes[inodes[ent].mode_index()]) < r; - }; - auto entries = meta.entry_table_v2_2(); - - if (!std::is_partitioned(entries.begin(), entries.end(), pred)) { - DWARFS_THROW(runtime_error, "entry_table_v2_2 inconsistency"); - } - - offsets[r] = std::distance( - entries.begin(), - std::partition_point(entries.begin(), entries.end(), pred)); - } - } - - return offsets; -} - -global_metadata::Meta const& -check_metadata(logger& lgr, global_metadata::Meta const& meta, bool check) { - if (check) { - LOG_PROXY(debug_logger_policy, lgr); - - auto ti = LOG_TIMED_DEBUG; - - ti << "check metadata consistency"; - - check_empty_tables(meta); - check_index_range(meta); - check_packed_tables(meta); - check_string_tables(meta); - check_chunks(meta); - auto offsets = check_partitioning(meta); - - auto num_dir = meta.directories().size() - 1; - auto num_lnk = meta.symlink_table().size(); - auto num_reg_unique = meta.chunk_table().size() - 1; - size_t num_reg_shared = 0; - - if (auto sfp = meta.shared_files_table()) { - if (meta.options()->packed_shared_files_table()) { - num_reg_shared = - std::accumulate(sfp->begin(), sfp->end(), 2 * sfp->size()); - num_reg_unique -= sfp->size(); - } else { - if (!std::is_sorted(sfp->begin(), sfp->end())) { - DWARFS_THROW(runtime_error, - "unpacked shared_files_table is not sorted"); - } - num_reg_shared = sfp->size(); - if (!sfp->empty()) { - num_reg_unique -= sfp->back() + 1; - } - } - } - - size_t num_dev = meta.devices() ? meta.devices()->size() : 0; - - if (num_dir != offsets[1]) { - DWARFS_THROW(runtime_error, "wrong number of directories"); - } - - if (num_lnk != offsets[2] - offsets[1]) { - DWARFS_THROW(runtime_error, "wrong number of links"); - } - - if (num_reg_unique + num_reg_shared != offsets[3] - offsets[2]) { - DWARFS_THROW(runtime_error, "wrong number of files"); - } - - if (num_dev != offsets[4] - offsets[3]) { - DWARFS_THROW(runtime_error, "wrong number of devices"); - } - - if (!meta.dir_entries()) { - for (auto ino : meta.inodes()) { - auto mode = meta.modes()[ino.mode_index()]; - auto i = ino.inode_v2_2(); - int base = mode_rank(mode); - - if (i < offsets[base] || - (i >= offsets[base + 1] && i > offsets[base])) { - DWARFS_THROW(runtime_error, "inode_v2_2 out of range"); - } - } - } - } - - return meta; -} - -} // namespace - -global_metadata::global_metadata(logger& lgr, Meta const& meta) - : meta_{meta} - , directories_storage_{unpack_directories(lgr, meta_)} - , directories_{directories_storage_.empty() ? nullptr - : directories_storage_.data()} - , names_{meta_.compact_names() - ? string_table(lgr, "names", *meta_.compact_names()) - : string_table(meta_.names())} {} - -void global_metadata::check_consistency(logger& lgr, Meta const& meta) { - check_metadata(lgr, meta, true); -} - -void global_metadata::check_consistency(logger& lgr) const { - check_consistency(lgr, meta_); -} - -uint32_t global_metadata::first_dir_entry(uint32_t ino) const { - return directories_ ? directories_[ino].first_entry().value() - : meta_.directories()[ino].first_entry(); -} - -uint32_t global_metadata::parent_dir_entry(uint32_t ino) const { - return directories_ ? directories_[ino].parent_entry().value() - : meta_.directories()[ino].parent_entry(); -} - -auto inode_view::mode() const -> mode_type { - assert(mode_index() < meta_->modes().size()); - return meta_->modes()[mode_index()]; -} - -auto inode_view::mode_string() const -> std::string { - return file_stat::mode_string(mode()); -} - -auto inode_view::perm_string() const -> std::string { - return file_stat::perm_string(mode()); -} - -auto inode_view::getuid() const -> uid_type { - auto uids = meta_->uids(); - auto ix = owner_index(); - if (!uids.empty()) { - assert(ix < uids.size()); - return uids[ix]; - } - // Releases up to and including 0.7.x, when using --set-owner, would store - // the uid in the owner_index field and leave the uids table empty. - return ix; -} - -auto inode_view::getgid() const -> gid_type { - auto gids = meta_->gids(); - auto ix = group_index(); - if (!gids.empty()) { - assert(ix < gids.size()); - return gids[ix]; - } - // Releases up to and including 0.7.x, when using --set-group, would store - // the gid in the group_index field and leave the gids table empty. - return ix; -} - -// TODO: pretty certain some of this stuff can be simplified - -std::string dir_entry_view::name() const { - return v_ | - match{ - [this](DirEntryView const& dev) { - return g_->names()[dev.name_index()]; - }, - [this](InodeView const& iv) { - return std::string(g_->meta().names()[iv.name_index_v2_2()]); - }, - }; -} - -inode_view dir_entry_view::inode() const { - return v_ | match{ - [this](DirEntryView const& dev) { - return inode_view(g_->meta().inodes()[dev.inode_num()], - dev.inode_num(), g_->meta()); - }, - [this](InodeView const& iv) { - return inode_view(iv, iv.inode_v2_2(), g_->meta()); - }, - }; -} - -bool dir_entry_view::is_root() const { - return v_ | match{ - [](DirEntryView const& dev) { return dev.inode_num() == 0; }, - [](InodeView const& iv) { return iv.inode_v2_2() == 0; }, - }; -} - -/** - * We need a parent index if the dir_entry_view is for a file. For - * directories, the parent can be determined via the directory's - * inode, but for files, this isn't possible. - */ - -dir_entry_view -dir_entry_view::from_dir_entry_index(uint32_t self_index, uint32_t parent_index, - global_metadata const& g) { - auto& meta = g.meta(); - - if (auto de = meta.dir_entries()) { - DWARFS_CHECK(self_index < de->size(), "self_index out of range"); - DWARFS_CHECK(parent_index < de->size(), "parent_index out of range"); - - auto dev = (*de)[self_index]; - - return dir_entry_view(dev, self_index, parent_index, g); - } - - DWARFS_CHECK(self_index < meta.inodes().size(), "self_index out of range"); - DWARFS_CHECK(parent_index < meta.inodes().size(), "self_index out of range"); - - auto iv = meta.inodes()[self_index]; - - return dir_entry_view(iv, self_index, parent_index, g); -} - -dir_entry_view dir_entry_view::from_dir_entry_index(uint32_t self_index, - global_metadata const& g) { - auto& meta = g.meta(); - - if (auto de = meta.dir_entries()) { - DWARFS_CHECK(self_index < de->size(), "self_index out of range"); - auto dev = (*de)[self_index]; - DWARFS_CHECK(dev.inode_num() < meta.directories().size(), - "self_index inode out of range"); - return dir_entry_view(dev, self_index, g.parent_dir_entry(dev.inode_num()), - g); - } - - DWARFS_CHECK(self_index < meta.inodes().size(), "self_index out of range"); - auto iv = meta.inodes()[self_index]; - - DWARFS_CHECK(iv.inode_v2_2() < meta.directories().size(), - "parent_index out of range"); - return dir_entry_view( - iv, self_index, - meta.entry_table_v2_2()[meta.directories()[iv.inode_v2_2()] - .parent_entry()], - g); -} +bool dir_entry_view::is_root() const { return impl_->is_root(); } std::optional dir_entry_view::parent() const { - if (is_root()) { - return std::nullopt; + if (auto p = impl_->parent()) { + return dir_entry_view{std::move(p)}; } - - return from_dir_entry_index(parent_index_, *g_); + return std::nullopt; } -std::string dir_entry_view::name(uint32_t index, global_metadata const& g) { - if (auto de = g.meta().dir_entries()) { - DWARFS_CHECK(index < de->size(), "index out of range"); - auto dev = (*de)[index]; - return g.names()[dev.name_index()]; - } +std::string dir_entry_view::path() const { return impl_->path(); } - DWARFS_CHECK(index < g.meta().inodes().size(), "index out of range"); - auto iv = g.meta().inodes()[index]; - return std::string(g.meta().names()[iv.name_index_v2_2()]); -} - -inode_view dir_entry_view::inode(uint32_t index, global_metadata const& g) { - if (auto de = g.meta().dir_entries()) { - DWARFS_CHECK(index < de->size(), "index out of range"); - auto dev = (*de)[index]; - return inode_view(g.meta().inodes()[dev.inode_num()], dev.inode_num(), - g.meta()); - } - - DWARFS_CHECK(index < g.meta().inodes().size(), "index out of range"); - auto iv = g.meta().inodes()[index]; - return inode_view(iv, iv.inode_v2_2(), g.meta()); -} - -std::string dir_entry_view::path() const { - return u8string_to_string(fs_path().u8string()); -} - -std::string dir_entry_view::unix_path() const { -#ifdef _WIN32 - auto p = fs_path().u8string(); - std::replace(p.begin(), p.end(), - static_cast(std::filesystem::path::preferred_separator), - '/'); - return u8string_to_string(p); -#else - return path(); -#endif -} - -std::wstring dir_entry_view::wpath() const { return fs_path().wstring(); } +std::string dir_entry_view::unix_path() const { return impl_->unix_path(); } std::filesystem::path dir_entry_view::fs_path() const { - std::filesystem::path p; - append_to(p); - return p; + return impl_->fs_path(); } +std::wstring dir_entry_view::wpath() const { return impl_->wpath(); } + void dir_entry_view::append_to(std::filesystem::path& p) const { - if (auto ev = parent()) { - if (!ev->is_root()) { - ev->append_to(p); - } - } - if (!is_root()) { - p /= string_to_u8string(name()); - } + impl_->append_to(p); } +uint32_t dir_entry_view::self_index() const { return impl_->self_index(); } + uint32_t directory_view::first_entry(uint32_t ino) const { return g_->first_dir_entry(ino); }