From 72f56004ec8dcfdb4b61430e4ccc4dc8ec6942d1 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Tue, 30 Mar 2021 17:49:14 +0200 Subject: [PATCH] Add metadata consistency checks --- include/dwarfs/metadata_types.h | 2 +- include/dwarfs/metadata_v2.h | 3 +- include/dwarfs/options.h | 1 + src/dwarfs/filesystem_v2.cpp | 32 ++-- src/dwarfs/metadata_types.cpp | 265 +++++++++++++++++++++++++++++++- src/dwarfs/metadata_v2.cpp | 10 +- src/dwarfsck.cpp | 3 + 7 files changed, 290 insertions(+), 26 deletions(-) diff --git a/include/dwarfs/metadata_types.h b/include/dwarfs/metadata_types.h index d84008af..77b57af2 100644 --- a/include/dwarfs/metadata_types.h +++ b/include/dwarfs/metadata_types.h @@ -48,7 +48,7 @@ class global_metadata { using Meta = ::apache::thrift::frozen::MappedFrozen; - global_metadata(logger& lgr, Meta const* meta); + global_metadata(logger& lgr, Meta const* meta, bool check_consistency); Meta const* meta() const { return meta_; } diff --git a/include/dwarfs/metadata_v2.h b/include/dwarfs/metadata_v2.h index d7afee43..5a75332d 100644 --- a/include/dwarfs/metadata_v2.h +++ b/include/dwarfs/metadata_v2.h @@ -57,7 +57,8 @@ class metadata_v2 { metadata_v2() = default; metadata_v2(logger& lgr, folly::ByteRange schema, folly::ByteRange data, - metadata_options const& options, int inode_offset = 0); + metadata_options const& options, int inode_offset = 0, + bool force_consistency_check = false); metadata_v2& operator=(metadata_v2&&) = default; diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h index 171c2a72..24ed6e06 100644 --- a/include/dwarfs/options.h +++ b/include/dwarfs/options.h @@ -41,6 +41,7 @@ struct block_cache_options { struct metadata_options { bool enable_nlink{false}; bool readonly{false}; + bool check_consistency{false}; }; struct filesystem_options { diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index 5cb0caec..00511dfb 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -178,6 +178,8 @@ class filesystem_parser { off_t image_offset() const { return image_offset_; } + bool has_checksums() const { return version_ >= 2; } + private: std::shared_ptr mm_; off_t const image_offset_; @@ -212,7 +214,8 @@ make_metadata(logger& lgr, std::shared_ptr mm, std::vector& meta_buffer, const metadata_options& options, int inode_offset = 0, bool force_buffers = false, - mlock_mode lock_mode = mlock_mode::NONE) { + mlock_mode lock_mode = mlock_mode::NONE, + bool force_consistency_check = false) { LOG_PROXY(debug_logger_policy, lgr); auto schema_it = sections.find(section_type::METADATA_V2_SCHEMA); auto meta_it = sections.find(section_type::METADATA_V2); @@ -250,7 +253,7 @@ make_metadata(logger& lgr, std::shared_ptr mm, return metadata_v2( lgr, get_section_data(mm, schema_it->second, schema_buffer, force_buffers), - meta_section_range, options, inode_offset); + meta_section_range, options, inode_offset, force_consistency_check); } template @@ -326,9 +329,9 @@ filesystem_::filesystem_(logger& lgr, std::shared_ptr mm, std::vector schema_buffer; - meta_ = - make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_, - options.metadata, inode_offset, false, options.lock_mode); + meta_ = make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_, + options.metadata, inode_offset, false, + options.lock_mode, !parser.has_checksums()); LOG_DEBUG << "read " << cache.block_count() << " blocks and " << meta_.size() << " bytes of metadata"; @@ -526,16 +529,10 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog, std::vector schema_raw; std::vector meta_raw; - if (opts.recompress_metadata) { - auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw, - metadata_options(), 0, true); - - struct ::statvfs stbuf; - meta.statvfs(&stbuf); - prog.original_size = stbuf.f_blocks * stbuf.f_frsize; - } else { - prog.original_size = total_block_size; - } + // force metadata check + auto meta = + make_metadata(lgr, mm, sections, schema_raw, meta_raw, metadata_options(), + 0, true, mlock_mode::NONE, !parser.has_checksums()); parser.rewind(); @@ -639,9 +636,8 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr mm, if (errors == 0 and detail_level > 0) { filesystem_options fsopts; - if (detail_level > 0) { - fsopts.metadata.enable_nlink = true; - } + fsopts.metadata.check_consistency = true; + fsopts.metadata.enable_nlink = true; fsopts.image_offset = image_offset; filesystem_v2(lgr, mm, fsopts).dump(os, detail_level); } diff --git a/src/dwarfs/metadata_types.cpp b/src/dwarfs/metadata_types.cpp index 44b27911..1953a881 100644 --- a/src/dwarfs/metadata_types.cpp +++ b/src/dwarfs/metadata_types.cpp @@ -19,6 +19,8 @@ * along with dwarfs. If not, see . */ +#include +#include #include #include "dwarfs/error.h" @@ -82,10 +84,269 @@ unpack_directories(logger& lgr, global_metadata::Meta const* meta) { return directories; } +int mode_rank(uint16_t mode) { + switch (mode & S_IFMT) { + case S_IFDIR: + return 0; + case S_IFLNK: + return 1; + case S_IFREG: + return 2; + case S_IFBLK: + case S_IFCHR: + return 3; + default: + return 4; + } +} + +void check_empty_tables(global_metadata::Meta const* meta) { + if (meta->inodes().empty()) { + DWARFS_THROW(runtime_error, "empty inodes table"); + } + + if (meta->directories().empty()) { + DWARFS_THROW(runtime_error, "empty directories table"); + } + + if (meta->chunk_table().empty()) { + DWARFS_THROW(runtime_error, "empty chunk_table table"); + } + + if (auto de = meta->dir_entries()) { + if (de->empty()) { + DWARFS_THROW(runtime_error, "empty dir_entries table"); + } + } else { + if (meta->entry_table_v2_2().empty()) { + DWARFS_THROW(runtime_error, "empty entry_table_v2_2 table"); + } + } + + if (meta->modes().empty()) { + DWARFS_THROW(runtime_error, "empty modes table"); + } +} + +void check_index_range(global_metadata::Meta const* meta) { + auto num_modes = meta->modes().size(); + auto num_uids = meta->uids().size(); + auto num_gids = meta->gids().size(); + auto num_names = meta->names().size(); + auto num_inodes = meta->inodes().size(); + bool v2_2 = !static_cast(meta->dir_entries()); + + for (auto ino : meta->inodes()) { + if (ino.mode_index() >= num_modes) { + DWARFS_THROW(runtime_error, "mode_index out of range"); + } + if (auto i = ino.owner_index(); i >= num_uids && i > 0) { + DWARFS_THROW(runtime_error, "owner_index out of range"); + } + if (auto i = ino.group_index(); i >= num_gids && i > 0) { + DWARFS_THROW(runtime_error, "group_index out of range"); + } + if (v2_2) { + if (auto i = ino.name_index_v2_2(); i >= num_names && i > 0) { + DWARFS_THROW(runtime_error, "name_index_v2_2 out of range"); + } + } + } + + if (auto dep = meta->dir_entries()) { + if (auto cn = meta->compact_names()) { + num_names = cn->index().size(); + if (!cn->packed_index()) { + if (num_names == 0) { + DWARFS_THROW(runtime_error, "empty compact_names index"); + } + --num_names; + } + } + + for (auto de : *dep) { + if (auto i = de.name_index(); i >= num_names && i > 0) { + DWARFS_THROW(runtime_error, "name_index out of range"); + } + if (auto i = de.inode_num(); i >= num_inodes) { + DWARFS_THROW(runtime_error, "inode_num out of range"); + } + } + } else { + for (auto ent : meta->entry_table_v2_2()) { + if (ent >= num_inodes) { + DWARFS_THROW(runtime_error, "entry_table_v2_2 value out of range"); + } + } + } +} + +void check_packed_tables(global_metadata::Meta const* meta) { + if (auto opt = meta->options(); opt and opt->packed_directories()) { + if (std::any_of(meta->directories().begin(), meta->directories().end(), + [](auto i) { return i.parent_entry() != 0; })) { + DWARFS_THROW(runtime_error, "parent_entry set in packed directory"); + } + if (std::accumulate(meta->directories().begin(), meta->directories().end(), + static_cast(0), [](auto n, auto d) { + return n + d.first_entry(); + }) != meta->dir_entries()->size()) { + DWARFS_THROW(runtime_error, + "first_entry inconsistency in packed directories"); + } + } else { + size_t num_entries = meta->dir_entries() ? meta->dir_entries()->size() + : meta->inodes().size(); + + for (auto d : meta->directories()) { + if (auto i = d.first_entry(); i > num_entries) { + DWARFS_THROW(runtime_error, "first_entry out of range"); + } + if (auto i = d.parent_entry(); i >= num_entries) { + DWARFS_THROW(runtime_error, "parent_entry out of range"); + } + } + } + + if (auto opt = meta->options(); opt and opt->packed_chunk_table()) { + if (std::accumulate(meta->chunk_table().begin(), meta->chunk_table().end(), + static_cast(0)) != meta->chunks().size()) { + DWARFS_THROW(runtime_error, "packed chunk_table inconsistency"); + } + } else { + if (!std::is_sorted(meta->chunk_table().begin(), + meta->chunk_table().end()) or + meta->chunk_table().back() != meta->chunks().size()) { + DWARFS_THROW(runtime_error, "chunk_table inconsistency"); + } + } +} + +void check_chunks(global_metadata::Meta const* meta) { + auto block_size = meta->block_size(); + + for (auto c : meta->chunks()) { + if (c.offset() >= block_size || c.size() > block_size) { + DWARFS_THROW(runtime_error, "chunk offset/size out of range"); + } + if (c.offset() + c.size() > block_size) { + DWARFS_THROW(runtime_error, "chunk end outside of block"); + } + } +} + +std::array check_partitioning(global_metadata::Meta const* meta) { + std::array offsets; + + for (int r = 0; r < static_cast(offsets.size()); ++r) { + if (auto dep = meta->dir_entries()) { + auto pred = [r, modes = meta->modes()](auto ino) { + return mode_rank(modes[ino.mode_index()]) < r; + }; + auto inodes = meta->inodes(); + + if (!std::is_partitioned(inodes.begin(), inodes.end(), pred)) { + DWARFS_THROW(runtime_error, "inode table inconsistency"); + } + + offsets[r] = std::distance( + inodes.begin(), + std::partition_point(inodes.begin(), inodes.end(), pred)); + } else { + auto pred = [r, modes = meta->modes(), + inodes = meta->inodes()](auto ent) { + return mode_rank(modes[inodes[ent].mode_index()]) < r; + }; + auto entries = meta->entry_table_v2_2(); + + if (!std::is_partitioned(entries.begin(), entries.end(), pred)) { + DWARFS_THROW(runtime_error, "entry_table_v2_2 inconsistency"); + } + + offsets[r] = std::distance( + entries.begin(), + std::partition_point(entries.begin(), entries.end(), pred)); + } + } + + return offsets; +} + +global_metadata::Meta const* +check_metadata(logger& lgr, global_metadata::Meta const* meta, bool check) { + if (check) { + LOG_PROXY(debug_logger_policy, lgr); + + auto ti = LOG_TIMED_DEBUG; + + ti << "check metadata consistency"; + + check_empty_tables(meta); + check_index_range(meta); + check_packed_tables(meta); + check_chunks(meta); + auto offsets = check_partitioning(meta); + + auto num_dir = meta->directories().size() - 1; + auto num_lnk = meta->symlink_table().size(); + auto num_reg_unique = meta->chunk_table().size() - 1; + size_t num_reg_shared = 0; + + if (auto sfp = meta->shared_files_table()) { + if (meta->options()->packed_shared_files_table()) { + num_reg_shared = + std::accumulate(sfp->begin(), sfp->end(), 2 * sfp->size()); + num_reg_unique -= sfp->size(); + } else { + if (!std::is_sorted(sfp->begin(), sfp->end())) { + DWARFS_THROW(runtime_error, + "unpacked shared_files_table is not sorted"); + } + num_reg_shared = sfp->size(); + num_reg_unique -= sfp->back() + 1; + } + } + + size_t num_dev = meta->devices() ? meta->devices()->size() : 0; + + if (num_dir != offsets[1]) { + DWARFS_THROW(runtime_error, "wrong number of directories"); + } + + if (num_lnk != offsets[2] - offsets[1]) { + DWARFS_THROW(runtime_error, "wrong number of links"); + } + + if (num_reg_unique + num_reg_shared != offsets[3] - offsets[2]) { + DWARFS_THROW(runtime_error, "wrong number of files"); + } + + if (num_dev != offsets[4] - offsets[3]) { + DWARFS_THROW(runtime_error, "wrong number of devices"); + } + + if (!meta->dir_entries()) { + for (auto ino : meta->inodes()) { + auto mode = meta->modes()[ino.mode_index()]; + auto i = ino.inode_v2_2(); + int base = mode_rank(mode); + + if (i < offsets[base] || + (i >= offsets[base + 1] && i > offsets[base])) { + DWARFS_THROW(runtime_error, "inode_v2_2 out of range"); + } + } + } + } + + return meta; +} + } // namespace -global_metadata::global_metadata(logger& lgr, Meta const* meta) - : meta_{meta} +global_metadata::global_metadata(logger& lgr, Meta const* meta, + bool check_consistency) + : meta_{check_metadata(lgr, meta, check_consistency)} , directories_storage_{unpack_directories(lgr, meta_)} , directories_{directories_storage_.empty() ? nullptr : directories_storage_.data()} diff --git a/src/dwarfs/metadata_v2.cpp b/src/dwarfs/metadata_v2.cpp index b6995e4c..f44d763f 100644 --- a/src/dwarfs/metadata_v2.cpp +++ b/src/dwarfs/metadata_v2.cpp @@ -242,10 +242,12 @@ template class metadata_ final : public metadata_v2::impl { public: metadata_(logger& lgr, folly::ByteRange schema, folly::ByteRange data, - metadata_options const& options, int inode_offset) + metadata_options const& options, int inode_offset, + bool force_consistency_check) : data_(data) , meta_(map_frozen(schema, data_)) - , global_(lgr, &meta_) + , global_(lgr, &meta_, + options.check_consistency || force_consistency_check) , root_(dir_entry_view::from_dir_entry_index(0, &global_)) , log_(lgr) , inode_offset_(inode_offset) @@ -1313,9 +1315,9 @@ metadata_v2::freeze(const thrift::metadata::metadata& data) { metadata_v2::metadata_v2(logger& lgr, folly::ByteRange schema, folly::ByteRange data, metadata_options const& options, - int inode_offset) + int inode_offset, bool force_consistency_check) : impl_(make_unique_logging_object( - lgr, schema, data, options, inode_offset)) {} + lgr, schema, data, options, inode_offset, force_consistency_check)) {} } // namespace dwarfs diff --git a/src/dwarfsck.cpp b/src/dwarfsck.cpp index afc6b29b..775251e1 100644 --- a/src/dwarfsck.cpp +++ b/src/dwarfsck.cpp @@ -110,6 +110,9 @@ int dwarfsck(int argc, char** argv) { try { filesystem_options fsopts; + + fsopts.metadata.check_consistency = true; + try { fsopts.image_offset = image_offset == "auto" ? filesystem_options::IMAGE_OFFSET_AUTO