Add metadata consistency checks

This commit is contained in:
Marcus Holland-Moritz 2021-03-30 17:49:14 +02:00
parent e85f782f6c
commit 72f56004ec
7 changed files with 290 additions and 26 deletions

View File

@ -48,7 +48,7 @@ class global_metadata {
using Meta =
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
global_metadata(logger& lgr, Meta const* meta);
global_metadata(logger& lgr, Meta const* meta, bool check_consistency);
Meta const* meta() const { return meta_; }

View File

@ -57,7 +57,8 @@ class metadata_v2 {
metadata_v2() = default;
metadata_v2(logger& lgr, folly::ByteRange schema, folly::ByteRange data,
metadata_options const& options, int inode_offset = 0);
metadata_options const& options, int inode_offset = 0,
bool force_consistency_check = false);
metadata_v2& operator=(metadata_v2&&) = default;

View File

@ -41,6 +41,7 @@ struct block_cache_options {
struct metadata_options {
bool enable_nlink{false};
bool readonly{false};
bool check_consistency{false};
};
struct filesystem_options {

View File

@ -178,6 +178,8 @@ class filesystem_parser {
off_t image_offset() const { return image_offset_; }
bool has_checksums() const { return version_ >= 2; }
private:
std::shared_ptr<mmif> mm_;
off_t const image_offset_;
@ -212,7 +214,8 @@ make_metadata(logger& lgr, std::shared_ptr<mmif> mm,
std::vector<uint8_t>& meta_buffer,
const metadata_options& options, int inode_offset = 0,
bool force_buffers = false,
mlock_mode lock_mode = mlock_mode::NONE) {
mlock_mode lock_mode = mlock_mode::NONE,
bool force_consistency_check = false) {
LOG_PROXY(debug_logger_policy, lgr);
auto schema_it = sections.find(section_type::METADATA_V2_SCHEMA);
auto meta_it = sections.find(section_type::METADATA_V2);
@ -250,7 +253,7 @@ make_metadata(logger& lgr, std::shared_ptr<mmif> mm,
return metadata_v2(
lgr,
get_section_data(mm, schema_it->second, schema_buffer, force_buffers),
meta_section_range, options, inode_offset);
meta_section_range, options, inode_offset, force_consistency_check);
}
template <typename LoggerPolicy>
@ -326,9 +329,9 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
std::vector<uint8_t> schema_buffer;
meta_ =
make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_,
options.metadata, inode_offset, false, options.lock_mode);
meta_ = make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_,
options.metadata, inode_offset, false,
options.lock_mode, !parser.has_checksums());
LOG_DEBUG << "read " << cache.block_count() << " blocks and " << meta_.size()
<< " bytes of metadata";
@ -526,16 +529,10 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
std::vector<uint8_t> schema_raw;
std::vector<uint8_t> meta_raw;
if (opts.recompress_metadata) {
auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw,
metadata_options(), 0, true);
struct ::statvfs stbuf;
meta.statvfs(&stbuf);
prog.original_size = stbuf.f_blocks * stbuf.f_frsize;
} else {
prog.original_size = total_block_size;
}
// force metadata check
auto meta =
make_metadata(lgr, mm, sections, schema_raw, meta_raw, metadata_options(),
0, true, mlock_mode::NONE, !parser.has_checksums());
parser.rewind();
@ -639,9 +636,8 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
if (errors == 0 and detail_level > 0) {
filesystem_options fsopts;
if (detail_level > 0) {
fsopts.metadata.enable_nlink = true;
}
fsopts.metadata.check_consistency = true;
fsopts.metadata.enable_nlink = true;
fsopts.image_offset = image_offset;
filesystem_v2(lgr, mm, fsopts).dump(os, detail_level);
}

View File

@ -19,6 +19,8 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <numeric>
#include <queue>
#include "dwarfs/error.h"
@ -82,10 +84,269 @@ unpack_directories(logger& lgr, global_metadata::Meta const* meta) {
return directories;
}
int mode_rank(uint16_t mode) {
switch (mode & S_IFMT) {
case S_IFDIR:
return 0;
case S_IFLNK:
return 1;
case S_IFREG:
return 2;
case S_IFBLK:
case S_IFCHR:
return 3;
default:
return 4;
}
}
void check_empty_tables(global_metadata::Meta const* meta) {
if (meta->inodes().empty()) {
DWARFS_THROW(runtime_error, "empty inodes table");
}
if (meta->directories().empty()) {
DWARFS_THROW(runtime_error, "empty directories table");
}
if (meta->chunk_table().empty()) {
DWARFS_THROW(runtime_error, "empty chunk_table table");
}
if (auto de = meta->dir_entries()) {
if (de->empty()) {
DWARFS_THROW(runtime_error, "empty dir_entries table");
}
} else {
if (meta->entry_table_v2_2().empty()) {
DWARFS_THROW(runtime_error, "empty entry_table_v2_2 table");
}
}
if (meta->modes().empty()) {
DWARFS_THROW(runtime_error, "empty modes table");
}
}
void check_index_range(global_metadata::Meta const* meta) {
auto num_modes = meta->modes().size();
auto num_uids = meta->uids().size();
auto num_gids = meta->gids().size();
auto num_names = meta->names().size();
auto num_inodes = meta->inodes().size();
bool v2_2 = !static_cast<bool>(meta->dir_entries());
for (auto ino : meta->inodes()) {
if (ino.mode_index() >= num_modes) {
DWARFS_THROW(runtime_error, "mode_index out of range");
}
if (auto i = ino.owner_index(); i >= num_uids && i > 0) {
DWARFS_THROW(runtime_error, "owner_index out of range");
}
if (auto i = ino.group_index(); i >= num_gids && i > 0) {
DWARFS_THROW(runtime_error, "group_index out of range");
}
if (v2_2) {
if (auto i = ino.name_index_v2_2(); i >= num_names && i > 0) {
DWARFS_THROW(runtime_error, "name_index_v2_2 out of range");
}
}
}
if (auto dep = meta->dir_entries()) {
if (auto cn = meta->compact_names()) {
num_names = cn->index().size();
if (!cn->packed_index()) {
if (num_names == 0) {
DWARFS_THROW(runtime_error, "empty compact_names index");
}
--num_names;
}
}
for (auto de : *dep) {
if (auto i = de.name_index(); i >= num_names && i > 0) {
DWARFS_THROW(runtime_error, "name_index out of range");
}
if (auto i = de.inode_num(); i >= num_inodes) {
DWARFS_THROW(runtime_error, "inode_num out of range");
}
}
} else {
for (auto ent : meta->entry_table_v2_2()) {
if (ent >= num_inodes) {
DWARFS_THROW(runtime_error, "entry_table_v2_2 value out of range");
}
}
}
}
void check_packed_tables(global_metadata::Meta const* meta) {
if (auto opt = meta->options(); opt and opt->packed_directories()) {
if (std::any_of(meta->directories().begin(), meta->directories().end(),
[](auto i) { return i.parent_entry() != 0; })) {
DWARFS_THROW(runtime_error, "parent_entry set in packed directory");
}
if (std::accumulate(meta->directories().begin(), meta->directories().end(),
static_cast<size_t>(0), [](auto n, auto d) {
return n + d.first_entry();
}) != meta->dir_entries()->size()) {
DWARFS_THROW(runtime_error,
"first_entry inconsistency in packed directories");
}
} else {
size_t num_entries = meta->dir_entries() ? meta->dir_entries()->size()
: meta->inodes().size();
for (auto d : meta->directories()) {
if (auto i = d.first_entry(); i > num_entries) {
DWARFS_THROW(runtime_error, "first_entry out of range");
}
if (auto i = d.parent_entry(); i >= num_entries) {
DWARFS_THROW(runtime_error, "parent_entry out of range");
}
}
}
if (auto opt = meta->options(); opt and opt->packed_chunk_table()) {
if (std::accumulate(meta->chunk_table().begin(), meta->chunk_table().end(),
static_cast<size_t>(0)) != meta->chunks().size()) {
DWARFS_THROW(runtime_error, "packed chunk_table inconsistency");
}
} else {
if (!std::is_sorted(meta->chunk_table().begin(),
meta->chunk_table().end()) or
meta->chunk_table().back() != meta->chunks().size()) {
DWARFS_THROW(runtime_error, "chunk_table inconsistency");
}
}
}
void check_chunks(global_metadata::Meta const* meta) {
auto block_size = meta->block_size();
for (auto c : meta->chunks()) {
if (c.offset() >= block_size || c.size() > block_size) {
DWARFS_THROW(runtime_error, "chunk offset/size out of range");
}
if (c.offset() + c.size() > block_size) {
DWARFS_THROW(runtime_error, "chunk end outside of block");
}
}
}
std::array<size_t, 6> check_partitioning(global_metadata::Meta const* meta) {
std::array<size_t, 6> offsets;
for (int r = 0; r < static_cast<int>(offsets.size()); ++r) {
if (auto dep = meta->dir_entries()) {
auto pred = [r, modes = meta->modes()](auto ino) {
return mode_rank(modes[ino.mode_index()]) < r;
};
auto inodes = meta->inodes();
if (!std::is_partitioned(inodes.begin(), inodes.end(), pred)) {
DWARFS_THROW(runtime_error, "inode table inconsistency");
}
offsets[r] = std::distance(
inodes.begin(),
std::partition_point(inodes.begin(), inodes.end(), pred));
} else {
auto pred = [r, modes = meta->modes(),
inodes = meta->inodes()](auto ent) {
return mode_rank(modes[inodes[ent].mode_index()]) < r;
};
auto entries = meta->entry_table_v2_2();
if (!std::is_partitioned(entries.begin(), entries.end(), pred)) {
DWARFS_THROW(runtime_error, "entry_table_v2_2 inconsistency");
}
offsets[r] = std::distance(
entries.begin(),
std::partition_point(entries.begin(), entries.end(), pred));
}
}
return offsets;
}
global_metadata::Meta const*
check_metadata(logger& lgr, global_metadata::Meta const* meta, bool check) {
if (check) {
LOG_PROXY(debug_logger_policy, lgr);
auto ti = LOG_TIMED_DEBUG;
ti << "check metadata consistency";
check_empty_tables(meta);
check_index_range(meta);
check_packed_tables(meta);
check_chunks(meta);
auto offsets = check_partitioning(meta);
auto num_dir = meta->directories().size() - 1;
auto num_lnk = meta->symlink_table().size();
auto num_reg_unique = meta->chunk_table().size() - 1;
size_t num_reg_shared = 0;
if (auto sfp = meta->shared_files_table()) {
if (meta->options()->packed_shared_files_table()) {
num_reg_shared =
std::accumulate(sfp->begin(), sfp->end(), 2 * sfp->size());
num_reg_unique -= sfp->size();
} else {
if (!std::is_sorted(sfp->begin(), sfp->end())) {
DWARFS_THROW(runtime_error,
"unpacked shared_files_table is not sorted");
}
num_reg_shared = sfp->size();
num_reg_unique -= sfp->back() + 1;
}
}
size_t num_dev = meta->devices() ? meta->devices()->size() : 0;
if (num_dir != offsets[1]) {
DWARFS_THROW(runtime_error, "wrong number of directories");
}
if (num_lnk != offsets[2] - offsets[1]) {
DWARFS_THROW(runtime_error, "wrong number of links");
}
if (num_reg_unique + num_reg_shared != offsets[3] - offsets[2]) {
DWARFS_THROW(runtime_error, "wrong number of files");
}
if (num_dev != offsets[4] - offsets[3]) {
DWARFS_THROW(runtime_error, "wrong number of devices");
}
if (!meta->dir_entries()) {
for (auto ino : meta->inodes()) {
auto mode = meta->modes()[ino.mode_index()];
auto i = ino.inode_v2_2();
int base = mode_rank(mode);
if (i < offsets[base] ||
(i >= offsets[base + 1] && i > offsets[base])) {
DWARFS_THROW(runtime_error, "inode_v2_2 out of range");
}
}
}
}
return meta;
}
} // namespace
global_metadata::global_metadata(logger& lgr, Meta const* meta)
: meta_{meta}
global_metadata::global_metadata(logger& lgr, Meta const* meta,
bool check_consistency)
: meta_{check_metadata(lgr, meta, check_consistency)}
, directories_storage_{unpack_directories(lgr, meta_)}
, directories_{directories_storage_.empty() ? nullptr
: directories_storage_.data()}

View File

@ -242,10 +242,12 @@ template <typename LoggerPolicy>
class metadata_ final : public metadata_v2::impl {
public:
metadata_(logger& lgr, folly::ByteRange schema, folly::ByteRange data,
metadata_options const& options, int inode_offset)
metadata_options const& options, int inode_offset,
bool force_consistency_check)
: data_(data)
, meta_(map_frozen<thrift::metadata::metadata>(schema, data_))
, global_(lgr, &meta_)
, global_(lgr, &meta_,
options.check_consistency || force_consistency_check)
, root_(dir_entry_view::from_dir_entry_index(0, &global_))
, log_(lgr)
, inode_offset_(inode_offset)
@ -1313,9 +1315,9 @@ metadata_v2::freeze(const thrift::metadata::metadata& data) {
metadata_v2::metadata_v2(logger& lgr, folly::ByteRange schema,
folly::ByteRange data, metadata_options const& options,
int inode_offset)
int inode_offset, bool force_consistency_check)
: impl_(make_unique_logging_object<metadata_v2::impl, metadata_,
logger_policies>(
lgr, schema, data, options, inode_offset)) {}
lgr, schema, data, options, inode_offset, force_consistency_check)) {}
} // namespace dwarfs

View File

@ -110,6 +110,9 @@ int dwarfsck(int argc, char** argv) {
try {
filesystem_options fsopts;
fsopts.metadata.check_consistency = true;
try {
fsopts.image_offset = image_offset == "auto"
? filesystem_options::IMAGE_OFFSET_AUTO