mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-18 08:49:29 -04:00
Add metadata consistency checks
This commit is contained in:
parent
e85f782f6c
commit
72f56004ec
@ -48,7 +48,7 @@ class global_metadata {
|
||||
using Meta =
|
||||
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata>;
|
||||
|
||||
global_metadata(logger& lgr, Meta const* meta);
|
||||
global_metadata(logger& lgr, Meta const* meta, bool check_consistency);
|
||||
|
||||
Meta const* meta() const { return meta_; }
|
||||
|
||||
|
@ -57,7 +57,8 @@ class metadata_v2 {
|
||||
metadata_v2() = default;
|
||||
|
||||
metadata_v2(logger& lgr, folly::ByteRange schema, folly::ByteRange data,
|
||||
metadata_options const& options, int inode_offset = 0);
|
||||
metadata_options const& options, int inode_offset = 0,
|
||||
bool force_consistency_check = false);
|
||||
|
||||
metadata_v2& operator=(metadata_v2&&) = default;
|
||||
|
||||
|
@ -41,6 +41,7 @@ struct block_cache_options {
|
||||
struct metadata_options {
|
||||
bool enable_nlink{false};
|
||||
bool readonly{false};
|
||||
bool check_consistency{false};
|
||||
};
|
||||
|
||||
struct filesystem_options {
|
||||
|
@ -178,6 +178,8 @@ class filesystem_parser {
|
||||
|
||||
off_t image_offset() const { return image_offset_; }
|
||||
|
||||
bool has_checksums() const { return version_ >= 2; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<mmif> mm_;
|
||||
off_t const image_offset_;
|
||||
@ -212,7 +214,8 @@ make_metadata(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
std::vector<uint8_t>& meta_buffer,
|
||||
const metadata_options& options, int inode_offset = 0,
|
||||
bool force_buffers = false,
|
||||
mlock_mode lock_mode = mlock_mode::NONE) {
|
||||
mlock_mode lock_mode = mlock_mode::NONE,
|
||||
bool force_consistency_check = false) {
|
||||
LOG_PROXY(debug_logger_policy, lgr);
|
||||
auto schema_it = sections.find(section_type::METADATA_V2_SCHEMA);
|
||||
auto meta_it = sections.find(section_type::METADATA_V2);
|
||||
@ -250,7 +253,7 @@ make_metadata(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
return metadata_v2(
|
||||
lgr,
|
||||
get_section_data(mm, schema_it->second, schema_buffer, force_buffers),
|
||||
meta_section_range, options, inode_offset);
|
||||
meta_section_range, options, inode_offset, force_consistency_check);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -326,9 +329,9 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
|
||||
std::vector<uint8_t> schema_buffer;
|
||||
|
||||
meta_ =
|
||||
make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_,
|
||||
options.metadata, inode_offset, false, options.lock_mode);
|
||||
meta_ = make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_,
|
||||
options.metadata, inode_offset, false,
|
||||
options.lock_mode, !parser.has_checksums());
|
||||
|
||||
LOG_DEBUG << "read " << cache.block_count() << " blocks and " << meta_.size()
|
||||
<< " bytes of metadata";
|
||||
@ -526,16 +529,10 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
|
||||
std::vector<uint8_t> schema_raw;
|
||||
std::vector<uint8_t> meta_raw;
|
||||
|
||||
if (opts.recompress_metadata) {
|
||||
auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw,
|
||||
metadata_options(), 0, true);
|
||||
|
||||
struct ::statvfs stbuf;
|
||||
meta.statvfs(&stbuf);
|
||||
prog.original_size = stbuf.f_blocks * stbuf.f_frsize;
|
||||
} else {
|
||||
prog.original_size = total_block_size;
|
||||
}
|
||||
// force metadata check
|
||||
auto meta =
|
||||
make_metadata(lgr, mm, sections, schema_raw, meta_raw, metadata_options(),
|
||||
0, true, mlock_mode::NONE, !parser.has_checksums());
|
||||
|
||||
parser.rewind();
|
||||
|
||||
@ -639,9 +636,8 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
|
||||
if (errors == 0 and detail_level > 0) {
|
||||
filesystem_options fsopts;
|
||||
if (detail_level > 0) {
|
||||
fsopts.metadata.enable_nlink = true;
|
||||
}
|
||||
fsopts.metadata.check_consistency = true;
|
||||
fsopts.metadata.enable_nlink = true;
|
||||
fsopts.image_offset = image_offset;
|
||||
filesystem_v2(lgr, mm, fsopts).dump(os, detail_level);
|
||||
}
|
||||
|
@ -19,6 +19,8 @@
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <queue>
|
||||
|
||||
#include "dwarfs/error.h"
|
||||
@ -82,10 +84,269 @@ unpack_directories(logger& lgr, global_metadata::Meta const* meta) {
|
||||
return directories;
|
||||
}
|
||||
|
||||
int mode_rank(uint16_t mode) {
|
||||
switch (mode & S_IFMT) {
|
||||
case S_IFDIR:
|
||||
return 0;
|
||||
case S_IFLNK:
|
||||
return 1;
|
||||
case S_IFREG:
|
||||
return 2;
|
||||
case S_IFBLK:
|
||||
case S_IFCHR:
|
||||
return 3;
|
||||
default:
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
void check_empty_tables(global_metadata::Meta const* meta) {
|
||||
if (meta->inodes().empty()) {
|
||||
DWARFS_THROW(runtime_error, "empty inodes table");
|
||||
}
|
||||
|
||||
if (meta->directories().empty()) {
|
||||
DWARFS_THROW(runtime_error, "empty directories table");
|
||||
}
|
||||
|
||||
if (meta->chunk_table().empty()) {
|
||||
DWARFS_THROW(runtime_error, "empty chunk_table table");
|
||||
}
|
||||
|
||||
if (auto de = meta->dir_entries()) {
|
||||
if (de->empty()) {
|
||||
DWARFS_THROW(runtime_error, "empty dir_entries table");
|
||||
}
|
||||
} else {
|
||||
if (meta->entry_table_v2_2().empty()) {
|
||||
DWARFS_THROW(runtime_error, "empty entry_table_v2_2 table");
|
||||
}
|
||||
}
|
||||
|
||||
if (meta->modes().empty()) {
|
||||
DWARFS_THROW(runtime_error, "empty modes table");
|
||||
}
|
||||
}
|
||||
|
||||
void check_index_range(global_metadata::Meta const* meta) {
|
||||
auto num_modes = meta->modes().size();
|
||||
auto num_uids = meta->uids().size();
|
||||
auto num_gids = meta->gids().size();
|
||||
auto num_names = meta->names().size();
|
||||
auto num_inodes = meta->inodes().size();
|
||||
bool v2_2 = !static_cast<bool>(meta->dir_entries());
|
||||
|
||||
for (auto ino : meta->inodes()) {
|
||||
if (ino.mode_index() >= num_modes) {
|
||||
DWARFS_THROW(runtime_error, "mode_index out of range");
|
||||
}
|
||||
if (auto i = ino.owner_index(); i >= num_uids && i > 0) {
|
||||
DWARFS_THROW(runtime_error, "owner_index out of range");
|
||||
}
|
||||
if (auto i = ino.group_index(); i >= num_gids && i > 0) {
|
||||
DWARFS_THROW(runtime_error, "group_index out of range");
|
||||
}
|
||||
if (v2_2) {
|
||||
if (auto i = ino.name_index_v2_2(); i >= num_names && i > 0) {
|
||||
DWARFS_THROW(runtime_error, "name_index_v2_2 out of range");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (auto dep = meta->dir_entries()) {
|
||||
if (auto cn = meta->compact_names()) {
|
||||
num_names = cn->index().size();
|
||||
if (!cn->packed_index()) {
|
||||
if (num_names == 0) {
|
||||
DWARFS_THROW(runtime_error, "empty compact_names index");
|
||||
}
|
||||
--num_names;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto de : *dep) {
|
||||
if (auto i = de.name_index(); i >= num_names && i > 0) {
|
||||
DWARFS_THROW(runtime_error, "name_index out of range");
|
||||
}
|
||||
if (auto i = de.inode_num(); i >= num_inodes) {
|
||||
DWARFS_THROW(runtime_error, "inode_num out of range");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto ent : meta->entry_table_v2_2()) {
|
||||
if (ent >= num_inodes) {
|
||||
DWARFS_THROW(runtime_error, "entry_table_v2_2 value out of range");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void check_packed_tables(global_metadata::Meta const* meta) {
|
||||
if (auto opt = meta->options(); opt and opt->packed_directories()) {
|
||||
if (std::any_of(meta->directories().begin(), meta->directories().end(),
|
||||
[](auto i) { return i.parent_entry() != 0; })) {
|
||||
DWARFS_THROW(runtime_error, "parent_entry set in packed directory");
|
||||
}
|
||||
if (std::accumulate(meta->directories().begin(), meta->directories().end(),
|
||||
static_cast<size_t>(0), [](auto n, auto d) {
|
||||
return n + d.first_entry();
|
||||
}) != meta->dir_entries()->size()) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
"first_entry inconsistency in packed directories");
|
||||
}
|
||||
} else {
|
||||
size_t num_entries = meta->dir_entries() ? meta->dir_entries()->size()
|
||||
: meta->inodes().size();
|
||||
|
||||
for (auto d : meta->directories()) {
|
||||
if (auto i = d.first_entry(); i > num_entries) {
|
||||
DWARFS_THROW(runtime_error, "first_entry out of range");
|
||||
}
|
||||
if (auto i = d.parent_entry(); i >= num_entries) {
|
||||
DWARFS_THROW(runtime_error, "parent_entry out of range");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (auto opt = meta->options(); opt and opt->packed_chunk_table()) {
|
||||
if (std::accumulate(meta->chunk_table().begin(), meta->chunk_table().end(),
|
||||
static_cast<size_t>(0)) != meta->chunks().size()) {
|
||||
DWARFS_THROW(runtime_error, "packed chunk_table inconsistency");
|
||||
}
|
||||
} else {
|
||||
if (!std::is_sorted(meta->chunk_table().begin(),
|
||||
meta->chunk_table().end()) or
|
||||
meta->chunk_table().back() != meta->chunks().size()) {
|
||||
DWARFS_THROW(runtime_error, "chunk_table inconsistency");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void check_chunks(global_metadata::Meta const* meta) {
|
||||
auto block_size = meta->block_size();
|
||||
|
||||
for (auto c : meta->chunks()) {
|
||||
if (c.offset() >= block_size || c.size() > block_size) {
|
||||
DWARFS_THROW(runtime_error, "chunk offset/size out of range");
|
||||
}
|
||||
if (c.offset() + c.size() > block_size) {
|
||||
DWARFS_THROW(runtime_error, "chunk end outside of block");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::array<size_t, 6> check_partitioning(global_metadata::Meta const* meta) {
|
||||
std::array<size_t, 6> offsets;
|
||||
|
||||
for (int r = 0; r < static_cast<int>(offsets.size()); ++r) {
|
||||
if (auto dep = meta->dir_entries()) {
|
||||
auto pred = [r, modes = meta->modes()](auto ino) {
|
||||
return mode_rank(modes[ino.mode_index()]) < r;
|
||||
};
|
||||
auto inodes = meta->inodes();
|
||||
|
||||
if (!std::is_partitioned(inodes.begin(), inodes.end(), pred)) {
|
||||
DWARFS_THROW(runtime_error, "inode table inconsistency");
|
||||
}
|
||||
|
||||
offsets[r] = std::distance(
|
||||
inodes.begin(),
|
||||
std::partition_point(inodes.begin(), inodes.end(), pred));
|
||||
} else {
|
||||
auto pred = [r, modes = meta->modes(),
|
||||
inodes = meta->inodes()](auto ent) {
|
||||
return mode_rank(modes[inodes[ent].mode_index()]) < r;
|
||||
};
|
||||
auto entries = meta->entry_table_v2_2();
|
||||
|
||||
if (!std::is_partitioned(entries.begin(), entries.end(), pred)) {
|
||||
DWARFS_THROW(runtime_error, "entry_table_v2_2 inconsistency");
|
||||
}
|
||||
|
||||
offsets[r] = std::distance(
|
||||
entries.begin(),
|
||||
std::partition_point(entries.begin(), entries.end(), pred));
|
||||
}
|
||||
}
|
||||
|
||||
return offsets;
|
||||
}
|
||||
|
||||
global_metadata::Meta const*
|
||||
check_metadata(logger& lgr, global_metadata::Meta const* meta, bool check) {
|
||||
if (check) {
|
||||
LOG_PROXY(debug_logger_policy, lgr);
|
||||
|
||||
auto ti = LOG_TIMED_DEBUG;
|
||||
|
||||
ti << "check metadata consistency";
|
||||
|
||||
check_empty_tables(meta);
|
||||
check_index_range(meta);
|
||||
check_packed_tables(meta);
|
||||
check_chunks(meta);
|
||||
auto offsets = check_partitioning(meta);
|
||||
|
||||
auto num_dir = meta->directories().size() - 1;
|
||||
auto num_lnk = meta->symlink_table().size();
|
||||
auto num_reg_unique = meta->chunk_table().size() - 1;
|
||||
size_t num_reg_shared = 0;
|
||||
|
||||
if (auto sfp = meta->shared_files_table()) {
|
||||
if (meta->options()->packed_shared_files_table()) {
|
||||
num_reg_shared =
|
||||
std::accumulate(sfp->begin(), sfp->end(), 2 * sfp->size());
|
||||
num_reg_unique -= sfp->size();
|
||||
} else {
|
||||
if (!std::is_sorted(sfp->begin(), sfp->end())) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
"unpacked shared_files_table is not sorted");
|
||||
}
|
||||
num_reg_shared = sfp->size();
|
||||
num_reg_unique -= sfp->back() + 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t num_dev = meta->devices() ? meta->devices()->size() : 0;
|
||||
|
||||
if (num_dir != offsets[1]) {
|
||||
DWARFS_THROW(runtime_error, "wrong number of directories");
|
||||
}
|
||||
|
||||
if (num_lnk != offsets[2] - offsets[1]) {
|
||||
DWARFS_THROW(runtime_error, "wrong number of links");
|
||||
}
|
||||
|
||||
if (num_reg_unique + num_reg_shared != offsets[3] - offsets[2]) {
|
||||
DWARFS_THROW(runtime_error, "wrong number of files");
|
||||
}
|
||||
|
||||
if (num_dev != offsets[4] - offsets[3]) {
|
||||
DWARFS_THROW(runtime_error, "wrong number of devices");
|
||||
}
|
||||
|
||||
if (!meta->dir_entries()) {
|
||||
for (auto ino : meta->inodes()) {
|
||||
auto mode = meta->modes()[ino.mode_index()];
|
||||
auto i = ino.inode_v2_2();
|
||||
int base = mode_rank(mode);
|
||||
|
||||
if (i < offsets[base] ||
|
||||
(i >= offsets[base + 1] && i > offsets[base])) {
|
||||
DWARFS_THROW(runtime_error, "inode_v2_2 out of range");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return meta;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
global_metadata::global_metadata(logger& lgr, Meta const* meta)
|
||||
: meta_{meta}
|
||||
global_metadata::global_metadata(logger& lgr, Meta const* meta,
|
||||
bool check_consistency)
|
||||
: meta_{check_metadata(lgr, meta, check_consistency)}
|
||||
, directories_storage_{unpack_directories(lgr, meta_)}
|
||||
, directories_{directories_storage_.empty() ? nullptr
|
||||
: directories_storage_.data()}
|
||||
|
@ -242,10 +242,12 @@ template <typename LoggerPolicy>
|
||||
class metadata_ final : public metadata_v2::impl {
|
||||
public:
|
||||
metadata_(logger& lgr, folly::ByteRange schema, folly::ByteRange data,
|
||||
metadata_options const& options, int inode_offset)
|
||||
metadata_options const& options, int inode_offset,
|
||||
bool force_consistency_check)
|
||||
: data_(data)
|
||||
, meta_(map_frozen<thrift::metadata::metadata>(schema, data_))
|
||||
, global_(lgr, &meta_)
|
||||
, global_(lgr, &meta_,
|
||||
options.check_consistency || force_consistency_check)
|
||||
, root_(dir_entry_view::from_dir_entry_index(0, &global_))
|
||||
, log_(lgr)
|
||||
, inode_offset_(inode_offset)
|
||||
@ -1313,9 +1315,9 @@ metadata_v2::freeze(const thrift::metadata::metadata& data) {
|
||||
|
||||
metadata_v2::metadata_v2(logger& lgr, folly::ByteRange schema,
|
||||
folly::ByteRange data, metadata_options const& options,
|
||||
int inode_offset)
|
||||
int inode_offset, bool force_consistency_check)
|
||||
: impl_(make_unique_logging_object<metadata_v2::impl, metadata_,
|
||||
logger_policies>(
|
||||
lgr, schema, data, options, inode_offset)) {}
|
||||
lgr, schema, data, options, inode_offset, force_consistency_check)) {}
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -110,6 +110,9 @@ int dwarfsck(int argc, char** argv) {
|
||||
|
||||
try {
|
||||
filesystem_options fsopts;
|
||||
|
||||
fsopts.metadata.check_consistency = true;
|
||||
|
||||
try {
|
||||
fsopts.image_offset = image_offset == "auto"
|
||||
? filesystem_options::IMAGE_OFFSET_AUTO
|
||||
|
Loading…
x
Reference in New Issue
Block a user