feat(forward-compat): allow unknown section/compression types (gh #158)

This commit is contained in:
Marcus Holland-Moritz 2023-11-28 21:46:47 +01:00
parent 95eac672f8
commit 8dae679ad4
7 changed files with 132 additions and 38 deletions

View File

@ -36,7 +36,16 @@
namespace dwarfs {
enum class compression_type : uint8_t {
enum class compression_type_v1 : uint8_t {
#define DWARFS_COMPRESSION_TYPE_ENUMERATION_(name, value) name = value
#define DWARFS_COMMA_ ,
DWARFS_COMPRESSION_TYPE_LIST(DWARFS_COMPRESSION_TYPE_ENUMERATION_,
DWARFS_COMMA_)
#undef DWARFS_COMPRESSION_TYPE_ENUMERATION_
#undef DWARFS_COMMA_
};
enum class compression_type : uint16_t {
#define DWARFS_COMPRESSION_TYPE_ENUMERATION_(name, value) name = value
#define DWARFS_COMMA_ ,
DWARFS_COMPRESSION_TYPE_LIST(DWARFS_COMPRESSION_TYPE_ENUMERATION_,

View File

@ -39,6 +39,8 @@ class fs_section {
size_t start() const { return impl_->start(); }
size_t length() const { return impl_->length(); }
bool is_known_compression() const { return impl_->is_known_compression(); }
bool is_known_type() const { return impl_->is_known_type(); }
compression_type compression() const { return impl_->compression(); }
section_type type() const { return impl_->type(); }
std::string name() const { return impl_->name(); }
@ -57,6 +59,8 @@ class fs_section {
virtual size_t start() const = 0;
virtual size_t length() const = 0;
virtual bool is_known_compression() const = 0;
virtual bool is_known_type() const = 0;
virtual compression_type compression() const = 0;
virtual section_type type() const = 0;
virtual std::string name() const = 0;

View File

@ -56,7 +56,7 @@ struct file_header {
struct section_header {
section_type type;
compression_type compression;
compression_type_v1 compression;
uint8_t unused;
uint32_t length;
@ -85,11 +85,13 @@ struct filesystem_info {
uint64_t uncompressed_block_size{0};
uint64_t compressed_metadata_size{0};
uint64_t uncompressed_metadata_size{0};
bool uncompressed_block_size_is_estimate{false};
bool uncompressed_metadata_size_is_estimate{false};
};
bool is_valid_compression_type(compression_type type);
bool is_known_compression_type(compression_type type);
bool is_valid_section_type(section_type type);
bool is_known_section_type(section_type type);
std::string get_compression_name(compression_type type);

View File

@ -52,6 +52,24 @@ namespace dwarfs {
namespace {
void check_section_logger(logger& lgr, fs_section const& section) {
LOG_PROXY(debug_logger_policy, lgr);
LOG_DEBUG << "section " << section.description() << " @ " << section.start()
<< " [" << section.length() << " bytes]";
if (!section.is_known_type()) {
LOG_WARN << "unknown section type " << folly::to_underlying(section.type())
<< " in section @ " << section.start();
}
if (!section.is_known_compression()) {
LOG_WARN << "unknown compression type "
<< folly::to_underlying(section.compression()) << " in section @ "
<< section.start();
}
}
class filesystem_parser {
private:
static uint64_t constexpr section_offset_mask{(UINT64_C(1) << 48) - 1};
@ -361,6 +379,7 @@ class filesystem_ final : public filesystem_v2::impl {
private:
filesystem_info const& get_info() const;
void check_section(fs_section const& section) const;
LOG_PROXY_DECL(LoggerPolicy);
std::shared_ptr<mmif> mm_;
@ -389,6 +408,11 @@ class filesystem_ final : public filesystem_v2::impl {
PERFMON_CLS_TIMER_DECL(readv_future)
};
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::check_section(fs_section const& section) const {
check_section_logger(LOG_GET_LOGGER, section);
}
template <typename LoggerPolicy>
filesystem_info const& filesystem_<LoggerPolicy>::get_info() const {
std::lock_guard lock(mx_);
@ -399,14 +423,27 @@ filesystem_info const& filesystem_<LoggerPolicy>::get_info() const {
parser_.rewind();
while (auto s = parser_.next_section()) {
check_section(*s);
if (s->type() == section_type::BLOCK) {
++info.block_count;
info.compressed_block_size += s->length();
info.uncompressed_block_size += get_uncompressed_section_size(mm_, *s);
try {
info.uncompressed_block_size +=
get_uncompressed_section_size(mm_, *s);
} catch (std::exception const& e) {
info.uncompressed_block_size += s->length();
info.uncompressed_block_size_is_estimate = true;
}
} else if (s->type() == section_type::METADATA_V2) {
info.compressed_metadata_size += s->length();
info.uncompressed_metadata_size +=
get_uncompressed_section_size(mm_, *s);
try {
info.uncompressed_metadata_size +=
get_uncompressed_section_size(mm_, *s);
} catch (std::exception const& e) {
info.uncompressed_metadata_size += s->length();
info.uncompressed_metadata_size_is_estimate = true;
}
}
}
@ -452,8 +489,8 @@ filesystem_<LoggerPolicy>::filesystem_(
section_map sections;
while (auto s = parser_.next_section()) {
LOG_DEBUG << "section " << s->name() << " @ " << s->start() << " ["
<< s->length() << " bytes]";
check_section(*s);
if (s->type() == section_type::BLOCK) {
cache.insert(*s);
} else {
@ -665,8 +702,8 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
section_map sections;
while (auto s = parser.next_section()) {
LOG_DEBUG << "section " << s->description() << " @ " << s->start() << " ["
<< s->length() << " bytes]";
check_section_logger(lgr, *s);
if (!s->check_fast(*mm)) {
DWARFS_THROW(runtime_error, "checksum error in section: " + s->name());
}
@ -746,8 +783,8 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
std::vector<std::future<fs_section>> sections;
while (auto sp = parser.next_section()) {
LOG_DEBUG << "section " << sp->description() << " @ " << sp->start() << " ["
<< sp->length() << " bytes]";
check_section_logger(lgr, *sp);
std::packaged_task<fs_section()> task{[&, s = *sp] {
if (!s.check_fast(*mm)) {
DWARFS_THROW(runtime_error, "checksum error in section: " + s.name());

View File

@ -59,28 +59,38 @@ void read_section_header_common(T& header, size_t& start, mmif const& mm,
template <typename T>
void check_section(T const& sec) {
if (!is_valid_section_type(sec.type())) {
DWARFS_THROW(runtime_error, fmt::format("invalid section type ({0})",
if (!is_known_section_type(sec.type())) {
DWARFS_THROW(runtime_error, fmt::format("unknown section type ({0})",
static_cast<int>(sec.type())));
}
if (!is_valid_compression_type(sec.compression())) {
if (!is_known_compression_type(sec.compression())) {
DWARFS_THROW(runtime_error,
fmt::format("invalid compression type ({0})",
fmt::format("unknown compression type ({0})",
static_cast<int>(sec.compression())));
}
}
} // namespace
class fs_section_v1 : public fs_section::impl {
class fs_section_v1 final : public fs_section::impl {
public:
fs_section_v1(mmif const& mm, size_t offset);
size_t start() const override { return start_; }
size_t length() const override { return hdr_.length; }
compression_type compression() const override { return hdr_.compression; }
bool is_known_compression() const override {
return is_known_compression_type(this->compression());
}
bool is_known_type() const override {
return is_known_section_type(this->type());
}
compression_type compression() const override {
return static_cast<compression_type>(hdr_.compression);
}
section_type type() const override { return hdr_.type; }
std::string name() const override { return get_section_name(hdr_.type); }
@ -100,13 +110,21 @@ class fs_section_v1 : public fs_section::impl {
section_header hdr_;
};
class fs_section_v2 : public fs_section::impl {
class fs_section_v2 final : public fs_section::impl {
public:
fs_section_v2(mmif const& mm, size_t offset);
size_t start() const override { return start_; }
size_t length() const override { return hdr_.length; }
bool is_known_compression() const override {
return is_known_compression_type(this->compression());
}
bool is_known_type() const override {
return is_known_section_type(this->type());
}
compression_type compression() const override {
return static_cast<compression_type>(hdr_.compression);
}
@ -149,7 +167,7 @@ class fs_section_v2 : public fs_section::impl {
section_header_v2 hdr_;
};
class fs_section_v2_lazy : public fs_section::impl {
class fs_section_v2_lazy final : public fs_section::impl {
public:
fs_section_v2_lazy(std::shared_ptr<mmif const> mm, section_type type,
size_t offset, size_t size);
@ -157,6 +175,14 @@ class fs_section_v2_lazy : public fs_section::impl {
size_t start() const override { return offset_ + sizeof(section_header_v2); }
size_t length() const override { return size_ - sizeof(section_header_v2); }
bool is_known_compression() const override {
return is_known_compression_type(this->compression());
}
bool is_known_type() const override {
return is_known_section_type(this->type());
}
compression_type compression() const override {
return section().compression();
}
@ -227,7 +253,12 @@ fs_section_v1::fs_section_v1(mmif const& mm, size_t offset) {
fs_section_v2::fs_section_v2(mmif const& mm, size_t offset) {
read_section_header_common(hdr_, start_, mm, offset);
check_section(*this);
// TODO: Don't enforce these checks as we might want to add section types
// and compression types in the future without necessarily incrementing
// the file system version.
// Only enforce them for v1 above, which doesn't have checksums and
// where we know the exact set of section and compression types.
// check_section(*this);
}
fs_section_v2_lazy::fs_section_v2_lazy(std::shared_ptr<mmif const> mm,

View File

@ -62,11 +62,11 @@ std::string get_default(const HT& ht, const typename HT::key_type& key) {
}
} // namespace
bool is_valid_compression_type(compression_type type) {
bool is_known_compression_type(compression_type type) {
return compressions.count(type) > 0;
}
bool is_valid_section_type(section_type type) {
bool is_known_section_type(section_type type) {
return sections.count(type) > 0;
}
@ -79,8 +79,8 @@ std::string get_section_name(section_type type) {
}
void section_header::dump(std::ostream& os) const {
os << "type=" << get_default(sections, type)
<< ", compression=" << get_default(compressions, compression)
os << "type=" << get_default(sections, type) << ", compression="
<< get_default(compressions, static_cast<compression_type>(compression))
<< ", length=" << length;
}

View File

@ -938,19 +938,30 @@ void metadata_<LoggerPolicy>::dump(
}
os << "original filesystem size: " << size_with_unit(stbuf.blocks) << "\n";
os << "compressed block size: "
<< size_with_unit(fsinfo.compressed_block_size)
<< fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_block_size) /
fsinfo.uncompressed_block_size)
<< "\n";
os << "uncompressed block size: "
<< size_with_unit(fsinfo.uncompressed_block_size) << "\n";
<< size_with_unit(fsinfo.compressed_block_size);
if (!fsinfo.uncompressed_block_size_is_estimate) {
os << fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_block_size) /
fsinfo.uncompressed_block_size);
}
os << "\n";
os << "uncompressed block size: ";
if (fsinfo.uncompressed_block_size_is_estimate) {
os << "(at least) ";
}
os << size_with_unit(fsinfo.uncompressed_block_size) << "\n";
os << "compressed metadata size: "
<< size_with_unit(fsinfo.compressed_metadata_size)
<< fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_metadata_size) /
fsinfo.uncompressed_metadata_size)
<< "\n";
os << "uncompressed metadata size: "
<< size_with_unit(fsinfo.uncompressed_metadata_size) << "\n";
<< size_with_unit(fsinfo.compressed_metadata_size);
if (!fsinfo.uncompressed_metadata_size_is_estimate) {
os << fmt::format(" ({0:.2f}%)",
(100.0 * fsinfo.compressed_metadata_size) /
fsinfo.uncompressed_metadata_size);
}
os << "\n";
os << "uncompressed metadata size: ";
if (fsinfo.uncompressed_metadata_size_is_estimate) {
os << "(at least) ";
}
os << size_with_unit(fsinfo.uncompressed_metadata_size) << "\n";
if (auto opt = meta_.options()) {
std::vector<std::string> options;
auto boolopt = [&](auto const& name, bool value) {