feat(forward-compat): allow unknown section/compression types (gh #158)

This commit is contained in:
Marcus Holland-Moritz 2023-11-28 21:46:47 +01:00
parent 95eac672f8
commit 8dae679ad4
7 changed files with 132 additions and 38 deletions

View File

@ -36,7 +36,16 @@
namespace dwarfs { namespace dwarfs {
enum class compression_type : uint8_t { enum class compression_type_v1 : uint8_t {
#define DWARFS_COMPRESSION_TYPE_ENUMERATION_(name, value) name = value
#define DWARFS_COMMA_ ,
DWARFS_COMPRESSION_TYPE_LIST(DWARFS_COMPRESSION_TYPE_ENUMERATION_,
DWARFS_COMMA_)
#undef DWARFS_COMPRESSION_TYPE_ENUMERATION_
#undef DWARFS_COMMA_
};
enum class compression_type : uint16_t {
#define DWARFS_COMPRESSION_TYPE_ENUMERATION_(name, value) name = value #define DWARFS_COMPRESSION_TYPE_ENUMERATION_(name, value) name = value
#define DWARFS_COMMA_ , #define DWARFS_COMMA_ ,
DWARFS_COMPRESSION_TYPE_LIST(DWARFS_COMPRESSION_TYPE_ENUMERATION_, DWARFS_COMPRESSION_TYPE_LIST(DWARFS_COMPRESSION_TYPE_ENUMERATION_,

View File

@ -39,6 +39,8 @@ class fs_section {
size_t start() const { return impl_->start(); } size_t start() const { return impl_->start(); }
size_t length() const { return impl_->length(); } size_t length() const { return impl_->length(); }
bool is_known_compression() const { return impl_->is_known_compression(); }
bool is_known_type() const { return impl_->is_known_type(); }
compression_type compression() const { return impl_->compression(); } compression_type compression() const { return impl_->compression(); }
section_type type() const { return impl_->type(); } section_type type() const { return impl_->type(); }
std::string name() const { return impl_->name(); } std::string name() const { return impl_->name(); }
@ -57,6 +59,8 @@ class fs_section {
virtual size_t start() const = 0; virtual size_t start() const = 0;
virtual size_t length() const = 0; virtual size_t length() const = 0;
virtual bool is_known_compression() const = 0;
virtual bool is_known_type() const = 0;
virtual compression_type compression() const = 0; virtual compression_type compression() const = 0;
virtual section_type type() const = 0; virtual section_type type() const = 0;
virtual std::string name() const = 0; virtual std::string name() const = 0;

View File

@ -56,7 +56,7 @@ struct file_header {
struct section_header { struct section_header {
section_type type; section_type type;
compression_type compression; compression_type_v1 compression;
uint8_t unused; uint8_t unused;
uint32_t length; uint32_t length;
@ -85,11 +85,13 @@ struct filesystem_info {
uint64_t uncompressed_block_size{0}; uint64_t uncompressed_block_size{0};
uint64_t compressed_metadata_size{0}; uint64_t compressed_metadata_size{0};
uint64_t uncompressed_metadata_size{0}; uint64_t uncompressed_metadata_size{0};
bool uncompressed_block_size_is_estimate{false};
bool uncompressed_metadata_size_is_estimate{false};
}; };
bool is_valid_compression_type(compression_type type); bool is_known_compression_type(compression_type type);
bool is_valid_section_type(section_type type); bool is_known_section_type(section_type type);
std::string get_compression_name(compression_type type); std::string get_compression_name(compression_type type);

View File

@ -52,6 +52,24 @@ namespace dwarfs {
namespace { namespace {
void check_section_logger(logger& lgr, fs_section const& section) {
LOG_PROXY(debug_logger_policy, lgr);
LOG_DEBUG << "section " << section.description() << " @ " << section.start()
<< " [" << section.length() << " bytes]";
if (!section.is_known_type()) {
LOG_WARN << "unknown section type " << folly::to_underlying(section.type())
<< " in section @ " << section.start();
}
if (!section.is_known_compression()) {
LOG_WARN << "unknown compression type "
<< folly::to_underlying(section.compression()) << " in section @ "
<< section.start();
}
}
class filesystem_parser { class filesystem_parser {
private: private:
static uint64_t constexpr section_offset_mask{(UINT64_C(1) << 48) - 1}; static uint64_t constexpr section_offset_mask{(UINT64_C(1) << 48) - 1};
@ -361,6 +379,7 @@ class filesystem_ final : public filesystem_v2::impl {
private: private:
filesystem_info const& get_info() const; filesystem_info const& get_info() const;
void check_section(fs_section const& section) const;
LOG_PROXY_DECL(LoggerPolicy); LOG_PROXY_DECL(LoggerPolicy);
std::shared_ptr<mmif> mm_; std::shared_ptr<mmif> mm_;
@ -389,6 +408,11 @@ class filesystem_ final : public filesystem_v2::impl {
PERFMON_CLS_TIMER_DECL(readv_future) PERFMON_CLS_TIMER_DECL(readv_future)
}; };
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::check_section(fs_section const& section) const {
check_section_logger(LOG_GET_LOGGER, section);
}
template <typename LoggerPolicy> template <typename LoggerPolicy>
filesystem_info const& filesystem_<LoggerPolicy>::get_info() const { filesystem_info const& filesystem_<LoggerPolicy>::get_info() const {
std::lock_guard lock(mx_); std::lock_guard lock(mx_);
@ -399,14 +423,27 @@ filesystem_info const& filesystem_<LoggerPolicy>::get_info() const {
parser_.rewind(); parser_.rewind();
while (auto s = parser_.next_section()) { while (auto s = parser_.next_section()) {
check_section(*s);
if (s->type() == section_type::BLOCK) { if (s->type() == section_type::BLOCK) {
++info.block_count; ++info.block_count;
info.compressed_block_size += s->length(); info.compressed_block_size += s->length();
info.uncompressed_block_size += get_uncompressed_section_size(mm_, *s); try {
info.uncompressed_block_size +=
get_uncompressed_section_size(mm_, *s);
} catch (std::exception const& e) {
info.uncompressed_block_size += s->length();
info.uncompressed_block_size_is_estimate = true;
}
} else if (s->type() == section_type::METADATA_V2) { } else if (s->type() == section_type::METADATA_V2) {
info.compressed_metadata_size += s->length(); info.compressed_metadata_size += s->length();
info.uncompressed_metadata_size += try {
get_uncompressed_section_size(mm_, *s); info.uncompressed_metadata_size +=
get_uncompressed_section_size(mm_, *s);
} catch (std::exception const& e) {
info.uncompressed_metadata_size += s->length();
info.uncompressed_metadata_size_is_estimate = true;
}
} }
} }
@ -452,8 +489,8 @@ filesystem_<LoggerPolicy>::filesystem_(
section_map sections; section_map sections;
while (auto s = parser_.next_section()) { while (auto s = parser_.next_section()) {
LOG_DEBUG << "section " << s->name() << " @ " << s->start() << " [" check_section(*s);
<< s->length() << " bytes]";
if (s->type() == section_type::BLOCK) { if (s->type() == section_type::BLOCK) {
cache.insert(*s); cache.insert(*s);
} else { } else {
@ -665,8 +702,8 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
section_map sections; section_map sections;
while (auto s = parser.next_section()) { while (auto s = parser.next_section()) {
LOG_DEBUG << "section " << s->description() << " @ " << s->start() << " [" check_section_logger(lgr, *s);
<< s->length() << " bytes]";
if (!s->check_fast(*mm)) { if (!s->check_fast(*mm)) {
DWARFS_THROW(runtime_error, "checksum error in section: " + s->name()); DWARFS_THROW(runtime_error, "checksum error in section: " + s->name());
} }
@ -746,8 +783,8 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
std::vector<std::future<fs_section>> sections; std::vector<std::future<fs_section>> sections;
while (auto sp = parser.next_section()) { while (auto sp = parser.next_section()) {
LOG_DEBUG << "section " << sp->description() << " @ " << sp->start() << " [" check_section_logger(lgr, *sp);
<< sp->length() << " bytes]";
std::packaged_task<fs_section()> task{[&, s = *sp] { std::packaged_task<fs_section()> task{[&, s = *sp] {
if (!s.check_fast(*mm)) { if (!s.check_fast(*mm)) {
DWARFS_THROW(runtime_error, "checksum error in section: " + s.name()); DWARFS_THROW(runtime_error, "checksum error in section: " + s.name());

View File

@ -59,28 +59,38 @@ void read_section_header_common(T& header, size_t& start, mmif const& mm,
template <typename T> template <typename T>
void check_section(T const& sec) { void check_section(T const& sec) {
if (!is_valid_section_type(sec.type())) { if (!is_known_section_type(sec.type())) {
DWARFS_THROW(runtime_error, fmt::format("invalid section type ({0})", DWARFS_THROW(runtime_error, fmt::format("unknown section type ({0})",
static_cast<int>(sec.type()))); static_cast<int>(sec.type())));
} }
if (!is_valid_compression_type(sec.compression())) { if (!is_known_compression_type(sec.compression())) {
DWARFS_THROW(runtime_error, DWARFS_THROW(runtime_error,
fmt::format("invalid compression type ({0})", fmt::format("unknown compression type ({0})",
static_cast<int>(sec.compression()))); static_cast<int>(sec.compression())));
} }
} }
} // namespace } // namespace
class fs_section_v1 : public fs_section::impl { class fs_section_v1 final : public fs_section::impl {
public: public:
fs_section_v1(mmif const& mm, size_t offset); fs_section_v1(mmif const& mm, size_t offset);
size_t start() const override { return start_; } size_t start() const override { return start_; }
size_t length() const override { return hdr_.length; } size_t length() const override { return hdr_.length; }
compression_type compression() const override { return hdr_.compression; } bool is_known_compression() const override {
return is_known_compression_type(this->compression());
}
bool is_known_type() const override {
return is_known_section_type(this->type());
}
compression_type compression() const override {
return static_cast<compression_type>(hdr_.compression);
}
section_type type() const override { return hdr_.type; } section_type type() const override { return hdr_.type; }
std::string name() const override { return get_section_name(hdr_.type); } std::string name() const override { return get_section_name(hdr_.type); }
@ -100,13 +110,21 @@ class fs_section_v1 : public fs_section::impl {
section_header hdr_; section_header hdr_;
}; };
class fs_section_v2 : public fs_section::impl { class fs_section_v2 final : public fs_section::impl {
public: public:
fs_section_v2(mmif const& mm, size_t offset); fs_section_v2(mmif const& mm, size_t offset);
size_t start() const override { return start_; } size_t start() const override { return start_; }
size_t length() const override { return hdr_.length; } size_t length() const override { return hdr_.length; }
bool is_known_compression() const override {
return is_known_compression_type(this->compression());
}
bool is_known_type() const override {
return is_known_section_type(this->type());
}
compression_type compression() const override { compression_type compression() const override {
return static_cast<compression_type>(hdr_.compression); return static_cast<compression_type>(hdr_.compression);
} }
@ -149,7 +167,7 @@ class fs_section_v2 : public fs_section::impl {
section_header_v2 hdr_; section_header_v2 hdr_;
}; };
class fs_section_v2_lazy : public fs_section::impl { class fs_section_v2_lazy final : public fs_section::impl {
public: public:
fs_section_v2_lazy(std::shared_ptr<mmif const> mm, section_type type, fs_section_v2_lazy(std::shared_ptr<mmif const> mm, section_type type,
size_t offset, size_t size); size_t offset, size_t size);
@ -157,6 +175,14 @@ class fs_section_v2_lazy : public fs_section::impl {
size_t start() const override { return offset_ + sizeof(section_header_v2); } size_t start() const override { return offset_ + sizeof(section_header_v2); }
size_t length() const override { return size_ - sizeof(section_header_v2); } size_t length() const override { return size_ - sizeof(section_header_v2); }
bool is_known_compression() const override {
return is_known_compression_type(this->compression());
}
bool is_known_type() const override {
return is_known_section_type(this->type());
}
compression_type compression() const override { compression_type compression() const override {
return section().compression(); return section().compression();
} }
@ -227,7 +253,12 @@ fs_section_v1::fs_section_v1(mmif const& mm, size_t offset) {
fs_section_v2::fs_section_v2(mmif const& mm, size_t offset) { fs_section_v2::fs_section_v2(mmif const& mm, size_t offset) {
read_section_header_common(hdr_, start_, mm, offset); read_section_header_common(hdr_, start_, mm, offset);
check_section(*this); // TODO: Don't enforce these checks as we might want to add section types
// and compression types in the future without necessarily incrementing
// the file system version.
// Only enforce them for v1 above, which doesn't have checksums and
// where we know the exact set of section and compression types.
// check_section(*this);
} }
fs_section_v2_lazy::fs_section_v2_lazy(std::shared_ptr<mmif const> mm, fs_section_v2_lazy::fs_section_v2_lazy(std::shared_ptr<mmif const> mm,

View File

@ -62,11 +62,11 @@ std::string get_default(const HT& ht, const typename HT::key_type& key) {
} }
} // namespace } // namespace
bool is_valid_compression_type(compression_type type) { bool is_known_compression_type(compression_type type) {
return compressions.count(type) > 0; return compressions.count(type) > 0;
} }
bool is_valid_section_type(section_type type) { bool is_known_section_type(section_type type) {
return sections.count(type) > 0; return sections.count(type) > 0;
} }
@ -79,8 +79,8 @@ std::string get_section_name(section_type type) {
} }
void section_header::dump(std::ostream& os) const { void section_header::dump(std::ostream& os) const {
os << "type=" << get_default(sections, type) os << "type=" << get_default(sections, type) << ", compression="
<< ", compression=" << get_default(compressions, compression) << get_default(compressions, static_cast<compression_type>(compression))
<< ", length=" << length; << ", length=" << length;
} }

View File

@ -938,19 +938,30 @@ void metadata_<LoggerPolicy>::dump(
} }
os << "original filesystem size: " << size_with_unit(stbuf.blocks) << "\n"; os << "original filesystem size: " << size_with_unit(stbuf.blocks) << "\n";
os << "compressed block size: " os << "compressed block size: "
<< size_with_unit(fsinfo.compressed_block_size) << size_with_unit(fsinfo.compressed_block_size);
<< fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_block_size) / if (!fsinfo.uncompressed_block_size_is_estimate) {
fsinfo.uncompressed_block_size) os << fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_block_size) /
<< "\n"; fsinfo.uncompressed_block_size);
os << "uncompressed block size: " }
<< size_with_unit(fsinfo.uncompressed_block_size) << "\n"; os << "\n";
os << "uncompressed block size: ";
if (fsinfo.uncompressed_block_size_is_estimate) {
os << "(at least) ";
}
os << size_with_unit(fsinfo.uncompressed_block_size) << "\n";
os << "compressed metadata size: " os << "compressed metadata size: "
<< size_with_unit(fsinfo.compressed_metadata_size) << size_with_unit(fsinfo.compressed_metadata_size);
<< fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_metadata_size) / if (!fsinfo.uncompressed_metadata_size_is_estimate) {
fsinfo.uncompressed_metadata_size) os << fmt::format(" ({0:.2f}%)",
<< "\n"; (100.0 * fsinfo.compressed_metadata_size) /
os << "uncompressed metadata size: " fsinfo.uncompressed_metadata_size);
<< size_with_unit(fsinfo.uncompressed_metadata_size) << "\n"; }
os << "\n";
os << "uncompressed metadata size: ";
if (fsinfo.uncompressed_metadata_size_is_estimate) {
os << "(at least) ";
}
os << size_with_unit(fsinfo.uncompressed_metadata_size) << "\n";
if (auto opt = meta_.options()) { if (auto opt = meta_.options()) {
std::vector<std::string> options; std::vector<std::string> options;
auto boolopt = [&](auto const& name, bool value) { auto boolopt = [&](auto const& name, bool value) {