diff --git a/include/dwarfs/compression.h b/include/dwarfs/compression.h index 9bb126bd..85338898 100644 --- a/include/dwarfs/compression.h +++ b/include/dwarfs/compression.h @@ -36,7 +36,16 @@ namespace dwarfs { -enum class compression_type : uint8_t { +enum class compression_type_v1 : uint8_t { +#define DWARFS_COMPRESSION_TYPE_ENUMERATION_(name, value) name = value +#define DWARFS_COMMA_ , + DWARFS_COMPRESSION_TYPE_LIST(DWARFS_COMPRESSION_TYPE_ENUMERATION_, + DWARFS_COMMA_) +#undef DWARFS_COMPRESSION_TYPE_ENUMERATION_ +#undef DWARFS_COMMA_ +}; + +enum class compression_type : uint16_t { #define DWARFS_COMPRESSION_TYPE_ENUMERATION_(name, value) name = value #define DWARFS_COMMA_ , DWARFS_COMPRESSION_TYPE_LIST(DWARFS_COMPRESSION_TYPE_ENUMERATION_, diff --git a/include/dwarfs/fs_section.h b/include/dwarfs/fs_section.h index 6b56b13b..3eac39c4 100644 --- a/include/dwarfs/fs_section.h +++ b/include/dwarfs/fs_section.h @@ -39,6 +39,8 @@ class fs_section { size_t start() const { return impl_->start(); } size_t length() const { return impl_->length(); } + bool is_known_compression() const { return impl_->is_known_compression(); } + bool is_known_type() const { return impl_->is_known_type(); } compression_type compression() const { return impl_->compression(); } section_type type() const { return impl_->type(); } std::string name() const { return impl_->name(); } @@ -57,6 +59,8 @@ class fs_section { virtual size_t start() const = 0; virtual size_t length() const = 0; + virtual bool is_known_compression() const = 0; + virtual bool is_known_type() const = 0; virtual compression_type compression() const = 0; virtual section_type type() const = 0; virtual std::string name() const = 0; diff --git a/include/dwarfs/fstypes.h b/include/dwarfs/fstypes.h index d589b6da..67d0efef 100644 --- a/include/dwarfs/fstypes.h +++ b/include/dwarfs/fstypes.h @@ -56,7 +56,7 @@ struct file_header { struct section_header { section_type type; - compression_type compression; + compression_type_v1 compression; uint8_t unused; uint32_t length; @@ -85,11 +85,13 @@ struct filesystem_info { uint64_t uncompressed_block_size{0}; uint64_t compressed_metadata_size{0}; uint64_t uncompressed_metadata_size{0}; + bool uncompressed_block_size_is_estimate{false}; + bool uncompressed_metadata_size_is_estimate{false}; }; -bool is_valid_compression_type(compression_type type); +bool is_known_compression_type(compression_type type); -bool is_valid_section_type(section_type type); +bool is_known_section_type(section_type type); std::string get_compression_name(compression_type type); diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index 1116e551..f5284b3a 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -52,6 +52,24 @@ namespace dwarfs { namespace { +void check_section_logger(logger& lgr, fs_section const& section) { + LOG_PROXY(debug_logger_policy, lgr); + + LOG_DEBUG << "section " << section.description() << " @ " << section.start() + << " [" << section.length() << " bytes]"; + + if (!section.is_known_type()) { + LOG_WARN << "unknown section type " << folly::to_underlying(section.type()) + << " in section @ " << section.start(); + } + + if (!section.is_known_compression()) { + LOG_WARN << "unknown compression type " + << folly::to_underlying(section.compression()) << " in section @ " + << section.start(); + } +} + class filesystem_parser { private: static uint64_t constexpr section_offset_mask{(UINT64_C(1) << 48) - 1}; @@ -361,6 +379,7 @@ class filesystem_ final : public filesystem_v2::impl { private: filesystem_info const& get_info() const; + void check_section(fs_section const& section) const; LOG_PROXY_DECL(LoggerPolicy); std::shared_ptr mm_; @@ -389,6 +408,11 @@ class filesystem_ final : public filesystem_v2::impl { PERFMON_CLS_TIMER_DECL(readv_future) }; +template +void filesystem_::check_section(fs_section const& section) const { + check_section_logger(LOG_GET_LOGGER, section); +} + template filesystem_info const& filesystem_::get_info() const { std::lock_guard lock(mx_); @@ -399,14 +423,27 @@ filesystem_info const& filesystem_::get_info() const { parser_.rewind(); while (auto s = parser_.next_section()) { + check_section(*s); + if (s->type() == section_type::BLOCK) { ++info.block_count; info.compressed_block_size += s->length(); - info.uncompressed_block_size += get_uncompressed_section_size(mm_, *s); + try { + info.uncompressed_block_size += + get_uncompressed_section_size(mm_, *s); + } catch (std::exception const& e) { + info.uncompressed_block_size += s->length(); + info.uncompressed_block_size_is_estimate = true; + } } else if (s->type() == section_type::METADATA_V2) { info.compressed_metadata_size += s->length(); - info.uncompressed_metadata_size += - get_uncompressed_section_size(mm_, *s); + try { + info.uncompressed_metadata_size += + get_uncompressed_section_size(mm_, *s); + } catch (std::exception const& e) { + info.uncompressed_metadata_size += s->length(); + info.uncompressed_metadata_size_is_estimate = true; + } } } @@ -452,8 +489,8 @@ filesystem_::filesystem_( section_map sections; while (auto s = parser_.next_section()) { - LOG_DEBUG << "section " << s->name() << " @ " << s->start() << " [" - << s->length() << " bytes]"; + check_section(*s); + if (s->type() == section_type::BLOCK) { cache.insert(*s); } else { @@ -665,8 +702,8 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog, section_map sections; while (auto s = parser.next_section()) { - LOG_DEBUG << "section " << s->description() << " @ " << s->start() << " [" - << s->length() << " bytes]"; + check_section_logger(lgr, *s); + if (!s->check_fast(*mm)) { DWARFS_THROW(runtime_error, "checksum error in section: " + s->name()); } @@ -746,8 +783,8 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr mm, std::vector> sections; while (auto sp = parser.next_section()) { - LOG_DEBUG << "section " << sp->description() << " @ " << sp->start() << " [" - << sp->length() << " bytes]"; + check_section_logger(lgr, *sp); + std::packaged_task task{[&, s = *sp] { if (!s.check_fast(*mm)) { DWARFS_THROW(runtime_error, "checksum error in section: " + s.name()); diff --git a/src/dwarfs/fs_section.cpp b/src/dwarfs/fs_section.cpp index 5cb64eca..7aaf7558 100644 --- a/src/dwarfs/fs_section.cpp +++ b/src/dwarfs/fs_section.cpp @@ -59,28 +59,38 @@ void read_section_header_common(T& header, size_t& start, mmif const& mm, template void check_section(T const& sec) { - if (!is_valid_section_type(sec.type())) { - DWARFS_THROW(runtime_error, fmt::format("invalid section type ({0})", + if (!is_known_section_type(sec.type())) { + DWARFS_THROW(runtime_error, fmt::format("unknown section type ({0})", static_cast(sec.type()))); } - if (!is_valid_compression_type(sec.compression())) { + if (!is_known_compression_type(sec.compression())) { DWARFS_THROW(runtime_error, - fmt::format("invalid compression type ({0})", + fmt::format("unknown compression type ({0})", static_cast(sec.compression()))); } } } // namespace -class fs_section_v1 : public fs_section::impl { +class fs_section_v1 final : public fs_section::impl { public: fs_section_v1(mmif const& mm, size_t offset); size_t start() const override { return start_; } size_t length() const override { return hdr_.length; } - compression_type compression() const override { return hdr_.compression; } + bool is_known_compression() const override { + return is_known_compression_type(this->compression()); + } + + bool is_known_type() const override { + return is_known_section_type(this->type()); + } + + compression_type compression() const override { + return static_cast(hdr_.compression); + } section_type type() const override { return hdr_.type; } std::string name() const override { return get_section_name(hdr_.type); } @@ -100,13 +110,21 @@ class fs_section_v1 : public fs_section::impl { section_header hdr_; }; -class fs_section_v2 : public fs_section::impl { +class fs_section_v2 final : public fs_section::impl { public: fs_section_v2(mmif const& mm, size_t offset); size_t start() const override { return start_; } size_t length() const override { return hdr_.length; } + bool is_known_compression() const override { + return is_known_compression_type(this->compression()); + } + + bool is_known_type() const override { + return is_known_section_type(this->type()); + } + compression_type compression() const override { return static_cast(hdr_.compression); } @@ -149,7 +167,7 @@ class fs_section_v2 : public fs_section::impl { section_header_v2 hdr_; }; -class fs_section_v2_lazy : public fs_section::impl { +class fs_section_v2_lazy final : public fs_section::impl { public: fs_section_v2_lazy(std::shared_ptr mm, section_type type, size_t offset, size_t size); @@ -157,6 +175,14 @@ class fs_section_v2_lazy : public fs_section::impl { size_t start() const override { return offset_ + sizeof(section_header_v2); } size_t length() const override { return size_ - sizeof(section_header_v2); } + bool is_known_compression() const override { + return is_known_compression_type(this->compression()); + } + + bool is_known_type() const override { + return is_known_section_type(this->type()); + } + compression_type compression() const override { return section().compression(); } @@ -227,7 +253,12 @@ fs_section_v1::fs_section_v1(mmif const& mm, size_t offset) { fs_section_v2::fs_section_v2(mmif const& mm, size_t offset) { read_section_header_common(hdr_, start_, mm, offset); - check_section(*this); + // TODO: Don't enforce these checks as we might want to add section types + // and compression types in the future without necessarily incrementing + // the file system version. + // Only enforce them for v1 above, which doesn't have checksums and + // where we know the exact set of section and compression types. + // check_section(*this); } fs_section_v2_lazy::fs_section_v2_lazy(std::shared_ptr mm, diff --git a/src/dwarfs/fstypes.cpp b/src/dwarfs/fstypes.cpp index 992b7941..953d41af 100644 --- a/src/dwarfs/fstypes.cpp +++ b/src/dwarfs/fstypes.cpp @@ -62,11 +62,11 @@ std::string get_default(const HT& ht, const typename HT::key_type& key) { } } // namespace -bool is_valid_compression_type(compression_type type) { +bool is_known_compression_type(compression_type type) { return compressions.count(type) > 0; } -bool is_valid_section_type(section_type type) { +bool is_known_section_type(section_type type) { return sections.count(type) > 0; } @@ -79,8 +79,8 @@ std::string get_section_name(section_type type) { } void section_header::dump(std::ostream& os) const { - os << "type=" << get_default(sections, type) - << ", compression=" << get_default(compressions, compression) + os << "type=" << get_default(sections, type) << ", compression=" + << get_default(compressions, static_cast(compression)) << ", length=" << length; } diff --git a/src/dwarfs/metadata_v2.cpp b/src/dwarfs/metadata_v2.cpp index 54342aad..6aa23075 100644 --- a/src/dwarfs/metadata_v2.cpp +++ b/src/dwarfs/metadata_v2.cpp @@ -938,19 +938,30 @@ void metadata_::dump( } os << "original filesystem size: " << size_with_unit(stbuf.blocks) << "\n"; os << "compressed block size: " - << size_with_unit(fsinfo.compressed_block_size) - << fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_block_size) / - fsinfo.uncompressed_block_size) - << "\n"; - os << "uncompressed block size: " - << size_with_unit(fsinfo.uncompressed_block_size) << "\n"; + << size_with_unit(fsinfo.compressed_block_size); + if (!fsinfo.uncompressed_block_size_is_estimate) { + os << fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_block_size) / + fsinfo.uncompressed_block_size); + } + os << "\n"; + os << "uncompressed block size: "; + if (fsinfo.uncompressed_block_size_is_estimate) { + os << "(at least) "; + } + os << size_with_unit(fsinfo.uncompressed_block_size) << "\n"; os << "compressed metadata size: " - << size_with_unit(fsinfo.compressed_metadata_size) - << fmt::format(" ({0:.2f}%)", (100.0 * fsinfo.compressed_metadata_size) / - fsinfo.uncompressed_metadata_size) - << "\n"; - os << "uncompressed metadata size: " - << size_with_unit(fsinfo.uncompressed_metadata_size) << "\n"; + << size_with_unit(fsinfo.compressed_metadata_size); + if (!fsinfo.uncompressed_metadata_size_is_estimate) { + os << fmt::format(" ({0:.2f}%)", + (100.0 * fsinfo.compressed_metadata_size) / + fsinfo.uncompressed_metadata_size); + } + os << "\n"; + os << "uncompressed metadata size: "; + if (fsinfo.uncompressed_metadata_size_is_estimate) { + os << "(at least) "; + } + os << size_with_unit(fsinfo.uncompressed_metadata_size) << "\n"; if (auto opt = meta_.options()) { std::vector options; auto boolopt = [&](auto const& name, bool value) {