From 067b47ab0c07b5a6c85bfec5fb9c73a9332002d9 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Tue, 20 May 2025 13:01:43 +0200 Subject: [PATCH] refactor(metadata_analyzer): clean up analyzer code --- src/reader/internal/metadata_analyzer.cpp | 474 ++++++++++++---------- 1 file changed, 264 insertions(+), 210 deletions(-) diff --git a/src/reader/internal/metadata_analyzer.cpp b/src/reader/internal/metadata_analyzer.cpp index 83f3eb3f..eef25779 100644 --- a/src/reader/internal/metadata_analyzer.cpp +++ b/src/reader/internal/metadata_analyzer.cpp @@ -47,8 +47,22 @@ using namespace dwarfs::internal; using namespace ::apache::thrift; using namespace ::apache::thrift::frozen; +#if FMT_VERSION >= 70000 +#define DWARFS_FMT_L "L" +#else +#define DWARFS_FMT_L "n" +#endif + namespace { +template +concept list_view_type = requires(T a) { + requires std::derived_from< + T, typename frozen::detail::ArrayLayout< + std::decay_t, + typename std::decay_t::value_type>::View>; +}; + Layout const& get_layout(MappedFrozen const& meta) { auto layout = meta.findFirstOfType< @@ -57,11 +71,17 @@ get_layout(MappedFrozen const& meta) { return **layout; } -void analyze_frozen(std::ostream& os, - MappedFrozen const& meta, - size_t total_size, bool verbose) { - null_logger lgr; +class frozen_analyzer { + public: + frozen_analyzer(MappedFrozen const& meta, + std::span data, bool verbose) + : meta_{meta} + , total_size_{data.size()} + , verbose_{verbose} {} + void print(std::ostream& os) const; + + private: struct usage_info { usage_info(size_t off, size_t sz, std::string text) : offset{off} @@ -73,161 +93,193 @@ void analyze_frozen(std::ostream& os, std::string line; }; - auto& l = get_layout(meta); - std::vector usage; + using detail_bits_t = std::pair; -#if FMT_VERSION >= 70000 -#define DWARFS_FMT_L "L" -#else -#define DWARFS_FMT_L "n" -#endif - - auto get_offset = [&](auto const* ptr) { - return ptr ? reinterpret_cast(ptr) - - reinterpret_cast(meta.getPosition().start) - : 0; - }; - - auto get_list_offset = [&](auto const& v) { - struct view_internal { - void const* layout; - uint8_t const* start; - size_t bitOffset; - uint8_t const* data; - size_t count; - }; - using list_t = std::decay_t; - static_assert(sizeof(v) == sizeof(view_internal)); - static_assert( - std::derived_from, - typename frozen::detail::ArrayLayout< - list_t, typename list_t::value_type>::View>); - auto const* vi = reinterpret_cast(&v); - DWARFS_CHECK(vi->count == v.size(), "internal error: size mismatch"); - return get_offset(vi->data); - }; - - auto fmt_size = [&](std::string_view name, std::optional count_opt, - size_t size) { - auto count = count_opt.value_or(1); - std::string count_str; - if (count_opt.has_value()) { - count_str = fmt::format("{0:" DWARFS_FMT_L "}", count); - } - return fmt::format("{0:>14} {1:.<24}{2:.>16" DWARFS_FMT_L - "} bytes {3:5.1f}% {4:5.1f} bytes/item\n", - count_str, name, size, 100.0 * size / total_size, - count > 0 ? static_cast(size) / count : 0.0); - }; - - auto fmt_detail = [&](std::string_view name, size_t count, size_t size, - std::optional offset, std::string num) { - std::string range; - if (verbose) { - if (offset) { - range = fmt::format(" {:08x}..{:08x} ", *offset, *offset + size); - } else { - range.append(21, ' '); - } - } - range.append(15, ' '); - return fmt::format("{0}{1:<24}{2:>16" DWARFS_FMT_L "} bytes {3:>6} " - "{4:5.1f} bytes/item\n", - range, name, size, num, - count > 0 ? static_cast(size) / count : 0.0); - }; - - auto fmt_detail_pct = [&](std::string_view name, size_t count, size_t size, - std::optional offset = std::nullopt) { - return fmt_detail(name, count, size, offset, - fmt::format("{0:5.1f}%", 100.0 * size / total_size)); - }; - - auto add_size = [&](std::string_view name, size_t count, size_t offset, - size_t size) { - usage.emplace_back(offset, size, fmt_size(name, count, size)); - }; - - auto add_size_unique = [&](std::string_view name, size_t offset, - size_t size) { - usage.emplace_back(offset, size, fmt_size(name, std::nullopt, size)); - }; - - auto list_size = [&](auto const& list, auto const& field) { + static size_t list_size(list_view_type auto const& list, auto const& field) { return (list.size() * field.layout.itemField.layout.bits + 7) / 8; }; - auto add_list_size = [&](std::string_view name, auto const& list, - auto const& field) { - add_size(name, list.size(), get_list_offset(list), list_size(list, field)); - }; + size_t get_offset(auto const* ptr) const; + size_t get_list_offset(list_view_type auto const& v) const; - auto add_string_list_size = [&](std::string_view name, auto const& list, - auto const& field) { - auto count = list.size(); - if (count > 0) { - auto index_size = list_size(list, field); - auto data_size = list.back().end() - list.front().begin(); - auto size = index_size + data_size; - auto fmt = fmt_size(name, count, size) + - fmt_detail_pct("|- data", count, data_size) + - fmt_detail_pct("'- index", count, index_size); - usage.emplace_back(get_list_offset(list), size, fmt); + std::string fmt_size(std::string_view name, std::optional count_opt, + size_t size) const; + std::string fmt_detail(std::string_view name, size_t count, size_t size, + std::optional offset, std::string num) const; + std::string fmt_detail_pct(std::string_view name, size_t count, size_t size, + std::optional offset = std::nullopt) const; + + void add_size(std::vector& usage, std::string_view name, + size_t count, size_t offset, size_t size) const; + void add_size_unique(std::vector& usage, std::string_view name, + size_t offset, size_t size) const; + void add_list_size(std::vector& usage, std::string_view name, + auto const& list, auto const& field) const; + void + add_string_list_size(std::vector& usage, std::string_view name, + auto const& list, auto const& field) const; + void + add_string_table_size(std::vector& usage, std::string_view name, + auto const& table, auto const& field) const; + + void summarize_details(std::vector& usage, std::string_view name, + size_t count, size_t offset, size_t size, + std::span details) const; + + MappedFrozen const& meta_; + size_t total_size_{0}; + bool verbose_{false}; +}; + +size_t frozen_analyzer::get_offset(auto const* ptr) const { + return ptr ? reinterpret_cast(ptr) - + reinterpret_cast(meta_.getPosition().start) + : 0; +} + +size_t frozen_analyzer::get_list_offset(list_view_type auto const& v) const { + struct view_internal { + void const* layout; + uint8_t const* start; + size_t bitOffset; + uint8_t const* data; + size_t count; + }; + static_assert(sizeof(v) == sizeof(view_internal)); + auto const* vi = reinterpret_cast(&v); + DWARFS_CHECK(vi->count == v.size(), "internal error: size mismatch"); + return get_offset(vi->data); +} + +std::string +frozen_analyzer::fmt_size(std::string_view name, + std::optional count_opt, size_t size) const { + auto count = count_opt.value_or(1); + std::string count_str; + if (count_opt.has_value()) { + count_str = fmt::format("{0:" DWARFS_FMT_L "}", count); + } + return fmt::format("{0:>14} {1:.<24}{2:.>16" DWARFS_FMT_L + "} bytes {3:5.1f}% {4:5.1f} bytes/item\n", + count_str, name, size, 100.0 * size / total_size_, + count > 0 ? static_cast(size) / count : 0.0); +} + +std::string +frozen_analyzer::fmt_detail(std::string_view name, size_t count, size_t size, + std::optional offset, + std::string num) const { + std::string range; + if (verbose_) { + if (offset) { + range = fmt::format(" {:08x}..{:08x} ", *offset, *offset + size); + } else { + range.append(21, ' '); } - }; + } + range.append(15, ' '); + return fmt::format("{0}{1:<24}{2:>16" DWARFS_FMT_L "} bytes {3:>6} " + "{4:5.1f} bytes/item\n", + range, name, size, num, + count > 0 ? static_cast(size) / count : 0.0); +} - auto add_string_table_size = [&](std::string_view name, auto const& table, - auto const& field) { - if (auto data_size = table.buffer().size(); data_size > 0) { - auto dict_size = - table.symtab() ? table.symtab()->size() : static_cast(0); - auto index_size = list_size(table.index(), field.layout.indexField); - auto size = index_size + data_size + dict_size; - auto count = table.index().size() - (table.packed_index() ? 0 : 1); - auto fmt = fmt_size(name, count, size) + - fmt_detail_pct("|- data", count, data_size, - get_offset(table.buffer().data())); - if (table.symtab()) { - string_table st(lgr, "tmp", table); - auto unpacked_size = st.unpacked_size(); - fmt += fmt_detail( - "|- unpacked", count, unpacked_size, std::nullopt, - fmt::format("{0:5.2f}x", - static_cast(unpacked_size) / data_size)); - fmt += fmt_detail_pct("|- dict", count, dict_size, - get_offset(table.symtab()->data())); - } - fmt += fmt_detail_pct("'- index", count, index_size, - get_list_offset(table.index())); - usage.emplace_back(get_offset(table.buffer().data()), size, fmt); +std::string +frozen_analyzer::fmt_detail_pct(std::string_view name, size_t count, + size_t size, + std::optional offset) const { + return fmt_detail(name, count, size, offset, + fmt::format("{0:5.1f}%", 100.0 * size / total_size_)); +}; + +void frozen_analyzer::add_size(std::vector& usage, + std::string_view name, size_t count, + size_t offset, size_t size) const { + usage.emplace_back(offset, size, fmt_size(name, count, size)); +} + +void frozen_analyzer::add_size_unique(std::vector& usage, + std::string_view name, size_t offset, + size_t size) const { + usage.emplace_back(offset, size, fmt_size(name, std::nullopt, size)); +} + +void frozen_analyzer::add_list_size(std::vector& usage, + std::string_view name, auto const& list, + auto const& field) const { + add_size(usage, name, list.size(), get_list_offset(list), + list_size(list, field)); +} + +void frozen_analyzer::add_string_list_size(std::vector& usage, + std::string_view name, + auto const& list, + auto const& field) const { + if (auto count = list.size(); count > 0) { + auto index_size = list_size(list, field); + auto data_size = list.back().end() - list.front().begin(); + auto size = index_size + data_size; + auto fmt = fmt_size(name, count, size) + + fmt_detail_pct("|- data", count, data_size) + + fmt_detail_pct("'- index", count, index_size); + usage.emplace_back(get_list_offset(list), size, fmt); + } +} + +void frozen_analyzer::add_string_table_size(std::vector& usage, + std::string_view name, + auto const& table, + auto const& field) const { + if (auto data_size = table.buffer().size(); data_size > 0) { + auto dict_size = + table.symtab() ? table.symtab()->size() : static_cast(0); + auto index_size = list_size(table.index(), field.layout.indexField); + auto size = index_size + data_size + dict_size; + auto count = table.index().size() - (table.packed_index() ? 0 : 1); + auto fmt = fmt_size(name, count, size) + + fmt_detail_pct("|- data", count, data_size, + get_offset(table.buffer().data())); + if (table.symtab()) { + null_logger lgr; + string_table st(lgr, "tmp", table); + auto unpacked_size = st.unpacked_size(); + fmt += fmt_detail( + "|- unpacked", count, unpacked_size, std::nullopt, + fmt::format("{0:5.2f}x", + static_cast(unpacked_size) / data_size)); + fmt += fmt_detail_pct("|- dict", count, dict_size, + get_offset(table.symtab()->data())); } - }; + fmt += fmt_detail_pct("'- index", count, index_size, + get_list_offset(table.index())); + usage.emplace_back(get_offset(table.buffer().data()), size, fmt); + } +} - using detail_bits_t = std::pair; +void frozen_analyzer::summarize_details( + std::vector& usage, std::string_view name, size_t count, + size_t offset, size_t size, std::span details) const { + std::ranges::stable_sort(details, std::ranges::greater{}, + &detail_bits_t::second); + auto fmt = fmt_size(name, count, size); + for (size_t i = 0; i < details.size(); ++i) { + auto [member, bits] = details[i]; + auto tree = i == details.size() - 1 ? "'" : "|"; + fmt += fmt_detail_pct(fmt::format("{}- {} [{}]", tree, member, bits), count, + (count * bits + 7) / 8); + } + usage.emplace_back(offset, size, fmt); +} - auto summarize_details = [&](std::string_view name, size_t count, - size_t offset, size_t size, - std::span details) { - std::ranges::stable_sort(details, std::ranges::greater{}, - &detail_bits_t::second); - auto fmt = fmt_size(name, count, size); - for (size_t i = 0; i < details.size(); ++i) { - auto [member, bits] = details[i]; - auto tree = i == details.size() - 1 ? "'" : "|"; - fmt += fmt_detail_pct(fmt::format("{}- {} [{}]", tree, member, bits), - count, (count * bits + 7) / 8); - } - usage.emplace_back(offset, size, fmt); - }; +#define META_LIST_SIZE(x) add_list_size(usage, #x, meta_.x(), l.x##Field) -#define META_LIST_SIZE(x) add_list_size(#x, meta.x(), l.x##Field) - -#define META_STRING_LIST_SIZE(x) add_string_list_size(#x, meta.x(), l.x##Field) +#define META_STRING_LIST_SIZE(x) \ + add_string_list_size(usage, #x, meta_.x(), l.x##Field) #define META_OPT_LIST_SIZE(x) \ do { \ - if (auto list = meta.x()) { \ - add_list_size(#x, *list, l.x##Field.layout.valueField); \ + if (auto list = meta_.x()) { \ + add_list_size(usage, #x, *list, l.x##Field.layout.valueField); \ } \ } while (0) @@ -235,8 +287,8 @@ void analyze_frozen(std::ostream& os, #define META_OPT_STRING_LIST_SIZE(x) \ do { \ - if (auto list = meta.x()) { \ - add_string_list_size(#x, *list, l.x##Field.layout.valueField); \ + if (auto list = meta_.x()) { \ + add_string_list_size(usage, #x, *list, l.x##Field.layout.valueField); \ } \ } while (0) @@ -244,8 +296,8 @@ void analyze_frozen(std::ostream& os, #define META_OPT_STRING_TABLE_SIZE(x) \ do { \ - if (auto table = meta.x()) { \ - add_string_table_size(#x, *table, l.x##Field.layout.valueField); \ + if (auto table = meta_.x()) { \ + add_string_table_size(usage, #x, *table, l.x##Field.layout.valueField); \ } \ } while (0) @@ -263,14 +315,14 @@ void analyze_frozen(std::ostream& os, } while (0) #define META_LIST_SIZE_DETAIL_END(x) \ - summarize_details(#x, meta.x().size(), get_list_offset(meta.x()), \ - list_size(meta.x(), l.x##Field), detail_bits); \ + summarize_details(usage, #x, meta_.x().size(), get_list_offset(meta_.x()), \ + list_size(meta_.x(), l.x##Field), detail_bits); \ } \ while (0) #define META_OPT_LIST_SIZE_DETAIL_BEGIN(x) \ do { \ - if (auto list = meta.x()) { \ + if (auto list = meta_.x()) { \ std::vector detail_bits; #define META_OPT_ADD_DETAIL_BITS(field, x) \ @@ -283,13 +335,17 @@ void analyze_frozen(std::ostream& os, } while (0) #define META_OPT_LIST_SIZE_DETAIL_END(x) \ - summarize_details(#x, list->size(), get_list_offset(*list), \ + summarize_details(usage, #x, list->size(), get_list_offset(*list), \ list_size(*list, l.x##Field.layout.valueField), \ detail_bits); \ } \ } \ while (0) +void frozen_analyzer::print(std::ostream& os) const { + auto& l = get_layout(meta_); + std::vector usage; + META_LIST_SIZE_DETAIL_BEGIN; META_ADD_DETAIL_BITS(chunks, block); META_ADD_DETAIL_BITS(chunks, offset); @@ -319,7 +375,7 @@ void analyze_frozen(std::ostream& os, META_OPT_LIST_SIZE_DETAIL_END(dir_entries); META_LIST_SIZE(chunk_table); - if (!meta.entry_table_v2_2().empty()) { + if (!meta_.entry_table_v2_2().empty()) { // deprecated, so only list if non-empty META_LIST_SIZE(entry_table_v2_2); } @@ -344,6 +400,59 @@ void analyze_frozen(std::ostream& os, META_OPT_STRING_LIST_SIZE(category_metadata_json); META_OPT_MAP_SIZE(block_category_metadata); + if (auto cache = meta_.reg_file_size_cache()) { + add_list_size( + usage, "inode_size_cache", cache->lookup(), + l.reg_file_size_cacheField.layout.valueField.layout.lookupField); + } + + if (auto list = meta_.metadata_version_history()) { + size_t history_size = + list_size(*list, l.metadata_version_historyField.layout.valueField); + for (auto const& entry : *list) { + if (entry.dwarfs_version()) { + history_size += entry.dwarfs_version()->size(); + } + } + add_size(usage, "metadata_version_history", list->size(), + get_list_offset(*list), history_size); + } + + if (auto version = meta_.dwarfs_version()) { + add_size_unique(usage, "dwarfs_version", get_offset(version->data()), + version->size()); + } + + add_size_unique(usage, "metadata_root", 0, l.size); + add_size_unique(usage, "padding", total_size_ - LayoutRoot::kPaddingBytes, + LayoutRoot::kPaddingBytes); + + std::ranges::sort(usage, [this](auto const& a, auto const& b) { + if (verbose_) { + return a.offset < b.offset || + (a.offset == b.offset && + (a.size < b.size || (a.size == b.size && a.line < b.line))); + } + return a.size > b.size || (a.size == b.size && a.line < b.line); + }); + + os << "metadata memory usage:\n"; + if (verbose_) { + os << fmt::format(" {:08x}..{:08x} ", 0, total_size_); + } + os << fmt::format(" {0:.<24}{1:.>16" DWARFS_FMT_L + "} bytes {2:6.1f} bytes/inode\n", + "total metadata", total_size_, + static_cast(total_size_) / meta_.inodes().size()); + + for (auto const& u : usage) { + if (verbose_) { + os << fmt::format(" {:08x}..{:08x} ", u.offset, u.offset + u.size); + } + os << u.line; + } +} + #undef META_LIST_SIZE #undef META_OPT_STRING_SET_SIZE #undef META_OPT_STRING_LIST_SIZE @@ -357,61 +466,6 @@ void analyze_frozen(std::ostream& os, #undef META_OPT_ADD_DETAIL_BITS #undef META_OPT_LIST_SIZE_DETAIL_END - if (auto cache = meta.reg_file_size_cache()) { - add_list_size( - "inode_size_cache", cache->lookup(), - l.reg_file_size_cacheField.layout.valueField.layout.lookupField); - } - - if (auto list = meta.metadata_version_history()) { - size_t history_size = - list_size(*list, l.metadata_version_historyField.layout.valueField); - for (auto const& entry : *list) { - if (entry.dwarfs_version()) { - history_size += entry.dwarfs_version()->size(); - } - } - add_size("metadata_version_history", list->size(), get_list_offset(*list), - history_size); - } - - if (auto version = meta.dwarfs_version()) { - add_size_unique("dwarfs_version", get_offset(version->data()), - version->size()); - } - - add_size_unique("metadata_root", 0, l.size); - add_size_unique("padding", total_size - LayoutRoot::kPaddingBytes, - LayoutRoot::kPaddingBytes); - - std::ranges::sort(usage, [verbose](auto const& a, auto const& b) { - if (verbose) { - return a.offset < b.offset || - (a.offset == b.offset && - (a.size < b.size || (a.size == b.size && a.line < b.line))); - } - return a.size > b.size || (a.size == b.size && a.line < b.line); - }); - - os << "metadata memory usage:\n"; - if (verbose) { - os << fmt::format(" {:08x}..{:08x} ", 0, total_size); - } - os << fmt::format(" {0:.<24}{1:.>16" DWARFS_FMT_L - "} bytes {2:6.1f} bytes/inode\n", - "total metadata", total_size, - static_cast(total_size) / meta.inodes().size()); - -#undef DWARFS_FMT_L - - for (auto const& u : usage) { - if (verbose) { - os << fmt::format(" {:08x}..{:08x} ", u.offset, u.offset + u.size); - } - os << u.line; - } -} - } // namespace metadata_analyzer::metadata_analyzer( @@ -426,7 +480,7 @@ void metadata_analyzer::print_layout(std::ostream& os) const { } void metadata_analyzer::print_frozen(std::ostream& os, bool verbose) const { - analyze_frozen(os, meta_, data_.size(), verbose); + frozen_analyzer(meta_, data_, verbose).print(os); } } // namespace dwarfs::reader::internal