From e803efebe60d6dde8c3e69953ec65131dab9110c Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 19 Aug 2024 17:01:30 +0200 Subject: [PATCH] feat: add inode size cache --- include/dwarfs/writer/scanner_options.h | 1 + src/reader/internal/metadata_v2.cpp | 56 ++++++++++++++++++++++++- src/writer/scanner.cpp | 12 +++++- thrift/metadata.thrift | 21 ++++++++++ 4 files changed, 88 insertions(+), 2 deletions(-) diff --git a/include/dwarfs/writer/scanner_options.h b/include/dwarfs/writer/scanner_options.h index 1dfa0ff5..f0f0e581 100644 --- a/include/dwarfs/writer/scanner_options.h +++ b/include/dwarfs/writer/scanner_options.h @@ -64,6 +64,7 @@ struct scanner_options { bool enable_history{true}; std::optional> command_line_arguments; history_config history; + size_t inode_size_cache_min_chunk_count{128}; }; } // namespace dwarfs::writer diff --git a/src/reader/internal/metadata_v2.cpp b/src/reader/internal/metadata_v2.cpp index f75a6acd..ed05bf09 100644 --- a/src/reader/internal/metadata_v2.cpp +++ b/src/reader/internal/metadata_v2.cpp @@ -292,6 +292,12 @@ void analyze_frozen(std::ostream& os, #undef META_OPT_LIST_SIZE #undef META_OPT_STRING_TABLE_SIZE + if (auto cache = meta.reg_file_size_cache()) { + add_list_size( + "inode_size_cache", cache->lookup(), + l->reg_file_size_cacheField.layout.valueField.layout.lookupField); + } + std::sort(usage.begin(), usage.end(), [](auto const& a, auto const& b) { return a.first > b.first || (a.first == b.first && a.second < b.second); }); @@ -538,6 +544,8 @@ class metadata_ final : public metadata_v2::impl { thrift::metadata::metadata unpack_metadata() const; + void check_inode_size_cache() const; + file_stat getattr_impl(inode_view iv, getattr_options const& opts) const; inode_view make_inode_view(uint32_t inode) const { @@ -671,17 +679,41 @@ class metadata_ final : public metadata_v2::impl { return {}; } - size_t reg_file_size(inode_view iv) const { + size_t reg_file_size_impl(inode_view iv, bool use_cache) const { PERFMON_CLS_SCOPED_SECTION(reg_file_size) + + // Looking up the chunk range is cheap, and we likely have to do it anyway std::error_code ec; auto cr = get_chunk_range(iv.inode_num(), ec); DWARFS_CHECK(!ec, fmt::format("get_chunk_range({}): {}", iv.inode_num(), ec.message())); + + if (use_cache) { + if (auto cache = meta_.reg_file_size_cache()) { + if (cr.size() >= cache->min_chunk_count()) { + LOG_TRACE << "using size cache lookup for inode " << iv.inode_num(); + if (auto size = cache->lookup().getOptional(iv.inode_num() - + file_inode_offset_)) { + return *size; + } + } + } + } + + // This is the expensive part for highly fragmented inodes return std::accumulate( cr.begin(), cr.end(), static_cast(0), [](size_t s, chunk_view cv) { return s + cv.size(); }); } + size_t reg_file_size_nocache(inode_view iv) const { + return reg_file_size_impl(iv, false); + } + + size_t reg_file_size(inode_view iv) const { + return reg_file_size_impl(iv, true); + } + size_t file_size(inode_view iv, uint16_t mode) const { switch (posix_file_type::from_mode(mode)) { case posix_file_type::regular: @@ -915,9 +947,31 @@ void metadata_::analyze_chunks(std::ostream& os) const { << "\n"; } +template +void metadata_::check_inode_size_cache() const { + if (auto cache = meta_.reg_file_size_cache()) { + LOG_DEBUG << "checking inode size cache"; + for (auto entry : cache->lookup()) { + auto inode = entry.first(); + auto size = entry.second(); + auto iv = make_inode_view(file_inode_offset_ + inode); + LOG_TRACE << "checking inode " << inode << " size " << size; + auto expected = reg_file_size_nocache(iv); + if (size != expected) { + DWARFS_THROW( + runtime_error, + fmt::format( + "inode size cache mismatch: inode {} expected {} got {}", inode, + expected, size)); + } + } + } +} + template void metadata_::check_consistency() const { global_.check_consistency(LOG_GET_LOGGER); + check_inode_size_cache(); } template diff --git a/src/writer/scanner.cpp b/src/writer/scanner.cpp index 773b4c64..b4dcd848 100644 --- a/src/writer/scanner.cpp +++ b/src/writer/scanner.cpp @@ -892,11 +892,15 @@ void scanner_::scan( LOG_INFO << "saving chunks..."; mv2.chunk_table()->resize(im.count() + 1); + auto& size_cache = mv2.reg_file_size_cache().emplace(); + size_cache.min_chunk_count() = options_.inode_size_cache_min_chunk_count; + // TODO: we should be able to start this once all blocks have been // submitted for compression mv2.chunks().value().reserve(prog.chunk_count); im.for_each_inode_in_order([&](std::shared_ptr const& ino) { - DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = mv2.chunks()->size(); + auto const total_chunks = mv2.chunks()->size(); + DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = total_chunks; if (!ino->append_chunks_to(mv2.chunks().value())) { std::ostringstream oss; for (auto fp : ino->all()) { @@ -905,6 +909,12 @@ void scanner_::scan( LOG_ERROR << "inconsistent fragments in inode " << ino->num() << ", the following files will be empty:" << oss.str(); } + auto num_inode_chunks = mv2.chunks()->size() - total_chunks; + if (num_inode_chunks >= options_.inode_size_cache_min_chunk_count) { + LOG_DEBUG << "caching size " << ino->size() << " for inode " << ino->num() + << " with " << num_inode_chunks << " chunks"; + size_cache.lookup()->emplace(ino->num(), ino->size()); + } }); blockmgr->map_logical_blocks(mv2.chunks().value()); diff --git a/thrift/metadata.thrift b/thrift/metadata.thrift index 62231454..5f95b50e 100644 --- a/thrift/metadata.thrift +++ b/thrift/metadata.thrift @@ -169,6 +169,20 @@ struct string_table { 4: bool packed_index } +/* + * For highly fragmented inodes, computing the size from the + * individual chunks can be extremely slow. This cache can be + * used to bypass the chunk lookup and size computation. + */ +struct inode_size_cache { + // lookup from inode number to size + 1: map lookup + + // minimum number of chunks for a file to be found in the cache, + // corresponds to scanner_options.inode_size_cache_min_chunk_count + 2: UInt64 min_chunk_count +} + /** * File System Metadata * @@ -388,4 +402,11 @@ struct metadata { // index into this vector is the block number and the value // is an index into `category_names`. 29: optional list block_categories + + //==========================================================// + // fields added with dwarfs-0.11.0, file system version 2.5 // + //==========================================================// + + // Size cache for highly fragmented file inodes + 30: optional inode_size_cache reg_file_size_cache }