From 4e43ed4e4b8e9e100c362120f862a0c69fda62af Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 7 Apr 2025 13:45:35 +0200 Subject: [PATCH] feat: limit preloading/caching blocks by cache size --- doc/dwarfs.md | 6 +++-- .../dwarfs/reader/internal/inode_reader_v2.h | 3 --- src/reader/filesystem_v2.cpp | 27 +++++++++++++++++-- src/reader/internal/inode_reader_v2.cpp | 6 ----- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/doc/dwarfs.md b/doc/dwarfs.md index 1abcb7fb..6614a800 100644 --- a/doc/dwarfs.md +++ b/doc/dwarfs.md @@ -118,13 +118,15 @@ options: - `-o preload_category=`*category*: Preload all blocks from this category when mounting the file system. This is typically used together with the `mkdwarfs` - "hotness" categorizer. + "hotness" categorizer. If the cache size is too small, only as + many blocks as will fit in the cache will be preloaded. - `-o preload_all` Preload *all* blocks from the file system. This is only useful for file systems where all uncompressed blocks fit fully into the configured cache size. To see the uncompressed block size, - you can use `dwarfsck`. + you can use `dwarfsck`. If the cache size is too small, only as + many blocks as will fit in the cache will be preloaded. - `-o (no_)cache_image`: By default, `dwarfs` tries to ensure that the compressed file diff --git a/include/dwarfs/reader/internal/inode_reader_v2.h b/include/dwarfs/reader/internal/inode_reader_v2.h index 145c95c5..75e47bfa 100644 --- a/include/dwarfs/reader/internal/inode_reader_v2.h +++ b/include/dwarfs/reader/internal/inode_reader_v2.h @@ -105,8 +105,6 @@ class inode_reader_v2 { impl_->cache_blocks(blocks); } - void cache_all_blocks() const { impl_->cache_all_blocks(); } - class impl { public: virtual ~impl() = default; @@ -129,7 +127,6 @@ class inode_reader_v2 { virtual void set_cache_tidy_config(cache_tidy_config const& cfg) = 0; virtual size_t num_blocks() const = 0; virtual void cache_blocks(std::span blocks) const = 0; - virtual void cache_all_blocks() const = 0; }; private: diff --git a/src/reader/filesystem_v2.cpp b/src/reader/filesystem_v2.cpp index 30d58e2b..6d8787e2 100644 --- a/src/reader/filesystem_v2.cpp +++ b/src/reader/filesystem_v2.cpp @@ -306,16 +306,39 @@ class filesystem_ final { } void cache_blocks_by_category(std::string_view category) const { - ir_.cache_blocks(meta_.get_block_numbers_by_category(category)); + auto const max_blocks = get_max_cache_blocks(); + auto block_numbers = meta_.get_block_numbers_by_category(category); + if (block_numbers.size() > max_blocks) { + LOG_WARN << "too many blocks in category " << category + << ", caching only the first " << max_blocks << " out of " + << block_numbers.size() << " blocks"; + block_numbers.resize(max_blocks); + } + ir_.cache_blocks(block_numbers); } - void cache_all_blocks() const { ir_.cache_all_blocks(); } + void cache_all_blocks() const { + auto const max_blocks = get_max_cache_blocks(); + auto num_blocks = ir_.num_blocks(); + if (num_blocks > max_blocks) { + LOG_WARN << "too many blocks in filesystem, caching only the first " + << max_blocks << " out of " << num_blocks << " blocks"; + num_blocks = max_blocks; + } + std::vector block_numbers(num_blocks); + std::iota(block_numbers.begin(), block_numbers.end(), 0); + ir_.cache_blocks(block_numbers); + } private: filesystem_parser make_fs_parser() const { return filesystem_parser(mm_, image_offset_, options_.image_size); } + size_t get_max_cache_blocks() const { + return options_.block_cache.max_bytes / meta_.block_size(); + } + filesystem_info const* get_info(fsinfo_options const& opts) const; void check_section(fs_section const& section) const; std::string read_string_ec(uint32_t inode, size_t size, file_off_t offset, diff --git a/src/reader/internal/inode_reader_v2.cpp b/src/reader/internal/inode_reader_v2.cpp index cef08ddb..6b352a68 100644 --- a/src/reader/internal/inode_reader_v2.cpp +++ b/src/reader/internal/inode_reader_v2.cpp @@ -158,12 +158,6 @@ class inode_reader_ final : public inode_reader_v2::impl { } } - void cache_all_blocks() const override { - for (size_t i = 0; i < cache_.block_count(); ++i) { - cache_.get(i, 0, 1); - } - } - private: using offset_cache_type = basic_offset_cache