feat: limit preloading/caching blocks by cache size

2025-08-03 09:47:01 -04:00 · 2025-04-07 13:45:35 +02:00 · 2025-04-07 13:45:35 +02:00 · 4e43ed4e4b
commit 4e43ed4e4b
parent e71cc49353
4 changed files with 29 additions and 13 deletions
--- a/doc/dwarfs.md
+++ b/doc/dwarfs.md
@ -118,13 +118,15 @@ options:
 - `-o preload_category=`*category*:
  Preload all blocks from this category when mounting the file
  system. This is typically used together with the `mkdwarfs`
-  "hotness" categorizer.
+  "hotness" categorizer. If the cache size is too small, only as
+  many blocks as will fit in the cache will be preloaded.

 - `-o preload_all`
  Preload *all* blocks from the file system. This is only useful
  for file systems where all uncompressed blocks fit fully into
  the configured cache size. To see the uncompressed block size,
-  you can use `dwarfsck`.
+  you can use `dwarfsck`. If the cache size is too small, only as
+  many blocks as will fit in the cache will be preloaded.

 - `-o (no_)cache_image`:
  By default, `dwarfs` tries to ensure that the compressed file
--- a/include/dwarfs/reader/internal/inode_reader_v2.h
+++ b/include/dwarfs/reader/internal/inode_reader_v2.h
@ -105,8 +105,6 @@ class inode_reader_v2 {
    impl_->cache_blocks(blocks);
  }

-  void cache_all_blocks() const { impl_->cache_all_blocks(); }
-
  class impl {
   public:
    virtual ~impl() = default;
@ -129,7 +127,6 @@ class inode_reader_v2 {
    virtual void set_cache_tidy_config(cache_tidy_config const& cfg) = 0;
    virtual size_t num_blocks() const = 0;
    virtual void cache_blocks(std::span<size_t const> blocks) const = 0;
-    virtual void cache_all_blocks() const = 0;
  };

 private:
--- a/src/reader/filesystem_v2.cpp
+++ b/src/reader/filesystem_v2.cpp
@ -306,16 +306,39 @@ class filesystem_ final {
  }

  void cache_blocks_by_category(std::string_view category) const {
-    ir_.cache_blocks(meta_.get_block_numbers_by_category(category));
+    auto const max_blocks = get_max_cache_blocks();
+    auto block_numbers = meta_.get_block_numbers_by_category(category);
+    if (block_numbers.size() > max_blocks) {
+      LOG_WARN << "too many blocks in category " << category
+               << ", caching only the first " << max_blocks << " out of "
+               << block_numbers.size() << " blocks";
+      block_numbers.resize(max_blocks);
+    }
+    ir_.cache_blocks(block_numbers);
  }

-  void cache_all_blocks() const { ir_.cache_all_blocks(); }
+  void cache_all_blocks() const {
+    auto const max_blocks = get_max_cache_blocks();
+    auto num_blocks = ir_.num_blocks();
+    if (num_blocks > max_blocks) {
+      LOG_WARN << "too many blocks in filesystem, caching only the first "
+               << max_blocks << " out of " << num_blocks << " blocks";
+      num_blocks = max_blocks;
+    }
+    std::vector<size_t> block_numbers(num_blocks);
+    std::iota(block_numbers.begin(), block_numbers.end(), 0);
+    ir_.cache_blocks(block_numbers);
+  }

 private:
  filesystem_parser make_fs_parser() const {
    return filesystem_parser(mm_, image_offset_, options_.image_size);
  }

+  size_t get_max_cache_blocks() const {
+    return options_.block_cache.max_bytes / meta_.block_size();
+  }
+
  filesystem_info const* get_info(fsinfo_options const& opts) const;
  void check_section(fs_section const& section) const;
  std::string read_string_ec(uint32_t inode, size_t size, file_off_t offset,
--- a/src/reader/internal/inode_reader_v2.cpp
+++ b/src/reader/internal/inode_reader_v2.cpp
@ -158,12 +158,6 @@ class inode_reader_ final : public inode_reader_v2::impl {
    }
  }

-  void cache_all_blocks() const override {
-    for (size_t i = 0; i < cache_.block_count(); ++i) {
-      cache_.get(i, 0, 1);
-    }
-  }
-
 private:
  using offset_cache_type =
      basic_offset_cache<uint32_t, file_off_t, size_t,