From 4e43ed4e4b8e9e100c362120f862a0c69fda62af Mon Sep 17 00:00:00 2001
From: Marcus Holland-Moritz <github@mhxnet.de>
Date: Mon, 7 Apr 2025 13:45:35 +0200
Subject: [PATCH] feat: limit preloading/caching blocks by cache size

---
 doc/dwarfs.md                                 |  6 +++--
 .../dwarfs/reader/internal/inode_reader_v2.h  |  3 ---
 src/reader/filesystem_v2.cpp                  | 27 +++++++++++++++++--
 src/reader/internal/inode_reader_v2.cpp       |  6 -----
 4 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/doc/dwarfs.md b/doc/dwarfs.md
index 1abcb7fb..6614a800 100644
--- a/doc/dwarfs.md
+++ b/doc/dwarfs.md
@@ -118,13 +118,15 @@ options:
 - `-o preload_category=`*category*:
   Preload all blocks from this category when mounting the file
   system. This is typically used together with the `mkdwarfs`
-  "hotness" categorizer.
+  "hotness" categorizer. If the cache size is too small, only as
+  many blocks as will fit in the cache will be preloaded.
 
 - `-o preload_all`
   Preload *all* blocks from the file system. This is only useful
   for file systems where all uncompressed blocks fit fully into
   the configured cache size. To see the uncompressed block size,
-  you can use `dwarfsck`.
+  you can use `dwarfsck`. If the cache size is too small, only as
+  many blocks as will fit in the cache will be preloaded.
 
 - `-o (no_)cache_image`:
   By default, `dwarfs` tries to ensure that the compressed file
diff --git a/include/dwarfs/reader/internal/inode_reader_v2.h b/include/dwarfs/reader/internal/inode_reader_v2.h
index 145c95c5..75e47bfa 100644
--- a/include/dwarfs/reader/internal/inode_reader_v2.h
+++ b/include/dwarfs/reader/internal/inode_reader_v2.h
@@ -105,8 +105,6 @@ class inode_reader_v2 {
     impl_->cache_blocks(blocks);
   }
 
-  void cache_all_blocks() const { impl_->cache_all_blocks(); }
-
   class impl {
    public:
     virtual ~impl() = default;
@@ -129,7 +127,6 @@ class inode_reader_v2 {
     virtual void set_cache_tidy_config(cache_tidy_config const& cfg) = 0;
     virtual size_t num_blocks() const = 0;
     virtual void cache_blocks(std::span<size_t const> blocks) const = 0;
-    virtual void cache_all_blocks() const = 0;
   };
 
  private:
diff --git a/src/reader/filesystem_v2.cpp b/src/reader/filesystem_v2.cpp
index 30d58e2b..6d8787e2 100644
--- a/src/reader/filesystem_v2.cpp
+++ b/src/reader/filesystem_v2.cpp
@@ -306,16 +306,39 @@ class filesystem_ final {
   }
 
   void cache_blocks_by_category(std::string_view category) const {
-    ir_.cache_blocks(meta_.get_block_numbers_by_category(category));
+    auto const max_blocks = get_max_cache_blocks();
+    auto block_numbers = meta_.get_block_numbers_by_category(category);
+    if (block_numbers.size() > max_blocks) {
+      LOG_WARN << "too many blocks in category " << category
+               << ", caching only the first " << max_blocks << " out of "
+               << block_numbers.size() << " blocks";
+      block_numbers.resize(max_blocks);
+    }
+    ir_.cache_blocks(block_numbers);
   }
 
-  void cache_all_blocks() const { ir_.cache_all_blocks(); }
+  void cache_all_blocks() const {
+    auto const max_blocks = get_max_cache_blocks();
+    auto num_blocks = ir_.num_blocks();
+    if (num_blocks > max_blocks) {
+      LOG_WARN << "too many blocks in filesystem, caching only the first "
+               << max_blocks << " out of " << num_blocks << " blocks";
+      num_blocks = max_blocks;
+    }
+    std::vector<size_t> block_numbers(num_blocks);
+    std::iota(block_numbers.begin(), block_numbers.end(), 0);
+    ir_.cache_blocks(block_numbers);
+  }
 
  private:
   filesystem_parser make_fs_parser() const {
     return filesystem_parser(mm_, image_offset_, options_.image_size);
   }
 
+  size_t get_max_cache_blocks() const {
+    return options_.block_cache.max_bytes / meta_.block_size();
+  }
+
   filesystem_info const* get_info(fsinfo_options const& opts) const;
   void check_section(fs_section const& section) const;
   std::string read_string_ec(uint32_t inode, size_t size, file_off_t offset,
diff --git a/src/reader/internal/inode_reader_v2.cpp b/src/reader/internal/inode_reader_v2.cpp
index cef08ddb..6b352a68 100644
--- a/src/reader/internal/inode_reader_v2.cpp
+++ b/src/reader/internal/inode_reader_v2.cpp
@@ -158,12 +158,6 @@ class inode_reader_ final : public inode_reader_v2::impl {
     }
   }
 
-  void cache_all_blocks() const override {
-    for (size_t i = 0; i < cache_.block_count(); ++i) {
-      cache_.get(i, 0, 1);
-    }
-  }
-
  private:
   using offset_cache_type =
       basic_offset_cache<uint32_t, file_off_t, size_t,