feat: use new minimal LRU cache implementation

This commit is contained in:
Marcus Holland-Moritz 2025-04-27 22:01:00 +02:00
parent b919012ac6
commit 7508b1db38
6 changed files with 326 additions and 19 deletions

View File

@ -506,6 +506,7 @@ if(WITH_TESTS)
test/global_metadata_test.cpp
test/integral_value_parser_test.cpp
test/lazy_value_test.cpp
test/lru_cache_test.cpp
test/metadata_requirements_test.cpp
test/nilsimsa_test.cpp
test/options_test.cpp
@ -603,6 +604,10 @@ if(WITH_TESTS)
target_link_libraries(tool_main_test PRIVATE mkdwarfs_main dwarfsck_main dwarfsextract_main PkgConfig::LIBARCHIVE)
endif()
if(TARGET dwarfs_unit_tests)
target_link_libraries(dwarfs_unit_tests PRIVATE phmap)
endif()
if(TARGET manpage_test)
if(WITH_TOOLS)
target_compile_definitions(manpage_test PRIVATE DWARFS_WITH_TOOLS)

View File

@ -0,0 +1,142 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the Software), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <functional>
#include <iterator>
#include <list>
#include <parallel_hashmap/phmap.h>
namespace dwarfs::reader::internal {
template <typename KeyT, typename T>
class lru_cache {
public:
using key_type = KeyT;
using mapped_type = T;
using value_type = std::pair<key_type const, mapped_type>;
using iterator = typename std::list<value_type>::iterator;
using const_iterator = typename std::list<value_type>::const_iterator;
using prune_hook_type = std::function<void(key_type, mapped_type&&)>;
lru_cache() = default;
explicit lru_cache(size_t max_size)
: max_size_{max_size} {
index_.reserve(max_size_);
}
// Set the maximum cache size
void set_max_size(size_t max_size) {
max_size_ = max_size;
while (cache_.size() > max_size_) {
evict_lru();
}
index_.reserve(max_size_);
}
// Set a custom prune hook
void set_prune_hook(prune_hook_type hook) { prune_hook_ = std::move(hook); }
// Insert or update an item in the cache, promoting it
void set(key_type const& key, mapped_type value,
prune_hook_type custom_prune_hook = {}) {
auto it = index_.find(key);
if (it != index_.end()) {
it->second->second = std::move(value);
move_to_front(it->second);
} else {
if (index_.size() >= max_size_) {
evict_lru(std::move(custom_prune_hook));
}
cache_.push_front(value_type(key, std::move(value)));
index_[key] = cache_.begin();
}
}
// Find an item, optionally promoting it
iterator find(key_type const& key, bool promote = true) {
auto it = index_.find(key);
if (it == index_.end()) {
return end();
}
if (promote) {
move_to_front(it->second);
}
return it->second;
}
iterator erase(iterator pos, prune_hook_type custom_prune_hook = {}) {
auto& key = pos->first;
auto& value = pos->second;
if (custom_prune_hook) {
custom_prune_hook(key, std::move(value));
} else if (prune_hook_) {
prune_hook_(key, std::move(value));
}
index_.erase(key);
return cache_.erase(pos);
}
void clear() {
index_.clear();
cache_.clear();
}
bool empty() const { return cache_.empty(); }
size_t size() const { return cache_.size(); }
iterator begin() { return cache_.begin(); }
iterator end() { return cache_.end(); }
const_iterator begin() const { return cache_.begin(); }
const_iterator end() const { return cache_.end(); }
private:
// Move the accessed item to the front of the cache (most recently used)
void move_to_front(iterator it) { cache_.splice(cache_.begin(), cache_, it); }
// Evict the least recently used item
void evict_lru(prune_hook_type custom_prune_hook = {}) {
if (auto it = cache_.end(); it != cache_.begin()) {
erase(--it, std::move(custom_prune_hook));
}
}
size_t max_size_;
phmap::flat_hash_map<key_type, iterator> index_;
std::list<value_type> cache_;
prune_hook_type prune_hook_;
};
} // namespace dwarfs::reader::internal

View File

@ -36,10 +36,10 @@
#include <span>
#include <vector>
#include <folly/container/EvictingCacheMap.h>
#include <dwarfs/small_vector.h>
#include <dwarfs/reader/internal/lru_cache.h>
namespace dwarfs::reader::internal {
template <typename InodeT, typename FileOffsetT, typename ChunkIndexT,
@ -205,7 +205,7 @@ class basic_offset_cache {
}
private:
using cache_type = folly::EvictingCacheMap<inode_type, value_type>;
using cache_type = lru_cache<inode_type, value_type>;
cache_type mutable cache_;
std::mutex mutable mx_;

View File

@ -41,7 +41,6 @@
#include <fmt/format.h>
#include <folly/container/EvictingCacheMap.h>
#include <folly/stats/Histogram.h>
#include <folly/system/ThreadName.h>
@ -60,6 +59,7 @@
#include <dwarfs/reader/internal/block_cache.h>
#include <dwarfs/reader/internal/block_cache_byte_buffer_factory.h>
#include <dwarfs/reader/internal/cached_block.h>
#include <dwarfs/reader/internal/lru_cache.h>
#include <dwarfs/reader/internal/periodic_executor.h>
namespace dwarfs::reader::internal {
@ -100,7 +100,7 @@ class lru_sequential_access_detector : public sequential_access_detector {
void touch(size_t block_no) override {
std::lock_guard lock(mx_);
lru_.set(
block_no, block_no, true,
block_no, block_no, /* true, */
// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
[this](size_t, size_t&&) { is_sequential_.reset(); });
}
@ -132,7 +132,7 @@ class lru_sequential_access_detector : public sequential_access_detector {
}
private:
using lru_type = folly::EvictingCacheMap<size_t, size_t>;
using lru_type = lru_cache<size_t, size_t>;
std::mutex mutable mx_;
lru_type lru_;
@ -251,6 +251,11 @@ class block_cache_ final : public block_cache::impl {
: hardware_concurrency(),
static_cast<size_t>(1)));
}
cache_.set_prune_hook(
[this](size_t block_no, std::shared_ptr<cached_block>&& block) {
on_block_removed("evicted", block_no, std::move(block));
blocks_evicted_.fetch_add(1, std::memory_order_relaxed);
});
}
~block_cache_() noexcept override {
@ -328,7 +333,6 @@ class block_cache_ final : public block_cache::impl {
}
void set_block_size(size_t size) override {
// XXX: This currently inevitably clears the cache
if (size == 0) {
DWARFS_THROW(runtime_error, "block size is zero");
}
@ -339,13 +343,7 @@ class block_cache_ final : public block_cache::impl {
}
std::lock_guard lock(mx_);
cache_.~lru_type();
new (&cache_) lru_type(max_blocks);
cache_.setPruneHook(
[this](size_t block_no, std::shared_ptr<cached_block>&& block) {
on_block_removed("evicted", block_no, std::move(block));
blocks_evicted_.fetch_add(1, std::memory_order_relaxed);
});
cache_.set_max_size(max_blocks);
}
void set_num_workers(size_t num) override {
@ -390,7 +388,7 @@ class block_cache_ final : public block_cache::impl {
if (auto next = seq_access_detector_->prefetch()) {
std::lock_guard lock(mx_);
if (cache_.findWithoutPromotion(*next) == cache_.end() &&
if (cache_.find(*next, false) == cache_.end() &&
active_.find(*next) == active_.end()) {
sequential_prefetches_.fetch_add(1, std::memory_order_relaxed);
LOG_TRACE << "prefetching block " << *next;
@ -759,8 +757,7 @@ class block_cache_ final : public block_cache::impl {
}
}
using lru_type =
folly::EvictingCacheMap<size_t, std::shared_ptr<cached_block>>;
using lru_type = lru_cache<size_t, std::shared_ptr<cached_block>>;
template <typename Key, typename Value>
using fast_map_type = phmap::flat_hash_map<Key, Value>;

View File

@ -36,7 +36,6 @@
#include <utility>
#include <vector>
#include <folly/container/EvictingCacheMap.h>
#include <folly/stats/Histogram.h>
#include <range/v3/view/enumerate.hpp>
@ -50,6 +49,7 @@
#include <dwarfs/reader/internal/block_cache.h>
#include <dwarfs/reader/internal/inode_reader_v2.h>
#include <dwarfs/reader/internal/lru_cache.h>
#include <dwarfs/reader/internal/offset_cache.h>
namespace dwarfs::reader::internal {
@ -164,7 +164,7 @@ class inode_reader_ final : public inode_reader_v2::impl {
offset_cache_chunk_index_interval,
offset_cache_updater_max_inline_offsets>;
using readahead_cache_type = folly::EvictingCacheMap<uint32_t, file_off_t>;
using readahead_cache_type = lru_cache<uint32_t, file_off_t>;
std::vector<std::future<block_range>>
read_internal(uint32_t inode, size_t size, file_off_t offset, size_t maxiov,

163
test/lru_cache_test.cpp Normal file
View File

@ -0,0 +1,163 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <memory>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <dwarfs/reader/internal/lru_cache.h>
namespace {
using dwarfs::reader::internal::lru_cache;
using unique_str_ptr = std::unique_ptr<std::string>;
} // namespace
// Test for integral keys and regular types (e.g., std::string)
TEST(lru_cache_test, insert_and_retrieve_with_integral_key) {
lru_cache<int, std::string> cache(3);
cache.set(1, "one");
cache.set(2, "two");
cache.set(3, "three");
// Retrieve and verify
ASSERT_EQ(cache.find(1)->second, "one");
ASSERT_EQ(cache.find(2)->second, "two");
ASSERT_EQ(cache.find(3)->second, "three");
}
TEST(lru_cache_test, insert_eviction_with_integral_key) {
lru_cache<int, std::string> cache(3);
cache.set(1, "one");
cache.set(2, "two");
cache.set(3, "three");
// Evict least recently used (key 1)
cache.set(4, "four");
// Verify eviction
ASSERT_EQ(cache.find(1), cache.end());
ASSERT_EQ(cache.find(2)->second, "two");
ASSERT_EQ(cache.find(3)->second, "three");
ASSERT_EQ(cache.find(4)->second, "four");
}
TEST(lru_cache_test, find_with_promotion) {
lru_cache<int, std::string> cache(3);
cache.set(1, "one");
cache.set(2, "two");
cache.set(3, "three");
// Access item to promote
cache.find(2);
// Add a new item, evicting the least recently used (key 1)
cache.set(4, "four");
// Verify promotion and eviction
ASSERT_EQ(cache.find(2)->second, "two");
ASSERT_EQ(cache.find(1), cache.end());
ASSERT_EQ(cache.find(3)->second, "three");
ASSERT_EQ(cache.find(4)->second, "four");
}
TEST(lru_cache_test, prune_hook) {
lru_cache<int, std::string> cache(3);
std::vector<std::pair<int, std::string>> evicted_items;
// Set a prune hook to capture evicted keys
cache.set_prune_hook([&evicted_items](int key, std::string&& value) {
evicted_items.emplace_back(key, std::move(value));
});
cache.set(1, "one");
cache.set(2, "two");
cache.set(3, "three");
cache.set(4, "four");
// Verify that the least recently used key is evicted
ASSERT_EQ(evicted_items.size(), 1);
EXPECT_EQ(evicted_items[0].first, 1);
EXPECT_EQ(evicted_items[0].second, "one");
}
TEST(lru_cache_test, unique_ptr_key_type) {
lru_cache<int, unique_str_ptr> cache(3);
cache.set(1, std::make_unique<std::string>("one"));
cache.set(2, std::make_unique<std::string>("two"));
cache.set(3, std::make_unique<std::string>("three"));
// Retrieve and verify unique_ptr values
auto val1 = std::move(cache.find(1)->second);
auto val2 = std::move(cache.find(2)->second);
auto val3 = std::move(cache.find(3)->second);
ASSERT_EQ(*val1, "one");
ASSERT_EQ(*val2, "two");
ASSERT_EQ(*val3, "three");
// Add a new item, evicting the least recently used (key 1)
cache.set(4, std::make_unique<std::string>("four"));
// Verify eviction of key 1
ASSERT_EQ(cache.find(1), cache.end());
}
TEST(lru_cache_test, unique_ptr_eviction) {
lru_cache<int, unique_str_ptr> cache(3);
std::vector<std::pair<int, unique_str_ptr>> evicted_items;
// Set a prune hook to capture evicted values
cache.set_prune_hook([&evicted_items](int key, unique_str_ptr&& value) {
evicted_items.emplace_back(key, std::move(value));
});
cache.set(1, std::make_unique<std::string>("one"));
cache.set(2, std::make_unique<std::string>("two"));
cache.set(3, std::make_unique<std::string>("three"));
cache.set(4, std::make_unique<std::string>("four"));
// Verify that the least recently used key (key 1) was evicted
ASSERT_EQ(evicted_items.size(), 1);
EXPECT_EQ(evicted_items[0].first, 1);
EXPECT_EQ(*evicted_items[0].second, "one");
}
TEST(lru_cache_test, clear_cache) {
lru_cache<int, std::string> cache(3);
cache.set(1, "one");
cache.set(2, "two");
cache.set(3, "three");
// Clear the cache
cache.clear();
// Verify that the cache is empty
ASSERT_TRUE(cache.empty());
ASSERT_EQ(cache.size(), 0);
}