mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-03 17:56:12 -04:00
feat: use new minimal LRU cache implementation
This commit is contained in:
parent
b919012ac6
commit
7508b1db38
@ -506,6 +506,7 @@ if(WITH_TESTS)
|
||||
test/global_metadata_test.cpp
|
||||
test/integral_value_parser_test.cpp
|
||||
test/lazy_value_test.cpp
|
||||
test/lru_cache_test.cpp
|
||||
test/metadata_requirements_test.cpp
|
||||
test/nilsimsa_test.cpp
|
||||
test/options_test.cpp
|
||||
@ -603,6 +604,10 @@ if(WITH_TESTS)
|
||||
target_link_libraries(tool_main_test PRIVATE mkdwarfs_main dwarfsck_main dwarfsextract_main PkgConfig::LIBARCHIVE)
|
||||
endif()
|
||||
|
||||
if(TARGET dwarfs_unit_tests)
|
||||
target_link_libraries(dwarfs_unit_tests PRIVATE phmap)
|
||||
endif()
|
||||
|
||||
if(TARGET manpage_test)
|
||||
if(WITH_TOOLS)
|
||||
target_compile_definitions(manpage_test PRIVATE DWARFS_WITH_TOOLS)
|
||||
|
142
include/dwarfs/reader/internal/lru_cache.h
Normal file
142
include/dwarfs/reader/internal/lru_cache.h
Normal file
@ -0,0 +1,142 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the “Software”), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <list>
|
||||
|
||||
#include <parallel_hashmap/phmap.h>
|
||||
|
||||
namespace dwarfs::reader::internal {
|
||||
|
||||
template <typename KeyT, typename T>
|
||||
class lru_cache {
|
||||
public:
|
||||
using key_type = KeyT;
|
||||
using mapped_type = T;
|
||||
using value_type = std::pair<key_type const, mapped_type>;
|
||||
|
||||
using iterator = typename std::list<value_type>::iterator;
|
||||
using const_iterator = typename std::list<value_type>::const_iterator;
|
||||
|
||||
using prune_hook_type = std::function<void(key_type, mapped_type&&)>;
|
||||
|
||||
lru_cache() = default;
|
||||
|
||||
explicit lru_cache(size_t max_size)
|
||||
: max_size_{max_size} {
|
||||
index_.reserve(max_size_);
|
||||
}
|
||||
|
||||
// Set the maximum cache size
|
||||
void set_max_size(size_t max_size) {
|
||||
max_size_ = max_size;
|
||||
while (cache_.size() > max_size_) {
|
||||
evict_lru();
|
||||
}
|
||||
index_.reserve(max_size_);
|
||||
}
|
||||
|
||||
// Set a custom prune hook
|
||||
void set_prune_hook(prune_hook_type hook) { prune_hook_ = std::move(hook); }
|
||||
|
||||
// Insert or update an item in the cache, promoting it
|
||||
void set(key_type const& key, mapped_type value,
|
||||
prune_hook_type custom_prune_hook = {}) {
|
||||
auto it = index_.find(key);
|
||||
if (it != index_.end()) {
|
||||
it->second->second = std::move(value);
|
||||
move_to_front(it->second);
|
||||
} else {
|
||||
if (index_.size() >= max_size_) {
|
||||
evict_lru(std::move(custom_prune_hook));
|
||||
}
|
||||
cache_.push_front(value_type(key, std::move(value)));
|
||||
index_[key] = cache_.begin();
|
||||
}
|
||||
}
|
||||
|
||||
// Find an item, optionally promoting it
|
||||
iterator find(key_type const& key, bool promote = true) {
|
||||
auto it = index_.find(key);
|
||||
if (it == index_.end()) {
|
||||
return end();
|
||||
}
|
||||
if (promote) {
|
||||
move_to_front(it->second);
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
iterator erase(iterator pos, prune_hook_type custom_prune_hook = {}) {
|
||||
auto& key = pos->first;
|
||||
auto& value = pos->second;
|
||||
if (custom_prune_hook) {
|
||||
custom_prune_hook(key, std::move(value));
|
||||
} else if (prune_hook_) {
|
||||
prune_hook_(key, std::move(value));
|
||||
}
|
||||
index_.erase(key);
|
||||
return cache_.erase(pos);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
index_.clear();
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
bool empty() const { return cache_.empty(); }
|
||||
|
||||
size_t size() const { return cache_.size(); }
|
||||
|
||||
iterator begin() { return cache_.begin(); }
|
||||
iterator end() { return cache_.end(); }
|
||||
|
||||
const_iterator begin() const { return cache_.begin(); }
|
||||
const_iterator end() const { return cache_.end(); }
|
||||
|
||||
private:
|
||||
// Move the accessed item to the front of the cache (most recently used)
|
||||
void move_to_front(iterator it) { cache_.splice(cache_.begin(), cache_, it); }
|
||||
|
||||
// Evict the least recently used item
|
||||
void evict_lru(prune_hook_type custom_prune_hook = {}) {
|
||||
if (auto it = cache_.end(); it != cache_.begin()) {
|
||||
erase(--it, std::move(custom_prune_hook));
|
||||
}
|
||||
}
|
||||
|
||||
size_t max_size_;
|
||||
phmap::flat_hash_map<key_type, iterator> index_;
|
||||
std::list<value_type> cache_;
|
||||
prune_hook_type prune_hook_;
|
||||
};
|
||||
|
||||
} // namespace dwarfs::reader::internal
|
@ -36,10 +36,10 @@
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/container/EvictingCacheMap.h>
|
||||
|
||||
#include <dwarfs/small_vector.h>
|
||||
|
||||
#include <dwarfs/reader/internal/lru_cache.h>
|
||||
|
||||
namespace dwarfs::reader::internal {
|
||||
|
||||
template <typename InodeT, typename FileOffsetT, typename ChunkIndexT,
|
||||
@ -205,7 +205,7 @@ class basic_offset_cache {
|
||||
}
|
||||
|
||||
private:
|
||||
using cache_type = folly::EvictingCacheMap<inode_type, value_type>;
|
||||
using cache_type = lru_cache<inode_type, value_type>;
|
||||
|
||||
cache_type mutable cache_;
|
||||
std::mutex mutable mx_;
|
||||
|
@ -41,7 +41,6 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/container/EvictingCacheMap.h>
|
||||
#include <folly/stats/Histogram.h>
|
||||
#include <folly/system/ThreadName.h>
|
||||
|
||||
@ -60,6 +59,7 @@
|
||||
#include <dwarfs/reader/internal/block_cache.h>
|
||||
#include <dwarfs/reader/internal/block_cache_byte_buffer_factory.h>
|
||||
#include <dwarfs/reader/internal/cached_block.h>
|
||||
#include <dwarfs/reader/internal/lru_cache.h>
|
||||
#include <dwarfs/reader/internal/periodic_executor.h>
|
||||
|
||||
namespace dwarfs::reader::internal {
|
||||
@ -100,7 +100,7 @@ class lru_sequential_access_detector : public sequential_access_detector {
|
||||
void touch(size_t block_no) override {
|
||||
std::lock_guard lock(mx_);
|
||||
lru_.set(
|
||||
block_no, block_no, true,
|
||||
block_no, block_no, /* true, */
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
|
||||
[this](size_t, size_t&&) { is_sequential_.reset(); });
|
||||
}
|
||||
@ -132,7 +132,7 @@ class lru_sequential_access_detector : public sequential_access_detector {
|
||||
}
|
||||
|
||||
private:
|
||||
using lru_type = folly::EvictingCacheMap<size_t, size_t>;
|
||||
using lru_type = lru_cache<size_t, size_t>;
|
||||
|
||||
std::mutex mutable mx_;
|
||||
lru_type lru_;
|
||||
@ -251,6 +251,11 @@ class block_cache_ final : public block_cache::impl {
|
||||
: hardware_concurrency(),
|
||||
static_cast<size_t>(1)));
|
||||
}
|
||||
cache_.set_prune_hook(
|
||||
[this](size_t block_no, std::shared_ptr<cached_block>&& block) {
|
||||
on_block_removed("evicted", block_no, std::move(block));
|
||||
blocks_evicted_.fetch_add(1, std::memory_order_relaxed);
|
||||
});
|
||||
}
|
||||
|
||||
~block_cache_() noexcept override {
|
||||
@ -328,7 +333,6 @@ class block_cache_ final : public block_cache::impl {
|
||||
}
|
||||
|
||||
void set_block_size(size_t size) override {
|
||||
// XXX: This currently inevitably clears the cache
|
||||
if (size == 0) {
|
||||
DWARFS_THROW(runtime_error, "block size is zero");
|
||||
}
|
||||
@ -339,13 +343,7 @@ class block_cache_ final : public block_cache::impl {
|
||||
}
|
||||
|
||||
std::lock_guard lock(mx_);
|
||||
cache_.~lru_type();
|
||||
new (&cache_) lru_type(max_blocks);
|
||||
cache_.setPruneHook(
|
||||
[this](size_t block_no, std::shared_ptr<cached_block>&& block) {
|
||||
on_block_removed("evicted", block_no, std::move(block));
|
||||
blocks_evicted_.fetch_add(1, std::memory_order_relaxed);
|
||||
});
|
||||
cache_.set_max_size(max_blocks);
|
||||
}
|
||||
|
||||
void set_num_workers(size_t num) override {
|
||||
@ -390,7 +388,7 @@ class block_cache_ final : public block_cache::impl {
|
||||
if (auto next = seq_access_detector_->prefetch()) {
|
||||
std::lock_guard lock(mx_);
|
||||
|
||||
if (cache_.findWithoutPromotion(*next) == cache_.end() &&
|
||||
if (cache_.find(*next, false) == cache_.end() &&
|
||||
active_.find(*next) == active_.end()) {
|
||||
sequential_prefetches_.fetch_add(1, std::memory_order_relaxed);
|
||||
LOG_TRACE << "prefetching block " << *next;
|
||||
@ -759,8 +757,7 @@ class block_cache_ final : public block_cache::impl {
|
||||
}
|
||||
}
|
||||
|
||||
using lru_type =
|
||||
folly::EvictingCacheMap<size_t, std::shared_ptr<cached_block>>;
|
||||
using lru_type = lru_cache<size_t, std::shared_ptr<cached_block>>;
|
||||
template <typename Key, typename Value>
|
||||
using fast_map_type = phmap::flat_hash_map<Key, Value>;
|
||||
|
||||
|
@ -36,7 +36,6 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/container/EvictingCacheMap.h>
|
||||
#include <folly/stats/Histogram.h>
|
||||
|
||||
#include <range/v3/view/enumerate.hpp>
|
||||
@ -50,6 +49,7 @@
|
||||
|
||||
#include <dwarfs/reader/internal/block_cache.h>
|
||||
#include <dwarfs/reader/internal/inode_reader_v2.h>
|
||||
#include <dwarfs/reader/internal/lru_cache.h>
|
||||
#include <dwarfs/reader/internal/offset_cache.h>
|
||||
|
||||
namespace dwarfs::reader::internal {
|
||||
@ -164,7 +164,7 @@ class inode_reader_ final : public inode_reader_v2::impl {
|
||||
offset_cache_chunk_index_interval,
|
||||
offset_cache_updater_max_inline_offsets>;
|
||||
|
||||
using readahead_cache_type = folly::EvictingCacheMap<uint32_t, file_off_t>;
|
||||
using readahead_cache_type = lru_cache<uint32_t, file_off_t>;
|
||||
|
||||
std::vector<std::future<block_range>>
|
||||
read_internal(uint32_t inode, size_t size, file_off_t offset, size_t maxiov,
|
||||
|
163
test/lru_cache_test.cpp
Normal file
163
test/lru_cache_test.cpp
Normal file
@ -0,0 +1,163 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <dwarfs/reader/internal/lru_cache.h>
|
||||
|
||||
namespace {
|
||||
|
||||
using dwarfs::reader::internal::lru_cache;
|
||||
using unique_str_ptr = std::unique_ptr<std::string>;
|
||||
|
||||
} // namespace
|
||||
|
||||
// Test for integral keys and regular types (e.g., std::string)
|
||||
TEST(lru_cache_test, insert_and_retrieve_with_integral_key) {
|
||||
lru_cache<int, std::string> cache(3);
|
||||
|
||||
cache.set(1, "one");
|
||||
cache.set(2, "two");
|
||||
cache.set(3, "three");
|
||||
|
||||
// Retrieve and verify
|
||||
ASSERT_EQ(cache.find(1)->second, "one");
|
||||
ASSERT_EQ(cache.find(2)->second, "two");
|
||||
ASSERT_EQ(cache.find(3)->second, "three");
|
||||
}
|
||||
|
||||
TEST(lru_cache_test, insert_eviction_with_integral_key) {
|
||||
lru_cache<int, std::string> cache(3);
|
||||
|
||||
cache.set(1, "one");
|
||||
cache.set(2, "two");
|
||||
cache.set(3, "three");
|
||||
|
||||
// Evict least recently used (key 1)
|
||||
cache.set(4, "four");
|
||||
|
||||
// Verify eviction
|
||||
ASSERT_EQ(cache.find(1), cache.end());
|
||||
ASSERT_EQ(cache.find(2)->second, "two");
|
||||
ASSERT_EQ(cache.find(3)->second, "three");
|
||||
ASSERT_EQ(cache.find(4)->second, "four");
|
||||
}
|
||||
|
||||
TEST(lru_cache_test, find_with_promotion) {
|
||||
lru_cache<int, std::string> cache(3);
|
||||
|
||||
cache.set(1, "one");
|
||||
cache.set(2, "two");
|
||||
cache.set(3, "three");
|
||||
|
||||
// Access item to promote
|
||||
cache.find(2);
|
||||
|
||||
// Add a new item, evicting the least recently used (key 1)
|
||||
cache.set(4, "four");
|
||||
|
||||
// Verify promotion and eviction
|
||||
ASSERT_EQ(cache.find(2)->second, "two");
|
||||
ASSERT_EQ(cache.find(1), cache.end());
|
||||
ASSERT_EQ(cache.find(3)->second, "three");
|
||||
ASSERT_EQ(cache.find(4)->second, "four");
|
||||
}
|
||||
|
||||
TEST(lru_cache_test, prune_hook) {
|
||||
lru_cache<int, std::string> cache(3);
|
||||
std::vector<std::pair<int, std::string>> evicted_items;
|
||||
|
||||
// Set a prune hook to capture evicted keys
|
||||
cache.set_prune_hook([&evicted_items](int key, std::string&& value) {
|
||||
evicted_items.emplace_back(key, std::move(value));
|
||||
});
|
||||
|
||||
cache.set(1, "one");
|
||||
cache.set(2, "two");
|
||||
cache.set(3, "three");
|
||||
cache.set(4, "four");
|
||||
|
||||
// Verify that the least recently used key is evicted
|
||||
ASSERT_EQ(evicted_items.size(), 1);
|
||||
EXPECT_EQ(evicted_items[0].first, 1);
|
||||
EXPECT_EQ(evicted_items[0].second, "one");
|
||||
}
|
||||
|
||||
TEST(lru_cache_test, unique_ptr_key_type) {
|
||||
lru_cache<int, unique_str_ptr> cache(3);
|
||||
|
||||
cache.set(1, std::make_unique<std::string>("one"));
|
||||
cache.set(2, std::make_unique<std::string>("two"));
|
||||
cache.set(3, std::make_unique<std::string>("three"));
|
||||
|
||||
// Retrieve and verify unique_ptr values
|
||||
auto val1 = std::move(cache.find(1)->second);
|
||||
auto val2 = std::move(cache.find(2)->second);
|
||||
auto val3 = std::move(cache.find(3)->second);
|
||||
|
||||
ASSERT_EQ(*val1, "one");
|
||||
ASSERT_EQ(*val2, "two");
|
||||
ASSERT_EQ(*val3, "three");
|
||||
|
||||
// Add a new item, evicting the least recently used (key 1)
|
||||
cache.set(4, std::make_unique<std::string>("four"));
|
||||
|
||||
// Verify eviction of key 1
|
||||
ASSERT_EQ(cache.find(1), cache.end());
|
||||
}
|
||||
|
||||
TEST(lru_cache_test, unique_ptr_eviction) {
|
||||
lru_cache<int, unique_str_ptr> cache(3);
|
||||
std::vector<std::pair<int, unique_str_ptr>> evicted_items;
|
||||
|
||||
// Set a prune hook to capture evicted values
|
||||
cache.set_prune_hook([&evicted_items](int key, unique_str_ptr&& value) {
|
||||
evicted_items.emplace_back(key, std::move(value));
|
||||
});
|
||||
|
||||
cache.set(1, std::make_unique<std::string>("one"));
|
||||
cache.set(2, std::make_unique<std::string>("two"));
|
||||
cache.set(3, std::make_unique<std::string>("three"));
|
||||
cache.set(4, std::make_unique<std::string>("four"));
|
||||
|
||||
// Verify that the least recently used key (key 1) was evicted
|
||||
ASSERT_EQ(evicted_items.size(), 1);
|
||||
EXPECT_EQ(evicted_items[0].first, 1);
|
||||
EXPECT_EQ(*evicted_items[0].second, "one");
|
||||
}
|
||||
|
||||
TEST(lru_cache_test, clear_cache) {
|
||||
lru_cache<int, std::string> cache(3);
|
||||
|
||||
cache.set(1, "one");
|
||||
cache.set(2, "two");
|
||||
cache.set(3, "three");
|
||||
|
||||
// Clear the cache
|
||||
cache.clear();
|
||||
|
||||
// Verify that the cache is empty
|
||||
ASSERT_TRUE(cache.empty());
|
||||
ASSERT_EQ(cache.size(), 0);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user