diff --git a/CMakeLists.txt b/CMakeLists.txt index b329d209..a7e4dc30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -506,6 +506,7 @@ if(WITH_TESTS) test/global_metadata_test.cpp test/integral_value_parser_test.cpp test/lazy_value_test.cpp + test/lru_cache_test.cpp test/metadata_requirements_test.cpp test/nilsimsa_test.cpp test/options_test.cpp @@ -603,6 +604,10 @@ if(WITH_TESTS) target_link_libraries(tool_main_test PRIVATE mkdwarfs_main dwarfsck_main dwarfsextract_main PkgConfig::LIBARCHIVE) endif() + if(TARGET dwarfs_unit_tests) + target_link_libraries(dwarfs_unit_tests PRIVATE phmap) + endif() + if(TARGET manpage_test) if(WITH_TOOLS) target_compile_definitions(manpage_test PRIVATE DWARFS_WITH_TOOLS) diff --git a/include/dwarfs/reader/internal/lru_cache.h b/include/dwarfs/reader/internal/lru_cache.h new file mode 100644 index 00000000..7560f681 --- /dev/null +++ b/include/dwarfs/reader/internal/lru_cache.h @@ -0,0 +1,142 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the “Software”), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include +#include +#include + +#include + +namespace dwarfs::reader::internal { + +template +class lru_cache { + public: + using key_type = KeyT; + using mapped_type = T; + using value_type = std::pair; + + using iterator = typename std::list::iterator; + using const_iterator = typename std::list::const_iterator; + + using prune_hook_type = std::function; + + lru_cache() = default; + + explicit lru_cache(size_t max_size) + : max_size_{max_size} { + index_.reserve(max_size_); + } + + // Set the maximum cache size + void set_max_size(size_t max_size) { + max_size_ = max_size; + while (cache_.size() > max_size_) { + evict_lru(); + } + index_.reserve(max_size_); + } + + // Set a custom prune hook + void set_prune_hook(prune_hook_type hook) { prune_hook_ = std::move(hook); } + + // Insert or update an item in the cache, promoting it + void set(key_type const& key, mapped_type value, + prune_hook_type custom_prune_hook = {}) { + auto it = index_.find(key); + if (it != index_.end()) { + it->second->second = std::move(value); + move_to_front(it->second); + } else { + if (index_.size() >= max_size_) { + evict_lru(std::move(custom_prune_hook)); + } + cache_.push_front(value_type(key, std::move(value))); + index_[key] = cache_.begin(); + } + } + + // Find an item, optionally promoting it + iterator find(key_type const& key, bool promote = true) { + auto it = index_.find(key); + if (it == index_.end()) { + return end(); + } + if (promote) { + move_to_front(it->second); + } + return it->second; + } + + iterator erase(iterator pos, prune_hook_type custom_prune_hook = {}) { + auto& key = pos->first; + auto& value = pos->second; + if (custom_prune_hook) { + custom_prune_hook(key, std::move(value)); + } else if (prune_hook_) { + prune_hook_(key, std::move(value)); + } + index_.erase(key); + return cache_.erase(pos); + } + + void clear() { + index_.clear(); + cache_.clear(); + } + + bool empty() const { return cache_.empty(); } + + size_t size() const { return cache_.size(); } + + iterator begin() { return cache_.begin(); } + iterator end() { return cache_.end(); } + + const_iterator begin() const { return cache_.begin(); } + const_iterator end() const { return cache_.end(); } + + private: + // Move the accessed item to the front of the cache (most recently used) + void move_to_front(iterator it) { cache_.splice(cache_.begin(), cache_, it); } + + // Evict the least recently used item + void evict_lru(prune_hook_type custom_prune_hook = {}) { + if (auto it = cache_.end(); it != cache_.begin()) { + erase(--it, std::move(custom_prune_hook)); + } + } + + size_t max_size_; + phmap::flat_hash_map index_; + std::list cache_; + prune_hook_type prune_hook_; +}; + +} // namespace dwarfs::reader::internal diff --git a/include/dwarfs/reader/internal/offset_cache.h b/include/dwarfs/reader/internal/offset_cache.h index 2248b9bf..cee26ef8 100644 --- a/include/dwarfs/reader/internal/offset_cache.h +++ b/include/dwarfs/reader/internal/offset_cache.h @@ -36,10 +36,10 @@ #include #include -#include - #include +#include + namespace dwarfs::reader::internal { template ; + using cache_type = lru_cache; cache_type mutable cache_; std::mutex mutable mx_; diff --git a/src/reader/internal/block_cache.cpp b/src/reader/internal/block_cache.cpp index 1add4127..1d0f6f11 100644 --- a/src/reader/internal/block_cache.cpp +++ b/src/reader/internal/block_cache.cpp @@ -41,7 +41,6 @@ #include -#include #include #include @@ -60,6 +59,7 @@ #include #include #include +#include #include namespace dwarfs::reader::internal { @@ -100,7 +100,7 @@ class lru_sequential_access_detector : public sequential_access_detector { void touch(size_t block_no) override { std::lock_guard lock(mx_); lru_.set( - block_no, block_no, true, + block_no, block_no, /* true, */ // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) [this](size_t, size_t&&) { is_sequential_.reset(); }); } @@ -132,7 +132,7 @@ class lru_sequential_access_detector : public sequential_access_detector { } private: - using lru_type = folly::EvictingCacheMap; + using lru_type = lru_cache; std::mutex mutable mx_; lru_type lru_; @@ -251,6 +251,11 @@ class block_cache_ final : public block_cache::impl { : hardware_concurrency(), static_cast(1))); } + cache_.set_prune_hook( + [this](size_t block_no, std::shared_ptr&& block) { + on_block_removed("evicted", block_no, std::move(block)); + blocks_evicted_.fetch_add(1, std::memory_order_relaxed); + }); } ~block_cache_() noexcept override { @@ -328,7 +333,6 @@ class block_cache_ final : public block_cache::impl { } void set_block_size(size_t size) override { - // XXX: This currently inevitably clears the cache if (size == 0) { DWARFS_THROW(runtime_error, "block size is zero"); } @@ -339,13 +343,7 @@ class block_cache_ final : public block_cache::impl { } std::lock_guard lock(mx_); - cache_.~lru_type(); - new (&cache_) lru_type(max_blocks); - cache_.setPruneHook( - [this](size_t block_no, std::shared_ptr&& block) { - on_block_removed("evicted", block_no, std::move(block)); - blocks_evicted_.fetch_add(1, std::memory_order_relaxed); - }); + cache_.set_max_size(max_blocks); } void set_num_workers(size_t num) override { @@ -390,7 +388,7 @@ class block_cache_ final : public block_cache::impl { if (auto next = seq_access_detector_->prefetch()) { std::lock_guard lock(mx_); - if (cache_.findWithoutPromotion(*next) == cache_.end() && + if (cache_.find(*next, false) == cache_.end() && active_.find(*next) == active_.end()) { sequential_prefetches_.fetch_add(1, std::memory_order_relaxed); LOG_TRACE << "prefetching block " << *next; @@ -759,8 +757,7 @@ class block_cache_ final : public block_cache::impl { } } - using lru_type = - folly::EvictingCacheMap>; + using lru_type = lru_cache>; template using fast_map_type = phmap::flat_hash_map; diff --git a/src/reader/internal/inode_reader_v2.cpp b/src/reader/internal/inode_reader_v2.cpp index 6b352a68..9b5479a1 100644 --- a/src/reader/internal/inode_reader_v2.cpp +++ b/src/reader/internal/inode_reader_v2.cpp @@ -36,7 +36,6 @@ #include #include -#include #include #include @@ -50,6 +49,7 @@ #include #include +#include #include namespace dwarfs::reader::internal { @@ -164,7 +164,7 @@ class inode_reader_ final : public inode_reader_v2::impl { offset_cache_chunk_index_interval, offset_cache_updater_max_inline_offsets>; - using readahead_cache_type = folly::EvictingCacheMap; + using readahead_cache_type = lru_cache; std::vector> read_internal(uint32_t inode, size_t size, file_off_t offset, size_t maxiov, diff --git a/test/lru_cache_test.cpp b/test/lru_cache_test.cpp new file mode 100644 index 00000000..52c2f054 --- /dev/null +++ b/test/lru_cache_test.cpp @@ -0,0 +1,163 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include + +#include +#include + +#include + +namespace { + +using dwarfs::reader::internal::lru_cache; +using unique_str_ptr = std::unique_ptr; + +} // namespace + +// Test for integral keys and regular types (e.g., std::string) +TEST(lru_cache_test, insert_and_retrieve_with_integral_key) { + lru_cache cache(3); + + cache.set(1, "one"); + cache.set(2, "two"); + cache.set(3, "three"); + + // Retrieve and verify + ASSERT_EQ(cache.find(1)->second, "one"); + ASSERT_EQ(cache.find(2)->second, "two"); + ASSERT_EQ(cache.find(3)->second, "three"); +} + +TEST(lru_cache_test, insert_eviction_with_integral_key) { + lru_cache cache(3); + + cache.set(1, "one"); + cache.set(2, "two"); + cache.set(3, "three"); + + // Evict least recently used (key 1) + cache.set(4, "four"); + + // Verify eviction + ASSERT_EQ(cache.find(1), cache.end()); + ASSERT_EQ(cache.find(2)->second, "two"); + ASSERT_EQ(cache.find(3)->second, "three"); + ASSERT_EQ(cache.find(4)->second, "four"); +} + +TEST(lru_cache_test, find_with_promotion) { + lru_cache cache(3); + + cache.set(1, "one"); + cache.set(2, "two"); + cache.set(3, "three"); + + // Access item to promote + cache.find(2); + + // Add a new item, evicting the least recently used (key 1) + cache.set(4, "four"); + + // Verify promotion and eviction + ASSERT_EQ(cache.find(2)->second, "two"); + ASSERT_EQ(cache.find(1), cache.end()); + ASSERT_EQ(cache.find(3)->second, "three"); + ASSERT_EQ(cache.find(4)->second, "four"); +} + +TEST(lru_cache_test, prune_hook) { + lru_cache cache(3); + std::vector> evicted_items; + + // Set a prune hook to capture evicted keys + cache.set_prune_hook([&evicted_items](int key, std::string&& value) { + evicted_items.emplace_back(key, std::move(value)); + }); + + cache.set(1, "one"); + cache.set(2, "two"); + cache.set(3, "three"); + cache.set(4, "four"); + + // Verify that the least recently used key is evicted + ASSERT_EQ(evicted_items.size(), 1); + EXPECT_EQ(evicted_items[0].first, 1); + EXPECT_EQ(evicted_items[0].second, "one"); +} + +TEST(lru_cache_test, unique_ptr_key_type) { + lru_cache cache(3); + + cache.set(1, std::make_unique("one")); + cache.set(2, std::make_unique("two")); + cache.set(3, std::make_unique("three")); + + // Retrieve and verify unique_ptr values + auto val1 = std::move(cache.find(1)->second); + auto val2 = std::move(cache.find(2)->second); + auto val3 = std::move(cache.find(3)->second); + + ASSERT_EQ(*val1, "one"); + ASSERT_EQ(*val2, "two"); + ASSERT_EQ(*val3, "three"); + + // Add a new item, evicting the least recently used (key 1) + cache.set(4, std::make_unique("four")); + + // Verify eviction of key 1 + ASSERT_EQ(cache.find(1), cache.end()); +} + +TEST(lru_cache_test, unique_ptr_eviction) { + lru_cache cache(3); + std::vector> evicted_items; + + // Set a prune hook to capture evicted values + cache.set_prune_hook([&evicted_items](int key, unique_str_ptr&& value) { + evicted_items.emplace_back(key, std::move(value)); + }); + + cache.set(1, std::make_unique("one")); + cache.set(2, std::make_unique("two")); + cache.set(3, std::make_unique("three")); + cache.set(4, std::make_unique("four")); + + // Verify that the least recently used key (key 1) was evicted + ASSERT_EQ(evicted_items.size(), 1); + EXPECT_EQ(evicted_items[0].first, 1); + EXPECT_EQ(*evicted_items[0].second, "one"); +} + +TEST(lru_cache_test, clear_cache) { + lru_cache cache(3); + + cache.set(1, "one"); + cache.set(2, "two"); + cache.set(3, "three"); + + // Clear the cache + cache.clear(); + + // Verify that the cache is empty + ASSERT_TRUE(cache.empty()); + ASSERT_EQ(cache.size(), 0); +}