From 656b7a7780b3d75046b4369d0be72dd327248e40 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Wed, 27 Oct 2021 14:35:22 +0200 Subject: [PATCH] Support for cache tidying --- doc/dwarfs.md | 31 +++++++++++++ include/dwarfs/options.h | 6 +++ src/dwarfs.cpp | 60 ++++++++++++++++++++++--- src/dwarfs/block_cache.cpp | 91 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 7 deletions(-) diff --git a/doc/dwarfs.md b/doc/dwarfs.md index 1755dc91..a2fb4491 100644 --- a/doc/dwarfs.md +++ b/doc/dwarfs.md @@ -110,6 +110,37 @@ options: mainly meant for debugging and the `debug` and `trace` levels in particular will slow down the driver. +* `-o tidy_strategy=`*name*: + Use one of the following strategies to tidy the block cache: + + - `none`: + This is the default strategy that never tidies the cache. + Blocks will only be evicted from the cache if the cache is + full and a more recently used block is added to the cache. + + - `time`: + Time based tidying strategy. Every `tidy_interval`, the block + cache is traversed and all blocks that have not been accessed + for more then `tidy_max_age` will be removed. + + - `swap`: + Swap based tidying strategy. Every `tidy_interval`, the block + cache is traversed and all blocks that have been fully or + partially swapped out by the kernel will be removed. + +* `-o tidy_interval=`*time*: + Used only if `tidy_strategy` is not `none`. This is the interval + at which the cache tidying thread wakes up to look for blocks + that can be removed from the cache. This must be an integer value. + Suffixes `ms`, `s`, `m`, `h` are supported. If no suffix is given, + the value will be assumed to be in seconds. + +* `-o tidy_max_age=`*time*: + Used only if `tidy_strategy` is `time`. A block will be removed + from the cache if it hasn't been used for this time span. This must + be an integer value. Suffixes `ms`, `s`, `m`, `h` are supported. + If no suffix is given, the value will be assumed to be in seconds. + There's two particular FUSE options that you'll likely need at some point, e.g. when trying to set up an `overlayfs` mount on top of a DwarFS image: diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h index 097be53b..f9b1df00 100644 --- a/include/dwarfs/options.h +++ b/include/dwarfs/options.h @@ -21,6 +21,7 @@ #pragma once +#include #include #include #include @@ -31,12 +32,17 @@ namespace dwarfs { enum class mlock_mode { NONE, TRY, MUST }; +enum class cache_tidy_strategy { NONE, EXPIRY_TIME, BLOCK_SWAPPED_OUT }; + struct block_cache_options { size_t max_bytes{0}; size_t num_workers{0}; double decompress_ratio{1.0}; bool mm_release{true}; bool init_workers{true}; + cache_tidy_strategy tidy_strategy{cache_tidy_strategy::NONE}; + std::chrono::milliseconds tidy_interval; + std::chrono::milliseconds tidy_expiry_time; }; struct metadata_options { diff --git a/src/dwarfs.cpp b/src/dwarfs.cpp index fc2c5cee..08a9e1f5 100644 --- a/src/dwarfs.cpp +++ b/src/dwarfs.cpp @@ -20,13 +20,16 @@ */ #include +#include #include #include +#include +#include +#include #include #include #include -#include #include #include @@ -54,12 +57,15 @@ struct options { const char* progname{nullptr}; std::string fsimage; int seen_mountpoint{0}; - const char* cachesize_str{nullptr}; // TODO: const?? -> use string? - const char* debuglevel_str{nullptr}; // TODO: const?? -> use string? - const char* workers_str{nullptr}; // TODO: const?? -> use string? - const char* mlock_str{nullptr}; // TODO: const?? -> use string? - const char* decompress_ratio_str{nullptr}; // TODO: const?? -> use string? - const char* image_offset_str{nullptr}; // TODO: const?? -> use string? + const char* cachesize_str{nullptr}; // TODO: const?? -> use string? + const char* debuglevel_str{nullptr}; // TODO: const?? -> use string? + const char* workers_str{nullptr}; // TODO: const?? -> use string? + const char* mlock_str{nullptr}; // TODO: const?? -> use string? + const char* decompress_ratio_str{nullptr}; // TODO: const?? -> use string? + const char* image_offset_str{nullptr}; // TODO: const?? -> use string? + const char* cache_tidy_strategy_str{nullptr}; // TODO: const?? -> use string? + const char* cache_tidy_interval_str{nullptr}; // TODO: const?? -> use string? + const char* cache_tidy_max_age_str{nullptr}; // TODO: const?? -> use string? int enable_nlink{0}; int readonly{0}; int cache_image{0}; @@ -69,6 +75,9 @@ struct options { mlock_mode lock_mode{mlock_mode::NONE}; double decompress_ratio{0.0}; logger::level_type debuglevel{logger::level_type::ERROR}; + cache_tidy_strategy block_cache_tidy_strategy{cache_tidy_strategy::NONE}; + std::chrono::milliseconds block_cache_tidy_interval{std::chrono::minutes(5)}; + std::chrono::milliseconds block_cache_tidy_max_age{std::chrono::minutes{10}}; }; struct dwarfs_userdata { @@ -93,6 +102,9 @@ constexpr struct ::fuse_opt dwarfs_opts[] = { DWARFS_OPT("mlock=%s", mlock_str, 0), DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), DWARFS_OPT("offset=%s", image_offset_str, 0), + DWARFS_OPT("tidy_strategy=%s", cache_tidy_strategy_str, 0), + DWARFS_OPT("tidy_interval=%s", cache_tidy_interval_str, 0), + DWARFS_OPT("tidy_max_age=%s", cache_tidy_max_age_str, 0), DWARFS_OPT("enable_nlink", enable_nlink, 1), DWARFS_OPT("readonly", readonly, 1), DWARFS_OPT("cache_image", cache_image, 1), @@ -101,6 +113,13 @@ constexpr struct ::fuse_opt dwarfs_opts[] = { DWARFS_OPT("no_cache_files", cache_files, 0), FUSE_OPT_END}; +std::unordered_map const + cache_tidy_strategy_map{ + {"none", cache_tidy_strategy::NONE}, + {"time", cache_tidy_strategy::EXPIRY_TIME}, + {"swap", cache_tidy_strategy::BLOCK_SWAPPED_OUT}, + }; + #define dUSERDATA \ auto userdata = reinterpret_cast(fuse_req_userdata(req)) @@ -465,6 +484,9 @@ void usage(const char* progname) { << " -o (no_)cache_image (don't) keep image in kernel cache\n" << " -o (no_)cache_files (don't) keep files in kernel cache\n" << " -o debuglevel=NAME error, warn, (info), debug, trace\n" + << " -o tidy_strategy=NAME (none)|time|swap\n" + << " -o tidy_interval=TIME interval for cache tidying (5m)\n" + << " -o tidy_max_age=TIME tidy blocks after this time (10m)\n" << std::endl; #if FUSE_USE_VERSION >= 30 @@ -624,6 +646,9 @@ void load_filesystem(dwarfs_userdata& userdata) { fsopts.block_cache.decompress_ratio = opts.decompress_ratio; fsopts.block_cache.mm_release = !opts.cache_image; fsopts.block_cache.init_workers = false; + fsopts.block_cache.tidy_strategy = opts.block_cache_tidy_strategy; + fsopts.block_cache.tidy_interval = opts.block_cache_tidy_interval; + fsopts.block_cache.tidy_expiry_time = opts.block_cache_tidy_max_age; fsopts.metadata.enable_nlink = bool(opts.enable_nlink); fsopts.metadata.readonly = bool(opts.readonly); @@ -700,6 +725,27 @@ int run_dwarfs(int argc, char** argv) { opts.decompress_ratio = opts.decompress_ratio_str ? folly::to(opts.decompress_ratio_str) : 0.8; + + if (opts.cache_tidy_strategy_str) { + if (auto it = cache_tidy_strategy_map.find(opts.cache_tidy_strategy_str); + it != cache_tidy_strategy_map.end()) { + opts.block_cache_tidy_strategy = it->second; + } else { + std::cerr << "error: no such cache tidy strategy: " + << opts.cache_tidy_strategy_str << std::endl; + return 1; + } + + if (opts.cache_tidy_interval_str) { + opts.block_cache_tidy_interval = + parse_time_with_unit(opts.cache_tidy_interval_str); + } + + if (opts.cache_tidy_max_age_str) { + opts.block_cache_tidy_max_age = + parse_time_with_unit(opts.cache_tidy_max_age_str); + } + } } catch (runtime_error const& e) { std::cerr << "error: " << e.what() << std::endl; return 1; diff --git a/src/dwarfs/block_cache.cpp b/src/dwarfs/block_cache.cpp index 54a80d90..b209dbef 100644 --- a/src/dwarfs/block_cache.cpp +++ b/src/dwarfs/block_cache.cpp @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -33,10 +35,14 @@ #include #include +#include +#include + #include #include #include +#include #include "dwarfs/block_cache.h" #include "dwarfs/fs_section.h" @@ -98,6 +104,22 @@ class cached_block { : range_end_.load(); } + void touch() { last_access_ = std::chrono::steady_clock::now(); } + + bool last_used_before(std::chrono::steady_clock::time_point tp) const { + return last_access_ < tp; + } + + bool is_swapped_out(std::vector& tmp) const { + auto page_size = ::sysconf(_SC_PAGESIZE); + tmp.resize((data_.size() + page_size) / page_size); + if (::mincore(const_cast(data_.data()), data_.size(), + tmp.data()) == 0) { + return std::any_of(tmp.begin(), tmp.end(), [](auto i) { return i != 0; }); + } + return false; + } + private: void try_release() { if (release_) { @@ -114,6 +136,7 @@ class cached_block { fs_section section_; LOG_PROXY_DECL(debug_logger_policy); bool const release_; + std::chrono::steady_clock::time_point last_access_; }; class block_request { @@ -214,11 +237,25 @@ class block_cache_ final : public block_cache::impl { : std::thread::hardware_concurrency(), static_cast(1))); } + + if (options.tidy_strategy != cache_tidy_strategy::NONE) { + tidy_running_ = true; + tidy_thread_ = std::thread(&block_cache_::tidy_thread, this); + } } ~block_cache_() noexcept override { LOG_DEBUG << "stopping cache workers"; + if (tidy_running_) { + { + std::lock_guard lock(mx_); + tidy_running_ = false; + } + tidy_cond_.notify_all(); + tidy_thread_.join(); + } + if (wg_) { wg_.stop(); } @@ -250,6 +287,7 @@ class block_cache_ final : public block_cache::impl { // number of evicted blocks outgrow the number of created blocks. LOG_INFO << "blocks created: " << blocks_created_.load(); LOG_INFO << "blocks evicted: " << blocks_evicted_.load(); + LOG_INFO << "blocks tidied: " << blocks_tidied_.load(); LOG_INFO << "request sets merged: " << sets_merged_.load(); LOG_INFO << "total requests: " << range_requests_.load(); LOG_INFO << "active hits (fast): " << active_hits_fast_.load(); @@ -544,6 +582,10 @@ class block_cache_ final : public block_cache::impl { } } + if (options_.tidy_strategy == cache_tidy_strategy::EXPIRY_TIME) { + block->touch(); + } + // Finally, put the block into the cache; it might already be // in there, in which case we just promote it to the front of // the LRU queue. @@ -553,6 +595,51 @@ class block_cache_ final : public block_cache::impl { } } + template + void tidy_collect(Pred const& predicate) { + auto it = cache_.begin(); + + while (it != cache_.end()) { + if (predicate(*it->second)) { + it = cache_.erase(it); + ++blocks_tidied_; + } else { + ++it; + } + } + } + + void tidy_thread() { + folly::setThreadName("cache-tidy"); + + std::unique_lock lock(mx_); + + while (tidy_running_) { + if (tidy_cond_.wait_for(lock, options_.tidy_interval) == + std::cv_status::timeout) { + switch (options_.tidy_strategy) { + case cache_tidy_strategy::EXPIRY_TIME: + tidy_collect( + [tp = std::chrono::steady_clock::now() - + options_.tidy_expiry_time](cached_block const& blk) { + return blk.last_used_before(tp); + }); + break; + + case cache_tidy_strategy::BLOCK_SWAPPED_OUT: { + std::vector tmp; + tidy_collect([&tmp](cached_block const& blk) { + return blk.is_swapped_out(tmp); + }); + } break; + + default: + break; + } + } + } + } + using lru_type = folly::EvictingCacheMap>; @@ -561,6 +648,9 @@ class block_cache_ final : public block_cache::impl { mutable folly::F14FastMap>> active_; + std::thread tidy_thread_; + std::condition_variable tidy_cond_; + bool tidy_running_{false}; mutable std::mutex mx_dec_; mutable folly::F14FastMap> @@ -577,6 +667,7 @@ class block_cache_ final : public block_cache::impl { mutable std::atomic partially_decompressed_{0}; mutable std::atomic total_block_bytes_{0}; mutable std::atomic total_decompressed_bytes_{0}; + mutable std::atomic blocks_tidied_{0}; mutable std::shared_mutex mx_wg_; mutable worker_group wg_;