mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-18 08:49:29 -04:00
Support for cache tidying
This commit is contained in:
parent
923eda7a71
commit
656b7a7780
@ -110,6 +110,37 @@ options:
|
||||
mainly meant for debugging and the `debug` and `trace` levels
|
||||
in particular will slow down the driver.
|
||||
|
||||
* `-o tidy_strategy=`*name*:
|
||||
Use one of the following strategies to tidy the block cache:
|
||||
|
||||
- `none`:
|
||||
This is the default strategy that never tidies the cache.
|
||||
Blocks will only be evicted from the cache if the cache is
|
||||
full and a more recently used block is added to the cache.
|
||||
|
||||
- `time`:
|
||||
Time based tidying strategy. Every `tidy_interval`, the block
|
||||
cache is traversed and all blocks that have not been accessed
|
||||
for more then `tidy_max_age` will be removed.
|
||||
|
||||
- `swap`:
|
||||
Swap based tidying strategy. Every `tidy_interval`, the block
|
||||
cache is traversed and all blocks that have been fully or
|
||||
partially swapped out by the kernel will be removed.
|
||||
|
||||
* `-o tidy_interval=`*time*:
|
||||
Used only if `tidy_strategy` is not `none`. This is the interval
|
||||
at which the cache tidying thread wakes up to look for blocks
|
||||
that can be removed from the cache. This must be an integer value.
|
||||
Suffixes `ms`, `s`, `m`, `h` are supported. If no suffix is given,
|
||||
the value will be assumed to be in seconds.
|
||||
|
||||
* `-o tidy_max_age=`*time*:
|
||||
Used only if `tidy_strategy` is `time`. A block will be removed
|
||||
from the cache if it hasn't been used for this time span. This must
|
||||
be an integer value. Suffixes `ms`, `s`, `m`, `h` are supported.
|
||||
If no suffix is given, the value will be assumed to be in seconds.
|
||||
|
||||
There's two particular FUSE options that you'll likely need at some
|
||||
point, e.g. when trying to set up an `overlayfs` mount on top of
|
||||
a DwarFS image:
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <iosfwd>
|
||||
#include <optional>
|
||||
@ -31,12 +32,17 @@ namespace dwarfs {
|
||||
|
||||
enum class mlock_mode { NONE, TRY, MUST };
|
||||
|
||||
enum class cache_tidy_strategy { NONE, EXPIRY_TIME, BLOCK_SWAPPED_OUT };
|
||||
|
||||
struct block_cache_options {
|
||||
size_t max_bytes{0};
|
||||
size_t num_workers{0};
|
||||
double decompress_ratio{1.0};
|
||||
bool mm_release{true};
|
||||
bool init_workers{true};
|
||||
cache_tidy_strategy tidy_strategy{cache_tidy_strategy::NONE};
|
||||
std::chrono::milliseconds tidy_interval;
|
||||
std::chrono::milliseconds tidy_expiry_time;
|
||||
};
|
||||
|
||||
struct metadata_options {
|
||||
|
@ -20,13 +20,16 @@
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
|
||||
#include <folly/Conv.h>
|
||||
#include <folly/experimental/symbolizer/SignalHandler.h>
|
||||
@ -60,6 +63,9 @@ struct options {
|
||||
const char* mlock_str{nullptr}; // TODO: const?? -> use string?
|
||||
const char* decompress_ratio_str{nullptr}; // TODO: const?? -> use string?
|
||||
const char* image_offset_str{nullptr}; // TODO: const?? -> use string?
|
||||
const char* cache_tidy_strategy_str{nullptr}; // TODO: const?? -> use string?
|
||||
const char* cache_tidy_interval_str{nullptr}; // TODO: const?? -> use string?
|
||||
const char* cache_tidy_max_age_str{nullptr}; // TODO: const?? -> use string?
|
||||
int enable_nlink{0};
|
||||
int readonly{0};
|
||||
int cache_image{0};
|
||||
@ -69,6 +75,9 @@ struct options {
|
||||
mlock_mode lock_mode{mlock_mode::NONE};
|
||||
double decompress_ratio{0.0};
|
||||
logger::level_type debuglevel{logger::level_type::ERROR};
|
||||
cache_tidy_strategy block_cache_tidy_strategy{cache_tidy_strategy::NONE};
|
||||
std::chrono::milliseconds block_cache_tidy_interval{std::chrono::minutes(5)};
|
||||
std::chrono::milliseconds block_cache_tidy_max_age{std::chrono::minutes{10}};
|
||||
};
|
||||
|
||||
struct dwarfs_userdata {
|
||||
@ -93,6 +102,9 @@ constexpr struct ::fuse_opt dwarfs_opts[] = {
|
||||
DWARFS_OPT("mlock=%s", mlock_str, 0),
|
||||
DWARFS_OPT("decratio=%s", decompress_ratio_str, 0),
|
||||
DWARFS_OPT("offset=%s", image_offset_str, 0),
|
||||
DWARFS_OPT("tidy_strategy=%s", cache_tidy_strategy_str, 0),
|
||||
DWARFS_OPT("tidy_interval=%s", cache_tidy_interval_str, 0),
|
||||
DWARFS_OPT("tidy_max_age=%s", cache_tidy_max_age_str, 0),
|
||||
DWARFS_OPT("enable_nlink", enable_nlink, 1),
|
||||
DWARFS_OPT("readonly", readonly, 1),
|
||||
DWARFS_OPT("cache_image", cache_image, 1),
|
||||
@ -101,6 +113,13 @@ constexpr struct ::fuse_opt dwarfs_opts[] = {
|
||||
DWARFS_OPT("no_cache_files", cache_files, 0),
|
||||
FUSE_OPT_END};
|
||||
|
||||
std::unordered_map<std::string_view, cache_tidy_strategy> const
|
||||
cache_tidy_strategy_map{
|
||||
{"none", cache_tidy_strategy::NONE},
|
||||
{"time", cache_tidy_strategy::EXPIRY_TIME},
|
||||
{"swap", cache_tidy_strategy::BLOCK_SWAPPED_OUT},
|
||||
};
|
||||
|
||||
#define dUSERDATA \
|
||||
auto userdata = reinterpret_cast<dwarfs_userdata*>(fuse_req_userdata(req))
|
||||
|
||||
@ -465,6 +484,9 @@ void usage(const char* progname) {
|
||||
<< " -o (no_)cache_image (don't) keep image in kernel cache\n"
|
||||
<< " -o (no_)cache_files (don't) keep files in kernel cache\n"
|
||||
<< " -o debuglevel=NAME error, warn, (info), debug, trace\n"
|
||||
<< " -o tidy_strategy=NAME (none)|time|swap\n"
|
||||
<< " -o tidy_interval=TIME interval for cache tidying (5m)\n"
|
||||
<< " -o tidy_max_age=TIME tidy blocks after this time (10m)\n"
|
||||
<< std::endl;
|
||||
|
||||
#if FUSE_USE_VERSION >= 30
|
||||
@ -624,6 +646,9 @@ void load_filesystem(dwarfs_userdata& userdata) {
|
||||
fsopts.block_cache.decompress_ratio = opts.decompress_ratio;
|
||||
fsopts.block_cache.mm_release = !opts.cache_image;
|
||||
fsopts.block_cache.init_workers = false;
|
||||
fsopts.block_cache.tidy_strategy = opts.block_cache_tidy_strategy;
|
||||
fsopts.block_cache.tidy_interval = opts.block_cache_tidy_interval;
|
||||
fsopts.block_cache.tidy_expiry_time = opts.block_cache_tidy_max_age;
|
||||
fsopts.metadata.enable_nlink = bool(opts.enable_nlink);
|
||||
fsopts.metadata.readonly = bool(opts.readonly);
|
||||
|
||||
@ -700,6 +725,27 @@ int run_dwarfs(int argc, char** argv) {
|
||||
opts.decompress_ratio = opts.decompress_ratio_str
|
||||
? folly::to<double>(opts.decompress_ratio_str)
|
||||
: 0.8;
|
||||
|
||||
if (opts.cache_tidy_strategy_str) {
|
||||
if (auto it = cache_tidy_strategy_map.find(opts.cache_tidy_strategy_str);
|
||||
it != cache_tidy_strategy_map.end()) {
|
||||
opts.block_cache_tidy_strategy = it->second;
|
||||
} else {
|
||||
std::cerr << "error: no such cache tidy strategy: "
|
||||
<< opts.cache_tidy_strategy_str << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (opts.cache_tidy_interval_str) {
|
||||
opts.block_cache_tidy_interval =
|
||||
parse_time_with_unit(opts.cache_tidy_interval_str);
|
||||
}
|
||||
|
||||
if (opts.cache_tidy_max_age_str) {
|
||||
opts.block_cache_tidy_max_age =
|
||||
parse_time_with_unit(opts.cache_tidy_max_age_str);
|
||||
}
|
||||
}
|
||||
} catch (runtime_error const& e) {
|
||||
std::cerr << "error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
|
@ -22,6 +22,8 @@
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <deque>
|
||||
#include <exception>
|
||||
#include <future>
|
||||
@ -33,10 +35,14 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/container/EvictingCacheMap.h>
|
||||
#include <folly/container/F14Map.h>
|
||||
#include <folly/system/ThreadName.h>
|
||||
|
||||
#include "dwarfs/block_cache.h"
|
||||
#include "dwarfs/fs_section.h"
|
||||
@ -98,6 +104,22 @@ class cached_block {
|
||||
: range_end_.load();
|
||||
}
|
||||
|
||||
void touch() { last_access_ = std::chrono::steady_clock::now(); }
|
||||
|
||||
bool last_used_before(std::chrono::steady_clock::time_point tp) const {
|
||||
return last_access_ < tp;
|
||||
}
|
||||
|
||||
bool is_swapped_out(std::vector<uint8_t>& tmp) const {
|
||||
auto page_size = ::sysconf(_SC_PAGESIZE);
|
||||
tmp.resize((data_.size() + page_size) / page_size);
|
||||
if (::mincore(const_cast<uint8_t*>(data_.data()), data_.size(),
|
||||
tmp.data()) == 0) {
|
||||
return std::any_of(tmp.begin(), tmp.end(), [](auto i) { return i != 0; });
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
void try_release() {
|
||||
if (release_) {
|
||||
@ -114,6 +136,7 @@ class cached_block {
|
||||
fs_section section_;
|
||||
LOG_PROXY_DECL(debug_logger_policy);
|
||||
bool const release_;
|
||||
std::chrono::steady_clock::time_point last_access_;
|
||||
};
|
||||
|
||||
class block_request {
|
||||
@ -214,11 +237,25 @@ class block_cache_ final : public block_cache::impl {
|
||||
: std::thread::hardware_concurrency(),
|
||||
static_cast<size_t>(1)));
|
||||
}
|
||||
|
||||
if (options.tidy_strategy != cache_tidy_strategy::NONE) {
|
||||
tidy_running_ = true;
|
||||
tidy_thread_ = std::thread(&block_cache_::tidy_thread, this);
|
||||
}
|
||||
}
|
||||
|
||||
~block_cache_() noexcept override {
|
||||
LOG_DEBUG << "stopping cache workers";
|
||||
|
||||
if (tidy_running_) {
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
tidy_running_ = false;
|
||||
}
|
||||
tidy_cond_.notify_all();
|
||||
tidy_thread_.join();
|
||||
}
|
||||
|
||||
if (wg_) {
|
||||
wg_.stop();
|
||||
}
|
||||
@ -250,6 +287,7 @@ class block_cache_ final : public block_cache::impl {
|
||||
// number of evicted blocks outgrow the number of created blocks.
|
||||
LOG_INFO << "blocks created: " << blocks_created_.load();
|
||||
LOG_INFO << "blocks evicted: " << blocks_evicted_.load();
|
||||
LOG_INFO << "blocks tidied: " << blocks_tidied_.load();
|
||||
LOG_INFO << "request sets merged: " << sets_merged_.load();
|
||||
LOG_INFO << "total requests: " << range_requests_.load();
|
||||
LOG_INFO << "active hits (fast): " << active_hits_fast_.load();
|
||||
@ -544,6 +582,10 @@ class block_cache_ final : public block_cache::impl {
|
||||
}
|
||||
}
|
||||
|
||||
if (options_.tidy_strategy == cache_tidy_strategy::EXPIRY_TIME) {
|
||||
block->touch();
|
||||
}
|
||||
|
||||
// Finally, put the block into the cache; it might already be
|
||||
// in there, in which case we just promote it to the front of
|
||||
// the LRU queue.
|
||||
@ -553,6 +595,51 @@ class block_cache_ final : public block_cache::impl {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Pred>
|
||||
void tidy_collect(Pred const& predicate) {
|
||||
auto it = cache_.begin();
|
||||
|
||||
while (it != cache_.end()) {
|
||||
if (predicate(*it->second)) {
|
||||
it = cache_.erase(it);
|
||||
++blocks_tidied_;
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void tidy_thread() {
|
||||
folly::setThreadName("cache-tidy");
|
||||
|
||||
std::unique_lock lock(mx_);
|
||||
|
||||
while (tidy_running_) {
|
||||
if (tidy_cond_.wait_for(lock, options_.tidy_interval) ==
|
||||
std::cv_status::timeout) {
|
||||
switch (options_.tidy_strategy) {
|
||||
case cache_tidy_strategy::EXPIRY_TIME:
|
||||
tidy_collect(
|
||||
[tp = std::chrono::steady_clock::now() -
|
||||
options_.tidy_expiry_time](cached_block const& blk) {
|
||||
return blk.last_used_before(tp);
|
||||
});
|
||||
break;
|
||||
|
||||
case cache_tidy_strategy::BLOCK_SWAPPED_OUT: {
|
||||
std::vector<uint8_t> tmp;
|
||||
tidy_collect([&tmp](cached_block const& blk) {
|
||||
return blk.is_swapped_out(tmp);
|
||||
});
|
||||
} break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using lru_type =
|
||||
folly::EvictingCacheMap<size_t, std::shared_ptr<cached_block>>;
|
||||
|
||||
@ -561,6 +648,9 @@ class block_cache_ final : public block_cache::impl {
|
||||
mutable folly::F14FastMap<size_t,
|
||||
std::deque<std::weak_ptr<block_request_set>>>
|
||||
active_;
|
||||
std::thread tidy_thread_;
|
||||
std::condition_variable tidy_cond_;
|
||||
bool tidy_running_{false};
|
||||
|
||||
mutable std::mutex mx_dec_;
|
||||
mutable folly::F14FastMap<size_t, std::weak_ptr<block_request_set>>
|
||||
@ -577,6 +667,7 @@ class block_cache_ final : public block_cache::impl {
|
||||
mutable std::atomic<size_t> partially_decompressed_{0};
|
||||
mutable std::atomic<size_t> total_block_bytes_{0};
|
||||
mutable std::atomic<size_t> total_decompressed_bytes_{0};
|
||||
mutable std::atomic<size_t> blocks_tidied_{0};
|
||||
|
||||
mutable std::shared_mutex mx_wg_;
|
||||
mutable worker_group wg_;
|
||||
|
Loading…
x
Reference in New Issue
Block a user