feat(dwarfs): add analysis_file option for profiling

This commit is contained in:
Marcus Holland-Moritz 2025-04-04 22:16:50 +02:00
parent 09068bfada
commit 5717afa562
2 changed files with 93 additions and 0 deletions

View File

@ -151,6 +151,13 @@ options:
in particular will slow down the driver. This defaults to `info`
in foreground mode (`-f`, `-d`) and to `warn` in background mode.
- `-o analysis_file=`*file*:
Write the paths of all files that were opened while the file system
image was mounted to this file. This can be used as a set of "hot"
files for the `hotness` categorizer in `mkdwarfs`. See the `mkdwarfs`
documentation for details on producing images optimized for fast
access times after mounting.
- `-o tidy_strategy=none`|`time`|`swap`:
Use one of the following strategies to tidy the block cache.
`none` is the default strategy that never tidies the cache. Blocks

View File

@ -27,12 +27,15 @@
*/
#include <filesystem>
#include <fstream>
#include <iostream>
#include <mutex>
#include <stdexcept>
#include <string>
#include <string_view>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <cstddef>
@ -180,6 +183,7 @@ struct options {
char const* cache_tidy_interval_str{nullptr}; // TODO: const?? -> use string?
char const* cache_tidy_max_age_str{nullptr}; // TODO: const?? -> use string?
char const* seq_detector_thresh_str{nullptr}; // TODO: const?? -> use string?
char const* analysis_file_str{nullptr}; // TODO: const?? -> use string?
#ifndef _WIN32
char const* uid_str{nullptr}; // TODO: const?? -> use string?
char const* gid_str{nullptr}; // TODO: const?? -> use string?
@ -217,6 +221,68 @@ struct options {
static_assert(std::is_standard_layout_v<options>);
class dwarfs_analysis {
public:
explicit dwarfs_analysis(std::filesystem::path const& path)
: path_{path} {}
~dwarfs_analysis() {
if (!path_.empty()) {
write_analysis();
}
}
void write_analysis() {
std::cerr << "Writing analysis to " << path_ << '\n';
std::ofstream ofs{path_};
if (!ofs) {
throw std::system_error{errno, std::system_category()};
}
std::unordered_set<fuse_ino_t> opened_inodes;
std::vector<std::string> opened;
{
std::lock_guard lock{mx_};
std::cerr << "Opened inodes: " << open_.size() << '\n';
std::cerr << "Lookup inodes: " << lookup_.size() << '\n';
for (auto ino : open_) {
if (opened_inodes.insert(ino).second) {
opened.push_back(lookup_.at(ino));
}
}
}
for (auto const& path : opened) {
ofs << path << '\n';
}
path_.clear();
}
void add_lookup(fuse_ino_t ino, std::string const& path) {
std::lock_guard lock{mx_};
std::cerr << "Lookup: " << ino << " -> " << path << '\n';
lookup_.try_emplace(ino, path);
}
void add_open(fuse_ino_t ino) {
std::lock_guard lock{mx_};
std::cerr << "Open: " << ino << '\n';
open_.push_back(ino);
}
private:
std::filesystem::path path_;
std::mutex mx_;
std::unordered_map<fuse_ino_t, std::string> lookup_;
std::vector<fuse_ino_t> open_;
};
struct dwarfs_userdata {
explicit dwarfs_userdata(iolayer const& iol)
: lgr{iol.term, iol.err}
@ -230,6 +296,7 @@ struct dwarfs_userdata {
stream_logger lgr;
reader::filesystem_v2 fs;
iolayer const& iol;
std::optional<dwarfs_analysis> analysis;
std::shared_ptr<performance_monitor> perfmon;
PERFMON_EXT_PROXY_DECL
PERFMON_EXT_TIMER_DECL(op_init)
@ -268,6 +335,7 @@ constexpr std::array dwarfs_opts{
DWARFS_OPT("tidy_interval=%s", cache_tidy_interval_str, 0),
DWARFS_OPT("tidy_max_age=%s", cache_tidy_max_age_str, 0),
DWARFS_OPT("seq_detector=%s", seq_detector_thresh_str, 0),
DWARFS_OPT("analysis_file=%s", analysis_file_str, 0),
DWARFS_OPT("preload_category=%s", preload_category_str, 0),
DWARFS_OPT("enable_nlink", enable_nlink, 1),
DWARFS_OPT("readonly", readonly, 1),
@ -436,6 +504,13 @@ void op_lookup(fuse_req_t req, fuse_ino_t parent, char const* name) {
return ENOENT;
}
if (userdata.analysis) {
auto iv = dev->inode();
if (iv.is_regular_file()) {
userdata.analysis->add_lookup(iv.inode_num(), dev->path());
}
}
std::error_code ec;
auto stbuf = userdata.fs.getattr(dev->inode(), ec);
@ -604,6 +679,10 @@ int op_open_common(LogProxy& log_, dwarfs_userdata& userdata,
return EACCES;
}
if (userdata.analysis) {
userdata.analysis->add_open(iv->inode_num());
}
fi->fh = iv->inode_num();
fi->direct_io = !userdata.opts.cache_files;
fi->keep_cache = userdata.opts.cache_files;
@ -1209,6 +1288,7 @@ void usage(std::ostream& os, std::filesystem::path const& progname) {
<< " -o (no_)cache_image (don't) keep image in kernel cache\n"
<< " -o (no_)cache_files (don't) keep files in kernel cache\n"
<< " -o debuglevel=NAME " << logger::all_level_names() << "\n"
<< " -o analysis_file=FILE write accessed files to this file\n"
<< " -o tidy_strategy=NAME (none)|time|swap\n"
<< " -o tidy_interval=TIME interval for cache tidying (5m)\n"
<< " -o tidy_max_age=TIME tidy blocks after this time (10m)\n"
@ -1483,6 +1563,12 @@ void load_filesystem(dwarfs_userdata& userdata) {
PERFMON_EXT_TIMER_SETUP(userdata, op_getxattr, "inode")
PERFMON_EXT_TIMER_SETUP(userdata, op_listxattr, "inode")
if (opts.analysis_file_str) {
auto file = userdata.iol.os->canonical(std::filesystem::path(
reinterpret_cast<char8_t const*>(opts.analysis_file_str)));
userdata.analysis.emplace(file);
}
auto fsimage = userdata.iol.os->canonical(std::filesystem::path(
reinterpret_cast<char8_t const*>(opts.fsimage->data())));