From 5717afa56211f3223167610db5d26b9451aea882 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Fri, 4 Apr 2025 22:16:50 +0200 Subject: [PATCH] feat(dwarfs): add `analysis_file` option for profiling --- doc/dwarfs.md | 7 ++++ tools/src/dwarfs_main.cpp | 86 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/doc/dwarfs.md b/doc/dwarfs.md index 9aab1e36..55ba1d65 100644 --- a/doc/dwarfs.md +++ b/doc/dwarfs.md @@ -151,6 +151,13 @@ options: in particular will slow down the driver. This defaults to `info` in foreground mode (`-f`, `-d`) and to `warn` in background mode. +- `-o analysis_file=`*file*: + Write the paths of all files that were opened while the file system + image was mounted to this file. This can be used as a set of "hot" + files for the `hotness` categorizer in `mkdwarfs`. See the `mkdwarfs` + documentation for details on producing images optimized for fast + access times after mounting. + - `-o tidy_strategy=none`|`time`|`swap`: Use one of the following strategies to tidy the block cache. `none` is the default strategy that never tidies the cache. Blocks diff --git a/tools/src/dwarfs_main.cpp b/tools/src/dwarfs_main.cpp index 7a8c834c..bd482284 100644 --- a/tools/src/dwarfs_main.cpp +++ b/tools/src/dwarfs_main.cpp @@ -27,12 +27,15 @@ */ #include +#include #include +#include #include #include #include #include #include +#include #include #include @@ -180,6 +183,7 @@ struct options { char const* cache_tidy_interval_str{nullptr}; // TODO: const?? -> use string? char const* cache_tidy_max_age_str{nullptr}; // TODO: const?? -> use string? char const* seq_detector_thresh_str{nullptr}; // TODO: const?? -> use string? + char const* analysis_file_str{nullptr}; // TODO: const?? -> use string? #ifndef _WIN32 char const* uid_str{nullptr}; // TODO: const?? -> use string? char const* gid_str{nullptr}; // TODO: const?? -> use string? @@ -217,6 +221,68 @@ struct options { static_assert(std::is_standard_layout_v); +class dwarfs_analysis { + public: + explicit dwarfs_analysis(std::filesystem::path const& path) + : path_{path} {} + + ~dwarfs_analysis() { + if (!path_.empty()) { + write_analysis(); + } + } + + void write_analysis() { + std::cerr << "Writing analysis to " << path_ << '\n'; + + std::ofstream ofs{path_}; + + if (!ofs) { + throw std::system_error{errno, std::system_category()}; + } + + std::unordered_set opened_inodes; + std::vector opened; + + { + std::lock_guard lock{mx_}; + + std::cerr << "Opened inodes: " << open_.size() << '\n'; + std::cerr << "Lookup inodes: " << lookup_.size() << '\n'; + + for (auto ino : open_) { + if (opened_inodes.insert(ino).second) { + opened.push_back(lookup_.at(ino)); + } + } + } + + for (auto const& path : opened) { + ofs << path << '\n'; + } + + path_.clear(); + } + + void add_lookup(fuse_ino_t ino, std::string const& path) { + std::lock_guard lock{mx_}; + std::cerr << "Lookup: " << ino << " -> " << path << '\n'; + lookup_.try_emplace(ino, path); + } + + void add_open(fuse_ino_t ino) { + std::lock_guard lock{mx_}; + std::cerr << "Open: " << ino << '\n'; + open_.push_back(ino); + } + + private: + std::filesystem::path path_; + std::mutex mx_; + std::unordered_map lookup_; + std::vector open_; +}; + struct dwarfs_userdata { explicit dwarfs_userdata(iolayer const& iol) : lgr{iol.term, iol.err} @@ -230,6 +296,7 @@ struct dwarfs_userdata { stream_logger lgr; reader::filesystem_v2 fs; iolayer const& iol; + std::optional analysis; std::shared_ptr perfmon; PERFMON_EXT_PROXY_DECL PERFMON_EXT_TIMER_DECL(op_init) @@ -268,6 +335,7 @@ constexpr std::array dwarfs_opts{ DWARFS_OPT("tidy_interval=%s", cache_tidy_interval_str, 0), DWARFS_OPT("tidy_max_age=%s", cache_tidy_max_age_str, 0), DWARFS_OPT("seq_detector=%s", seq_detector_thresh_str, 0), + DWARFS_OPT("analysis_file=%s", analysis_file_str, 0), DWARFS_OPT("preload_category=%s", preload_category_str, 0), DWARFS_OPT("enable_nlink", enable_nlink, 1), DWARFS_OPT("readonly", readonly, 1), @@ -436,6 +504,13 @@ void op_lookup(fuse_req_t req, fuse_ino_t parent, char const* name) { return ENOENT; } + if (userdata.analysis) { + auto iv = dev->inode(); + if (iv.is_regular_file()) { + userdata.analysis->add_lookup(iv.inode_num(), dev->path()); + } + } + std::error_code ec; auto stbuf = userdata.fs.getattr(dev->inode(), ec); @@ -604,6 +679,10 @@ int op_open_common(LogProxy& log_, dwarfs_userdata& userdata, return EACCES; } + if (userdata.analysis) { + userdata.analysis->add_open(iv->inode_num()); + } + fi->fh = iv->inode_num(); fi->direct_io = !userdata.opts.cache_files; fi->keep_cache = userdata.opts.cache_files; @@ -1209,6 +1288,7 @@ void usage(std::ostream& os, std::filesystem::path const& progname) { << " -o (no_)cache_image (don't) keep image in kernel cache\n" << " -o (no_)cache_files (don't) keep files in kernel cache\n" << " -o debuglevel=NAME " << logger::all_level_names() << "\n" + << " -o analysis_file=FILE write accessed files to this file\n" << " -o tidy_strategy=NAME (none)|time|swap\n" << " -o tidy_interval=TIME interval for cache tidying (5m)\n" << " -o tidy_max_age=TIME tidy blocks after this time (10m)\n" @@ -1483,6 +1563,12 @@ void load_filesystem(dwarfs_userdata& userdata) { PERFMON_EXT_TIMER_SETUP(userdata, op_getxattr, "inode") PERFMON_EXT_TIMER_SETUP(userdata, op_listxattr, "inode") + if (opts.analysis_file_str) { + auto file = userdata.iol.os->canonical(std::filesystem::path( + reinterpret_cast(opts.analysis_file_str))); + userdata.analysis.emplace(file); + } + auto fsimage = userdata.iol.os->canonical(std::filesystem::path( reinterpret_cast(opts.fsimage->data())));