From 80f84c6f44a150e34a2e9a19056f72e6fc1e5494 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 27 Apr 2024 15:22:35 +0200 Subject: [PATCH] feat(perfmon): add tracing support --- include/dwarfs/performance_monitor.h | 68 +++++++--- src/dwarfs/performance_monitor.cpp | 191 +++++++++++++++++++++++++-- 2 files changed, 232 insertions(+), 27 deletions(-) diff --git a/include/dwarfs/performance_monitor.h b/include/dwarfs/performance_monitor.h index 11f0c903..ac953195 100644 --- a/include/dwarfs/performance_monitor.h +++ b/include/dwarfs/performance_monitor.h @@ -22,29 +22,43 @@ #pragma once #include +#include +#include #include #include +#include +#include #include #include +#include + namespace dwarfs { +class file_access; + class performance_monitor { public: using timer_id = size_t; using time_type = uint64_t; + static inline constexpr size_t kNumInlineContext{3}; static std::unique_ptr - create(std::unordered_set const& enabled_namespaces); + create(std::unordered_set const& enabled_namespaces, + std::shared_ptr fa = nullptr, + std::optional trace_file = std::nullopt); virtual ~performance_monitor() = default; virtual time_type now() const = 0; - virtual void add_sample(timer_id id, time_type start) const = 0; + virtual void add_sample(timer_id id, time_type start, + std::span context) const = 0; virtual void summarize(std::ostream& os) const = 0; virtual bool is_enabled(std::string const& ns) const = 0; virtual timer_id - setup_timer(std::string const& ns, std::string const& name) const = 0; + setup_timer(std::string const& ns, std::string const& name, + std::initializer_list context) const = 0; + virtual bool wants_context() const = 0; }; class performance_monitor_proxy { @@ -57,11 +71,22 @@ class performance_monitor_proxy { performance_monitor::timer_id id) : mon_{mon} , id_{id} - , start_{mon_->now()} {} + , start_{mon_->now()} { + if (mon_->wants_context()) { + context_.emplace(); + } + } + + void set_context(std::initializer_list ctx) { + if (context_) { + context_->assign(ctx); + } + } ~section_timer() { if (mon_) { - mon_->add_sample(id_, start_); + mon_->add_sample(id_, start_, + context_ ? *context_ : std::span{}); } } @@ -69,6 +94,9 @@ class performance_monitor_proxy { performance_monitor const* mon_{nullptr}; performance_monitor::timer_id id_; performance_monitor::time_type start_; + std::optional< + folly::small_vector> + context_; }; performance_monitor_proxy() = default; @@ -76,8 +104,10 @@ class performance_monitor_proxy { performance_monitor_proxy(std::shared_ptr mon, std::string const& proxy_namespace); - performance_monitor::timer_id setup_timer(std::string const& name) const { - return mon_ ? mon_->setup_timer(namespace_, name) : 0; + performance_monitor::timer_id + setup_timer(std::string const& name, + std::initializer_list context) const { + return mon_ ? mon_->setup_timer(namespace_, name, context) : 0; } section_timer scoped_section(performance_monitor::timer_id id) const { @@ -105,8 +135,8 @@ class performance_monitor_proxy { , instname { monitor, name_space } #define PERFMON_TIMER_DECL(id) \ performance_monitor::timer_id const perfmon_##id##_id_; -#define PERFMON_TIMER_INIT(instname, id) \ - , perfmon_##id##_id_ { instname.setup_timer(#id) } +#define PERFMON_TIMER_INIT(instname, id, ...) \ + , perfmon_##id##_id_ { instname.setup_timer(#id, {__VA_ARGS__}) } #define PERFMON_SCOPED_SECTION(instname, id) \ auto perfmon_scoped_section_ = instname.scoped_section(perfmon_##id##_id_); #define PERFMON_PROXY_SETUP(instname, monitor, name_space) \ @@ -117,8 +147,9 @@ class performance_monitor_proxy { #define PERFMON_EXT_PROXY_DECL PERFMON_PROXY_DECL(PERFMON_PROXY_INSTNAME) #define PERFMON_EXT_TIMER_DECL(id) \ performance_monitor::timer_id perfmon_##id##_id_; -#define PERFMON_EXT_TIMER_SETUP(scope, id) \ - (scope).perfmon_##id##_id_ = (scope).PERFMON_PROXY_INSTNAME.setup_timer(#id); +#define PERFMON_EXT_TIMER_SETUP(scope, id, ...) \ + (scope).perfmon_##id##_id_ = \ + (scope).PERFMON_PROXY_INSTNAME.setup_timer(#id, {__VA_ARGS__}); #define PERFMON_EXT_SCOPED_SECTION(scope, id) \ auto perfmon_scoped_section_ = \ (scope).PERFMON_PROXY_INSTNAME.scoped_section( \ @@ -130,11 +161,14 @@ class performance_monitor_proxy { #define PERFMON_CLS_PROXY_INIT(monitor, name_space) \ PERFMON_PROXY_INIT(PERFMON_PROXY_INSTNAME, monitor, name_space) #define PERFMON_CLS_TIMER_DECL(id) PERFMON_TIMER_DECL(id) -#define PERFMON_CLS_TIMER_INIT(id) \ - PERFMON_TIMER_INIT(PERFMON_PROXY_INSTNAME, id) +#define PERFMON_CLS_TIMER_INIT(id, ...) \ + PERFMON_TIMER_INIT(PERFMON_PROXY_INSTNAME, id, __VA_ARGS__) #define PERFMON_CLS_SCOPED_SECTION(id) \ PERFMON_SCOPED_SECTION(PERFMON_PROXY_INSTNAME, id) +#define PERFMON_SET_CONTEXT(...) \ + perfmon_scoped_section_.set_context({__VA_ARGS__}); + #else #define PERFMON_ARG(monitor) nullptr @@ -143,22 +177,24 @@ class performance_monitor_proxy { #define PERFMON_PROXY_DECL(instname) #define PERFMON_PROXY_INIT(instname, monitor, name_space) #define PERFMON_TIMER_DECL(id) -#define PERFMON_TIMER_INIT(instname, id) +#define PERFMON_TIMER_INIT(instname, id, ...) #define PERFMON_SCOPED_SECTION(instname, id) #define PERFMON_PROXY_SETUP(instname, monitor, name_space) #define PERFMON_EXT_PROXY_DECL #define PERFMON_EXT_TIMER_DECL(id) -#define PERFMON_EXT_TIMER_SETUP(scope, id) +#define PERFMON_EXT_TIMER_SETUP(scope, id, ...) #define PERFMON_EXT_SCOPED_SECTION(scope, id) #define PERFMON_EXT_PROXY_SETUP(scope, monitor, name_space) #define PERFMON_CLS_PROXY_DECL #define PERFMON_CLS_PROXY_INIT(monitor, name_space) #define PERFMON_CLS_TIMER_DECL(id) -#define PERFMON_CLS_TIMER_INIT(id) +#define PERFMON_CLS_TIMER_INIT(id, ...) #define PERFMON_CLS_SCOPED_SECTION(id) +#define PERFMON_SET_CONTEXT(...) + #endif } // namespace dwarfs diff --git a/src/dwarfs/performance_monitor.cpp b/src/dwarfs/performance_monitor.cpp index bf7e85d1..a0d0d749 100644 --- a/src/dwarfs/performance_monitor.cpp +++ b/src/dwarfs/performance_monitor.cpp @@ -24,6 +24,8 @@ #include #include #include +#include +#include #ifdef _WIN32 #include @@ -31,9 +33,14 @@ #include #endif +#include #include +#include #include +#include + +#include "dwarfs/file_access.h" #include "dwarfs/performance_monitor.h" #include "dwarfs/util.h" @@ -45,10 +52,12 @@ class single_timer { public: static constexpr uint64_t const histogram_interval = 1; - single_timer(std::string const& name_space, std::string const& name) + single_timer(std::string const& name_space, std::string const& name, + std::initializer_list context) : log_hist_{1, 0, 64} , namespace_{name_space} - , name_{name} { + , name_{name} + , context_{context.begin(), context.end()} { total_time_.store(0); } @@ -98,6 +107,8 @@ class single_timer { os << " p99 latency: " << time_with_unit(p99) << "\n\n"; } + std::span context() const { return context_; } + private: std::atomic samples_{}; std::atomic total_time_{}; @@ -105,6 +116,7 @@ class single_timer { std::mutex mutable log_hist_mutex_; std::string const namespace_; std::string const name_; + std::vector const context_; }; } // namespace @@ -114,16 +126,42 @@ class performance_monitor_impl : public performance_monitor { using timer_id = performance_monitor::timer_id; using time_type = performance_monitor::time_type; + struct trace_event { + trace_event(timer_id id, time_type start, time_type end, + std::span ctx) + : id{id} + , start{start} + , end{end} + , context{ctx.begin(), ctx.end()} {} + + timer_id id; + time_type start; + time_type end; + folly::small_vector context; + }; + explicit performance_monitor_impl( - std::unordered_set enabled_namespaces) + std::unordered_set enabled_namespaces, + std::shared_ptr fa, + std::optional trace_file) : timebase_{get_timebase()} - , enabled_namespaces_{std::move(enabled_namespaces)} {} + , enabled_namespaces_{std::move(enabled_namespaces)} + , start_time_{now()} + , trace_file_{std::move(trace_file)} + , fa_{std::move(fa)} {} + + ~performance_monitor_impl() override { + if (trace_file_) { + write_trace_events(*trace_file_); + } + } timer_id - setup_timer(std::string const& ns, std::string const& name) const override { + setup_timer(std::string const& ns, std::string const& name, + std::initializer_list context) const override { std::lock_guard lock(timers_mx_); timer_id rv = timers_.size(); - timers_.emplace_back(ns, name); + timers_.emplace_back(ns, name, context); return rv; } @@ -139,11 +177,28 @@ class performance_monitor_impl : public performance_monitor { #endif } - void add_sample(timer_id id, time_type start) const override { - auto elapsed = now() - start; + void add_sample(timer_id id, time_type start, + std::span context) const override { + auto end = now(); // No need to acquire the mutex here as existing timers // never move in the deque. - timers_[id].add_sample(elapsed); + auto& t = timers_[id]; + t.add_sample(end - start); + + if (trace_file_) { + std::vector* events; + + { + std::lock_guard lock(events_mx_); + auto& evp = trace_events_[std::this_thread::get_id()]; + if (!evp) { + evp = std::make_unique>(); + } + events = evp.get(); + } + + events->emplace_back(id, start, end, context); + } } void summarize(std::ostream& os) const override { @@ -176,6 +231,8 @@ class performance_monitor_impl : public performance_monitor { return enabled_namespaces_.find(ns) != enabled_namespaces_.end(); } + bool wants_context() const override { return trace_file_.has_value(); } + private: static double get_timebase() { #ifdef _WIN32 @@ -187,10 +244,119 @@ class performance_monitor_impl : public performance_monitor { #endif } + struct json_trace_event { + json_trace_event(timer_id id, int tid, char ph, time_type ts, + std::span ctxt = {}) + : id{id} + , tid{tid} + , ph{ph} + , ts{ts} + , args{ctxt.begin(), ctxt.end()} {} + + timer_id id; + int tid; + char ph; + time_type ts; + std::vector args; + }; + + void write_trace_events(std::filesystem::path const& path) const { + std::error_code ec; + auto output = fa_->open_output(path, ec); + + if (ec) { + return; // Meh. + } + + std::vector events; + std::unordered_map thread_ids; + + { + std::lock_guard lock(events_mx_); + int max_tid{0}; + + for (auto const& [tid, evs] : trace_events_) { + auto it = thread_ids.find(tid); + if (it == thread_ids.end()) { + it = thread_ids.emplace(tid, ++max_tid).first; + } + + for (auto const& ev : *evs) { + events.emplace_back(ev.id, it->second, 'B', ev.start - start_time_, + ev.context); + events.emplace_back(ev.id, it->second, 'E', ev.end - start_time_); + } + } + } + + std::sort(events.begin(), events.end(), + [](auto const& a, auto const& b) { return a.ts < b.ts; }); + + bool first = true; + auto const pid = ::getpid(); + + auto& os = output->os(); + + os << "[\n"; + + std::lock_guard lock(timers_mx_); + + for (auto const& ev : events) { + if (!first) { + os << ",\n"; + } + first = false; + + auto& t = timers_[ev.id]; + std::string name; + + if (!ev.args.empty()) { + name = fmt::format("{}({})", t.name(), fmt::join(ev.args, ", ")); + } else { + name = t.name(); + } + + os << fmt::format(" {{\n \"name\": \"{}\",\n \"cat\": \"{}\",\n", + name, t.get_namespace()); + os << fmt::format(" \"ph\": \"{}\",\n \"ts\": {:.3f},\n", ev.ph, + 1e6 * ev.ts * timebase_); + os << fmt::format(" \"pid\": {},\n \"tid\": {}", pid, ev.tid); + if (!ev.args.empty()) { + auto ctx_names = t.context(); + os << ",\n \"args\": {"; + for (auto [i, arg] : folly::enumerate(ev.args)) { + if (i > 0) { + os << ", "; + } + std::string arg_name; + if (i < ctx_names.size()) { + arg_name = ctx_names[i]; + } else { + arg_name = fmt::format("arg{}", i); + } + os << fmt::format("\"{}\": {}", arg_name, arg); + } + os << "}"; + } + os << "\n }"; + } + + os << "\n]\n"; + + output->close(); + } + std::deque mutable timers_; std::mutex mutable timers_mx_; double const timebase_; std::unordered_set const enabled_namespaces_; + time_type const start_time_; + std::optional const trace_file_; + std::mutex mutable events_mx_; + std::unordered_map< + std::thread::id, + std::unique_ptr>> mutable trace_events_; + std::shared_ptr fa_; }; performance_monitor_proxy::performance_monitor_proxy( @@ -200,11 +366,14 @@ performance_monitor_proxy::performance_monitor_proxy( , namespace_{mon_namespace} {} std::unique_ptr performance_monitor::create( - std::unordered_set const& enabled_namespaces) { + std::unordered_set const& enabled_namespaces, + std::shared_ptr fa, + std::optional trace_file) { return enabled_namespaces.empty() ? nullptr : std::make_unique( - std::move(enabled_namespaces)); + std::move(enabled_namespaces), std::move(fa), + std::move(trace_file)); } } // namespace dwarfs