feat(perfmon): add tracing support

This commit is contained in:
Marcus Holland-Moritz 2024-04-27 15:22:35 +02:00
parent 37b67eebbd
commit 80f84c6f44
2 changed files with 232 additions and 27 deletions

View File

@ -22,29 +22,43 @@
#pragma once #pragma once
#include <cstdint> #include <cstdint>
#include <filesystem>
#include <initializer_list>
#include <iosfwd> #include <iosfwd>
#include <memory> #include <memory>
#include <optional>
#include <span>
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <folly/small_vector.h>
namespace dwarfs { namespace dwarfs {
class file_access;
class performance_monitor { class performance_monitor {
public: public:
using timer_id = size_t; using timer_id = size_t;
using time_type = uint64_t; using time_type = uint64_t;
static inline constexpr size_t kNumInlineContext{3};
static std::unique_ptr<performance_monitor> static std::unique_ptr<performance_monitor>
create(std::unordered_set<std::string> const& enabled_namespaces); create(std::unordered_set<std::string> const& enabled_namespaces,
std::shared_ptr<file_access const> fa = nullptr,
std::optional<std::filesystem::path> trace_file = std::nullopt);
virtual ~performance_monitor() = default; virtual ~performance_monitor() = default;
virtual time_type now() const = 0; virtual time_type now() const = 0;
virtual void add_sample(timer_id id, time_type start) const = 0; virtual void add_sample(timer_id id, time_type start,
std::span<uint64_t const> context) const = 0;
virtual void summarize(std::ostream& os) const = 0; virtual void summarize(std::ostream& os) const = 0;
virtual bool is_enabled(std::string const& ns) const = 0; virtual bool is_enabled(std::string const& ns) const = 0;
virtual timer_id virtual timer_id
setup_timer(std::string const& ns, std::string const& name) const = 0; setup_timer(std::string const& ns, std::string const& name,
std::initializer_list<std::string_view> context) const = 0;
virtual bool wants_context() const = 0;
}; };
class performance_monitor_proxy { class performance_monitor_proxy {
@ -57,11 +71,22 @@ class performance_monitor_proxy {
performance_monitor::timer_id id) performance_monitor::timer_id id)
: mon_{mon} : mon_{mon}
, id_{id} , id_{id}
, start_{mon_->now()} {} , start_{mon_->now()} {
if (mon_->wants_context()) {
context_.emplace();
}
}
void set_context(std::initializer_list<uint64_t> ctx) {
if (context_) {
context_->assign(ctx);
}
}
~section_timer() { ~section_timer() {
if (mon_) { if (mon_) {
mon_->add_sample(id_, start_); mon_->add_sample(id_, start_,
context_ ? *context_ : std::span<uint64_t const>{});
} }
} }
@ -69,6 +94,9 @@ class performance_monitor_proxy {
performance_monitor const* mon_{nullptr}; performance_monitor const* mon_{nullptr};
performance_monitor::timer_id id_; performance_monitor::timer_id id_;
performance_monitor::time_type start_; performance_monitor::time_type start_;
std::optional<
folly::small_vector<uint64_t, performance_monitor::kNumInlineContext>>
context_;
}; };
performance_monitor_proxy() = default; performance_monitor_proxy() = default;
@ -76,8 +104,10 @@ class performance_monitor_proxy {
performance_monitor_proxy(std::shared_ptr<performance_monitor const> mon, performance_monitor_proxy(std::shared_ptr<performance_monitor const> mon,
std::string const& proxy_namespace); std::string const& proxy_namespace);
performance_monitor::timer_id setup_timer(std::string const& name) const { performance_monitor::timer_id
return mon_ ? mon_->setup_timer(namespace_, name) : 0; setup_timer(std::string const& name,
std::initializer_list<std::string_view> context) const {
return mon_ ? mon_->setup_timer(namespace_, name, context) : 0;
} }
section_timer scoped_section(performance_monitor::timer_id id) const { section_timer scoped_section(performance_monitor::timer_id id) const {
@ -105,8 +135,8 @@ class performance_monitor_proxy {
, instname { monitor, name_space } , instname { monitor, name_space }
#define PERFMON_TIMER_DECL(id) \ #define PERFMON_TIMER_DECL(id) \
performance_monitor::timer_id const perfmon_##id##_id_; performance_monitor::timer_id const perfmon_##id##_id_;
#define PERFMON_TIMER_INIT(instname, id) \ #define PERFMON_TIMER_INIT(instname, id, ...) \
, perfmon_##id##_id_ { instname.setup_timer(#id) } , perfmon_##id##_id_ { instname.setup_timer(#id, {__VA_ARGS__}) }
#define PERFMON_SCOPED_SECTION(instname, id) \ #define PERFMON_SCOPED_SECTION(instname, id) \
auto perfmon_scoped_section_ = instname.scoped_section(perfmon_##id##_id_); auto perfmon_scoped_section_ = instname.scoped_section(perfmon_##id##_id_);
#define PERFMON_PROXY_SETUP(instname, monitor, name_space) \ #define PERFMON_PROXY_SETUP(instname, monitor, name_space) \
@ -117,8 +147,9 @@ class performance_monitor_proxy {
#define PERFMON_EXT_PROXY_DECL PERFMON_PROXY_DECL(PERFMON_PROXY_INSTNAME) #define PERFMON_EXT_PROXY_DECL PERFMON_PROXY_DECL(PERFMON_PROXY_INSTNAME)
#define PERFMON_EXT_TIMER_DECL(id) \ #define PERFMON_EXT_TIMER_DECL(id) \
performance_monitor::timer_id perfmon_##id##_id_; performance_monitor::timer_id perfmon_##id##_id_;
#define PERFMON_EXT_TIMER_SETUP(scope, id) \ #define PERFMON_EXT_TIMER_SETUP(scope, id, ...) \
(scope).perfmon_##id##_id_ = (scope).PERFMON_PROXY_INSTNAME.setup_timer(#id); (scope).perfmon_##id##_id_ = \
(scope).PERFMON_PROXY_INSTNAME.setup_timer(#id, {__VA_ARGS__});
#define PERFMON_EXT_SCOPED_SECTION(scope, id) \ #define PERFMON_EXT_SCOPED_SECTION(scope, id) \
auto perfmon_scoped_section_ = \ auto perfmon_scoped_section_ = \
(scope).PERFMON_PROXY_INSTNAME.scoped_section( \ (scope).PERFMON_PROXY_INSTNAME.scoped_section( \
@ -130,11 +161,14 @@ class performance_monitor_proxy {
#define PERFMON_CLS_PROXY_INIT(monitor, name_space) \ #define PERFMON_CLS_PROXY_INIT(monitor, name_space) \
PERFMON_PROXY_INIT(PERFMON_PROXY_INSTNAME, monitor, name_space) PERFMON_PROXY_INIT(PERFMON_PROXY_INSTNAME, monitor, name_space)
#define PERFMON_CLS_TIMER_DECL(id) PERFMON_TIMER_DECL(id) #define PERFMON_CLS_TIMER_DECL(id) PERFMON_TIMER_DECL(id)
#define PERFMON_CLS_TIMER_INIT(id) \ #define PERFMON_CLS_TIMER_INIT(id, ...) \
PERFMON_TIMER_INIT(PERFMON_PROXY_INSTNAME, id) PERFMON_TIMER_INIT(PERFMON_PROXY_INSTNAME, id, __VA_ARGS__)
#define PERFMON_CLS_SCOPED_SECTION(id) \ #define PERFMON_CLS_SCOPED_SECTION(id) \
PERFMON_SCOPED_SECTION(PERFMON_PROXY_INSTNAME, id) PERFMON_SCOPED_SECTION(PERFMON_PROXY_INSTNAME, id)
#define PERFMON_SET_CONTEXT(...) \
perfmon_scoped_section_.set_context({__VA_ARGS__});
#else #else
#define PERFMON_ARG(monitor) nullptr #define PERFMON_ARG(monitor) nullptr
@ -143,22 +177,24 @@ class performance_monitor_proxy {
#define PERFMON_PROXY_DECL(instname) #define PERFMON_PROXY_DECL(instname)
#define PERFMON_PROXY_INIT(instname, monitor, name_space) #define PERFMON_PROXY_INIT(instname, monitor, name_space)
#define PERFMON_TIMER_DECL(id) #define PERFMON_TIMER_DECL(id)
#define PERFMON_TIMER_INIT(instname, id) #define PERFMON_TIMER_INIT(instname, id, ...)
#define PERFMON_SCOPED_SECTION(instname, id) #define PERFMON_SCOPED_SECTION(instname, id)
#define PERFMON_PROXY_SETUP(instname, monitor, name_space) #define PERFMON_PROXY_SETUP(instname, monitor, name_space)
#define PERFMON_EXT_PROXY_DECL #define PERFMON_EXT_PROXY_DECL
#define PERFMON_EXT_TIMER_DECL(id) #define PERFMON_EXT_TIMER_DECL(id)
#define PERFMON_EXT_TIMER_SETUP(scope, id) #define PERFMON_EXT_TIMER_SETUP(scope, id, ...)
#define PERFMON_EXT_SCOPED_SECTION(scope, id) #define PERFMON_EXT_SCOPED_SECTION(scope, id)
#define PERFMON_EXT_PROXY_SETUP(scope, monitor, name_space) #define PERFMON_EXT_PROXY_SETUP(scope, monitor, name_space)
#define PERFMON_CLS_PROXY_DECL #define PERFMON_CLS_PROXY_DECL
#define PERFMON_CLS_PROXY_INIT(monitor, name_space) #define PERFMON_CLS_PROXY_INIT(monitor, name_space)
#define PERFMON_CLS_TIMER_DECL(id) #define PERFMON_CLS_TIMER_DECL(id)
#define PERFMON_CLS_TIMER_INIT(id) #define PERFMON_CLS_TIMER_INIT(id, ...)
#define PERFMON_CLS_SCOPED_SECTION(id) #define PERFMON_CLS_SCOPED_SECTION(id)
#define PERFMON_SET_CONTEXT(...)
#endif #endif
} // namespace dwarfs } // namespace dwarfs

View File

@ -24,6 +24,8 @@
#include <deque> #include <deque>
#include <mutex> #include <mutex>
#include <string_view> #include <string_view>
#include <thread>
#include <vector>
#ifdef _WIN32 #ifdef _WIN32
#include <folly/portability/Windows.h> #include <folly/portability/Windows.h>
@ -31,9 +33,14 @@
#include <sys/time.h> #include <sys/time.h>
#endif #endif
#include <folly/container/Enumerate.h>
#include <folly/lang/Bits.h> #include <folly/lang/Bits.h>
#include <folly/portability/Unistd.h>
#include <folly/stats/Histogram.h> #include <folly/stats/Histogram.h>
#include <fmt/format.h>
#include "dwarfs/file_access.h"
#include "dwarfs/performance_monitor.h" #include "dwarfs/performance_monitor.h"
#include "dwarfs/util.h" #include "dwarfs/util.h"
@ -45,10 +52,12 @@ class single_timer {
public: public:
static constexpr uint64_t const histogram_interval = 1; static constexpr uint64_t const histogram_interval = 1;
single_timer(std::string const& name_space, std::string const& name) single_timer(std::string const& name_space, std::string const& name,
std::initializer_list<std::string_view> context)
: log_hist_{1, 0, 64} : log_hist_{1, 0, 64}
, namespace_{name_space} , namespace_{name_space}
, name_{name} { , name_{name}
, context_{context.begin(), context.end()} {
total_time_.store(0); total_time_.store(0);
} }
@ -98,6 +107,8 @@ class single_timer {
os << " p99 latency: " << time_with_unit(p99) << "\n\n"; os << " p99 latency: " << time_with_unit(p99) << "\n\n";
} }
std::span<std::string const> context() const { return context_; }
private: private:
std::atomic<uint64_t> samples_{}; std::atomic<uint64_t> samples_{};
std::atomic<uint64_t> total_time_{}; std::atomic<uint64_t> total_time_{};
@ -105,6 +116,7 @@ class single_timer {
std::mutex mutable log_hist_mutex_; std::mutex mutable log_hist_mutex_;
std::string const namespace_; std::string const namespace_;
std::string const name_; std::string const name_;
std::vector<std::string> const context_;
}; };
} // namespace } // namespace
@ -114,16 +126,42 @@ class performance_monitor_impl : public performance_monitor {
using timer_id = performance_monitor::timer_id; using timer_id = performance_monitor::timer_id;
using time_type = performance_monitor::time_type; using time_type = performance_monitor::time_type;
struct trace_event {
trace_event(timer_id id, time_type start, time_type end,
std::span<uint64_t const> ctx)
: id{id}
, start{start}
, end{end}
, context{ctx.begin(), ctx.end()} {}
timer_id id;
time_type start;
time_type end;
folly::small_vector<uint64_t, kNumInlineContext> context;
};
explicit performance_monitor_impl( explicit performance_monitor_impl(
std::unordered_set<std::string> enabled_namespaces) std::unordered_set<std::string> enabled_namespaces,
std::shared_ptr<file_access const> fa,
std::optional<std::filesystem::path> trace_file)
: timebase_{get_timebase()} : timebase_{get_timebase()}
, enabled_namespaces_{std::move(enabled_namespaces)} {} , enabled_namespaces_{std::move(enabled_namespaces)}
, start_time_{now()}
, trace_file_{std::move(trace_file)}
, fa_{std::move(fa)} {}
~performance_monitor_impl() override {
if (trace_file_) {
write_trace_events(*trace_file_);
}
}
timer_id timer_id
setup_timer(std::string const& ns, std::string const& name) const override { setup_timer(std::string const& ns, std::string const& name,
std::initializer_list<std::string_view> context) const override {
std::lock_guard lock(timers_mx_); std::lock_guard lock(timers_mx_);
timer_id rv = timers_.size(); timer_id rv = timers_.size();
timers_.emplace_back(ns, name); timers_.emplace_back(ns, name, context);
return rv; return rv;
} }
@ -139,11 +177,28 @@ class performance_monitor_impl : public performance_monitor {
#endif #endif
} }
void add_sample(timer_id id, time_type start) const override { void add_sample(timer_id id, time_type start,
auto elapsed = now() - start; std::span<uint64_t const> context) const override {
auto end = now();
// No need to acquire the mutex here as existing timers // No need to acquire the mutex here as existing timers
// never move in the deque. // never move in the deque.
timers_[id].add_sample(elapsed); auto& t = timers_[id];
t.add_sample(end - start);
if (trace_file_) {
std::vector<trace_event>* events;
{
std::lock_guard lock(events_mx_);
auto& evp = trace_events_[std::this_thread::get_id()];
if (!evp) {
evp = std::make_unique<std::vector<trace_event>>();
}
events = evp.get();
}
events->emplace_back(id, start, end, context);
}
} }
void summarize(std::ostream& os) const override { void summarize(std::ostream& os) const override {
@ -176,6 +231,8 @@ class performance_monitor_impl : public performance_monitor {
return enabled_namespaces_.find(ns) != enabled_namespaces_.end(); return enabled_namespaces_.find(ns) != enabled_namespaces_.end();
} }
bool wants_context() const override { return trace_file_.has_value(); }
private: private:
static double get_timebase() { static double get_timebase() {
#ifdef _WIN32 #ifdef _WIN32
@ -187,10 +244,119 @@ class performance_monitor_impl : public performance_monitor {
#endif #endif
} }
struct json_trace_event {
json_trace_event(timer_id id, int tid, char ph, time_type ts,
std::span<uint64_t const> ctxt = {})
: id{id}
, tid{tid}
, ph{ph}
, ts{ts}
, args{ctxt.begin(), ctxt.end()} {}
timer_id id;
int tid;
char ph;
time_type ts;
std::vector<uint64_t> args;
};
void write_trace_events(std::filesystem::path const& path) const {
std::error_code ec;
auto output = fa_->open_output(path, ec);
if (ec) {
return; // Meh.
}
std::vector<json_trace_event> events;
std::unordered_map<std::thread::id, int> thread_ids;
{
std::lock_guard lock(events_mx_);
int max_tid{0};
for (auto const& [tid, evs] : trace_events_) {
auto it = thread_ids.find(tid);
if (it == thread_ids.end()) {
it = thread_ids.emplace(tid, ++max_tid).first;
}
for (auto const& ev : *evs) {
events.emplace_back(ev.id, it->second, 'B', ev.start - start_time_,
ev.context);
events.emplace_back(ev.id, it->second, 'E', ev.end - start_time_);
}
}
}
std::sort(events.begin(), events.end(),
[](auto const& a, auto const& b) { return a.ts < b.ts; });
bool first = true;
auto const pid = ::getpid();
auto& os = output->os();
os << "[\n";
std::lock_guard lock(timers_mx_);
for (auto const& ev : events) {
if (!first) {
os << ",\n";
}
first = false;
auto& t = timers_[ev.id];
std::string name;
if (!ev.args.empty()) {
name = fmt::format("{}({})", t.name(), fmt::join(ev.args, ", "));
} else {
name = t.name();
}
os << fmt::format(" {{\n \"name\": \"{}\",\n \"cat\": \"{}\",\n",
name, t.get_namespace());
os << fmt::format(" \"ph\": \"{}\",\n \"ts\": {:.3f},\n", ev.ph,
1e6 * ev.ts * timebase_);
os << fmt::format(" \"pid\": {},\n \"tid\": {}", pid, ev.tid);
if (!ev.args.empty()) {
auto ctx_names = t.context();
os << ",\n \"args\": {";
for (auto [i, arg] : folly::enumerate(ev.args)) {
if (i > 0) {
os << ", ";
}
std::string arg_name;
if (i < ctx_names.size()) {
arg_name = ctx_names[i];
} else {
arg_name = fmt::format("arg{}", i);
}
os << fmt::format("\"{}\": {}", arg_name, arg);
}
os << "}";
}
os << "\n }";
}
os << "\n]\n";
output->close();
}
std::deque<single_timer> mutable timers_; std::deque<single_timer> mutable timers_;
std::mutex mutable timers_mx_; std::mutex mutable timers_mx_;
double const timebase_; double const timebase_;
std::unordered_set<std::string> const enabled_namespaces_; std::unordered_set<std::string> const enabled_namespaces_;
time_type const start_time_;
std::optional<std::filesystem::path> const trace_file_;
std::mutex mutable events_mx_;
std::unordered_map<
std::thread::id,
std::unique_ptr<std::vector<trace_event>>> mutable trace_events_;
std::shared_ptr<file_access const> fa_;
}; };
performance_monitor_proxy::performance_monitor_proxy( performance_monitor_proxy::performance_monitor_proxy(
@ -200,11 +366,14 @@ performance_monitor_proxy::performance_monitor_proxy(
, namespace_{mon_namespace} {} , namespace_{mon_namespace} {}
std::unique_ptr<performance_monitor> performance_monitor::create( std::unique_ptr<performance_monitor> performance_monitor::create(
std::unordered_set<std::string> const& enabled_namespaces) { std::unordered_set<std::string> const& enabled_namespaces,
std::shared_ptr<file_access const> fa,
std::optional<std::filesystem::path> trace_file) {
return enabled_namespaces.empty() return enabled_namespaces.empty()
? nullptr ? nullptr
: std::make_unique<performance_monitor_impl>( : std::make_unique<performance_monitor_impl>(
std::move(enabled_namespaces)); std::move(enabled_namespaces), std::move(fa),
std::move(trace_file));
} }
} // namespace dwarfs } // namespace dwarfs