pstats: Add support for profiling thread context switches

Disabled by default, enable with `pstats-thread-profiling true` in Config.prc
This commit is contained in:
rdb 2022-02-22 17:28:57 +01:00
parent b4d51c24e9
commit fd033e66f1
25 changed files with 402 additions and 1 deletions

View File

@ -39,7 +39,11 @@ INLINE ConditionVarPosixImpl::
INLINE void ConditionVarPosixImpl::
wait() {
TAU_PROFILE("ConditionVarPosixImpl::wait()", " ", TAU_USER);
#ifdef DO_PSTATS
int result = _wait_func(&_cvar, &_mutex._lock);
#else
int result = pthread_cond_wait(&_cvar, &_mutex._lock);
#endif
#ifndef NDEBUG
if (result != 0) {
pipeline_cat.error()

View File

@ -18,6 +18,12 @@
#include "conditionVarPosixImpl.h"
#include <sys/time.h>
int (*ConditionVarPosixImpl::_wait_func)(pthread_cond_t *,
pthread_mutex_t *) = &pthread_cond_wait;
int (*ConditionVarPosixImpl::_timedwait_func)(pthread_cond_t *,
pthread_mutex_t *,
const struct timespec *) = &pthread_cond_timedwait;
/**
*
*/
@ -41,7 +47,11 @@ wait(double timeout) {
++ts.tv_sec;
}
#ifdef DO_PSTATS
int result = _timedwait_func(&_cvar, &_mutex._lock, &ts);
#else
int result = pthread_cond_timedwait(&_cvar, &_mutex._lock, &ts);
#endif
#ifndef NDEBUG
if (result != 0 && result != ETIMEDOUT) {
pipeline_cat.error()

View File

@ -43,6 +43,11 @@ public:
private:
MutexPosixImpl &_mutex;
pthread_cond_t _cvar;
static int (*_wait_func)(pthread_cond_t *, pthread_mutex_t *);
static int (*_timedwait_func)(pthread_cond_t *, pthread_mutex_t *,
const struct timespec *);
friend class PStatClientImpl;
};
#include "conditionVarPosixImpl.I"

View File

@ -23,7 +23,11 @@ ConditionVarWin32Impl(MutexWin32Impl &mutex) : _mutex(mutex) {
*/
INLINE void ConditionVarWin32Impl::
wait() {
#ifdef DO_PSTATS
_wait_func(&_cvar, &_mutex._lock, INFINITE, 0);
#else
SleepConditionVariableSRW(&_cvar, &_mutex._lock, INFINITE, 0);
#endif
}
/**
@ -31,7 +35,11 @@ wait() {
*/
INLINE void ConditionVarWin32Impl::
wait(double timeout) {
#ifdef DO_PSTATS
_wait_func(&_cvar, &_mutex._lock, (DWORD)(timeout * 1000.0), 0);
#else
SleepConditionVariableSRW(&_cvar, &_mutex._lock, (DWORD)(timeout * 1000.0), 0);
#endif
}
/**

View File

@ -17,4 +17,7 @@
#include "conditionVarWin32Impl.h"
// This function gets replaced by PStats to measure the time spent waiting.
BOOL (*ConditionVarWin32Impl::_wait_func)(PCONDITION_VARIABLE, PSRWLOCK, DWORD, ULONG) = &SleepConditionVariableSRW;
#endif // _WIN32

View File

@ -40,6 +40,9 @@ public:
private:
MutexWin32Impl &_mutex;
CONDITION_VARIABLE _cvar = CONDITION_VARIABLE_INIT;
static BOOL (*_wait_func)(PCONDITION_VARIABLE, PSRWLOCK, DWORD, ULONG);
friend class PStatClientImpl;
};
#include "conditionVarWin32Impl.I"

View File

@ -191,7 +191,11 @@ is_simple_threads() {
INLINE void Thread::
sleep(double seconds) {
TAU_PROFILE("void Thread::sleep(double)", " ", TAU_USER);
#ifdef DO_PSTATS
_sleep_func(seconds);
#else
ThreadImpl::sleep(seconds);
#endif
}
/**
@ -200,7 +204,11 @@ sleep(double seconds) {
INLINE void Thread::
force_yield() {
TAU_PROFILE("void Thread::yield()", " ", TAU_USER);
#ifdef DO_PSTATS
_yield_func();
#else
ThreadImpl::yield();
#endif
}
/**
@ -214,6 +222,18 @@ consider_yield() {
ThreadImpl::consider_yield();
}
/**
* Returns thread statistics. The first number is the total number of context
* switches reported by the OS, and the second number is the number of
* involuntary context switches (ie. the thread was scheduled out by the OS),
* if known.
*/
INLINE bool Thread::
get_context_switches(size_t &total, size_t &involuntary) {
return ThreadImpl::get_context_switches(total, involuntary);
}
/**
* Returns true if the thread has been started, false if it has not, or if
* join() has already been called.

View File

@ -22,6 +22,9 @@ Thread *Thread::_main_thread;
Thread *Thread::_external_thread;
TypeHandle Thread::_type_handle;
void (*Thread::_sleep_func)(double) = &ThreadImpl::sleep;
void (*Thread::_yield_func)() = &ThreadImpl::yield;
/**
* Creates a new Thread object, but does not immediately start executing it.
* This gives the caller a chance to store it in a PT(Thread) object, if

View File

@ -81,6 +81,8 @@ PUBLISHED:
BLOCKING INLINE static void force_yield();
BLOCKING INLINE static void consider_yield();
INLINE static bool get_context_switches(size_t &total, size_t &involuntary);
virtual void output(std::ostream &out) const;
void output_blocker(std::ostream &out) const;
static void write_status(std::ostream &out);
@ -162,6 +164,10 @@ private:
static Thread *_main_thread;
static Thread *_external_thread;
static void (*_sleep_func)(double);
static void (*_yield_func)();
friend class PStatClientImpl;
public:
static TypeHandle get_class_type() {
return _type_handle;

View File

@ -124,3 +124,11 @@ yield() {
INLINE void ThreadDummyImpl::
consider_yield() {
}
/**
*
*/
INLINE bool ThreadDummyImpl::
get_context_switches(size_t &, size_t &) {
return false;
}

View File

@ -57,6 +57,8 @@ public:
INLINE static void sleep(double seconds);
INLINE static void yield();
INLINE static void consider_yield();
INLINE static bool get_context_switches(size_t &, size_t &);
};
#include "threadDummyImpl.I"

View File

@ -21,6 +21,11 @@
#include "config_pipeline.h"
#include <sched.h>
// Used for getrusage().
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#ifdef ANDROID
#include "config_express.h"
#include <jni.h>
@ -248,6 +253,26 @@ bind_java_thread() {
}
#endif // ANDROID
/**
* Returns the number of context switches that occurred on the current thread.
* The first number is the total number of context switches reported by the OS,
* and the second number is the number of involuntary context switches (ie. the
* thread was scheduled out by the OS), if known, otherwise zero.
* Returns true if context switch information was available, false otherwise.
*/
bool ThreadPosixImpl::
get_context_switches(size_t &total, size_t &involuntary) {
#ifdef RUSAGE_THREAD
struct rusage usage;
if (getrusage(RUSAGE_THREAD, &usage) == 0) {
total = (size_t)usage.ru_nvcsw;
involuntary = (size_t)usage.ru_nivcsw;
return true;
}
#endif
return false;
}
/**
* The entry point of each thread.
*/

View File

@ -63,6 +63,8 @@ public:
static void bind_java_thread();
#endif
static bool get_context_switches(size_t &total, size_t &involuntary);
private:
static void *root_func(void *data);
static Thread *init_current_thread();

View File

@ -55,6 +55,9 @@ ThreadSimpleImpl(Thread *parent_obj) :
#ifdef WIN32
_win32_system_thread_id = 0;
#endif
_context_switches = 0;
_involuntary_context_switches = 0;
}
/**
@ -229,6 +232,32 @@ yield_this(bool volunteer) {
}
_manager->enqueue_ready(this, true);
_manager->next_context();
if (!volunteer) {
// Technically all context switches are voluntary here, but to make this
// distinction meaningful, we count it as involuntary on consider_yield().
++_involuntary_context_switches;
}
}
/**
* Returns the number of context switches that occurred on the current thread.
* The first number is the total number of context switches reported by the OS,
* and the second number is the number of involuntary context switches (ie. the
* thread was scheduled out by the OS), if known, otherwise zero.
* Returns true if context switch information was available, false otherwise.
*/
bool ThreadSimpleImpl::
get_context_switches(size_t &total, size_t &involuntary) {
ThreadSimpleManager *manager = ThreadSimpleManager::get_global_ptr();
if (manager->is_same_system_thread()) {
ThreadSimpleImpl *thread = manager->get_current_thread();
if (thread != nullptr) {
total = thread->_context_switches;
involuntary = thread->_involuntary_context_switches;
return true;
}
}
return false;
}
/**

View File

@ -77,6 +77,8 @@ public:
INLINE static void write_status(std::ostream &out);
static bool get_context_switches(size_t &total, size_t &involuntary);
private:
static void st_begin_thread(void *data);
void begin_thread();
@ -142,6 +144,9 @@ private:
DWORD _win32_system_thread_id;
#endif
size_t _context_switches;
size_t _involuntary_context_switches;
friend class ThreadSimpleManager;
};

View File

@ -233,6 +233,8 @@ next_context() {
unref_delete(finished_thread->_parent_obj);
}
++_current_thread->_context_switches;
// Mark the current thread's resume point.
#ifdef HAVE_PYTHON

View File

@ -17,7 +17,8 @@
INLINE ThreadWin32Impl::
ThreadWin32Impl(Thread *parent_obj) :
_cv(_mutex),
_parent_obj(parent_obj)
_parent_obj(parent_obj),
_profiling(0)
{
_thread = 0;
_joinable = false;

View File

@ -20,9 +20,34 @@
#include "pointerTo.h"
#include "config_pipeline.h"
#include <windows.h>
static thread_local Thread *_current_thread = nullptr;
static patomic_flag _main_thread_known = ATOMIC_FLAG_INIT;
#if _WIN32_WINNT < 0x0601
// Requires Windows 7.
static DWORD (*EnableThreadProfiling)(HANDLE, DWORD, DWORD64, HANDLE *) = nullptr;
static DWORD (*DisableThreadProfiling)(HANDLE) = nullptr;
static DWORD (*ReadThreadProfilingData)(HANDLE, DWORD, PPERFORMANCE_DATA data) = nullptr;
static bool init_thread_profiling() {
static bool inited = false;
if (!inited) {
HMODULE kernel32 = GetModuleHandleA("kernel32.dll");
EnableThreadProfiling = (decltype(EnableThreadProfiling))GetProcAddress(kernel32, "EnableThreadProfiling");
DisableThreadProfiling = (decltype(DisableThreadProfiling))GetProcAddress(kernel32, "DisableThreadProfiling");
ReadThreadProfilingData = (decltype(ReadThreadProfilingData))GetProcAddress(kernel32, "ReadThreadProfilingData");
inited = true;
}
return (EnableThreadProfiling && DisableThreadProfiling && ReadThreadProfilingData);
}
#else
static bool init_thread_profiling() {
return true;
}
#endif
/**
* Called by get_current_thread() if the current thread pointer is null; checks
* whether it might be the main thread.
@ -170,6 +195,35 @@ bind_thread(Thread *thread) {
_current_thread = thread;
}
/**
* Returns the number of context switches that occurred on the current thread.
* The first number is the total number of context switches reported by the OS,
* and the second number is the number of involuntary context switches (ie. the
* thread was scheduled out by the OS), if known, otherwise zero.
* Returns true if context switch information was available, false otherwise.
*/
bool ThreadWin32Impl::
get_context_switches(size_t &total, size_t &involuntary) {
Thread *thread = get_current_thread();
ThreadWin32Impl *self = &thread->_impl;
if (!self->_profiling && init_thread_profiling()) {
DWORD result = EnableThreadProfiling(GetCurrentThread(), THREAD_PROFILING_FLAG_DISPATCH, 0, &self->_profiling);
if (result != ERROR_SUCCESS) {
self->_profiling = 0;
return false;
}
}
PERFORMANCE_DATA data = {sizeof(PERFORMANCE_DATA), PERFORMANCE_DATA_VERSION};
if (ReadThreadProfilingData(self->_profiling, READ_THREAD_PROFILING_FLAG_DISPATCHING, &data) == ERROR_SUCCESS) {
total = data.ContextSwitchCount;
involuntary = 0;
return true;
}
return false;
}
/**
* The entry point of each thread.
*/
@ -212,6 +266,11 @@ root_func(LPVOID data) {
self->_mutex.unlock();
}
if (self->_profiling != 0) {
DisableThreadProfiling(self->_profiling);
self->_profiling = 0;
}
// Now drop the parent object reference that we grabbed in start(). This
// might delete the parent object, and in turn, delete the ThreadWin32Impl
// object.

View File

@ -52,6 +52,8 @@ public:
INLINE static void yield();
INLINE static void consider_yield();
static bool get_context_switches(size_t &total, size_t &involuntary);
private:
static DWORD WINAPI root_func(LPVOID data);
@ -69,6 +71,7 @@ private:
DWORD _thread_id;
bool _joinable;
Status _status;
HANDLE _profiling;
};
#include "threadWin32Impl.I"

View File

@ -82,6 +82,11 @@ ConfigVariableBool pstats_gpu_timing
"is not usually an accurate reflectino of how long the actual "
"operation takes on the video card."));
ConfigVariableBool pstats_thread_profiling
("pstats-thread-profiling", false,
PRC_DESC("Set this true to query the system for thread statistics, such as "
"the number of context switches and time spent waiting."));
// The rest are different in that they directly control the server, not the
// client.
ConfigVariableBool pstats_scroll_mode

View File

@ -38,6 +38,7 @@ extern EXPCL_PANDA_PSTATCLIENT ConfigVariableString pstats_host;
extern EXPCL_PANDA_PSTATCLIENT ConfigVariableInt pstats_port;
extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_target_frame_rate;
extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_gpu_timing;
extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_thread_profiling;
extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_scroll_mode;
extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_history;

View File

@ -905,6 +905,38 @@ stop(int collector_index, int thread_index, double as_of) {
}
}
/**
* Adds a pair of start/stop times in the given collector. Used in low-level
* code that knows there will not be any other collectors started and stopped
* in the meantime, and can be more efficient than a pair of start/stop times.
*/
void PStatClient::
start_stop(int collector_index, int thread_index, double start, double stop) {
if (!client_is_connected()) {
return;
}
#ifdef _DEBUG
nassertv(collector_index >= 0 && collector_index < get_num_collectors());
nassertv(thread_index >= 0 && thread_index < get_num_threads());
#endif
Collector *collector = get_collector_ptr(collector_index);
InternalThread *thread = get_thread_ptr(thread_index);
if (collector->is_active() && thread->_is_active) {
LightMutexHolder holder(thread->_thread_lock);
if (collector->_per_thread[thread_index]._nested_count == 0) {
// This collector wasn't already started in this thread; record a new
// data point.
if (thread->_thread_active) {
thread->_frame_data.add_start(collector_index, start);
thread->_frame_data.add_stop(collector_index, stop);
}
}
}
}
/**
* Removes the level value from the indicated collector. The collector will
* no longer be reported as having any particular level value.

View File

@ -129,6 +129,7 @@ private:
void start(int collector_index, int thread_index, double as_of);
void stop(int collector_index, int thread_index);
void stop(int collector_index, int thread_index, double as_of);
void start_stop(int collector_index, int thread_index, double start, double stop);
void clear_level(int collector_index, int thread_index);
void set_level(int collector_index, int thread_index, double level);
@ -216,6 +217,8 @@ private:
bool _is_active;
int _frame_number;
double _next_packet;
size_t _context_switches = 0;
size_t _involuntary_context_switches = 0;
bool _thread_active;
@ -321,6 +324,7 @@ private:
void start(int collector_index, int thread_index, double as_of);
void stop(int collector_index, int thread_index);
void stop(int collector_index, int thread_index, double as_of);
void start_stop(int collector_index, int thread_index, double start, double stop);
void clear_level(int collector_index, int thread_index);
void set_level(int collector_index, int thread_index, double level);

View File

@ -24,6 +24,8 @@
#include "config_pstatclient.h"
#include "pStatProperties.h"
#include "cmath.h"
#include "conditionVarWin32Impl.h"
#include "conditionVarPosixImpl.h"
#include <algorithm>
@ -32,6 +34,15 @@
#include <windows.h>
#endif
static PStatCollector _cswitch_pcollector("Context Switches");
static PStatCollector _cswitch_sleep_pcollector("Context Switches:Sleep");
static PStatCollector _cswitch_yield_pcollector("Context Switches:Yields");
static PStatCollector _cswitch_cvar_pcollector("Context Switches:Condition Variable");
static PStatCollector _cswitch_involuntary_pcollector("Context Switches:Involuntary");
static PStatCollector _wait_sleep_pcollector("Wait:Sleep");
static PStatCollector _wait_yield_pcollector("Wait:Yield");
static PStatCollector _wait_cvar_pcollector("Wait:Condition Variable");
/**
*
*/
@ -124,6 +135,106 @@ client_connect(std::string hostname, int port) {
MutexDebug::increment_pstats();
#endif // DEBUG_THREADS
if (pstats_thread_profiling) {
// Replace the condition variable wait function with one that does PStats
// statistics.
_thread_profiling = true;
Thread::_sleep_func = [] (double seconds) {
Thread *current_thread = Thread::get_current_thread();
int thread_index = current_thread->get_pstats_index();
if (thread_index >= 0) {
PStatClient *client = PStatClient::get_global_pstats();
double start = client->get_real_time();
ThreadImpl::sleep(seconds);
double stop = client->get_real_time();
client->start_stop(_wait_sleep_pcollector.get_index(), thread_index, start, stop);
client->add_level(_cswitch_sleep_pcollector.get_index(), thread_index, 1);
}
else {
ThreadImpl::sleep(seconds);
}
};
Thread::_yield_func = [] () {
Thread *current_thread = Thread::get_current_thread();
int thread_index = current_thread->get_pstats_index();
if (thread_index >= 0) {
PStatClient *client = PStatClient::get_global_pstats();
double start = client->get_real_time();
ThreadImpl::yield();
double stop = client->get_real_time();
client->start_stop(_wait_yield_pcollector.get_index(), thread_index, start, stop);
client->add_level(_cswitch_yield_pcollector.get_index(), thread_index, 1);
}
else {
ThreadImpl::yield();
}
};
#ifdef _WIN32
ConditionVarWin32Impl::_wait_func =
[] (PCONDITION_VARIABLE cvar, PSRWLOCK lock, DWORD time, ULONG flags) {
Thread *current_thread = Thread::get_current_thread();
int thread_index = current_thread->get_pstats_index();
BOOL result;
if (thread_index >= 0) {
PStatClient *client = PStatClient::get_global_pstats();
double start = client->get_real_time();
result = SleepConditionVariableSRW(cvar, lock, time, flags);
double stop = client->get_real_time();
client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop);
client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1);
}
else {
result = SleepConditionVariableSRW(cvar, lock, time, flags);
}
return result;
};
#endif
#ifdef HAVE_POSIX_THREADS
ConditionVarPosixImpl::_wait_func =
[] (pthread_cond_t *cvar, pthread_mutex_t *lock) {
Thread *current_thread = Thread::get_current_thread();
int thread_index = current_thread->get_pstats_index();
int result;
if (thread_index >= 0) {
PStatClient *client = PStatClient::get_global_pstats();
double start = client->get_real_time();
result = pthread_cond_wait(cvar, lock);
double stop = client->get_real_time();
client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop);
client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1);
}
else {
result = pthread_cond_wait(cvar, lock);
}
return result;
};
ConditionVarPosixImpl::_timedwait_func =
[] (pthread_cond_t *cvar, pthread_mutex_t *lock,
const struct timespec *ts) {
Thread *current_thread = Thread::get_current_thread();
int thread_index = current_thread->get_pstats_index();
int result;
if (thread_index >= 0) {
PStatClient *client = PStatClient::get_global_pstats();
double start = client->get_real_time();
result = pthread_cond_timedwait(cvar, lock, ts);
double stop = client->get_real_time();
client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop);
client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1);
}
else {
result = pthread_cond_timedwait(cvar, lock, ts);
}
return result;
};
#endif
}
// Wait for the server hello.
while (!_got_udp_port) {
transmit_control_data();
@ -137,6 +248,23 @@ client_connect(std::string hostname, int port) {
*/
void PStatClientImpl::
client_disconnect() {
if (_thread_profiling) {
// Switch the functions back to what they were.
Thread::_sleep_func = &ThreadImpl::sleep;
Thread::_yield_func = &ThreadImpl::yield;
#ifdef _WIN32
ConditionVarWin32Impl::_wait_func = &SleepConditionVariableSRW;
#endif
#ifdef HAVE_POSIX_THREADS
ConditionVarPosixImpl::_wait_func = &pthread_cond_wait;
ConditionVarPosixImpl::_timedwait_func = &pthread_cond_timedwait;
#endif
_thread_profiling = false;
}
if (_is_connected) {
#ifdef DEBUG_THREADS
MutexDebug::decrement_pstats();
@ -207,6 +335,37 @@ new_frame(int thread_index, int frame_number) {
if (frame_number == -1) {
frame_number = pthread->_frame_number;
}
// Record the number of context switches on this thread.
if (_thread_profiling) {
size_t total, involuntary;
PT(Thread) thread = pthread->_thread.lock();
if (thread != nullptr && thread->get_context_switches(total, involuntary)) {
size_t total_this_frame = total - pthread->_context_switches;
pthread->_context_switches = total;
frame_data.add_level(_cswitch_pcollector.get_index(), total_this_frame);
if (involuntary != 0) {
size_t involuntary_this_frame = involuntary - pthread->_involuntary_context_switches;
pthread->_involuntary_context_switches = involuntary;
frame_data.add_level(_cswitch_involuntary_pcollector.get_index(), involuntary_this_frame);
}
}
_client->clear_level(_cswitch_sleep_pcollector.get_index(), thread_index);
_client->clear_level(_cswitch_yield_pcollector.get_index(), thread_index);
_client->clear_level(_cswitch_cvar_pcollector.get_index(), thread_index);
}
}
else if (_thread_profiling) {
// Record the initial number of context switches.
PT(Thread) thread = pthread->_thread.lock();
if (thread != nullptr) {
thread->get_context_switches(pthread->_context_switches,
pthread->_involuntary_context_switches);
}
_client->clear_level(_cswitch_sleep_pcollector.get_index(), thread_index);
_client->clear_level(_cswitch_yield_pcollector.get_index(), thread_index);
_client->clear_level(_cswitch_cvar_pcollector.get_index(), thread_index);
}
pthread->_frame_data.clear();

View File

@ -111,6 +111,8 @@ private:
double _udp_count_factor;
unsigned int _tcp_count;
unsigned int _udp_count;
bool _thread_profiling = false;
};
#include "pStatClientImpl.I"