diff --git a/panda/src/pipeline/conditionVarPosixImpl.I b/panda/src/pipeline/conditionVarPosixImpl.I index 0b3e533717..2f8e73e5f5 100644 --- a/panda/src/pipeline/conditionVarPosixImpl.I +++ b/panda/src/pipeline/conditionVarPosixImpl.I @@ -39,7 +39,11 @@ INLINE ConditionVarPosixImpl:: INLINE void ConditionVarPosixImpl:: wait() { TAU_PROFILE("ConditionVarPosixImpl::wait()", " ", TAU_USER); +#ifdef DO_PSTATS + int result = _wait_func(&_cvar, &_mutex._lock); +#else int result = pthread_cond_wait(&_cvar, &_mutex._lock); +#endif #ifndef NDEBUG if (result != 0) { pipeline_cat.error() diff --git a/panda/src/pipeline/conditionVarPosixImpl.cxx b/panda/src/pipeline/conditionVarPosixImpl.cxx index e2e527c369..0d48b3034b 100644 --- a/panda/src/pipeline/conditionVarPosixImpl.cxx +++ b/panda/src/pipeline/conditionVarPosixImpl.cxx @@ -18,6 +18,12 @@ #include "conditionVarPosixImpl.h" #include +int (*ConditionVarPosixImpl::_wait_func)(pthread_cond_t *, + pthread_mutex_t *) = &pthread_cond_wait; +int (*ConditionVarPosixImpl::_timedwait_func)(pthread_cond_t *, + pthread_mutex_t *, + const struct timespec *) = &pthread_cond_timedwait; + /** * */ @@ -41,7 +47,11 @@ wait(double timeout) { ++ts.tv_sec; } +#ifdef DO_PSTATS + int result = _timedwait_func(&_cvar, &_mutex._lock, &ts); +#else int result = pthread_cond_timedwait(&_cvar, &_mutex._lock, &ts); +#endif #ifndef NDEBUG if (result != 0 && result != ETIMEDOUT) { pipeline_cat.error() diff --git a/panda/src/pipeline/conditionVarPosixImpl.h b/panda/src/pipeline/conditionVarPosixImpl.h index e39a5a122b..56bc3e9acc 100644 --- a/panda/src/pipeline/conditionVarPosixImpl.h +++ b/panda/src/pipeline/conditionVarPosixImpl.h @@ -43,6 +43,11 @@ public: private: MutexPosixImpl &_mutex; pthread_cond_t _cvar; + + static int (*_wait_func)(pthread_cond_t *, pthread_mutex_t *); + static int (*_timedwait_func)(pthread_cond_t *, pthread_mutex_t *, + const struct timespec *); + friend class PStatClientImpl; }; #include "conditionVarPosixImpl.I" diff --git a/panda/src/pipeline/conditionVarWin32Impl.I b/panda/src/pipeline/conditionVarWin32Impl.I index 275acff593..56fb036266 100644 --- a/panda/src/pipeline/conditionVarWin32Impl.I +++ b/panda/src/pipeline/conditionVarWin32Impl.I @@ -23,7 +23,11 @@ ConditionVarWin32Impl(MutexWin32Impl &mutex) : _mutex(mutex) { */ INLINE void ConditionVarWin32Impl:: wait() { +#ifdef DO_PSTATS + _wait_func(&_cvar, &_mutex._lock, INFINITE, 0); +#else SleepConditionVariableSRW(&_cvar, &_mutex._lock, INFINITE, 0); +#endif } /** @@ -31,7 +35,11 @@ wait() { */ INLINE void ConditionVarWin32Impl:: wait(double timeout) { +#ifdef DO_PSTATS + _wait_func(&_cvar, &_mutex._lock, (DWORD)(timeout * 1000.0), 0); +#else SleepConditionVariableSRW(&_cvar, &_mutex._lock, (DWORD)(timeout * 1000.0), 0); +#endif } /** diff --git a/panda/src/pipeline/conditionVarWin32Impl.cxx b/panda/src/pipeline/conditionVarWin32Impl.cxx index 5ba4165a0c..6f4776dfa0 100644 --- a/panda/src/pipeline/conditionVarWin32Impl.cxx +++ b/panda/src/pipeline/conditionVarWin32Impl.cxx @@ -17,4 +17,7 @@ #include "conditionVarWin32Impl.h" +// This function gets replaced by PStats to measure the time spent waiting. +BOOL (*ConditionVarWin32Impl::_wait_func)(PCONDITION_VARIABLE, PSRWLOCK, DWORD, ULONG) = &SleepConditionVariableSRW; + #endif // _WIN32 diff --git a/panda/src/pipeline/conditionVarWin32Impl.h b/panda/src/pipeline/conditionVarWin32Impl.h index 5c8d4a91d3..12cbe053f7 100644 --- a/panda/src/pipeline/conditionVarWin32Impl.h +++ b/panda/src/pipeline/conditionVarWin32Impl.h @@ -40,6 +40,9 @@ public: private: MutexWin32Impl &_mutex; CONDITION_VARIABLE _cvar = CONDITION_VARIABLE_INIT; + + static BOOL (*_wait_func)(PCONDITION_VARIABLE, PSRWLOCK, DWORD, ULONG); + friend class PStatClientImpl; }; #include "conditionVarWin32Impl.I" diff --git a/panda/src/pipeline/thread.I b/panda/src/pipeline/thread.I index dc7001fd06..7fce66e6d5 100644 --- a/panda/src/pipeline/thread.I +++ b/panda/src/pipeline/thread.I @@ -191,7 +191,11 @@ is_simple_threads() { INLINE void Thread:: sleep(double seconds) { TAU_PROFILE("void Thread::sleep(double)", " ", TAU_USER); +#ifdef DO_PSTATS + _sleep_func(seconds); +#else ThreadImpl::sleep(seconds); +#endif } /** @@ -200,7 +204,11 @@ sleep(double seconds) { INLINE void Thread:: force_yield() { TAU_PROFILE("void Thread::yield()", " ", TAU_USER); +#ifdef DO_PSTATS + _yield_func(); +#else ThreadImpl::yield(); +#endif } /** @@ -214,6 +222,18 @@ consider_yield() { ThreadImpl::consider_yield(); } + +/** + * Returns thread statistics. The first number is the total number of context + * switches reported by the OS, and the second number is the number of + * involuntary context switches (ie. the thread was scheduled out by the OS), + * if known. + */ +INLINE bool Thread:: +get_context_switches(size_t &total, size_t &involuntary) { + return ThreadImpl::get_context_switches(total, involuntary); +} + /** * Returns true if the thread has been started, false if it has not, or if * join() has already been called. diff --git a/panda/src/pipeline/thread.cxx b/panda/src/pipeline/thread.cxx index 81152a8095..5f2cafda3a 100644 --- a/panda/src/pipeline/thread.cxx +++ b/panda/src/pipeline/thread.cxx @@ -22,6 +22,9 @@ Thread *Thread::_main_thread; Thread *Thread::_external_thread; TypeHandle Thread::_type_handle; +void (*Thread::_sleep_func)(double) = &ThreadImpl::sleep; +void (*Thread::_yield_func)() = &ThreadImpl::yield; + /** * Creates a new Thread object, but does not immediately start executing it. * This gives the caller a chance to store it in a PT(Thread) object, if diff --git a/panda/src/pipeline/thread.h b/panda/src/pipeline/thread.h index 64f7622d99..55ea0cbb09 100644 --- a/panda/src/pipeline/thread.h +++ b/panda/src/pipeline/thread.h @@ -81,6 +81,8 @@ PUBLISHED: BLOCKING INLINE static void force_yield(); BLOCKING INLINE static void consider_yield(); + INLINE static bool get_context_switches(size_t &total, size_t &involuntary); + virtual void output(std::ostream &out) const; void output_blocker(std::ostream &out) const; static void write_status(std::ostream &out); @@ -162,6 +164,10 @@ private: static Thread *_main_thread; static Thread *_external_thread; + static void (*_sleep_func)(double); + static void (*_yield_func)(); + friend class PStatClientImpl; + public: static TypeHandle get_class_type() { return _type_handle; diff --git a/panda/src/pipeline/threadDummyImpl.I b/panda/src/pipeline/threadDummyImpl.I index 7bab1ae408..64a02bd5a0 100644 --- a/panda/src/pipeline/threadDummyImpl.I +++ b/panda/src/pipeline/threadDummyImpl.I @@ -124,3 +124,11 @@ yield() { INLINE void ThreadDummyImpl:: consider_yield() { } + +/** + * + */ +INLINE bool ThreadDummyImpl:: +get_context_switches(size_t &, size_t &) { + return false; +} diff --git a/panda/src/pipeline/threadDummyImpl.h b/panda/src/pipeline/threadDummyImpl.h index c7cea7a3ca..ce3dea76bb 100644 --- a/panda/src/pipeline/threadDummyImpl.h +++ b/panda/src/pipeline/threadDummyImpl.h @@ -57,6 +57,8 @@ public: INLINE static void sleep(double seconds); INLINE static void yield(); INLINE static void consider_yield(); + + INLINE static bool get_context_switches(size_t &, size_t &); }; #include "threadDummyImpl.I" diff --git a/panda/src/pipeline/threadPosixImpl.cxx b/panda/src/pipeline/threadPosixImpl.cxx index d46660194f..9108825422 100644 --- a/panda/src/pipeline/threadPosixImpl.cxx +++ b/panda/src/pipeline/threadPosixImpl.cxx @@ -21,6 +21,11 @@ #include "config_pipeline.h" #include +// Used for getrusage(). +#include +#include +#include + #ifdef ANDROID #include "config_express.h" #include @@ -248,6 +253,26 @@ bind_java_thread() { } #endif // ANDROID +/** + * Returns the number of context switches that occurred on the current thread. + * The first number is the total number of context switches reported by the OS, + * and the second number is the number of involuntary context switches (ie. the + * thread was scheduled out by the OS), if known, otherwise zero. + * Returns true if context switch information was available, false otherwise. + */ +bool ThreadPosixImpl:: +get_context_switches(size_t &total, size_t &involuntary) { +#ifdef RUSAGE_THREAD + struct rusage usage; + if (getrusage(RUSAGE_THREAD, &usage) == 0) { + total = (size_t)usage.ru_nvcsw; + involuntary = (size_t)usage.ru_nivcsw; + return true; + } +#endif + return false; +} + /** * The entry point of each thread. */ diff --git a/panda/src/pipeline/threadPosixImpl.h b/panda/src/pipeline/threadPosixImpl.h index aa9a4e02ad..65d7f951ca 100644 --- a/panda/src/pipeline/threadPosixImpl.h +++ b/panda/src/pipeline/threadPosixImpl.h @@ -63,6 +63,8 @@ public: static void bind_java_thread(); #endif + static bool get_context_switches(size_t &total, size_t &involuntary); + private: static void *root_func(void *data); static Thread *init_current_thread(); diff --git a/panda/src/pipeline/threadSimpleImpl.cxx b/panda/src/pipeline/threadSimpleImpl.cxx index 14d6f7561f..aeefd34d7e 100644 --- a/panda/src/pipeline/threadSimpleImpl.cxx +++ b/panda/src/pipeline/threadSimpleImpl.cxx @@ -55,6 +55,9 @@ ThreadSimpleImpl(Thread *parent_obj) : #ifdef WIN32 _win32_system_thread_id = 0; #endif + + _context_switches = 0; + _involuntary_context_switches = 0; } /** @@ -229,6 +232,32 @@ yield_this(bool volunteer) { } _manager->enqueue_ready(this, true); _manager->next_context(); + if (!volunteer) { + // Technically all context switches are voluntary here, but to make this + // distinction meaningful, we count it as involuntary on consider_yield(). + ++_involuntary_context_switches; + } +} + +/** + * Returns the number of context switches that occurred on the current thread. + * The first number is the total number of context switches reported by the OS, + * and the second number is the number of involuntary context switches (ie. the + * thread was scheduled out by the OS), if known, otherwise zero. + * Returns true if context switch information was available, false otherwise. + */ +bool ThreadSimpleImpl:: +get_context_switches(size_t &total, size_t &involuntary) { + ThreadSimpleManager *manager = ThreadSimpleManager::get_global_ptr(); + if (manager->is_same_system_thread()) { + ThreadSimpleImpl *thread = manager->get_current_thread(); + if (thread != nullptr) { + total = thread->_context_switches; + involuntary = thread->_involuntary_context_switches; + return true; + } + } + return false; } /** diff --git a/panda/src/pipeline/threadSimpleImpl.h b/panda/src/pipeline/threadSimpleImpl.h index 5bd6af3fdc..5e747ea917 100644 --- a/panda/src/pipeline/threadSimpleImpl.h +++ b/panda/src/pipeline/threadSimpleImpl.h @@ -77,6 +77,8 @@ public: INLINE static void write_status(std::ostream &out); + static bool get_context_switches(size_t &total, size_t &involuntary); + private: static void st_begin_thread(void *data); void begin_thread(); @@ -142,6 +144,9 @@ private: DWORD _win32_system_thread_id; #endif + size_t _context_switches; + size_t _involuntary_context_switches; + friend class ThreadSimpleManager; }; diff --git a/panda/src/pipeline/threadSimpleManager.cxx b/panda/src/pipeline/threadSimpleManager.cxx index 861e19cd77..d28a4372f6 100644 --- a/panda/src/pipeline/threadSimpleManager.cxx +++ b/panda/src/pipeline/threadSimpleManager.cxx @@ -233,6 +233,8 @@ next_context() { unref_delete(finished_thread->_parent_obj); } + ++_current_thread->_context_switches; + // Mark the current thread's resume point. #ifdef HAVE_PYTHON diff --git a/panda/src/pipeline/threadWin32Impl.I b/panda/src/pipeline/threadWin32Impl.I index 1ed6ed50ae..14f912f917 100644 --- a/panda/src/pipeline/threadWin32Impl.I +++ b/panda/src/pipeline/threadWin32Impl.I @@ -17,7 +17,8 @@ INLINE ThreadWin32Impl:: ThreadWin32Impl(Thread *parent_obj) : _cv(_mutex), - _parent_obj(parent_obj) + _parent_obj(parent_obj), + _profiling(0) { _thread = 0; _joinable = false; diff --git a/panda/src/pipeline/threadWin32Impl.cxx b/panda/src/pipeline/threadWin32Impl.cxx index c151589a47..552c8b0453 100644 --- a/panda/src/pipeline/threadWin32Impl.cxx +++ b/panda/src/pipeline/threadWin32Impl.cxx @@ -20,9 +20,34 @@ #include "pointerTo.h" #include "config_pipeline.h" +#include + static thread_local Thread *_current_thread = nullptr; static patomic_flag _main_thread_known = ATOMIC_FLAG_INIT; +#if _WIN32_WINNT < 0x0601 +// Requires Windows 7. +static DWORD (*EnableThreadProfiling)(HANDLE, DWORD, DWORD64, HANDLE *) = nullptr; +static DWORD (*DisableThreadProfiling)(HANDLE) = nullptr; +static DWORD (*ReadThreadProfilingData)(HANDLE, DWORD, PPERFORMANCE_DATA data) = nullptr; + +static bool init_thread_profiling() { + static bool inited = false; + if (!inited) { + HMODULE kernel32 = GetModuleHandleA("kernel32.dll"); + EnableThreadProfiling = (decltype(EnableThreadProfiling))GetProcAddress(kernel32, "EnableThreadProfiling"); + DisableThreadProfiling = (decltype(DisableThreadProfiling))GetProcAddress(kernel32, "DisableThreadProfiling"); + ReadThreadProfilingData = (decltype(ReadThreadProfilingData))GetProcAddress(kernel32, "ReadThreadProfilingData"); + inited = true; + } + return (EnableThreadProfiling && DisableThreadProfiling && ReadThreadProfilingData); +} +#else +static bool init_thread_profiling() { + return true; +} +#endif + /** * Called by get_current_thread() if the current thread pointer is null; checks * whether it might be the main thread. @@ -170,6 +195,35 @@ bind_thread(Thread *thread) { _current_thread = thread; } +/** + * Returns the number of context switches that occurred on the current thread. + * The first number is the total number of context switches reported by the OS, + * and the second number is the number of involuntary context switches (ie. the + * thread was scheduled out by the OS), if known, otherwise zero. + * Returns true if context switch information was available, false otherwise. + */ +bool ThreadWin32Impl:: +get_context_switches(size_t &total, size_t &involuntary) { + Thread *thread = get_current_thread(); + ThreadWin32Impl *self = &thread->_impl; + + if (!self->_profiling && init_thread_profiling()) { + DWORD result = EnableThreadProfiling(GetCurrentThread(), THREAD_PROFILING_FLAG_DISPATCH, 0, &self->_profiling); + if (result != ERROR_SUCCESS) { + self->_profiling = 0; + return false; + } + } + + PERFORMANCE_DATA data = {sizeof(PERFORMANCE_DATA), PERFORMANCE_DATA_VERSION}; + if (ReadThreadProfilingData(self->_profiling, READ_THREAD_PROFILING_FLAG_DISPATCHING, &data) == ERROR_SUCCESS) { + total = data.ContextSwitchCount; + involuntary = 0; + return true; + } + return false; +} + /** * The entry point of each thread. */ @@ -212,6 +266,11 @@ root_func(LPVOID data) { self->_mutex.unlock(); } + if (self->_profiling != 0) { + DisableThreadProfiling(self->_profiling); + self->_profiling = 0; + } + // Now drop the parent object reference that we grabbed in start(). This // might delete the parent object, and in turn, delete the ThreadWin32Impl // object. diff --git a/panda/src/pipeline/threadWin32Impl.h b/panda/src/pipeline/threadWin32Impl.h index 64f64f31b6..3c9cfd275e 100644 --- a/panda/src/pipeline/threadWin32Impl.h +++ b/panda/src/pipeline/threadWin32Impl.h @@ -52,6 +52,8 @@ public: INLINE static void yield(); INLINE static void consider_yield(); + static bool get_context_switches(size_t &total, size_t &involuntary); + private: static DWORD WINAPI root_func(LPVOID data); @@ -69,6 +71,7 @@ private: DWORD _thread_id; bool _joinable; Status _status; + HANDLE _profiling; }; #include "threadWin32Impl.I" diff --git a/panda/src/pstatclient/config_pstatclient.cxx b/panda/src/pstatclient/config_pstatclient.cxx index 1335254590..5b97075bcc 100644 --- a/panda/src/pstatclient/config_pstatclient.cxx +++ b/panda/src/pstatclient/config_pstatclient.cxx @@ -82,6 +82,11 @@ ConfigVariableBool pstats_gpu_timing "is not usually an accurate reflectino of how long the actual " "operation takes on the video card.")); +ConfigVariableBool pstats_thread_profiling +("pstats-thread-profiling", false, + PRC_DESC("Set this true to query the system for thread statistics, such as " + "the number of context switches and time spent waiting.")); + // The rest are different in that they directly control the server, not the // client. ConfigVariableBool pstats_scroll_mode diff --git a/panda/src/pstatclient/config_pstatclient.h b/panda/src/pstatclient/config_pstatclient.h index 6de3c5c120..856031e1f2 100644 --- a/panda/src/pstatclient/config_pstatclient.h +++ b/panda/src/pstatclient/config_pstatclient.h @@ -38,6 +38,7 @@ extern EXPCL_PANDA_PSTATCLIENT ConfigVariableString pstats_host; extern EXPCL_PANDA_PSTATCLIENT ConfigVariableInt pstats_port; extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_target_frame_rate; extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_gpu_timing; +extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_thread_profiling; extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_scroll_mode; extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_history; diff --git a/panda/src/pstatclient/pStatClient.cxx b/panda/src/pstatclient/pStatClient.cxx index 593ebf8f20..e08dff1605 100644 --- a/panda/src/pstatclient/pStatClient.cxx +++ b/panda/src/pstatclient/pStatClient.cxx @@ -905,6 +905,38 @@ stop(int collector_index, int thread_index, double as_of) { } } +/** + * Adds a pair of start/stop times in the given collector. Used in low-level + * code that knows there will not be any other collectors started and stopped + * in the meantime, and can be more efficient than a pair of start/stop times. + */ +void PStatClient:: +start_stop(int collector_index, int thread_index, double start, double stop) { + if (!client_is_connected()) { + return; + } + +#ifdef _DEBUG + nassertv(collector_index >= 0 && collector_index < get_num_collectors()); + nassertv(thread_index >= 0 && thread_index < get_num_threads()); +#endif + + Collector *collector = get_collector_ptr(collector_index); + InternalThread *thread = get_thread_ptr(thread_index); + + if (collector->is_active() && thread->_is_active) { + LightMutexHolder holder(thread->_thread_lock); + if (collector->_per_thread[thread_index]._nested_count == 0) { + // This collector wasn't already started in this thread; record a new + // data point. + if (thread->_thread_active) { + thread->_frame_data.add_start(collector_index, start); + thread->_frame_data.add_stop(collector_index, stop); + } + } + } +} + /** * Removes the level value from the indicated collector. The collector will * no longer be reported as having any particular level value. diff --git a/panda/src/pstatclient/pStatClient.h b/panda/src/pstatclient/pStatClient.h index 9b17f208f3..d164cb29b1 100644 --- a/panda/src/pstatclient/pStatClient.h +++ b/panda/src/pstatclient/pStatClient.h @@ -129,6 +129,7 @@ private: void start(int collector_index, int thread_index, double as_of); void stop(int collector_index, int thread_index); void stop(int collector_index, int thread_index, double as_of); + void start_stop(int collector_index, int thread_index, double start, double stop); void clear_level(int collector_index, int thread_index); void set_level(int collector_index, int thread_index, double level); @@ -216,6 +217,8 @@ private: bool _is_active; int _frame_number; double _next_packet; + size_t _context_switches = 0; + size_t _involuntary_context_switches = 0; bool _thread_active; @@ -321,6 +324,7 @@ private: void start(int collector_index, int thread_index, double as_of); void stop(int collector_index, int thread_index); void stop(int collector_index, int thread_index, double as_of); + void start_stop(int collector_index, int thread_index, double start, double stop); void clear_level(int collector_index, int thread_index); void set_level(int collector_index, int thread_index, double level); diff --git a/panda/src/pstatclient/pStatClientImpl.cxx b/panda/src/pstatclient/pStatClientImpl.cxx index 619c8fd33e..69415569fb 100644 --- a/panda/src/pstatclient/pStatClientImpl.cxx +++ b/panda/src/pstatclient/pStatClientImpl.cxx @@ -24,6 +24,8 @@ #include "config_pstatclient.h" #include "pStatProperties.h" #include "cmath.h" +#include "conditionVarWin32Impl.h" +#include "conditionVarPosixImpl.h" #include @@ -32,6 +34,15 @@ #include #endif +static PStatCollector _cswitch_pcollector("Context Switches"); +static PStatCollector _cswitch_sleep_pcollector("Context Switches:Sleep"); +static PStatCollector _cswitch_yield_pcollector("Context Switches:Yields"); +static PStatCollector _cswitch_cvar_pcollector("Context Switches:Condition Variable"); +static PStatCollector _cswitch_involuntary_pcollector("Context Switches:Involuntary"); +static PStatCollector _wait_sleep_pcollector("Wait:Sleep"); +static PStatCollector _wait_yield_pcollector("Wait:Yield"); +static PStatCollector _wait_cvar_pcollector("Wait:Condition Variable"); + /** * */ @@ -124,6 +135,106 @@ client_connect(std::string hostname, int port) { MutexDebug::increment_pstats(); #endif // DEBUG_THREADS + if (pstats_thread_profiling) { + // Replace the condition variable wait function with one that does PStats + // statistics. + _thread_profiling = true; + + Thread::_sleep_func = [] (double seconds) { + Thread *current_thread = Thread::get_current_thread(); + int thread_index = current_thread->get_pstats_index(); + if (thread_index >= 0) { + PStatClient *client = PStatClient::get_global_pstats(); + double start = client->get_real_time(); + ThreadImpl::sleep(seconds); + double stop = client->get_real_time(); + client->start_stop(_wait_sleep_pcollector.get_index(), thread_index, start, stop); + client->add_level(_cswitch_sleep_pcollector.get_index(), thread_index, 1); + } + else { + ThreadImpl::sleep(seconds); + } + }; + + Thread::_yield_func = [] () { + Thread *current_thread = Thread::get_current_thread(); + int thread_index = current_thread->get_pstats_index(); + if (thread_index >= 0) { + PStatClient *client = PStatClient::get_global_pstats(); + double start = client->get_real_time(); + ThreadImpl::yield(); + double stop = client->get_real_time(); + client->start_stop(_wait_yield_pcollector.get_index(), thread_index, start, stop); + client->add_level(_cswitch_yield_pcollector.get_index(), thread_index, 1); + } + else { + ThreadImpl::yield(); + } + }; + +#ifdef _WIN32 + ConditionVarWin32Impl::_wait_func = + [] (PCONDITION_VARIABLE cvar, PSRWLOCK lock, DWORD time, ULONG flags) { + Thread *current_thread = Thread::get_current_thread(); + int thread_index = current_thread->get_pstats_index(); + BOOL result; + if (thread_index >= 0) { + PStatClient *client = PStatClient::get_global_pstats(); + double start = client->get_real_time(); + result = SleepConditionVariableSRW(cvar, lock, time, flags); + double stop = client->get_real_time(); + client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop); + client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1); + } + else { + result = SleepConditionVariableSRW(cvar, lock, time, flags); + } + return result; + }; +#endif + +#ifdef HAVE_POSIX_THREADS + ConditionVarPosixImpl::_wait_func = + [] (pthread_cond_t *cvar, pthread_mutex_t *lock) { + Thread *current_thread = Thread::get_current_thread(); + int thread_index = current_thread->get_pstats_index(); + int result; + if (thread_index >= 0) { + PStatClient *client = PStatClient::get_global_pstats(); + double start = client->get_real_time(); + result = pthread_cond_wait(cvar, lock); + double stop = client->get_real_time(); + client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop); + client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1); + } + else { + result = pthread_cond_wait(cvar, lock); + } + return result; + }; + + ConditionVarPosixImpl::_timedwait_func = + [] (pthread_cond_t *cvar, pthread_mutex_t *lock, + const struct timespec *ts) { + Thread *current_thread = Thread::get_current_thread(); + int thread_index = current_thread->get_pstats_index(); + int result; + if (thread_index >= 0) { + PStatClient *client = PStatClient::get_global_pstats(); + double start = client->get_real_time(); + result = pthread_cond_timedwait(cvar, lock, ts); + double stop = client->get_real_time(); + client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop); + client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1); + } + else { + result = pthread_cond_timedwait(cvar, lock, ts); + } + return result; + }; +#endif + } + // Wait for the server hello. while (!_got_udp_port) { transmit_control_data(); @@ -137,6 +248,23 @@ client_connect(std::string hostname, int port) { */ void PStatClientImpl:: client_disconnect() { + if (_thread_profiling) { + // Switch the functions back to what they were. + Thread::_sleep_func = &ThreadImpl::sleep; + Thread::_yield_func = &ThreadImpl::yield; + +#ifdef _WIN32 + ConditionVarWin32Impl::_wait_func = &SleepConditionVariableSRW; +#endif + +#ifdef HAVE_POSIX_THREADS + ConditionVarPosixImpl::_wait_func = &pthread_cond_wait; + ConditionVarPosixImpl::_timedwait_func = &pthread_cond_timedwait; +#endif + + _thread_profiling = false; + } + if (_is_connected) { #ifdef DEBUG_THREADS MutexDebug::decrement_pstats(); @@ -207,6 +335,37 @@ new_frame(int thread_index, int frame_number) { if (frame_number == -1) { frame_number = pthread->_frame_number; } + + // Record the number of context switches on this thread. + if (_thread_profiling) { + size_t total, involuntary; + PT(Thread) thread = pthread->_thread.lock(); + if (thread != nullptr && thread->get_context_switches(total, involuntary)) { + size_t total_this_frame = total - pthread->_context_switches; + pthread->_context_switches = total; + frame_data.add_level(_cswitch_pcollector.get_index(), total_this_frame); + + if (involuntary != 0) { + size_t involuntary_this_frame = involuntary - pthread->_involuntary_context_switches; + pthread->_involuntary_context_switches = involuntary; + frame_data.add_level(_cswitch_involuntary_pcollector.get_index(), involuntary_this_frame); + } + } + _client->clear_level(_cswitch_sleep_pcollector.get_index(), thread_index); + _client->clear_level(_cswitch_yield_pcollector.get_index(), thread_index); + _client->clear_level(_cswitch_cvar_pcollector.get_index(), thread_index); + } + } + else if (_thread_profiling) { + // Record the initial number of context switches. + PT(Thread) thread = pthread->_thread.lock(); + if (thread != nullptr) { + thread->get_context_switches(pthread->_context_switches, + pthread->_involuntary_context_switches); + } + _client->clear_level(_cswitch_sleep_pcollector.get_index(), thread_index); + _client->clear_level(_cswitch_yield_pcollector.get_index(), thread_index); + _client->clear_level(_cswitch_cvar_pcollector.get_index(), thread_index); } pthread->_frame_data.clear(); diff --git a/panda/src/pstatclient/pStatClientImpl.h b/panda/src/pstatclient/pStatClientImpl.h index 2b767838bc..f758d4b262 100644 --- a/panda/src/pstatclient/pStatClientImpl.h +++ b/panda/src/pstatclient/pStatClientImpl.h @@ -111,6 +111,8 @@ private: double _udp_count_factor; unsigned int _tcp_count; unsigned int _udp_count; + + bool _thread_profiling = false; }; #include "pStatClientImpl.I"