pipeline: Improve performance of Thread::get_current_thread() substantially

Speedup is realised by using thread-local variables.  Note that on Windows we can't inline get_current_thread, but it's still faster this way than calling TlsGetValue.

In theory the cache line alignment should help avoid false sharing but I have not profiled that extensively.
This commit is contained in:
rdb 2022-02-04 22:50:59 +01:00
parent 39d69f13de
commit 46a1ad3544
7 changed files with 81 additions and 114 deletions

View File

@ -42,7 +42,8 @@ class AsyncTask;
* object will automatically be destructed if no other pointers are
* referencing it.
*/
class EXPCL_PANDA_PIPELINE Thread : public TypedReferenceCount, public Namable {
// Due to a GCC bug, we can't use alignas() together with an attribute.
class ALIGN_64BYTE EXPCL_PANDA_PIPELINE Thread : public TypedReferenceCount, public Namable {
protected:
Thread(const std::string &name, const std::string &sync_name);
Thread(const Thread &copy) = delete;

View File

@ -46,26 +46,8 @@ prepare_for_exit() {
INLINE Thread *ThreadPosixImpl::
get_current_thread() {
TAU_PROFILE("Thread *ThreadPosixImpl::get_current_thread()", " ", TAU_USER);
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
return (Thread *)pthread_getspecific(_pt_ptr_index);
}
/**
* Associates the indicated Thread object with the currently-executing thread.
* You should not call this directly; use Thread::bind_thread() instead.
*/
INLINE void ThreadPosixImpl::
bind_thread(Thread *thread) {
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
int result = pthread_setspecific(_pt_ptr_index, thread);
nassertv(result == 0);
#ifdef ANDROID
bind_java_thread();
#endif
Thread *thread = _current_thread;
return (thread != nullptr) ? thread : init_current_thread();
}
/**

View File

@ -28,8 +28,8 @@
static JavaVM *java_vm = nullptr;
#endif
pthread_key_t ThreadPosixImpl::_pt_ptr_index = 0;
bool ThreadPosixImpl::_got_pt_ptr_index = false;
__thread Thread *ThreadPosixImpl::_current_thread = nullptr;
static patomic_flag _main_thread_known = ATOMIC_FLAG_INIT;
/**
*
@ -80,10 +80,6 @@ start(ThreadPriority priority, bool joinable) {
_status = S_start_called;
_detached = false;
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
pthread_attr_t attr;
pthread_attr_init(&attr);
@ -186,6 +182,21 @@ get_unique_id() const {
return strm.str();
}
/**
* Associates the indicated Thread object with the currently-executing thread.
* You should not call this directly; use Thread::bind_thread() instead.
*/
void ThreadPosixImpl::
bind_thread(Thread *thread) {
if (_current_thread == nullptr && thread == Thread::get_main_thread()) {
_main_thread_known.test_and_set(std::memory_order_relaxed);
}
_current_thread = thread;
#ifdef ANDROID
bind_java_thread();
#endif
}
#ifdef ANDROID
/**
* Attaches the thread to the Java virtual machine. If this returns true, a
@ -247,8 +258,7 @@ root_func(void *data) {
// TAU_PROFILE("void ThreadPosixImpl::root_func()", " ", TAU_USER);
ThreadPosixImpl *self = (ThreadPosixImpl *)data;
int result = pthread_setspecific(_pt_ptr_index, self->_parent_obj);
nassertr(result == 0, nullptr);
_current_thread = self->_parent_obj;
{
self->_mutex.lock();
@ -302,27 +312,18 @@ root_func(void *data) {
}
/**
* Allocate a new index to store the Thread parent pointer as a piece of per-
* thread private data.
* Called by get_current_thread() if the current therad pointer is null; checks
* whether it might be the main thread.
*/
void ThreadPosixImpl::
init_pt_ptr_index() {
nassertv(!_got_pt_ptr_index);
int result = pthread_key_create(&_pt_ptr_index, nullptr);
if (result != 0) {
thread_cat->error()
<< "Unable to associate Thread pointers with threads.\n";
return;
Thread *ThreadPosixImpl::
init_current_thread() {
Thread *thread = _current_thread;
if (!_main_thread_known.test_and_set(std::memory_order_relaxed)) {
thread = Thread::get_main_thread();
_current_thread = thread;
}
_got_pt_ptr_index = true;
// Assume that we must be in the main thread, since this method must be
// called before the first thread is spawned.
Thread *main_thread_obj = Thread::get_main_thread();
result = pthread_setspecific(_pt_ptr_index, main_thread_obj);
nassertv(result == 0);
nassertr(thread != nullptr, nullptr);
return thread;
}
#ifdef ANDROID

View File

@ -49,7 +49,7 @@ public:
INLINE static void prepare_for_exit();
INLINE static Thread *get_current_thread();
INLINE static void bind_thread(Thread *thread);
static void bind_thread(Thread *thread);
INLINE static bool is_threading_supported();
INLINE static bool is_true_threads();
INLINE static bool is_simple_threads();
@ -65,7 +65,7 @@ public:
private:
static void *root_func(void *data);
static void init_pt_ptr_index();
static Thread *init_current_thread();
// There appears to be a name collision with the word "Status".
enum PStatus {
@ -86,8 +86,7 @@ private:
JNIEnv *_jni_env;
#endif
static pthread_key_t _pt_ptr_index;
static bool _got_pt_ptr_index;
static __thread Thread *_current_thread;
};
#include "threadPosixImpl.I"

View File

@ -38,30 +38,6 @@ INLINE void ThreadWin32Impl::
prepare_for_exit() {
}
/**
*
*/
INLINE Thread *ThreadWin32Impl::
get_current_thread() {
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
return (Thread *)TlsGetValue(_pt_ptr_index);
}
/**
* Associates the indicated Thread object with the currently-executing thread.
* You should not call this directly; use Thread::bind_thread() instead.
*/
INLINE void ThreadWin32Impl::
bind_thread(Thread *thread) {
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
BOOL result = TlsSetValue(_pt_ptr_index, thread);
nassertv(result);
}
/**
*
*/

View File

@ -20,8 +20,28 @@
#include "pointerTo.h"
#include "config_pipeline.h"
DWORD ThreadWin32Impl::_pt_ptr_index = 0;
bool ThreadWin32Impl::_got_pt_ptr_index = false;
static thread_local Thread *_current_thread = nullptr;
static patomic_flag _main_thread_known = ATOMIC_FLAG_INIT;
/**
* Called by get_current_thread() if the current thread pointer is null; checks
* whether it might be the main thread.
* Note that adding noinline speeds up this call *significantly*, don't remove!
*/
static __declspec(noinline) Thread *
init_current_thread() {
Thread *thread = _current_thread;
if (!_main_thread_known.test_and_set(std::memory_order_relaxed)) {
// Assume that we must be in the main thread, since this method must be
// called before the first thread is spawned.
thread = Thread::get_main_thread();
_current_thread = thread;
}
// If this assertion triggers, you are making Panda calls from a thread
// that has not first been registered using Thread::bind_thread().
nassertr(thread != nullptr, nullptr);
return thread;
}
/**
*
@ -62,10 +82,6 @@ start(ThreadPriority priority, bool joinable) {
_joinable = joinable;
_status = S_start_called;
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
// Increment the parent object's reference count first. The thread will
// eventually decrement it when it terminates.
_parent_obj->ref();
@ -133,6 +149,27 @@ get_unique_id() const {
return strm.str();
}
/**
*
*/
Thread *ThreadWin32Impl::
get_current_thread() {
Thread *thread = _current_thread;
return (thread != nullptr) ? thread : init_current_thread();
}
/**
* Associates the indicated Thread object with the currently-executing thread.
* You should not call this directly; use Thread::bind_thread() instead.
*/
void ThreadWin32Impl::
bind_thread(Thread *thread) {
if (_current_thread == nullptr && thread == Thread::get_main_thread()) {
_main_thread_known.test_and_set(std::memory_order_relaxed);
}
_current_thread = thread;
}
/**
* The entry point of each thread.
*/
@ -143,8 +180,7 @@ root_func(LPVOID data) {
// TAU_PROFILE("void ThreadWin32Impl::root_func()", " ", TAU_USER);
ThreadWin32Impl *self = (ThreadWin32Impl *)data;
BOOL result = TlsSetValue(_pt_ptr_index, self->_parent_obj);
nassertr(result, 1);
_current_thread = self->_parent_obj;
{
self->_mutex.lock();
@ -185,28 +221,4 @@ root_func(LPVOID data) {
return 0;
}
/**
* Allocate a new index to store the Thread parent pointer as a piece of per-
* thread private data.
*/
void ThreadWin32Impl::
init_pt_ptr_index() {
nassertv(!_got_pt_ptr_index);
_pt_ptr_index = TlsAlloc();
if (_pt_ptr_index == TLS_OUT_OF_INDEXES) {
thread_cat->error()
<< "Unable to associate Thread pointers with threads.\n";
return;
}
_got_pt_ptr_index = true;
// Assume that we must be in the main thread, since this method must be
// called before the first thread is spawned.
Thread *main_thread_obj = Thread::get_main_thread();
BOOL result = TlsSetValue(_pt_ptr_index, main_thread_obj);
nassertv(result);
}
#endif // THREAD_WIN32_IMPL

View File

@ -43,8 +43,8 @@ public:
INLINE static void prepare_for_exit();
INLINE static Thread *get_current_thread();
INLINE static void bind_thread(Thread *thread);
static Thread *get_current_thread();
static void bind_thread(Thread *thread);
INLINE static bool is_threading_supported();
INLINE static bool is_true_threads();
INLINE static bool is_simple_threads();
@ -54,7 +54,6 @@ public:
private:
static DWORD WINAPI root_func(LPVOID data);
static void init_pt_ptr_index();
enum Status {
S_new,
@ -70,9 +69,6 @@ private:
DWORD _thread_id;
bool _joinable;
Status _status;
static DWORD _pt_ptr_index;
static bool _got_pt_ptr_index;
};
#include "threadWin32Impl.I"