pipeline: Improve performance of Thread::get_current_thread() substantially

Speedup is realised by using thread-local variables.  Note that on Windows we can't inline get_current_thread, but it's still faster this way than calling TlsGetValue.

In theory the cache line alignment should help avoid false sharing but I have not profiled that extensively.
This commit is contained in:
rdb 2022-02-04 22:50:59 +01:00
parent 39d69f13de
commit 46a1ad3544
7 changed files with 81 additions and 114 deletions

View File

@ -42,7 +42,8 @@ class AsyncTask;
* object will automatically be destructed if no other pointers are * object will automatically be destructed if no other pointers are
* referencing it. * referencing it.
*/ */
class EXPCL_PANDA_PIPELINE Thread : public TypedReferenceCount, public Namable { // Due to a GCC bug, we can't use alignas() together with an attribute.
class ALIGN_64BYTE EXPCL_PANDA_PIPELINE Thread : public TypedReferenceCount, public Namable {
protected: protected:
Thread(const std::string &name, const std::string &sync_name); Thread(const std::string &name, const std::string &sync_name);
Thread(const Thread &copy) = delete; Thread(const Thread &copy) = delete;

View File

@ -46,26 +46,8 @@ prepare_for_exit() {
INLINE Thread *ThreadPosixImpl:: INLINE Thread *ThreadPosixImpl::
get_current_thread() { get_current_thread() {
TAU_PROFILE("Thread *ThreadPosixImpl::get_current_thread()", " ", TAU_USER); TAU_PROFILE("Thread *ThreadPosixImpl::get_current_thread()", " ", TAU_USER);
if (!_got_pt_ptr_index) { Thread *thread = _current_thread;
init_pt_ptr_index(); return (thread != nullptr) ? thread : init_current_thread();
}
return (Thread *)pthread_getspecific(_pt_ptr_index);
}
/**
* Associates the indicated Thread object with the currently-executing thread.
* You should not call this directly; use Thread::bind_thread() instead.
*/
INLINE void ThreadPosixImpl::
bind_thread(Thread *thread) {
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
int result = pthread_setspecific(_pt_ptr_index, thread);
nassertv(result == 0);
#ifdef ANDROID
bind_java_thread();
#endif
} }
/** /**

View File

@ -28,8 +28,8 @@
static JavaVM *java_vm = nullptr; static JavaVM *java_vm = nullptr;
#endif #endif
pthread_key_t ThreadPosixImpl::_pt_ptr_index = 0; __thread Thread *ThreadPosixImpl::_current_thread = nullptr;
bool ThreadPosixImpl::_got_pt_ptr_index = false; static patomic_flag _main_thread_known = ATOMIC_FLAG_INIT;
/** /**
* *
@ -80,10 +80,6 @@ start(ThreadPriority priority, bool joinable) {
_status = S_start_called; _status = S_start_called;
_detached = false; _detached = false;
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
pthread_attr_t attr; pthread_attr_t attr;
pthread_attr_init(&attr); pthread_attr_init(&attr);
@ -186,6 +182,21 @@ get_unique_id() const {
return strm.str(); return strm.str();
} }
/**
* Associates the indicated Thread object with the currently-executing thread.
* You should not call this directly; use Thread::bind_thread() instead.
*/
void ThreadPosixImpl::
bind_thread(Thread *thread) {
if (_current_thread == nullptr && thread == Thread::get_main_thread()) {
_main_thread_known.test_and_set(std::memory_order_relaxed);
}
_current_thread = thread;
#ifdef ANDROID
bind_java_thread();
#endif
}
#ifdef ANDROID #ifdef ANDROID
/** /**
* Attaches the thread to the Java virtual machine. If this returns true, a * Attaches the thread to the Java virtual machine. If this returns true, a
@ -247,8 +258,7 @@ root_func(void *data) {
// TAU_PROFILE("void ThreadPosixImpl::root_func()", " ", TAU_USER); // TAU_PROFILE("void ThreadPosixImpl::root_func()", " ", TAU_USER);
ThreadPosixImpl *self = (ThreadPosixImpl *)data; ThreadPosixImpl *self = (ThreadPosixImpl *)data;
int result = pthread_setspecific(_pt_ptr_index, self->_parent_obj); _current_thread = self->_parent_obj;
nassertr(result == 0, nullptr);
{ {
self->_mutex.lock(); self->_mutex.lock();
@ -302,27 +312,18 @@ root_func(void *data) {
} }
/** /**
* Allocate a new index to store the Thread parent pointer as a piece of per- * Called by get_current_thread() if the current therad pointer is null; checks
* thread private data. * whether it might be the main thread.
*/ */
void ThreadPosixImpl:: Thread *ThreadPosixImpl::
init_pt_ptr_index() { init_current_thread() {
nassertv(!_got_pt_ptr_index); Thread *thread = _current_thread;
if (!_main_thread_known.test_and_set(std::memory_order_relaxed)) {
int result = pthread_key_create(&_pt_ptr_index, nullptr); thread = Thread::get_main_thread();
if (result != 0) { _current_thread = thread;
thread_cat->error()
<< "Unable to associate Thread pointers with threads.\n";
return;
} }
nassertr(thread != nullptr, nullptr);
_got_pt_ptr_index = true; return thread;
// Assume that we must be in the main thread, since this method must be
// called before the first thread is spawned.
Thread *main_thread_obj = Thread::get_main_thread();
result = pthread_setspecific(_pt_ptr_index, main_thread_obj);
nassertv(result == 0);
} }
#ifdef ANDROID #ifdef ANDROID

View File

@ -49,7 +49,7 @@ public:
INLINE static void prepare_for_exit(); INLINE static void prepare_for_exit();
INLINE static Thread *get_current_thread(); INLINE static Thread *get_current_thread();
INLINE static void bind_thread(Thread *thread); static void bind_thread(Thread *thread);
INLINE static bool is_threading_supported(); INLINE static bool is_threading_supported();
INLINE static bool is_true_threads(); INLINE static bool is_true_threads();
INLINE static bool is_simple_threads(); INLINE static bool is_simple_threads();
@ -65,7 +65,7 @@ public:
private: private:
static void *root_func(void *data); static void *root_func(void *data);
static void init_pt_ptr_index(); static Thread *init_current_thread();
// There appears to be a name collision with the word "Status". // There appears to be a name collision with the word "Status".
enum PStatus { enum PStatus {
@ -86,8 +86,7 @@ private:
JNIEnv *_jni_env; JNIEnv *_jni_env;
#endif #endif
static pthread_key_t _pt_ptr_index; static __thread Thread *_current_thread;
static bool _got_pt_ptr_index;
}; };
#include "threadPosixImpl.I" #include "threadPosixImpl.I"

View File

@ -38,30 +38,6 @@ INLINE void ThreadWin32Impl::
prepare_for_exit() { prepare_for_exit() {
} }
/**
*
*/
INLINE Thread *ThreadWin32Impl::
get_current_thread() {
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
return (Thread *)TlsGetValue(_pt_ptr_index);
}
/**
* Associates the indicated Thread object with the currently-executing thread.
* You should not call this directly; use Thread::bind_thread() instead.
*/
INLINE void ThreadWin32Impl::
bind_thread(Thread *thread) {
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
BOOL result = TlsSetValue(_pt_ptr_index, thread);
nassertv(result);
}
/** /**
* *
*/ */

View File

@ -20,8 +20,28 @@
#include "pointerTo.h" #include "pointerTo.h"
#include "config_pipeline.h" #include "config_pipeline.h"
DWORD ThreadWin32Impl::_pt_ptr_index = 0; static thread_local Thread *_current_thread = nullptr;
bool ThreadWin32Impl::_got_pt_ptr_index = false; static patomic_flag _main_thread_known = ATOMIC_FLAG_INIT;
/**
* Called by get_current_thread() if the current thread pointer is null; checks
* whether it might be the main thread.
* Note that adding noinline speeds up this call *significantly*, don't remove!
*/
static __declspec(noinline) Thread *
init_current_thread() {
Thread *thread = _current_thread;
if (!_main_thread_known.test_and_set(std::memory_order_relaxed)) {
// Assume that we must be in the main thread, since this method must be
// called before the first thread is spawned.
thread = Thread::get_main_thread();
_current_thread = thread;
}
// If this assertion triggers, you are making Panda calls from a thread
// that has not first been registered using Thread::bind_thread().
nassertr(thread != nullptr, nullptr);
return thread;
}
/** /**
* *
@ -62,10 +82,6 @@ start(ThreadPriority priority, bool joinable) {
_joinable = joinable; _joinable = joinable;
_status = S_start_called; _status = S_start_called;
if (!_got_pt_ptr_index) {
init_pt_ptr_index();
}
// Increment the parent object's reference count first. The thread will // Increment the parent object's reference count first. The thread will
// eventually decrement it when it terminates. // eventually decrement it when it terminates.
_parent_obj->ref(); _parent_obj->ref();
@ -133,6 +149,27 @@ get_unique_id() const {
return strm.str(); return strm.str();
} }
/**
*
*/
Thread *ThreadWin32Impl::
get_current_thread() {
Thread *thread = _current_thread;
return (thread != nullptr) ? thread : init_current_thread();
}
/**
* Associates the indicated Thread object with the currently-executing thread.
* You should not call this directly; use Thread::bind_thread() instead.
*/
void ThreadWin32Impl::
bind_thread(Thread *thread) {
if (_current_thread == nullptr && thread == Thread::get_main_thread()) {
_main_thread_known.test_and_set(std::memory_order_relaxed);
}
_current_thread = thread;
}
/** /**
* The entry point of each thread. * The entry point of each thread.
*/ */
@ -143,8 +180,7 @@ root_func(LPVOID data) {
// TAU_PROFILE("void ThreadWin32Impl::root_func()", " ", TAU_USER); // TAU_PROFILE("void ThreadWin32Impl::root_func()", " ", TAU_USER);
ThreadWin32Impl *self = (ThreadWin32Impl *)data; ThreadWin32Impl *self = (ThreadWin32Impl *)data;
BOOL result = TlsSetValue(_pt_ptr_index, self->_parent_obj); _current_thread = self->_parent_obj;
nassertr(result, 1);
{ {
self->_mutex.lock(); self->_mutex.lock();
@ -185,28 +221,4 @@ root_func(LPVOID data) {
return 0; return 0;
} }
/**
* Allocate a new index to store the Thread parent pointer as a piece of per-
* thread private data.
*/
void ThreadWin32Impl::
init_pt_ptr_index() {
nassertv(!_got_pt_ptr_index);
_pt_ptr_index = TlsAlloc();
if (_pt_ptr_index == TLS_OUT_OF_INDEXES) {
thread_cat->error()
<< "Unable to associate Thread pointers with threads.\n";
return;
}
_got_pt_ptr_index = true;
// Assume that we must be in the main thread, since this method must be
// called before the first thread is spawned.
Thread *main_thread_obj = Thread::get_main_thread();
BOOL result = TlsSetValue(_pt_ptr_index, main_thread_obj);
nassertv(result);
}
#endif // THREAD_WIN32_IMPL #endif // THREAD_WIN32_IMPL

View File

@ -43,8 +43,8 @@ public:
INLINE static void prepare_for_exit(); INLINE static void prepare_for_exit();
INLINE static Thread *get_current_thread(); static Thread *get_current_thread();
INLINE static void bind_thread(Thread *thread); static void bind_thread(Thread *thread);
INLINE static bool is_threading_supported(); INLINE static bool is_threading_supported();
INLINE static bool is_true_threads(); INLINE static bool is_true_threads();
INLINE static bool is_simple_threads(); INLINE static bool is_simple_threads();
@ -54,7 +54,6 @@ public:
private: private:
static DWORD WINAPI root_func(LPVOID data); static DWORD WINAPI root_func(LPVOID data);
static void init_pt_ptr_index();
enum Status { enum Status {
S_new, S_new,
@ -70,9 +69,6 @@ private:
DWORD _thread_id; DWORD _thread_id;
bool _joinable; bool _joinable;
Status _status; Status _status;
static DWORD _pt_ptr_index;
static bool _got_pt_ptr_index;
}; };
#include "threadWin32Impl.I" #include "threadWin32Impl.I"