From 809f9b04f65865348bf8bb680da6a8a52be6d01d Mon Sep 17 00:00:00 2001 From: rdb Date: Tue, 4 Sep 2018 11:53:27 +0200 Subject: [PATCH] Fix problems with spinlock mutex/cvar implementation This reimplements the spinlock on top of std::atomic_flag, which is guaranteed to be lockless. It also inserts the PAUSE (REP NOP) instruction which is strongly recommended to be placed in busy-wait loops by Intel. This also includes a recursive spinlock implementation. The spinlock implementation is disabled by default, but can be enabled by adding the --override MUTEX_SPINLOCK=1 flag to makepanda. --- dtool/src/dtoolbase/mutexSpinlockImpl.I | 11 +--- dtool/src/dtoolbase/mutexSpinlockImpl.cxx | 11 +++- dtool/src/dtoolbase/mutexSpinlockImpl.h | 8 ++- makepanda/makepanda.py | 1 + .../src/pipeline/conditionVarSpinlockImpl.cxx | 31 +++++++++- panda/src/pipeline/conditionVarSpinlockImpl.h | 1 + panda/src/pipeline/lightReMutexDirect.I | 15 ----- panda/src/pipeline/lightReMutexDirect.h | 4 +- panda/src/pipeline/mutexTrueImpl.h | 7 +++ panda/src/pipeline/p3pipeline_composite2.cxx | 1 + panda/src/pipeline/reMutexDirect.I | 12 ++++ panda/src/pipeline/reMutexDirect.cxx | 4 +- panda/src/pipeline/reMutexDirect.h | 4 +- panda/src/pipeline/reMutexSpinlockImpl.I | 23 ++++++++ panda/src/pipeline/reMutexSpinlockImpl.cxx | 57 +++++++++++++++++++ panda/src/pipeline/reMutexSpinlockImpl.h | 54 ++++++++++++++++++ 16 files changed, 208 insertions(+), 36 deletions(-) create mode 100644 panda/src/pipeline/reMutexSpinlockImpl.I create mode 100644 panda/src/pipeline/reMutexSpinlockImpl.cxx create mode 100644 panda/src/pipeline/reMutexSpinlockImpl.h diff --git a/dtool/src/dtoolbase/mutexSpinlockImpl.I b/dtool/src/dtoolbase/mutexSpinlockImpl.I index b3eb084181..53ce89445f 100644 --- a/dtool/src/dtoolbase/mutexSpinlockImpl.I +++ b/dtool/src/dtoolbase/mutexSpinlockImpl.I @@ -11,13 +11,6 @@ * @date 2006-04-11 */ -/** - * - */ -constexpr MutexSpinlockImpl:: -MutexSpinlockImpl() : _lock(0) { -} - /** * */ @@ -33,7 +26,7 @@ lock() { */ INLINE bool MutexSpinlockImpl:: try_lock() { - return (AtomicAdjust::compare_and_exchange(_lock, 0, 1) == 0); + return !_flag.test_and_set(std::memory_order_acquire); } /** @@ -41,5 +34,5 @@ try_lock() { */ INLINE void MutexSpinlockImpl:: unlock() { - AtomicAdjust::set(_lock, 0); + _flag.clear(std::memory_order_release); } diff --git a/dtool/src/dtoolbase/mutexSpinlockImpl.cxx b/dtool/src/dtoolbase/mutexSpinlockImpl.cxx index 32a84fa28f..73f2683ff2 100644 --- a/dtool/src/dtoolbase/mutexSpinlockImpl.cxx +++ b/dtool/src/dtoolbase/mutexSpinlockImpl.cxx @@ -17,12 +17,21 @@ #include "mutexSpinlockImpl.h" +#if defined(__i386__) || defined(__x86_64) || defined(_M_IX86) || defined(_M_X64) +#include +#define PAUSE() _mm_pause() +#else +#define PAUSE() +#endif + /** * */ void MutexSpinlockImpl:: do_lock() { - while (AtomicAdjust::compare_and_exchange(_lock, 0, 1) != 0) { + // Loop until we changed the flag from 0 to 1 (and it wasn't already 1). + while (_flag.test_and_set(std::memory_order_acquire)) { + PAUSE(); } } diff --git a/dtool/src/dtoolbase/mutexSpinlockImpl.h b/dtool/src/dtoolbase/mutexSpinlockImpl.h index c7dfb72cf2..cd858f5551 100644 --- a/dtool/src/dtoolbase/mutexSpinlockImpl.h +++ b/dtool/src/dtoolbase/mutexSpinlockImpl.h @@ -19,7 +19,9 @@ #ifdef MUTEX_SPINLOCK -#include "atomicAdjust.h" +#ifdef PHAVE_ATOMIC +#include +#endif /** * Uses a simple user-space spinlock to implement a mutex. It is usually not @@ -29,7 +31,7 @@ */ class EXPCL_DTOOL_DTOOLBASE MutexSpinlockImpl { public: - constexpr MutexSpinlockImpl(); + constexpr MutexSpinlockImpl() noexcept = default; MutexSpinlockImpl(const MutexSpinlockImpl ©) = delete; MutexSpinlockImpl &operator = (const MutexSpinlockImpl ©) = delete; @@ -42,7 +44,7 @@ public: private: void do_lock(); - TVOLATILE AtomicAdjust::Integer _lock; + std::atomic_flag _flag = ATOMIC_FLAG_INIT; }; #include "mutexSpinlockImpl.I" diff --git a/makepanda/makepanda.py b/makepanda/makepanda.py index 00dd2c3cd3..e371fbd3e8 100755 --- a/makepanda/makepanda.py +++ b/makepanda/makepanda.py @@ -2271,6 +2271,7 @@ DTOOL_CONFIG=[ ("OS_SIMPLE_THREADS", '1', '1'), ("DEBUG_THREADS", 'UNDEF', 'UNDEF'), ("HAVE_POSIX_THREADS", 'UNDEF', '1'), + ("MUTEX_SPINLOCK", 'UNDEF', 'UNDEF'), ("HAVE_AUDIO", '1', '1'), ("NOTIFY_DEBUG", 'UNDEF', 'UNDEF'), ("DO_PSTATS", 'UNDEF', 'UNDEF'), diff --git a/panda/src/pipeline/conditionVarSpinlockImpl.cxx b/panda/src/pipeline/conditionVarSpinlockImpl.cxx index f36bc83125..d35d8cd1f7 100644 --- a/panda/src/pipeline/conditionVarSpinlockImpl.cxx +++ b/panda/src/pipeline/conditionVarSpinlockImpl.cxx @@ -16,6 +16,14 @@ #ifdef MUTEX_SPINLOCK #include "conditionVarSpinlockImpl.h" +#include "trueClock.h" + +#if defined(__i386__) || defined(__x86_64) || defined(_M_IX86) || defined(_M_X64) +#include +#define PAUSE() _mm_pause() +#else +#define PAUSE() +#endif /** * @@ -23,12 +31,31 @@ void ConditionVarSpinlockImpl:: wait() { AtomicAdjust::Integer current = _event; - _mutex.release(); + _mutex.unlock(); while (AtomicAdjust::get(_event) == current) { + PAUSE(); } - _mutex.acquire(); + _mutex.lock(); +} + +/** + * + */ +void ConditionVarSpinlockImpl:: +wait(double timeout) { + TrueClock *clock = TrueClock::get_global_ptr(); + double end_time = clock->get_short_time() + timeout; + + AtomicAdjust::Integer current = _event; + _mutex.unlock(); + + while (AtomicAdjust::get(_event) == current && clock->get_short_time() < end_time) { + PAUSE(); + } + + _mutex.lock(); } #endif // MUTEX_SPINLOCK diff --git a/panda/src/pipeline/conditionVarSpinlockImpl.h b/panda/src/pipeline/conditionVarSpinlockImpl.h index 5d8da7bbf7..34b61645f8 100644 --- a/panda/src/pipeline/conditionVarSpinlockImpl.h +++ b/panda/src/pipeline/conditionVarSpinlockImpl.h @@ -37,6 +37,7 @@ public: INLINE ~ConditionVarSpinlockImpl(); void wait(); + void wait(double timeout); INLINE void notify(); INLINE void notify_all(); diff --git a/panda/src/pipeline/lightReMutexDirect.I b/panda/src/pipeline/lightReMutexDirect.I index 08bb0daa67..8484092ecc 100644 --- a/panda/src/pipeline/lightReMutexDirect.I +++ b/panda/src/pipeline/lightReMutexDirect.I @@ -11,21 +11,6 @@ * @date 2008-10-08 */ -/** - * - */ -INLINE LightReMutexDirect:: -LightReMutexDirect() -#ifndef HAVE_REMUTEXIMPL - : _cvar_impl(_lock_impl) -#endif -{ -#ifndef HAVE_REMUTEXIMPL - _locking_thread = nullptr; - _lock_count = 0; -#endif -} - /** * Alias for acquire() to match C++11 semantics. * @see acquire() diff --git a/panda/src/pipeline/lightReMutexDirect.h b/panda/src/pipeline/lightReMutexDirect.h index 56371cc443..d1bc022740 100644 --- a/panda/src/pipeline/lightReMutexDirect.h +++ b/panda/src/pipeline/lightReMutexDirect.h @@ -29,7 +29,7 @@ class Thread; */ class EXPCL_PANDA_PIPELINE LightReMutexDirect { protected: - INLINE LightReMutexDirect(); + LightReMutexDirect() = default; LightReMutexDirect(const LightReMutexDirect ©) = delete; ~LightReMutexDirect() = default; @@ -57,7 +57,7 @@ PUBLISHED: private: #ifdef HAVE_REMUTEXTRUEIMPL - mutable ReMutexImpl _impl; + mutable ReMutexTrueImpl _impl; #else // If we don't have a reentrant mutex, use the one we hand-rolled in diff --git a/panda/src/pipeline/mutexTrueImpl.h b/panda/src/pipeline/mutexTrueImpl.h index 2366159a38..5a4850a4a0 100644 --- a/panda/src/pipeline/mutexTrueImpl.h +++ b/panda/src/pipeline/mutexTrueImpl.h @@ -43,6 +43,13 @@ typedef MutexImpl MutexTrueImpl; #if HAVE_REMUTEXIMPL typedef ReMutexImpl ReMutexTrueImpl; #define HAVE_REMUTEXTRUEIMPL 1 + +#elif MUTEX_SPINLOCK +// This is defined here because it needs code from pipeline. +#include "reMutexSpinlockImpl.h" +typedef ReMutexSpinlockImpl ReMutexTrueImpl; +#define HAVE_REMUTEXTRUEIMPL 1 + #else #undef HAVE_REMUTEXTRUEIMPL #endif // HAVE_REMUTEXIMPL diff --git a/panda/src/pipeline/p3pipeline_composite2.cxx b/panda/src/pipeline/p3pipeline_composite2.cxx index e6607a6ebf..e9e928ee4c 100644 --- a/panda/src/pipeline/p3pipeline_composite2.cxx +++ b/panda/src/pipeline/p3pipeline_composite2.cxx @@ -13,6 +13,7 @@ #include "reMutex.cxx" #include "reMutexDirect.cxx" #include "reMutexHolder.cxx" +#include "reMutexSpinlockImpl.cxx" #include "thread.cxx" #include "threadDummyImpl.cxx" #include "threadPosixImpl.cxx" diff --git a/panda/src/pipeline/reMutexDirect.I b/panda/src/pipeline/reMutexDirect.I index 0785473e7c..e7fa3a6fce 100644 --- a/panda/src/pipeline/reMutexDirect.I +++ b/panda/src/pipeline/reMutexDirect.I @@ -33,7 +33,11 @@ ReMutexDirect() INLINE void ReMutexDirect:: lock() { TAU_PROFILE("void ReMutexDirect::acquire()", " ", TAU_USER); +#ifdef HAVE_REMUTEXTRUEIMPL _impl.lock(); +#else + ((ReMutexDirect *)this)->do_lock(); +#endif // HAVE_REMUTEXTRUEIMPL } /** @@ -43,7 +47,11 @@ lock() { INLINE bool ReMutexDirect:: try_lock() { TAU_PROFILE("void ReMutexDirect::try_acquire()", " ", TAU_USER); +#ifdef HAVE_REMUTEXTRUEIMPL return _impl.try_lock(); +#else + return ((ReMutexDirect *)this)->do_try_lock(); +#endif // HAVE_REMUTEXTRUEIMPL } /** @@ -53,7 +61,11 @@ try_lock() { INLINE void ReMutexDirect:: unlock() { TAU_PROFILE("void ReMutexDirect::unlock()", " ", TAU_USER); +#ifdef HAVE_REMUTEXTRUEIMPL _impl.unlock(); +#else + ((ReMutexDirect *)this)->do_unlock(); +#endif // HAVE_REMUTEXTRUEIMPL } /** diff --git a/panda/src/pipeline/reMutexDirect.cxx b/panda/src/pipeline/reMutexDirect.cxx index 7bfdcb8f8d..f6cd59e4de 100644 --- a/panda/src/pipeline/reMutexDirect.cxx +++ b/panda/src/pipeline/reMutexDirect.cxx @@ -141,11 +141,11 @@ do_elevate_lock() { * mutex). */ void ReMutexDirect:: -do_unlock() { +do_unlock(Thread *current_thread) { _lock_impl.lock(); #ifdef _DEBUG - if (_locking_thread != Thread::get_current_thread()) { + if (_locking_thread != current_thread) { std::ostringstream ostr; ostr << *_locking_thread << " attempted to release " << *this << " which it does not own"; diff --git a/panda/src/pipeline/reMutexDirect.h b/panda/src/pipeline/reMutexDirect.h index b6f5215319..faf46d0fb0 100644 --- a/panda/src/pipeline/reMutexDirect.h +++ b/panda/src/pipeline/reMutexDirect.h @@ -59,7 +59,7 @@ PUBLISHED: private: #ifdef HAVE_REMUTEXTRUEIMPL - mutable ReMutexImpl _impl; + mutable ReMutexTrueImpl _impl; #else // If we don't have a reentrant mutex, we have to hand-roll one. @@ -68,7 +68,7 @@ private: INLINE bool do_try_lock(); bool do_try_lock(Thread *current_thread); void do_elevate_lock(); - void do_unlock(); + void do_unlock(Thread *current_thread = Thread::get_current_thread()); Thread *_locking_thread; int _lock_count; diff --git a/panda/src/pipeline/reMutexSpinlockImpl.I b/panda/src/pipeline/reMutexSpinlockImpl.I new file mode 100644 index 0000000000..5f5e2d89c3 --- /dev/null +++ b/panda/src/pipeline/reMutexSpinlockImpl.I @@ -0,0 +1,23 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file reMutexSpinlockImpl.I + * @author rdb + * @date 2018-09-03 + */ + +/** + * + */ +INLINE void ReMutexSpinlockImpl:: +unlock() { + assert(_counter > 0); + if (!--_counter) { + AtomicAdjust::set_ptr(_locking_thread, nullptr); + } +} diff --git a/panda/src/pipeline/reMutexSpinlockImpl.cxx b/panda/src/pipeline/reMutexSpinlockImpl.cxx new file mode 100644 index 0000000000..0de12f986f --- /dev/null +++ b/panda/src/pipeline/reMutexSpinlockImpl.cxx @@ -0,0 +1,57 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file reMutexSpinlockImpl.cxx + * @author rdb + * @date 2018-09-03 + */ + +#include "selectThreadImpl.h" + +#ifdef MUTEX_SPINLOCK + +#include "reMutexSpinlockImpl.h" +#include "thread.h" + +#if defined(__i386__) || defined(__x86_64) || defined(_M_IX86) || defined(_M_X64) +#include +#define PAUSE() _mm_pause() +#else +#define PAUSE() +#endif + +/** + * + */ +void ReMutexSpinlockImpl:: +lock() { + Thread *current_thread = Thread::get_current_thread(); + Thread *locking_thread = (Thread *)AtomicAdjust::compare_and_exchange_ptr(_locking_thread, nullptr, current_thread); + while (locking_thread != nullptr && locking_thread != current_thread) { + PAUSE(); + locking_thread = (Thread *)AtomicAdjust::compare_and_exchange_ptr(_locking_thread, nullptr, current_thread); + } + ++_counter; +} + +/** + * + */ +bool ReMutexSpinlockImpl:: +try_lock() { + Thread *current_thread = Thread::get_current_thread(); + Thread *locking_thread = (Thread *)AtomicAdjust::compare_and_exchange_ptr(_locking_thread, nullptr, current_thread); + if (locking_thread == nullptr || locking_thread == current_thread) { + ++_counter; + return true; + } else { + return false; + } +} + +#endif // MUTEX_SPINLOCK diff --git a/panda/src/pipeline/reMutexSpinlockImpl.h b/panda/src/pipeline/reMutexSpinlockImpl.h new file mode 100644 index 0000000000..666ed832e3 --- /dev/null +++ b/panda/src/pipeline/reMutexSpinlockImpl.h @@ -0,0 +1,54 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file reMutexSpinlockImpl.h + * @author rdb + * @date 2018-09-03 + */ + +#ifndef REMUTEXSPINLOCKIMPL_H +#define REMUTEXSPINLOCKIMPL_H + +#include "dtoolbase.h" +#include "selectThreadImpl.h" + +#ifdef MUTEX_SPINLOCK + +#include "atomicAdjust.h" + +class Thread; + +/** + * Uses a simple user-space spinlock to implement a mutex. It is usually not + * a good idea to use this implementation, unless you are building Panda for a + * specific application on a specific SMP machine, and you are confident that + * you have at least as many CPU's as you have threads. + */ +class EXPCL_PANDA_PIPELINE ReMutexSpinlockImpl { +public: + constexpr ReMutexSpinlockImpl() noexcept = default; + ReMutexSpinlockImpl(const ReMutexSpinlockImpl ©) = delete; + + ReMutexSpinlockImpl &operator = (const ReMutexSpinlockImpl ©) = delete; + +public: + void lock(); + bool try_lock(); + INLINE void unlock(); + +private: + AtomicAdjust::Pointer _locking_thread = nullptr; + unsigned int _counter = 0; +}; + + +#include "reMutexSpinlockImpl.I" + +#endif // MUTEX_SPINLOCK + +#endif