dtoolbase: new mutex and condition variable impl on Windows

This uses SRWLock on Vista and above, and uses a hand-rolled implementation on Windows XP that uses Events (and a spinlock, if this is a multi-core system).

Since SRWLocks aren't recursive, ReMutexWin32Impl has been added to implement recursive mutices, using old-fashioned critical sections.

MutexImpl now has a constexpr constructor on all implementations.
This commit is contained in:
rdb 2019-02-06 23:55:46 +01:00
parent 7e0952e367
commit 89392f0e2d
8 changed files with 492 additions and 89 deletions

View File

@ -34,7 +34,7 @@ typedef MutexSpinlockImpl MutexImpl;
#include "mutexWin32Impl.h"
typedef MutexWin32Impl MutexImpl;
typedef MutexWin32Impl ReMutexImpl; // Win32 Mutexes are always reentrant.
typedef ReMutexWin32Impl ReMutexImpl;
#define HAVE_REMUTEXIMPL 1
#elif defined(THREAD_POSIX_IMPL)

View File

@ -16,7 +16,11 @@
*/
INLINE MutexWin32Impl::
~MutexWin32Impl() {
DeleteCriticalSection(&_lock);
// If the lock has been contended, and we use the Windows XP implementation,
// we have a handle to close. Otherwise, this field will be null.
if (_lock[1] != nullptr) {
CloseHandle(_lock[1]);
}
}
/**
@ -24,7 +28,7 @@ INLINE MutexWin32Impl::
*/
INLINE void MutexWin32Impl::
lock() {
EnterCriticalSection(&_lock);
_funcs._lock(_lock);
}
/**
@ -32,13 +36,45 @@ lock() {
*/
INLINE bool MutexWin32Impl::
try_lock() {
return (TryEnterCriticalSection(&_lock) != 0);
return _funcs._try_lock(_lock);
}
/**
*
*/
INLINE void MutexWin32Impl::
unlock() {
_funcs._unlock(_lock);
}
/**
*
*/
INLINE ReMutexWin32Impl::
~ReMutexWin32Impl() {
DeleteCriticalSection(&_lock);
}
/**
*
*/
INLINE void ReMutexWin32Impl::
lock() {
EnterCriticalSection(&_lock);
}
/**
*
*/
INLINE bool ReMutexWin32Impl::
try_lock() {
return (TryEnterCriticalSection(&_lock) != 0);
}
/**
*
*/
INLINE void ReMutexWin32Impl::
unlock() {
LeaveCriticalSection(&_lock);
}

View File

@ -13,16 +13,384 @@
#include "selectThreadImpl.h"
#ifdef WIN32_VC
#if defined(WIN32_VC) && !defined(CPPPARSER)
#include "mutexWin32Impl.h"
// If this is true, we will use SRWLock on Windows Vista and above instead of
// our own implementation.
static const bool prefer_srwlock = true;
// These configure our own Windows XP implementation.
static const uintptr_t lock_bit = 0x40000000;
static const unsigned int spin_count = 4000;
// This gets set to spin_count if we are on a multi-core system.
static unsigned int effective_spin_count = 0;
/**
* Windows XP implementation of lock(), which uses a combination of a spinlock
* and an event.
*/
static void __stdcall lock_xp(volatile PVOID *lock) {
// In the Windows XP case, lock consists of two words: the first one is a
// number of waiters plus a bit to indicate that it is locked, the second
// one is the handle of an event that is created in case of contention.
// The first word can be in the following states:
//
// lock bit | waiters | meaning
// ---------|---------|---------
// unset | 0 | unlocked
// set | 0 | locked, nobody waiting on event
// set | >0 | locked, at least one thread waiting on event
// unset | >0 | handing off lock to one of waiters
//
// The last state is a little subtle: at this point, the thread that was
// holding the lock has stopped holding it, but is about to fire off a
// signal to a waiting thread, which will attempt to grab the lock. In this
// case, the waiting thread has first dibs on the lock, and any new threads
// will still treat it as locked and wait until there are no more waiters.
// First try to acquire the lock without suspending the thread. This only
// works if the waiter count is 0; this way, we give priority to threads
// that are already waiting for the event.
if (InterlockedCompareExchangePointer(lock, (void *)lock_bit, nullptr) == nullptr) {
// Got the lock on the first try.
return;
}
// On multi-core systems, we keep trying for the configured spin_count.
const unsigned int max_spins = effective_spin_count;
for (unsigned int spins = 0; spins < max_spins; ++spins) {
if (InterlockedCompareExchangePointer(lock, (void *)lock_bit, nullptr) == nullptr) {
// We managed to acquire the lock.
return;
}
// Emit the pause instruction. This is NOT a thread yield.
YieldProcessor();
}
// Looks like we might have to go to sleep for a while using an event.
HANDLE event = lock[1];
if (event == nullptr) {
// We don't have an event yet. Create an auto-reset event.
HANDLE new_event = CreateEvent(nullptr, false, false, nullptr);
while (new_event == nullptr) {
// Hmm, out of memory? Just yield to another thread for now until the
// lock is either freed or until we can create an event.
Sleep(1);
if (InterlockedCompareExchangePointer(lock, (void *)lock_bit, nullptr) == 0) {
return;
}
new_event = CreateEvent(nullptr, false, false, nullptr);
}
// Push the new event.
event = InterlockedCompareExchangePointer(lock + 1, new_event, nullptr);
if (event == nullptr) {
// Set successfully.
event = new_event;
} else {
// Another thread created an event; delete ours and use that one instead.
CloseHandle(new_event);
}
}
// OK, now we have an event. We need to let the unlock() function know that
// we are waiting.
while (true) {
uintptr_t waiters = (uintptr_t)lock[0];
if (waiters == 0) {
// It became unlocked while we were creating an event. Quick, grab it.
if (InterlockedCompareExchangePointer(lock, (void *)lock_bit, nullptr) == nullptr) {
return;
}
}
// If the lock bit gets unset while we try this, just keep trying. It
// would be dangerous to increment this while the lock bit is unset.
waiters |= lock_bit;
uintptr_t new_waiters = (uintptr_t)InterlockedCompareExchangePointer(lock, (void *)(waiters + 1), (void *)waiters);
if (new_waiters == waiters) {
// Made the change successfully.
break;
} else if (new_waiters == 0) {
// It just became unlocked. Quick, grab it.
if (InterlockedCompareExchangePointer(lock, (void *)lock_bit, nullptr) == nullptr) {
return;
}
}
YieldProcessor();
}
// Sleep well, thread.
while (true) {
WaitForSingleObjectEx(event, INFINITE, FALSE);
// We were woken up. Does that mean the lock can be ours?
while (true) {
uintptr_t waiters = (uintptr_t)lock[0];
if (waiters & lock_bit) {
// False alarm. Go back to sleep.
break;
}
assert(waiters > 0);
// Grab the lock immediately, and simultaneously tell it that we are no
// longer waiting.
uintptr_t new_waiters = (uintptr_t)InterlockedCompareExchangePointer(lock, (void *)((waiters - 1) | lock_bit), (void *)waiters);
if (new_waiters == waiters) {
// The lock is ours.
return;
} else if (new_waiters & lock_bit) {
// Another thread beat us to it. Go back to sleep.
break;
}
YieldProcessor();
}
}
// Never supposed to get here.
assert(false);
}
/**
* Windows XP implementation of try_lock().
*/
static BOOL __stdcall try_lock_xp(volatile PVOID *lock) {
return (InterlockedCompareExchangePointer(lock, (void *)lock_bit, nullptr) == nullptr);
}
/**
* Windows XP implementation of unlock().
*/
static void __stdcall unlock_xp(volatile PVOID *lock) {
// Clear the lock flag.
#ifdef _WIN64
uintptr_t waiters = _InterlockedAnd64((volatile __int64 *)lock, ~lock_bit);
#else
uintptr_t waiters = _InterlockedAnd((volatile long *)lock, ~lock_bit);
#endif
// If this triggers, the lock wasn't held to begin with.
assert((waiters & lock_bit) != 0);
// Have any threads begun to sleep (or are about to) waiting for this lock?
if ((waiters & ~lock_bit) == 0) {
// No contention, nothing to do.
return;
} else {
// By signalling the auto-resetting event, we wake up one waiting thread.
HANDLE event = lock[1];
assert(event != nullptr);
SetEvent(event);
}
}
/**
* Windows XP implementation to wait for a condition variable.
*/
static BOOL __stdcall
cvar_wait_xp(volatile PVOID *cvar, volatile PVOID *lock, DWORD timeout, ULONG) {
// Increment the number of waiters.
#ifdef _WIN64
_InterlockedIncrement64((volatile __int64 *)cvar);
#else
_InterlockedIncrement((volatile long *)cvar);
#endif
// Make sure we have two events created: one auto-reset event and one
// manual-reset, to handle signal and broadcast, respectively.
if (cvar[1] == nullptr) {
cvar[1] = CreateEvent(nullptr, false, false, nullptr);
}
if (cvar[2] == nullptr) {
cvar[2] = CreateEvent(nullptr, true, false, nullptr);
}
// It's ok to release the external_mutex here since Win32 manual-reset
// events maintain state when used with SetEvent(). This avoids the "lost
// wakeup" bug...
unlock_xp(lock);
// Wait for either event to become signaled due to notify() being called or
// notify_all() being called.
int result = WaitForMultipleObjects(2, (const HANDLE *)(cvar + 1), FALSE, timeout);
// Decrement the counter. If it reached zero, we were the last waiter.
#ifdef _WIN64
bool nonzero = (_InterlockedDecrement64((volatile __int64 *)cvar) != 0);
#else
bool nonzero = (_InterlockedDecrement((volatile long *)cvar) != 0);
#endif
bool last_waiter = (result == WAIT_OBJECT_0 + 1 && !nonzero);
// Some thread called notify_all().
if (last_waiter) {
// We're the last waiter to be notified or to stop waiting, so reset the
// manual event.
ResetEvent(cvar[2]);
}
// Reacquire the <external_mutex>.
lock_xp(lock);
return TRUE;
}
/**
* Wakes one thread waiting for a condition variable.
*/
static void __stdcall
cvar_notify_one_xp(volatile PVOID *cvar) {
// If there are any waiters, signal one of them to wake up by signalling the
// auto-reset event.
if ((uintptr_t)cvar[0] > 0) {
SetEvent(cvar[1]);
}
}
/**
* Wakes all threads waiting for a condition variable.
*/
static void __stdcall
cvar_notify_all_xp(volatile PVOID *cvar) {
// If there are any waiters, signal the manual-reset event, which will be
// reset by the last thread to wake up.
if ((uintptr_t)cvar[0] > 0) {
SetEvent(cvar[2]);
}
}
/**
* This is put initially in the _lock slot; it makes sure that the lock
* functions get initialized the first time someone tries to grab a lock.
*/
void __stdcall MutexWin32Impl::
lock_initially(volatile PVOID *lock) {
MutexWin32Impl::init_lock_funcs();
MutexWin32Impl::_funcs._lock(lock);
}
/**
* This is put initially in the _try_lock slot; it makes sure that the lock
* functions get initialized the first time someone tries to grab a lock.
*/
BOOL __stdcall MutexWin32Impl::
try_lock_initially(volatile PVOID *lock) {
MutexWin32Impl::init_lock_funcs();
return MutexWin32Impl::_funcs._try_lock(lock);
}
#ifndef NDEBUG
/**
* This gets put initially in the _unlock slot and should never be called,
* since the initial lock/try_lock implementation will replace the pointers.
*/
void __stdcall MutexWin32Impl::
unlock_initially(volatile PVOID *) {
std::cerr << "Attempt to release a mutex at static init time before acquiring it!\n";
assert(false);
}
#endif
/**
* Does nothing.
*/
static void __stdcall
noop(volatile PVOID *) {
}
// We initially set the function pointers to functions that do initialization
// first.
MutexWin32Impl::LockFunctions MutexWin32Impl::_funcs = {
&MutexWin32Impl::lock_initially,
&MutexWin32Impl::try_lock_initially,
#ifndef NDEBUG
&MutexWin32Impl::unlock_initially,
#else
&noop,
#endif
&cvar_wait_xp,
#ifndef NDEBUG
&MutexWin32Impl::unlock_initially,
&MutexWin32Impl::unlock_initially,
#else
&noop,
&noop,
#endif
};
/**
* Called the first time a lock is grabbed.
*/
void MutexWin32Impl::
init_lock_funcs() {
if (MutexWin32Impl::_funcs._lock != &MutexWin32Impl::lock_initially) {
// Already initialized.
return;
}
// We don't need to be very thread safe here. This can only ever be called
// at static init time, when there is still only one thread.
if (prefer_srwlock) {
HMODULE module = GetModuleHandleA("kernel32");
if (module != nullptr) {
_funcs._lock = (LockFunc)GetProcAddress(module, "AcquireSRWLockExclusive");
_funcs._try_lock = (TryLockFunc)GetProcAddress(module, "TryAcquireSRWLockExclusive");
_funcs._unlock = (LockFunc)GetProcAddress(module, "ReleaseSRWLockExclusive");
_funcs._cvar_wait = (CondWaitFunc)GetProcAddress(module, "SleepConditionVariableSRW");
_funcs._cvar_notify_one = (LockFunc)GetProcAddress(module, "WakeConditionVariable");
_funcs._cvar_notify_all = (LockFunc)GetProcAddress(module, "WakeAllConditionVariable");
if (_funcs._lock != nullptr &&
_funcs._try_lock != nullptr &&
_funcs._unlock != nullptr &&
_funcs._cvar_wait != nullptr &&
_funcs._cvar_notify_one != nullptr &&
_funcs._cvar_notify_all != nullptr) {
return;
}
}
}
// Fall back to our custom Event-based implementation on Windows XP.
_funcs._lock = &lock_xp;
_funcs._try_lock = &try_lock_xp;
_funcs._unlock = &unlock_xp;
_funcs._cvar_wait = &cvar_wait_xp;
_funcs._cvar_notify_one = &cvar_notify_one_xp;
_funcs._cvar_notify_all = &cvar_notify_all_xp;
// Are we on a multi-core system? If so, enable the spinlock.
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
if (sysinfo.dwNumberOfProcessors > 1) {
effective_spin_count = spin_count;
} else {
effective_spin_count = 0;
}
}
/**
* Ensures that the lock functions are initialized at static init time. This
* prevents us from having to implement synchronization in our initialization.
*/
class LockFuncsInitializer {
public:
LockFuncsInitializer() {
MutexWin32Impl::init_lock_funcs();
}
};
static LockFuncsInitializer _lock_funcs_init;
/**
*
*/
MutexWin32Impl::
MutexWin32Impl() {
InitializeCriticalSectionAndSpinCount(&_lock, 4000);
ReMutexWin32Impl::
ReMutexWin32Impl() {
InitializeCriticalSectionAndSpinCount(&_lock, spin_count);
}
#endif // WIN32_VC

View File

@ -24,11 +24,12 @@
#include <windows.h>
/**
* Uses Windows native calls to implement a mutex.
* Uses SRWLock on Vista and above to implement a mutex. Older versions of
* Windows fall back to a combination of a spinlock and an Event object.
*/
class EXPCL_DTOOL_DTOOLBASE MutexWin32Impl {
public:
MutexWin32Impl();
constexpr MutexWin32Impl() = default;
MutexWin32Impl(const MutexWin32Impl &copy) = delete;
INLINE ~MutexWin32Impl();
@ -39,12 +40,63 @@ public:
INLINE bool try_lock();
INLINE void unlock();
static void init_lock_funcs();
private:
CRITICAL_SECTION _lock;
#ifndef CPPPARSER
// Store function pointers; these point directly to the SRWLock Win32 API
// functions on Vista and above, or to our own implementation on Windows XP.
typedef void (__stdcall *LockFunc)(volatile PVOID *lock);
typedef BOOL (__stdcall *TryLockFunc)(volatile PVOID *lock);
typedef BOOL (__stdcall *CondWaitFunc)(volatile PVOID *cvar, volatile PVOID *lock, DWORD, ULONG);
struct LockFunctions {
LockFunc _lock;
TryLockFunc _try_lock;
LockFunc _unlock;
CondWaitFunc _cvar_wait;
LockFunc _cvar_notify_one;
LockFunc _cvar_notify_all;
};
static LockFunctions _funcs;
static void __stdcall lock_initially(volatile PVOID *lock);
static BOOL __stdcall try_lock_initially(volatile PVOID *lock);
static void __stdcall unlock_initially(volatile PVOID *lock);
#endif
private:
// In the SRWLock implementation, only the first field is used. On Windows
// XP, the first field contains a waiter count and lock bit, and the second
// field contains an Event handle if contention has occurred.
volatile PVOID _lock[2] = {nullptr, nullptr};
friend class ConditionVarWin32Impl;
friend class ConditionVarFullWin32Impl;
};
/**
* Uses CRITICAL_SECTION to implement a recursive mutex.
*/
class EXPCL_DTOOL_DTOOLBASE ReMutexWin32Impl {
public:
ReMutexWin32Impl();
ReMutexWin32Impl(const ReMutexWin32Impl &copy) = delete;
INLINE ~ReMutexWin32Impl();
ReMutexWin32Impl &operator = (const ReMutexWin32Impl &copy) = delete;
public:
INLINE void lock();
INLINE bool try_lock();
INLINE void unlock();
private:
CRITICAL_SECTION _lock;
};
#include "mutexWin32Impl.I"
#endif // WIN32_VC

View File

@ -15,14 +15,7 @@
*
*/
INLINE ConditionVarFullWin32Impl::
ConditionVarFullWin32Impl(MutexWin32Impl &mutex) {
_external_mutex = &mutex._lock;
// Create an auto-reset event and a manual-reset event.
_event_signal = CreateEvent(nullptr, false, false, nullptr);
_event_broadcast = CreateEvent(nullptr, true, false, nullptr);
_waiters_count = 0;
ConditionVarFullWin32Impl(MutexWin32Impl &mutex) : _mutex(mutex) {
}
/**
@ -30,8 +23,14 @@ ConditionVarFullWin32Impl(MutexWin32Impl &mutex) {
*/
INLINE ConditionVarFullWin32Impl::
~ConditionVarFullWin32Impl() {
CloseHandle(_event_signal);
CloseHandle(_event_broadcast);
// These fields are only set in the Windows XP implementation, when these
// both contain events.
if (_cvar[1] != nullptr) {
CloseHandle(_cvar[1]);
}
if (_cvar[2] != nullptr) {
CloseHandle(_cvar[2]);
}
}
/**
@ -39,29 +38,7 @@ INLINE ConditionVarFullWin32Impl::
*/
INLINE void ConditionVarFullWin32Impl::
wait() {
AtomicAdjust::inc(_waiters_count);
// It's ok to release the external_mutex here since Win32 manual-reset
// events maintain state when used with SetEvent(). This avoids the "lost
// wakeup" bug...
LeaveCriticalSection(_external_mutex);
// Wait for either event to become signaled due to notify() being called or
// notify_all() being called.
int result = WaitForMultipleObjects(2, &_event_signal, FALSE, INFINITE);
bool nonzero = AtomicAdjust::dec(_waiters_count);
bool last_waiter = (result == WAIT_OBJECT_0 + 1 && !nonzero);
// Some thread called notify_all().
if (last_waiter) {
// We're the last waiter to be notified or to stop waiting, so reset the
// manual event.
ResetEvent(_event_broadcast);
}
// Reacquire the <external_mutex>.
EnterCriticalSection(_external_mutex);
MutexWin32Impl::_funcs._cvar_wait(_cvar, _mutex._lock, INFINITE, 0);
}
/**
@ -69,29 +46,7 @@ wait() {
*/
INLINE void ConditionVarFullWin32Impl::
wait(double timeout) {
AtomicAdjust::inc(_waiters_count);
// It's ok to release the external_mutex here since Win32 manual-reset
// events maintain state when used with SetEvent(). This avoids the "lost
// wakeup" bug...
LeaveCriticalSection(_external_mutex);
// Wait for either event to become signaled due to notify() being called or
// notify_all() being called.
int result = WaitForMultipleObjects(2, &_event_signal, FALSE, (DWORD)(timeout * 1000.0));
bool nonzero = AtomicAdjust::dec(_waiters_count);
bool last_waiter = (result == WAIT_OBJECT_0 + 1 && !nonzero);
// Some thread called notify_all().
if (last_waiter) {
// We're the last waiter to be notified or to stop waiting, so reset the
// manual event.
ResetEvent(_event_broadcast);
}
// Reacquire the <external_mutex>.
EnterCriticalSection(_external_mutex);
MutexWin32Impl::_funcs._cvar_wait(_cvar, _mutex._lock, (DWORD)(timeout * 1000.0), 0);
}
/**
@ -99,11 +54,7 @@ wait(double timeout) {
*/
INLINE void ConditionVarFullWin32Impl::
notify() {
bool have_waiters = AtomicAdjust::get(_waiters_count) > 0;
if (have_waiters) {
SetEvent(_event_signal);
}
MutexWin32Impl::_funcs._cvar_notify_one(_cvar);
}
/**
@ -111,9 +62,5 @@ notify() {
*/
INLINE void ConditionVarFullWin32Impl::
notify_all() {
bool have_waiters = AtomicAdjust::get(_waiters_count) > 0;
if (have_waiters) {
SetEvent(_event_broadcast);
}
MutexWin32Impl::_funcs._cvar_notify_all(_cvar);
}

View File

@ -50,10 +50,12 @@ public:
INLINE void notify_all();
private:
CRITICAL_SECTION *_external_mutex;
HANDLE _event_signal;
HANDLE _event_broadcast;
TVOLATILE AtomicAdjust::Integer _waiters_count;
MutexWin32Impl &_mutex;
// On Windows XP, the first field contains a Signal (auto-reset) event,
// the second field a broadcast (manual reset) event, third a waiter count.
// On Windows Vista and above, the first contains a PCONDITION_VARIABLE.
volatile PVOID _cvar[3] = {nullptr, nullptr, nullptr};
};
#include "conditionVarFullWin32Impl.I"

View File

@ -15,9 +15,7 @@
*
*/
INLINE ConditionVarWin32Impl::
ConditionVarWin32Impl(MutexWin32Impl &mutex) {
_external_mutex = &mutex._lock;
ConditionVarWin32Impl(MutexWin32Impl &mutex) : _mutex(mutex) {
// Create an auto-reset event.
_event_signal = CreateEvent(nullptr, false, false, nullptr);
}
@ -35,12 +33,12 @@ INLINE ConditionVarWin32Impl::
*/
INLINE void ConditionVarWin32Impl::
wait() {
LeaveCriticalSection(_external_mutex);
_mutex.unlock();
DWORD result = WaitForSingleObject(_event_signal, INFINITE);
nassertv(result == WAIT_OBJECT_0);
EnterCriticalSection(_external_mutex);
_mutex.lock();
}
/**
@ -48,12 +46,12 @@ wait() {
*/
INLINE void ConditionVarWin32Impl::
wait(double timeout) {
LeaveCriticalSection(_external_mutex);
_mutex.unlock();
DWORD result = WaitForSingleObject(_event_signal, (DWORD)(timeout * 1000.0));
nassertv(result == WAIT_OBJECT_0 || result == WAIT_TIMEOUT);
EnterCriticalSection(_external_mutex);
_mutex.lock();
}
/**

View File

@ -44,7 +44,7 @@ public:
INLINE void notify();
private:
CRITICAL_SECTION *_external_mutex;
MutexWin32Impl &_mutex;
HANDLE _event_signal;
};