dtoolbase: Add atomic wait and notify operations from C++20

Adds patomic_signed_lock_free, patomic_unsigned_lock_free, and patomic_flag with wait/notify methods modelled after C++20.  Implemented using futexes, falling back to a mutex+condition variable hash table if not supported.  (Currently the hash table has a fixed size of 64, which we could increase if necessary, but we really shouldn't even have a fraction of that number of simultaneously sleeping threads...)

Other atomic types are unaffected at the moment, in part because futexes are really restricted to 32-bit ints on Linux anyway
This commit is contained in:
rdb 2022-02-23 18:40:04 +01:00
parent 5196719f29
commit c3ce8164bc
5 changed files with 466 additions and 27 deletions

View File

@ -46,6 +46,7 @@ set(P3DTOOLBASE_HEADERS
typeRegistryNode.I typeRegistryNode.h
typedObject.I typedObject.h
pallocator.T pallocator.h
patomic.h patomic.I
pdeque.h plist.h pmap.h pset.h
pvector.h epvector.h
lookup3.h
@ -70,6 +71,7 @@ set(P3DTOOLBASE_SOURCES
mutexWin32Impl.cxx
mutexSpinlockImpl.cxx
neverFreeMemory.cxx
patomic.cxx
pdtoa.cxx
pstrtod.cxx
register_type.cxx

View File

@ -1,3 +1,4 @@
#include "patomic.cxx"
#include "mutexPosixImpl.cxx"
#include "mutexWin32Impl.cxx"
#include "mutexSpinlockImpl.cxx"

View File

@ -11,6 +11,7 @@
* @date 2022-01-28
*/
#if defined(THREAD_DUMMY_IMPL) || defined(THREAD_SIMPLE_IMPL)
/**
* Value initializer.
*/
@ -247,15 +248,99 @@ operator ^=(T arg) noexcept {
return _value ^= arg;
}
#endif // defined(THREAD_DUMMY_IMPL) || defined(THREAD_SIMPLE_IMPL)
/**
* Sets the flag to true and returns the previous value.
* Initializes the variable to zero (according to C++20 semantics, NOT C++11
* semantics!)
*/
ALWAYS_INLINE bool patomic_flag::
test_and_set(std::memory_order order) noexcept {
bool value = __internal_flag;
__internal_flag = true;
return value;
constexpr patomic_unsigned_lock_free::
patomic_unsigned_lock_free() noexcept :
patomic<uint32_t>(0u) {
}
/**
* Initializes the variable to the given value.
*/
constexpr patomic_unsigned_lock_free::
patomic_unsigned_lock_free(uint32_t desired) noexcept :
patomic<uint32_t>(desired) {
}
/**
* Waits until the value is no longer equal to the given value.
*/
ALWAYS_INLINE void patomic_unsigned_lock_free::
wait(uint32_t old, std::memory_order order) const noexcept {
if (load(order) == old) {
patomic_wait((const volatile uint32_t *)this, old);
}
}
/**
* Wakes up at least one thread waiting for the value to change.
*/
ALWAYS_INLINE void patomic_unsigned_lock_free::
notify_one() noexcept {
patomic_notify_one((volatile uint32_t *)this);
}
/**
* Wakes up at least one thread waiting for the value to change.
*/
ALWAYS_INLINE void patomic_unsigned_lock_free::
notify_all() noexcept {
patomic_notify_all((volatile uint32_t *)this);
}
/**
* Initializes the variable to zero (according to C++20 semantics, NOT C++11
* semantics!)
*/
constexpr patomic_signed_lock_free::
patomic_signed_lock_free() noexcept :
patomic<int32_t>(0) {
}
/**
* Initializes the variable to the given value.
*/
constexpr patomic_signed_lock_free::
patomic_signed_lock_free(int32_t desired) noexcept :
patomic<int32_t>(desired) {
}
/**
* Waits until the value is no longer equal to the given value.
*/
ALWAYS_INLINE void patomic_signed_lock_free::
wait(int32_t old, std::memory_order order) const noexcept {
if (load(order) == old) {
patomic_wait((const volatile int32_t *)this, old);
}
}
/**
* Wakes up at least one thread waiting for the value to change.
*/
ALWAYS_INLINE void patomic_signed_lock_free::
notify_one() noexcept {
patomic_notify_one((volatile int32_t *)this);
}
/**
* Wakes up at least one thread waiting for the value to change.
*/
ALWAYS_INLINE void patomic_signed_lock_free::
notify_all() noexcept {
patomic_notify_all((volatile int32_t *)this);
}
/**
* Allows assignment from ATOMIC_FLAG_INIT.
*/
constexpr patomic_flag::
patomic_flag(bool desired) noexcept : _value(desired) {
}
/**
@ -263,5 +348,125 @@ test_and_set(std::memory_order order) noexcept {
*/
ALWAYS_INLINE void patomic_flag::
clear(std::memory_order order) noexcept {
__internal_flag = false;
_value.store(0u, order);
}
/**
* Sets the flag to true and returns the previous value.
*/
ALWAYS_INLINE bool patomic_flag::
test_and_set(std::memory_order order) noexcept {
return (bool)_value.exchange(1u, order);
}
/**
* Returns the current value of the flag.
*/
ALWAYS_INLINE bool patomic_flag::
test(std::memory_order order) const noexcept {
return (bool)_value.load(order);
}
/**
* Waits until the value is no longer equal to the given value.
*/
ALWAYS_INLINE void patomic_flag::
wait(bool old, std::memory_order order) const noexcept {
_value.wait(old, order);
}
/**
* Wakes up at least one thread waiting for the value to change.
*/
ALWAYS_INLINE void patomic_flag::
notify_one() noexcept {
_value.notify_one();
}
/**
* Wakes up at least one thread waiting for the value to change.
*/
ALWAYS_INLINE void patomic_flag::
notify_all() noexcept {
_value.notify_all();
}
/**
*
*/
ALWAYS_INLINE void
patomic_wait(const volatile int32_t *value, int32_t old) {
patomic_wait((const volatile uint32_t *)value, (uint32_t)old);
}
/**
*
*/
ALWAYS_INLINE void
patomic_notify_one(volatile int32_t *value) {
patomic_notify_one((volatile uint32_t *)value);
}
/**
*
*/
ALWAYS_INLINE void
patomic_notify_all(volatile int32_t *value) {
patomic_notify_all((volatile uint32_t *)value);
}
/**
*
*/
ALWAYS_INLINE void
patomic_wait(const volatile uint32_t *value, uint32_t old) {
#ifdef __linux__
while (__atomic_load_n(value, __ATOMIC_SEQ_CST) == old) {
syscall(SYS_futex, old, FUTEX_WAIT_PRIVATE, old, 0, 0, 0);
}
//#elif _WIN32_WINNT >= _WIN32_WINNT_WIN8
// while (*value == old) {
// WaitOnAddress((volatile void *)value, &old, sizeof(uint32_t), INFINITE);
// }
#elif defined(_WIN32)
while (*value == old) {
_patomic_wait_func((volatile void *)value, &old, sizeof(uint32_t), INFINITE);
}
#elif defined(HAVE_POSIX_THREADS)
_patomic_wait(value, old);
#else
while (*value == old);
#endif
}
/**
*
*/
ALWAYS_INLINE void
patomic_notify_one(volatile uint32_t *value) {
#ifdef __linux__
syscall(SYS_futex, value, FUTEX_WAKE_PRIVATE, 1, 0, 0, 0);
//#elif _WIN32_WINNT >= _WIN32_WINNT_WIN8
// WakeByAddressSingle((void *)value);
#elif defined(_WIN32)
_patomic_wake_one_func((void *)value);
#elif defined(HAVE_POSIX_THREADS)
_patomic_notify_all(value);
#endif
}
/**
*
*/
ALWAYS_INLINE void
patomic_notify_all(volatile uint32_t *value) {
#ifdef __linux__
syscall(SYS_futex, value, FUTEX_WAKE_PRIVATE, INT_MAX, 0, 0, 0);
//#elif _WIN32_WINNT >= _WIN32_WINNT_WIN8
// WakeByAddressAll((void *)value);
#elif defined(_WIN32)
_patomic_wake_all_func((void *)value);
#elif defined(HAVE_POSIX_THREADS)
_patomic_notify_all(value);
#endif
}

View File

@ -0,0 +1,168 @@
/**
* PANDA 3D SOFTWARE
* Copyright (c) Carnegie Mellon University. All rights reserved.
*
* All use of this software is subject to the terms of the revised BSD
* license. You should have received a copy of this license along
* with this source code in a file named "LICENSE."
*
* @file patomic.cxx
* @author rdb
* @date 2022-02-23
*/
#include "patomic.h"
#include <functional>
static_assert(sizeof(patomic_unsigned_lock_free) == sizeof(uint32_t),
"expected atomic uint32_t to have same size as uint32_t");
static_assert(sizeof(patomic_signed_lock_free) == sizeof(int32_t),
"expected atomic int32_t to have same size as int32_t");
static_assert(sizeof(uint32_t) == sizeof(int32_t),
"expected int32_t to have same size as uint32_t");
#if !defined(CPPPARSER) && defined(_WIN32)
// On Windows 7, we try to load the Windows 8 functions dynamically, and
// fall back to a condition variable table if they aren't available.
static BOOL initialize_wait(volatile VOID *addr, PVOID cmp, SIZE_T size, DWORD timeout);
static void dummy_wake(PVOID addr) {}
BOOL (*_patomic_wait_func)(volatile VOID *, PVOID, SIZE_T, DWORD) = &initialize_wait;
void (*_patomic_wake_one_func)(PVOID) = &dummy_wake;
void (*_patomic_wake_all_func)(PVOID) = &dummy_wake;
// Randomly pick an entry into the wait table based on the hash of the address.
// It's possible to get hash collision, but that's not so bad, it just means
// that the other thread will get a spurious wakeup.
struct alignas(64) WaitTableEntry {
SRWLOCK _lock = SRWLOCK_INIT;
CONDITION_VARIABLE _cvar = CONDITION_VARIABLE_INIT;
DWORD _waiters = 0;
};
static WaitTableEntry _wait_table[64] = {};
static const size_t _wait_hash_mask = 63;
/**
* Emulates WakeByAddressSingle for Windows Vista and 7.
*/
static void
emulated_wake(PVOID addr) {
size_t i = std::hash<volatile void *>{}(addr) & (sizeof(_wait_table) / sizeof(WaitTableEntry) - 1);
WaitTableEntry &entry = _wait_table[i];
AcquireSRWLockExclusive(&entry._lock);
DWORD num_waiters = entry._waiters;
ReleaseSRWLockExclusive(&entry._lock);
if (num_waiters > 0) {
// We have to wake up all the threads, even if only one of them is for this
// address. Some of them will get a spurious wakeup, but that's OK.
WakeAllConditionVariable(&entry._cvar);
}
}
/**
* Emulates WaitOnAddress for Windows Vista and 7. Only supports aligned
* 32-bit values.
*/
static BOOL
emulated_wait(volatile VOID *addr, PVOID cmp, SIZE_T size, DWORD timeout) {
assert(size == sizeof(LONG));
LONG cmpval = *(LONG *)cmp;
if (*(LONG *)addr != cmpval) {
return TRUE;
}
size_t i = std::hash<volatile void *>{}(addr) & _wait_hash_mask;
WaitTableEntry &entry = _wait_table[i];
AcquireSRWLockExclusive(&entry._lock);
++entry._waiters;
while (*(LONG *)addr == cmpval) {
if (SleepConditionVariableSRW(&entry._cvar, &entry._lock, timeout, 0) != 0) {
// Timeout.
--entry._waiters;
ReleaseSRWLockExclusive(&entry._lock);
return FALSE;
}
}
--entry._waiters;
ReleaseSRWLockExclusive(&entry._lock);
return TRUE;
}
/**
* Initially assigned to the wait function slot to initialize the function
* pointers.
*/
static BOOL
initialize_wait(volatile VOID *addr, PVOID cmp, SIZE_T size, DWORD timeout) {
// There's a chance of a race here, with two threads trying to initialize the
// functions at the same time. That's OK, because they should all produce
// the same results, and the stores to the function pointers are atomic.
HMODULE lib = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
if (lib) {
auto wait_func = (decltype(_patomic_wait_func))GetProcAddress(lib, "WaitOnAddress");
auto wake_one_func = (decltype(_patomic_wake_one_func))GetProcAddress(lib, "WakeByAddressSingle");
auto wake_all_func = (decltype(_patomic_wake_all_func))GetProcAddress(lib, "WakeByAddressAll");
if (wait_func && wake_one_func && wake_all_func) {
// Make sure that the wake function is guaranteed to be visible to other
// threads by the time we assign the wait function.
_patomic_wake_one_func = wake_one_func;
_patomic_wake_all_func = wake_all_func;
patomic_thread_fence(std::memory_order_release);
_patomic_wait_func = wait_func;
return wait_func(addr, cmp, size, timeout);
}
}
// We don't have Windows 8's functions, use the emulated wait and wake funcs.
_patomic_wake_one_func = &emulated_wake;
_patomic_wake_all_func = &emulated_wake;
patomic_thread_fence(std::memory_order_release);
_patomic_wait_func = &emulated_wait;
return emulated_wait(addr, cmp, size, timeout);
}
#elif !defined(CPPPARSER) && !defined(__linux__) && defined(HAVE_POSIX_THREADS)
// Same as above, but using pthreads.
struct alignas(64) WaitTableEntry {
pthread_mutex_t _lock = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t _cvar = PTHREAD_COND_INITIALIZER;
unsigned int _waiters = 0;
};
static WaitTableEntry _wait_table[64];
static const size_t _wait_hash_mask = 63;
/**
*
*/
void
_patomic_wait(const volatile uint32_t *value, uint32_t old) {
WaitTableEntry &entry = _wait_table[std::hash<const volatile void *>{}(value) & _wait_hash_mask];
pthread_mutex_lock(&entry._lock);
++entry._waiters;
while (__atomic_load_n(value, __ATOMIC_SEQ_CST) == old) {
pthread_cond_wait(&entry._cvar, &entry._lock);
}
--entry._waiters;
pthread_mutex_unlock(&entry._lock);
}
/**
*
*/
void
_patomic_notify_all(volatile uint32_t *value) {
WaitTableEntry &entry = _wait_table[std::hash<const volatile void *>{}(value) & _wait_hash_mask];
pthread_mutex_lock(&entry._lock);
unsigned int num_waiters = entry._waiters;
pthread_mutex_unlock(&entry._lock);
if (num_waiters > 0) {
pthread_cond_broadcast(&entry._cvar);
}
}
#endif

View File

@ -19,6 +19,19 @@
#include <atomic>
#ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN 1
#endif
#include <windows.h>
#endif
#ifdef __linux__
#include <linux/futex.h>
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if defined(THREAD_DUMMY_IMPL) || defined(THREAD_SIMPLE_IMPL)
/**
@ -73,36 +86,86 @@ private:
T _value;
};
/**
* Dummy implementation of std::atomic_flag that does not do any atomic
* operations.
*/
struct EXPCL_DTOOL_DTOOLBASE patomic_flag {
constexpr patomic_flag() noexcept = default;
patomic_flag(const patomic_flag &) = delete;
patomic_flag &operator=(const patomic_flag &) = delete;
ALWAYS_INLINE bool test_and_set(std::memory_order order = std::memory_order_seq_cst) noexcept;
ALWAYS_INLINE void clear(std::memory_order order = std::memory_order_seq_cst) noexcept;
bool __internal_flag = false;
};
#define patomic_thread_fence(order) (std::atomic_signal_fence((order)))
#include "patomic.I"
#else
// We're using real threading, so use the real implementation.
template<class T>
using patomic = std::atomic<T>;
typedef std::atomic_flag patomic_flag;
#define patomic_thread_fence(order) (std::atomic_thread_fence((order)))
#endif
/**
* Implementation of atomic_unsigned_lock_free with C++20 semantics.
*/
class EXPCL_DTOOL_DTOOLBASE patomic_unsigned_lock_free : public patomic<uint32_t> {
public:
constexpr patomic_unsigned_lock_free() noexcept;
constexpr patomic_unsigned_lock_free(uint32_t desired) noexcept;
INLINE void wait(uint32_t old, std::memory_order order = std::memory_order_seq_cst) const noexcept;
ALWAYS_INLINE void notify_one() noexcept;
ALWAYS_INLINE void notify_all() noexcept;
};
/**
* Implementation of atomic_signed_lock_free with C++20 semantics.
*/
class EXPCL_DTOOL_DTOOLBASE patomic_signed_lock_free : public patomic<int32_t> {
public:
constexpr patomic_signed_lock_free() noexcept;
constexpr patomic_signed_lock_free(int32_t desired) noexcept;
INLINE void wait(int32_t old, std::memory_order order = std::memory_order_seq_cst) const noexcept;
ALWAYS_INLINE void notify_one() noexcept;
ALWAYS_INLINE void notify_all() noexcept;
};
/**
* Implementation of atomic_flag with C++20 semantics.
*/
class EXPCL_DTOOL_DTOOLBASE patomic_flag {
public:
constexpr patomic_flag() noexcept = default;
constexpr patomic_flag(bool desired) noexcept;
patomic_flag(const patomic_flag &) = delete;
patomic_flag &operator=(const patomic_flag &) = delete;
ALWAYS_INLINE void clear(std::memory_order order = std::memory_order_seq_cst) noexcept;
ALWAYS_INLINE bool test_and_set(std::memory_order order = std::memory_order_seq_cst) noexcept;
ALWAYS_INLINE bool test(std::memory_order order = std::memory_order_seq_cst) const noexcept;
ALWAYS_INLINE void wait(bool old, std::memory_order order = std::memory_order_seq_cst) const noexcept;
ALWAYS_INLINE void notify_one() noexcept;
ALWAYS_INLINE void notify_all() noexcept;
private:
patomic_unsigned_lock_free _value { 0u };
};
#ifndef CPPPARSER
ALWAYS_INLINE void patomic_wait(const volatile int32_t *value, int32_t old);
ALWAYS_INLINE void patomic_notify_one(volatile int32_t *value);
ALWAYS_INLINE void patomic_notify_all(volatile int32_t *value);
ALWAYS_INLINE void patomic_wait(const volatile uint32_t *value, uint32_t old);
ALWAYS_INLINE void patomic_notify_one(volatile uint32_t *value);
ALWAYS_INLINE void patomic_notify_all(volatile uint32_t *value);
#ifdef _WIN32
EXPCL_DTOOL_DTOOLBASE extern BOOL (*_patomic_wait_func)(volatile VOID *, PVOID, SIZE_T, DWORD);
EXPCL_DTOOL_DTOOLBASE extern void (*_patomic_wake_one_func)(PVOID);
EXPCL_DTOOL_DTOOLBASE extern void (*_patomic_wake_all_func)(PVOID);
#elif !defined(__linux__) && defined(HAVE_POSIX_THREADS)
EXPCL_DTOOL_DTOOLBASE void _patomic_wait(const volatile uint32_t *value, uint32_t old);
EXPCL_DTOOL_DTOOLBASE void _patomic_notify_all(volatile uint32_t *value);
#endif
#include "patomic.I"
#endif // CPPPARSER
#endif