sync.stdatomic: add atomic_thread_fence(), cpu_relax() (#24690)

This commit is contained in:
kbkpbot 2025-06-11 23:48:23 +08:00 committed by GitHub
parent 2bc5887398
commit a9f4a942fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 97 additions and 6 deletions

View File

@ -15,12 +15,46 @@
#include "atomic_cpp.h"
#endif
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
/* x86 architecture: uses PAUSE instruction for efficient spinning */
#define cpu_relax() __asm__ __volatile__ ("pause")
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
#if defined(__TINYC__)
/* TCC compiler limitation: assembly not supported on ARM */
#define cpu_relax()
#else
/* ARM architecture: uses YIELD instruction for power-efficient spinning */
#define cpu_relax() __asm__ __volatile__ ("yield" ::: "memory")
#endif
#elif defined(__riscv) && __riscv_xlen == 64
/* RISC-V 64-bit: no dedicated pause instruction, using alternative sequence */
#define cpu_relax() __asm__ __volatile__ ( \
"fence rw, rw\n\t" /* Full memory barrier (read-write ordering) */ \
"andi a0, a0, 0\n\t" /* Dummy arithmetic instruction (always sets a0 = 0) */ \
::: "memory", "a0") /* Clobbers memory and a0 register to prevent optimizations */
#elif defined(__powerpc64__) || defined(__ppc64__)
/* PowerPC 64-bit: use OR instruction for synchronization */
#define cpu_relax() __asm__ __volatile__ ("or 1,1,1\n\t" ::: "memory")
#elif defined(__mips64)
/* MIPS 64-bit: use series of super-scalar NOPs */
#define cpu_relax() __asm__ __volatile__ ("ssnop\n\tssnop\n\tssnop\n\t" ::: "memory")
#else
/* Fallback implementation for unsupported architectures */
#define cpu_relax() __asm__ __volatile__ ( \
"nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" /* Series of no-operation instructions */ \
::: "memory") /* Memory clobber to prevent instruction reordering */
#endif
#ifdef __TINYC__
typedef volatile long long atomic_llong;
typedef volatile unsigned long long atomic_ullong;
typedef volatile uintptr_t atomic_uintptr_t;
extern void atomic_thread_fence (int memory_order);
extern void __atomic_thread_fence (int memory_order);
#define atomic_thread_fence(order) __atomic_thread_fence (order)
// use functions for 64, 32 and 8 bit from libatomic directly
// since tcc is not capible to use "generic" C functions
// there is no header file for libatomic so we provide function declarations here

View File

@ -24,7 +24,10 @@
#include <stdint.h>
#include <windows.h>
#ifdef __TINYC__
#ifdef _MSC_VER
#define cpu_relax() _mm_pause()
#else
#define cpu_relax() __asm__ __volatile__ ("pause")
#endif
#define ATOMIC_FLAG_INIT 0
@ -39,10 +42,59 @@
#define kill_dependency(y) ((void)0)
// memory order policies - we use "sequentially consistent" by default
#define memory_order_relaxed 0
#define memory_order_consume 1
#define memory_order_acquire 2
#define memory_order_release 3
#define memory_order_acq_rel 4
#define memory_order_seq_cst 5
#ifdef _MSC_VER
#define atomic_thread_fence(order) \
((order) == memory_order_seq_cst ? MemoryBarrier() : \
(order) == memory_order_release ? WriteBarrier() : \
(order) == memory_order_acquire ? ReadBarrier() : (void)0);
do { \
switch (order) { \
case memory_order_release: \
_WriteBarrier(); \
_ReadWriteBarrier(); \
break; \
case memory_order_acquire: \
_ReadBarrier(); \
_ReadWriteBarrier(); \
break; \
case memory_order_acq_rel: \
_ReadBarrier(); \
_WriteBarrier(); \
_ReadWriteBarrier(); \
break; \
case memory_order_seq_cst: \
MemoryBarrier(); \
break; \
default: /* relaxed, consume */ \
break; \
} \
} while (0)
#else
#define atomic_thread_fence(order) do { \
switch (order) { \
case memory_order_relaxed: \
break; \
case memory_order_acquire: \
case memory_order_consume: \
case memory_order_release: \
case memory_order_acq_rel: \
__asm__ __volatile__ ("" : : : "memory"); \
break; \
case memory_order_seq_cst: \
__asm__ __volatile__ ("mfence" : : : "memory"); \
break; \
default: \
__asm__ __volatile__ ("mfence" : : : "memory"); \
break; \
} \
} while (0)
#endif
#define atomic_signal_fence(order) \
((void)0)

View File

@ -14,8 +14,6 @@ $if windows {
$if linux {
$if tinyc {
$if amd64 {
// Debian/Ubuntu:
#flag $when_first_existing('/usr/lib/gcc/x86_64-linux-gnu/6/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/7/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/8/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/9/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/10/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/11/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/12/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/13/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/14/libatomic.a')
// Redhat/CentOS:
#flag $when_first_existing('/usr/lib/gcc/x86_64-redhat-linux/6/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/7/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/8/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/9/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/10/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/11/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/12/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/13/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/14/libatomic.a')
// Gentoo:
@ -108,3 +106,6 @@ fn C.atomic_compare_exchange_strong_u64(voidptr, voidptr, u64) bool
fn C.atomic_exchange_u64(voidptr, u64) u64
fn C.atomic_fetch_add_u64(voidptr, u64) u64
fn C.atomic_fetch_sub_u64(voidptr, u64) u64
fn C.atomic_thread_fence(int)
fn C.cpu_relax()

View File

@ -271,4 +271,8 @@ fn test_atomic_vals() {
assert v_voidptr.swap(ptr_2) == ptr_1
assert v_voidptr.compare_and_swap(ptr_2, ptr_1) == true
assert v_voidptr.load() == ptr_1
// just for compile
C.atomic_thread_fence(C.memory_order_relaxed)
C.cpu_relax()
}