diff --git a/thirdparty/stdatomic/nix/atomic.h b/thirdparty/stdatomic/nix/atomic.h index e583a431f0..80b5a884ee 100644 --- a/thirdparty/stdatomic/nix/atomic.h +++ b/thirdparty/stdatomic/nix/atomic.h @@ -15,12 +15,46 @@ #include "atomic_cpp.h" #endif +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) + /* x86 architecture: uses PAUSE instruction for efficient spinning */ + #define cpu_relax() __asm__ __volatile__ ("pause") +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) + #if defined(__TINYC__) + /* TCC compiler limitation: assembly not supported on ARM */ + #define cpu_relax() + #else + /* ARM architecture: uses YIELD instruction for power-efficient spinning */ + #define cpu_relax() __asm__ __volatile__ ("yield" ::: "memory") + #endif +#elif defined(__riscv) && __riscv_xlen == 64 + /* RISC-V 64-bit: no dedicated pause instruction, using alternative sequence */ + #define cpu_relax() __asm__ __volatile__ ( \ + "fence rw, rw\n\t" /* Full memory barrier (read-write ordering) */ \ + "andi a0, a0, 0\n\t" /* Dummy arithmetic instruction (always sets a0 = 0) */ \ + ::: "memory", "a0") /* Clobbers memory and a0 register to prevent optimizations */ +#elif defined(__powerpc64__) || defined(__ppc64__) + /* PowerPC 64-bit: use OR instruction for synchronization */ + #define cpu_relax() __asm__ __volatile__ ("or 1,1,1\n\t" ::: "memory") +#elif defined(__mips64) + /* MIPS 64-bit: use series of super-scalar NOPs */ + #define cpu_relax() __asm__ __volatile__ ("ssnop\n\tssnop\n\tssnop\n\t" ::: "memory") +#else + /* Fallback implementation for unsupported architectures */ + #define cpu_relax() __asm__ __volatile__ ( \ + "nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t" /* Series of no-operation instructions */ \ + ::: "memory") /* Memory clobber to prevent instruction reordering */ +#endif + #ifdef __TINYC__ typedef volatile long long atomic_llong; typedef volatile unsigned long long atomic_ullong; typedef volatile uintptr_t atomic_uintptr_t; +extern void atomic_thread_fence (int memory_order); +extern void __atomic_thread_fence (int memory_order); +#define atomic_thread_fence(order) __atomic_thread_fence (order) + // use functions for 64, 32 and 8 bit from libatomic directly // since tcc is not capible to use "generic" C functions // there is no header file for libatomic so we provide function declarations here diff --git a/thirdparty/stdatomic/win/atomic.h b/thirdparty/stdatomic/win/atomic.h index 9ee7f30e0f..0ffda08a7b 100644 --- a/thirdparty/stdatomic/win/atomic.h +++ b/thirdparty/stdatomic/win/atomic.h @@ -24,7 +24,10 @@ #include #include -#ifdef __TINYC__ +#ifdef _MSC_VER +#define cpu_relax() _mm_pause() +#else +#define cpu_relax() __asm__ __volatile__ ("pause") #endif #define ATOMIC_FLAG_INIT 0 @@ -39,10 +42,59 @@ #define kill_dependency(y) ((void)0) +// memory order policies - we use "sequentially consistent" by default + +#define memory_order_relaxed 0 +#define memory_order_consume 1 +#define memory_order_acquire 2 +#define memory_order_release 3 +#define memory_order_acq_rel 4 +#define memory_order_seq_cst 5 + +#ifdef _MSC_VER #define atomic_thread_fence(order) \ - ((order) == memory_order_seq_cst ? MemoryBarrier() : \ - (order) == memory_order_release ? WriteBarrier() : \ - (order) == memory_order_acquire ? ReadBarrier() : (void)0); + do { \ + switch (order) { \ + case memory_order_release: \ + _WriteBarrier(); \ + _ReadWriteBarrier(); \ + break; \ + case memory_order_acquire: \ + _ReadBarrier(); \ + _ReadWriteBarrier(); \ + break; \ + case memory_order_acq_rel: \ + _ReadBarrier(); \ + _WriteBarrier(); \ + _ReadWriteBarrier(); \ + break; \ + case memory_order_seq_cst: \ + MemoryBarrier(); \ + break; \ + default: /* relaxed, consume */ \ + break; \ + } \ + } while (0) +#else +#define atomic_thread_fence(order) do { \ + switch (order) { \ + case memory_order_relaxed: \ + break; \ + case memory_order_acquire: \ + case memory_order_consume: \ + case memory_order_release: \ + case memory_order_acq_rel: \ + __asm__ __volatile__ ("" : : : "memory"); \ + break; \ + case memory_order_seq_cst: \ + __asm__ __volatile__ ("mfence" : : : "memory"); \ + break; \ + default: \ + __asm__ __volatile__ ("mfence" : : : "memory"); \ + break; \ + } \ +} while (0) +#endif #define atomic_signal_fence(order) \ ((void)0) diff --git a/vlib/sync/stdatomic/1.declarations.c.v b/vlib/sync/stdatomic/1.declarations.c.v index 4c789e6454..661778a7c4 100644 --- a/vlib/sync/stdatomic/1.declarations.c.v +++ b/vlib/sync/stdatomic/1.declarations.c.v @@ -14,8 +14,6 @@ $if windows { $if linux { $if tinyc { $if amd64 { - // Debian/Ubuntu: - #flag $when_first_existing('/usr/lib/gcc/x86_64-linux-gnu/6/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/7/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/8/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/9/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/10/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/11/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/12/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/13/libatomic.a','/usr/lib/gcc/x86_64-linux-gnu/14/libatomic.a') // Redhat/CentOS: #flag $when_first_existing('/usr/lib/gcc/x86_64-redhat-linux/6/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/7/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/8/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/9/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/10/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/11/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/12/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/13/libatomic.a','/usr/lib/gcc/x86_64-redhat-linux/14/libatomic.a') // Gentoo: @@ -108,3 +106,6 @@ fn C.atomic_compare_exchange_strong_u64(voidptr, voidptr, u64) bool fn C.atomic_exchange_u64(voidptr, u64) u64 fn C.atomic_fetch_add_u64(voidptr, u64) u64 fn C.atomic_fetch_sub_u64(voidptr, u64) u64 + +fn C.atomic_thread_fence(int) +fn C.cpu_relax() diff --git a/vlib/sync/stdatomic/atomic_test.v b/vlib/sync/stdatomic/atomic_test.v index aa37dcca6e..4f1e8667c4 100644 --- a/vlib/sync/stdatomic/atomic_test.v +++ b/vlib/sync/stdatomic/atomic_test.v @@ -271,4 +271,8 @@ fn test_atomic_vals() { assert v_voidptr.swap(ptr_2) == ptr_1 assert v_voidptr.compare_and_swap(ptr_2, ptr_1) == true assert v_voidptr.load() == ptr_1 + + // just for compile + C.atomic_thread_fence(C.memory_order_relaxed) + C.cpu_relax() }