libdeflate/common/compiler_gcc.h
Eric Biggers 0f5238f0ad lib: remove the "packed struct" approach to unaligned memory access
gcc 10 is miscompiling libdeflate on x86_64 at -O3 due to a regression
in how packed structs are handled
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).

Work around this by just always using memcpy() for unaligned accesses.
It's unclear that the "packed struct" approach is worthwhile to maintain
anymore.  Currently I'm only aware that it's useful with old gcc's on
arm32.  Hopefully, compilers are good enough now that we can simply use
memcpy() everywhere.

Update https://github.com/ebiggers/libdeflate/issues/64
2020-05-08 23:03:58 -07:00

141 lines
4.6 KiB
C

/*
* compiler_gcc.h - definitions for the GNU C Compiler. This also handles clang
* and the Intel C Compiler (icc).
*
* TODO: icc is not well tested, so some things are currently disabled even
* though they maybe can be enabled on some icc versions.
*/
#if !defined(__clang__) && !defined(__INTEL_COMPILER)
# define GCC_PREREQ(major, minor) \
(__GNUC__ > (major) || \
(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
#else
# define GCC_PREREQ(major, minor) 0
#endif
/* Note: only check the clang version when absolutely necessary!
* "Vendors" such as Apple can use different version numbers. */
#ifdef __clang__
# ifdef __apple_build_version__
# define CLANG_PREREQ(major, minor, apple_version) \
(__apple_build_version__ >= (apple_version))
# else
# define CLANG_PREREQ(major, minor, apple_version) \
(__clang_major__ > (major) || \
(__clang_major__ == (major) && __clang_minor__ >= (minor)))
# endif
#else
# define CLANG_PREREQ(major, minor, apple_version) 0
#endif
#ifndef __has_attribute
# define __has_attribute(attribute) 0
#endif
#ifndef __has_feature
# define __has_feature(feature) 0
#endif
#ifndef __has_builtin
# define __has_builtin(builtin) 0
#endif
#ifdef _WIN32
# define LIBEXPORT __declspec(dllexport)
#else
# define LIBEXPORT __attribute__((visibility("default")))
#endif
#define inline inline
#define forceinline inline __attribute__((always_inline))
#define restrict __restrict__
#define likely(expr) __builtin_expect(!!(expr), 1)
#define unlikely(expr) __builtin_expect(!!(expr), 0)
#define prefetchr(addr) __builtin_prefetch((addr), 0)
#define prefetchw(addr) __builtin_prefetch((addr), 1)
#define _aligned_attribute(n) __attribute__((aligned(n)))
#define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE \
(GCC_PREREQ(4, 4) || __has_attribute(target))
#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
# if defined(__i386__) || defined(__x86_64__)
# define COMPILER_SUPPORTS_PCLMUL_TARGET \
(GCC_PREREQ(4, 4) || __has_builtin(__builtin_ia32_pclmulqdq128))
# define COMPILER_SUPPORTS_AVX_TARGET \
(GCC_PREREQ(4, 6) || __has_builtin(__builtin_ia32_maxps256))
# define COMPILER_SUPPORTS_BMI2_TARGET \
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pdep_di))
# define COMPILER_SUPPORTS_AVX2_TARGET \
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_psadbw256))
# define COMPILER_SUPPORTS_AVX512BW_TARGET \
(GCC_PREREQ(5, 1) || __has_builtin(__builtin_ia32_psadbw512))
/*
* Prior to gcc 4.9 (r200349) and clang 3.8 (r239883), x86 intrinsics
* not available in the main target could not be used in 'target'
* attribute functions. Unfortunately clang has no feature test macro
* for this so we have to check its version.
*/
# if GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000)
# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 1
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS \
COMPILER_SUPPORTS_PCLMUL_TARGET
# define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS \
COMPILER_SUPPORTS_AVX2_TARGET
# define COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS \
COMPILER_SUPPORTS_AVX512BW_TARGET
# endif
# elif (defined(__arm__) && defined(__ARM_FP)) || defined(__aarch64__)
/* arm: including arm_neon.h requires hardware fp support */
/*
* Prior to gcc 6.1 (r230411 for arm, r226563 for aarch64), NEON
* and crypto intrinsics not available in the main target could not be
* used in 'target' attribute functions.
*
* clang as of 5.0.1 still doesn't allow it. But, it does seem to allow
* the pmull intrinsics if only __ARM_NEON is enabled.
*/
# define COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS GCC_PREREQ(6, 1)
# ifdef __ARM_NEON
# define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS \
(GCC_PREREQ(6, 1) || __has_builtin(__builtin_neon_vmull_p64))
# else
# define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS \
(GCC_PREREQ(6, 1))
# endif
# endif
#endif /* COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE */
/* Newer gcc supports __BYTE_ORDER__. Older gcc doesn't. */
#ifdef __BYTE_ORDER__
# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#endif
#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
# define bswap16 __builtin_bswap16
#endif
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
# define bswap32 __builtin_bswap32
#endif
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
# define bswap64 __builtin_bswap64
#endif
#if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__)
# define UNALIGNED_ACCESS_IS_FAST 1
#endif
#define bsr32(n) (31 - __builtin_clz(n))
#define bsr64(n) (63 - __builtin_clzll(n))
#define bsf32(n) __builtin_ctz(n)
#define bsf64(n) __builtin_ctzll(n)