From 161720608697e320e496c5015fbab84bac051674 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 18 Feb 2018 23:03:26 -0800 Subject: [PATCH] lib/x86: allow choosing adler32_sse2() at runtime Now that we detect CPU features on 32-bit x86, allow the SSE2 implementation of Adler-32 to be selected at runtime based on the presence of the SSE2 feature. --- common/common_defs.h | 3 +++ common/compiler_gcc.h | 1 + lib/x86/adler32_impl.h | 19 ++++++++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/common/common_defs.h b/common/common_defs.h index 2f00357..dbaee51 100644 --- a/common/common_defs.h +++ b/common/common_defs.h @@ -133,6 +133,9 @@ typedef size_t machine_word_t; * Which targets are supported with the 'target' function attribute and have * intrinsics that work within 'target'-ed functions? */ +#ifndef COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS +# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 0 +#endif #ifndef COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS # define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS 0 #endif diff --git a/common/compiler_gcc.h b/common/compiler_gcc.h index 73e4d17..ad68eba 100644 --- a/common/compiler_gcc.h +++ b/common/compiler_gcc.h @@ -80,6 +80,7 @@ * for this so we have to check its version. */ # if GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000) +# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 1 # define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS \ COMPILER_SUPPORTS_PCLMUL_TARGET # define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS \ diff --git a/lib/x86/adler32_impl.h b/lib/x86/adler32_impl.h index d5140a6..ad057dd 100644 --- a/lib/x86/adler32_impl.h +++ b/lib/x86/adler32_impl.h @@ -81,7 +81,10 @@ adler32_avx2_chunk(const __m256i *p, const __m256i *const end, u32 *s1, u32 *s2) #endif /* AVX2 implementation */ /* SSE2 implementation */ -#if !defined(DEFAULT_IMPL) && defined(__SSE2__) +#undef DISPATCH_SSE2 +#if !defined(DEFAULT_IMPL) && \ + (defined(__SSE2__) || (X86_CPU_FEATURES_ENABLED && \ + COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS)) # define FUNCNAME adler32_sse2 # define FUNCNAME_CHUNK adler32_sse2_chunk # define IMPL_ALIGNMENT 16 @@ -92,8 +95,14 @@ adler32_avx2_chunk(const __m256i *p, const __m256i *const end, u32 *s1, u32 *s2) * would behave incorrectly. */ # define IMPL_MAX_CHUNK_SIZE (32 * (0x7FFF / 0xFF)) -# define ATTRIBUTES -# define DEFAULT_IMPL adler32_sse2 +# ifdef __SSE2__ +# define ATTRIBUTES +# define DEFAULT_IMPL adler32_sse2 +# else +# define ATTRIBUTES __attribute__((target("sse2"))) +# define DISPATCH 1 +# define DISPATCH_SSE2 1 +# endif # include static forceinline ATTRIBUTES void adler32_sse2_chunk(const __m128i *p, const __m128i *const end, u32 *s1, u32 *s2) @@ -179,6 +188,10 @@ arch_select_adler32_func(void) #ifdef DISPATCH_AVX2 if (features & X86_CPU_FEATURE_AVX2) return adler32_avx2; +#endif +#ifdef DISPATCH_SSE2 + if (features & X86_CPU_FEATURE_SSE2) + return adler32_sse2; #endif return NULL; }