mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-10 12:58:30 -04:00
lib/x86: allow choosing adler32_sse2() at runtime
Now that we detect CPU features on 32-bit x86, allow the SSE2 implementation of Adler-32 to be selected at runtime based on the presence of the SSE2 feature.
This commit is contained in:
parent
0d1260be99
commit
1617206086
@ -133,6 +133,9 @@ typedef size_t machine_word_t;
|
|||||||
* Which targets are supported with the 'target' function attribute and have
|
* Which targets are supported with the 'target' function attribute and have
|
||||||
* intrinsics that work within 'target'-ed functions?
|
* intrinsics that work within 'target'-ed functions?
|
||||||
*/
|
*/
|
||||||
|
#ifndef COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS
|
||||||
|
# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 0
|
||||||
|
#endif
|
||||||
#ifndef COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS
|
#ifndef COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS
|
||||||
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS 0
|
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS 0
|
||||||
#endif
|
#endif
|
||||||
|
@ -80,6 +80,7 @@
|
|||||||
* for this so we have to check its version.
|
* for this so we have to check its version.
|
||||||
*/
|
*/
|
||||||
# if GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000)
|
# if GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000)
|
||||||
|
# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 1
|
||||||
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS \
|
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS \
|
||||||
COMPILER_SUPPORTS_PCLMUL_TARGET
|
COMPILER_SUPPORTS_PCLMUL_TARGET
|
||||||
# define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS \
|
# define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS \
|
||||||
|
@ -81,7 +81,10 @@ adler32_avx2_chunk(const __m256i *p, const __m256i *const end, u32 *s1, u32 *s2)
|
|||||||
#endif /* AVX2 implementation */
|
#endif /* AVX2 implementation */
|
||||||
|
|
||||||
/* SSE2 implementation */
|
/* SSE2 implementation */
|
||||||
#if !defined(DEFAULT_IMPL) && defined(__SSE2__)
|
#undef DISPATCH_SSE2
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
(defined(__SSE2__) || (X86_CPU_FEATURES_ENABLED && \
|
||||||
|
COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS))
|
||||||
# define FUNCNAME adler32_sse2
|
# define FUNCNAME adler32_sse2
|
||||||
# define FUNCNAME_CHUNK adler32_sse2_chunk
|
# define FUNCNAME_CHUNK adler32_sse2_chunk
|
||||||
# define IMPL_ALIGNMENT 16
|
# define IMPL_ALIGNMENT 16
|
||||||
@ -92,8 +95,14 @@ adler32_avx2_chunk(const __m256i *p, const __m256i *const end, u32 *s1, u32 *s2)
|
|||||||
* would behave incorrectly.
|
* would behave incorrectly.
|
||||||
*/
|
*/
|
||||||
# define IMPL_MAX_CHUNK_SIZE (32 * (0x7FFF / 0xFF))
|
# define IMPL_MAX_CHUNK_SIZE (32 * (0x7FFF / 0xFF))
|
||||||
|
# ifdef __SSE2__
|
||||||
# define ATTRIBUTES
|
# define ATTRIBUTES
|
||||||
# define DEFAULT_IMPL adler32_sse2
|
# define DEFAULT_IMPL adler32_sse2
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("sse2")))
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_SSE2 1
|
||||||
|
# endif
|
||||||
# include <emmintrin.h>
|
# include <emmintrin.h>
|
||||||
static forceinline ATTRIBUTES void
|
static forceinline ATTRIBUTES void
|
||||||
adler32_sse2_chunk(const __m128i *p, const __m128i *const end, u32 *s1, u32 *s2)
|
adler32_sse2_chunk(const __m128i *p, const __m128i *const end, u32 *s1, u32 *s2)
|
||||||
@ -179,6 +188,10 @@ arch_select_adler32_func(void)
|
|||||||
#ifdef DISPATCH_AVX2
|
#ifdef DISPATCH_AVX2
|
||||||
if (features & X86_CPU_FEATURE_AVX2)
|
if (features & X86_CPU_FEATURE_AVX2)
|
||||||
return adler32_avx2;
|
return adler32_avx2;
|
||||||
|
#endif
|
||||||
|
#ifdef DISPATCH_SSE2
|
||||||
|
if (features & X86_CPU_FEATURE_SSE2)
|
||||||
|
return adler32_sse2;
|
||||||
#endif
|
#endif
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user