mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-08-04 02:06:31 -04:00
lib/arm/crc32: add support for ARM CRC32 instructions
Add a CRC32 implementation that uses the ARM CRC32 instructions. This is simpler and faster than the PMULL implementation. On AWS Graviton2, the performance improvement is about 70%. On Hikey960, the performance improvement is about 30% for the Cortex-A53 cores or about 5% for the Cortex-A73 cores. Based on work by Greg V <greg@unrelenting.technology> (https://github.com/ebiggers/libdeflate/pull/45) and Andrew Steinborn <git@steinborn.me> (https://github.com/ebiggers/libdeflate/pull/76).
This commit is contained in:
parent
2eeaa9282e
commit
ea88fa822f
@ -28,6 +28,80 @@
|
|||||||
|
|
||||||
#include "cpu_features.h"
|
#include "cpu_features.h"
|
||||||
|
|
||||||
|
/* Implementation using ARM CRC32 instructions */
|
||||||
|
#undef DISPATCH_ARM
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
(defined(__ARM_FEATURE_CRC32) || \
|
||||||
|
(ARM_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS))
|
||||||
|
# ifdef __ARM_FEATURE_CRC32
|
||||||
|
# define ATTRIBUTES
|
||||||
|
# define DEFAULT_IMPL crc32_arm
|
||||||
|
# else
|
||||||
|
# ifdef __arm__
|
||||||
|
# ifdef __clang__
|
||||||
|
# define ATTRIBUTES __attribute__((target("armv8-a,crc")))
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("arch=armv8-a+crc")))
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# ifdef __clang__
|
||||||
|
# define ATTRIBUTES __attribute__((target("crc")))
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("+crc")))
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_ARM 1
|
||||||
|
# endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* gcc's (as of 10.1) version of arm_acle.h for arm32, and clang's (as of
|
||||||
|
* 10.0.1) version of arm_acle.h for both arm32 and arm64, have a bug where they
|
||||||
|
* only define the CRC32 functions like __crc32b() when __ARM_FEATURE_CRC32 is
|
||||||
|
* defined. That prevents them from being used via __attribute__((target)) when
|
||||||
|
* the main target doesn't have CRC32 support enabled. The actual built-ins
|
||||||
|
* like __builtin_arm_crc32b() are available and work, however; it's just the
|
||||||
|
* wrappers in arm_acle.h like __crc32b() that erroneously don't get defined.
|
||||||
|
* Work around this by manually defining __ARM_FEATURE_CRC32.
|
||||||
|
*/
|
||||||
|
#ifndef __ARM_FEATURE_CRC32
|
||||||
|
# define __ARM_FEATURE_CRC32 1
|
||||||
|
#endif
|
||||||
|
#include <arm_acle.h>
|
||||||
|
|
||||||
|
static u32 ATTRIBUTES
|
||||||
|
crc32_arm(u32 remainder, const u8 *p, size_t size)
|
||||||
|
{
|
||||||
|
while (size != 0 && (uintptr_t)p & 7) {
|
||||||
|
remainder = __crc32b(remainder, *p++);
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (size >= 32) {
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 0)));
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 1)));
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 2)));
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 3)));
|
||||||
|
p += 32;
|
||||||
|
size -= 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (size >= 8) {
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*(u64 *)p));
|
||||||
|
p += 8;
|
||||||
|
size -= 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (size != 0) {
|
||||||
|
remainder = __crc32b(remainder, *p++);
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
|
||||||
|
return remainder;
|
||||||
|
}
|
||||||
|
#undef ATTRIBUTES
|
||||||
|
#endif /* Implementation using ARM CRC32 instructions */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CRC-32 folding with ARM Crypto extension-PMULL
|
* CRC-32 folding with ARM Crypto extension-PMULL
|
||||||
*
|
*
|
||||||
@ -155,6 +229,10 @@ arch_select_crc32_func(void)
|
|||||||
{
|
{
|
||||||
u32 features = get_cpu_features();
|
u32 features = get_cpu_features();
|
||||||
|
|
||||||
|
#ifdef DISPATCH_ARM
|
||||||
|
if (features & ARM_CPU_FEATURE_CRC32)
|
||||||
|
return crc32_arm;
|
||||||
|
#endif
|
||||||
#ifdef DISPATCH_PMULL
|
#ifdef DISPATCH_PMULL
|
||||||
if (features & ARM_CPU_FEATURE_PMULL)
|
if (features & ARM_CPU_FEATURE_PMULL)
|
||||||
return crc32_pmull;
|
return crc32_pmull;
|
||||||
|
@ -112,9 +112,13 @@ i386|x86_64)
|
|||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
arm*|aarch*)
|
arm*|aarch*)
|
||||||
|
if have_cpu_feature crc32; then
|
||||||
|
do_benchmark "ARM"
|
||||||
|
disable_cpu_feature "crc32" "-march=armv8-a+nocrc"
|
||||||
|
fi
|
||||||
if have_cpu_feature pmull; then
|
if have_cpu_feature pmull; then
|
||||||
do_benchmark "PMULL"
|
do_benchmark "PMULL"
|
||||||
disable_cpu_feature "pmull" "-march=armv8-a+nocrypto"
|
disable_cpu_feature "pmull" "-march=armv8-a+nocrc+nocrypto"
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
Loading…
x
Reference in New Issue
Block a user