From ea88fa822f0cd7bc205ea6b23c1bfb486234d2ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 10 Oct 2020 23:02:50 -0700 Subject: [PATCH] lib/arm/crc32: add support for ARM CRC32 instructions Add a CRC32 implementation that uses the ARM CRC32 instructions. This is simpler and faster than the PMULL implementation. On AWS Graviton2, the performance improvement is about 70%. On Hikey960, the performance improvement is about 30% for the Cortex-A53 cores or about 5% for the Cortex-A73 cores. Based on work by Greg V (https://github.com/ebiggers/libdeflate/pull/45) and Andrew Steinborn (https://github.com/ebiggers/libdeflate/pull/76). --- lib/arm/crc32_impl.h | 78 ++++++++++++++++++++++++++++++++++++ tools/checksum_benchmarks.sh | 6 ++- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/lib/arm/crc32_impl.h b/lib/arm/crc32_impl.h index d5af63c..e1d94a5 100644 --- a/lib/arm/crc32_impl.h +++ b/lib/arm/crc32_impl.h @@ -28,6 +28,80 @@ #include "cpu_features.h" +/* Implementation using ARM CRC32 instructions */ +#undef DISPATCH_ARM +#if !defined(DEFAULT_IMPL) && \ + (defined(__ARM_FEATURE_CRC32) || \ + (ARM_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS)) +# ifdef __ARM_FEATURE_CRC32 +# define ATTRIBUTES +# define DEFAULT_IMPL crc32_arm +# else +# ifdef __arm__ +# ifdef __clang__ +# define ATTRIBUTES __attribute__((target("armv8-a,crc"))) +# else +# define ATTRIBUTES __attribute__((target("arch=armv8-a+crc"))) +# endif +# else +# ifdef __clang__ +# define ATTRIBUTES __attribute__((target("crc"))) +# else +# define ATTRIBUTES __attribute__((target("+crc"))) +# endif +# endif +# define DISPATCH 1 +# define DISPATCH_ARM 1 +# endif + +/* + * gcc's (as of 10.1) version of arm_acle.h for arm32, and clang's (as of + * 10.0.1) version of arm_acle.h for both arm32 and arm64, have a bug where they + * only define the CRC32 functions like __crc32b() when __ARM_FEATURE_CRC32 is + * defined. That prevents them from being used via __attribute__((target)) when + * the main target doesn't have CRC32 support enabled. The actual built-ins + * like __builtin_arm_crc32b() are available and work, however; it's just the + * wrappers in arm_acle.h like __crc32b() that erroneously don't get defined. + * Work around this by manually defining __ARM_FEATURE_CRC32. + */ +#ifndef __ARM_FEATURE_CRC32 +# define __ARM_FEATURE_CRC32 1 +#endif +#include + +static u32 ATTRIBUTES +crc32_arm(u32 remainder, const u8 *p, size_t size) +{ + while (size != 0 && (uintptr_t)p & 7) { + remainder = __crc32b(remainder, *p++); + size--; + } + + while (size >= 32) { + remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 0))); + remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 1))); + remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 2))); + remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 3))); + p += 32; + size -= 32; + } + + while (size >= 8) { + remainder = __crc32d(remainder, le64_bswap(*(u64 *)p)); + p += 8; + size -= 8; + } + + while (size != 0) { + remainder = __crc32b(remainder, *p++); + size--; + } + + return remainder; +} +#undef ATTRIBUTES +#endif /* Implementation using ARM CRC32 instructions */ + /* * CRC-32 folding with ARM Crypto extension-PMULL * @@ -155,6 +229,10 @@ arch_select_crc32_func(void) { u32 features = get_cpu_features(); +#ifdef DISPATCH_ARM + if (features & ARM_CPU_FEATURE_CRC32) + return crc32_arm; +#endif #ifdef DISPATCH_PMULL if (features & ARM_CPU_FEATURE_PMULL) return crc32_pmull; diff --git a/tools/checksum_benchmarks.sh b/tools/checksum_benchmarks.sh index 5d797a9..e025e5f 100755 --- a/tools/checksum_benchmarks.sh +++ b/tools/checksum_benchmarks.sh @@ -112,9 +112,13 @@ i386|x86_64) fi ;; arm*|aarch*) + if have_cpu_feature crc32; then + do_benchmark "ARM" + disable_cpu_feature "crc32" "-march=armv8-a+nocrc" + fi if have_cpu_feature pmull; then do_benchmark "PMULL" - disable_cpu_feature "pmull" "-march=armv8-a+nocrypto" + disable_cpu_feature "pmull" "-march=armv8-a+nocrc+nocrypto" fi ;; esac