From d747d2a0c3d0afeadd6d287d2dd0158dc470a368 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Dec 2015 01:07:22 -0600 Subject: [PATCH] Add ARM NEON support --- src/matchfinder_common.h | 18 ++++++++++++ src/matchfinder_neon.h | 61 ++++++++++++++++++++++++++++++++++++++++ tools/android_test.sh | 2 +- 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 src/matchfinder_neon.h diff --git a/src/matchfinder_common.h b/src/matchfinder_common.h index 2858d5a..2d7ffd9 100644 --- a/src/matchfinder_common.h +++ b/src/matchfinder_common.h @@ -36,6 +36,14 @@ typedef s16 mf_pos_t; # endif #endif +#ifdef __ARM_NEON__ +# include "matchfinder_neon.h" +# if MATCHFINDER_ALIGNMENT < 16 +# undef MATCHFINDER_ALIGNMENT +# define MATCHFINDER_ALIGNMENT 16 +# endif +#endif + /* * Initialize the hash table portion of the matchfinder. * @@ -58,6 +66,11 @@ matchfinder_init(mf_pos_t *data, size_t num_entries) return; #endif +#if defined(__ARM_NEON__) && defined(_aligned_attribute) + if (matchfinder_init_neon(data, size)) + return; +#endif + for (size_t i = 0; i < num_entries; i++) data[i] = MATCHFINDER_INITVAL; } @@ -98,6 +111,11 @@ matchfinder_rebase(mf_pos_t *data, size_t num_entries) return; #endif +#if defined(__ARM_NEON__) && defined(_aligned_attribute) + if (matchfinder_rebase_neon(data, size)) + return; +#endif + if (MATCHFINDER_WINDOW_SIZE == 32768) { /* Branchless version for 32768 byte windows. If the value was * already negative, clear all bits except the sign bit; this diff --git a/src/matchfinder_neon.h b/src/matchfinder_neon.h new file mode 100644 index 0000000..42ec662 --- /dev/null +++ b/src/matchfinder_neon.h @@ -0,0 +1,61 @@ +/* + * matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced + * SIMD) instructions + */ + +#include + +static forceinline bool +matchfinder_init_neon(mf_pos_t *data, size_t size) +{ + int16x8_t v, *p; + size_t n; + + if (size % sizeof(int16x8_t) * 4) + return false; + + STATIC_ASSERT(sizeof(mf_pos_t) == 2); + v = (int16x8_t) { + MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, + MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, + MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, + }; + p = (int16x8_t *)data; + n = size / (sizeof(int16x8_t) * 4); + do { + p[0] = v; + p[1] = v; + p[2] = v; + p[3] = v; + p += 4; + } while (--n); + return true; +} + +static forceinline bool +matchfinder_rebase_neon(mf_pos_t *data, size_t size) +{ + int16x8_t v, *p; + size_t n; + + if ((size % sizeof(int16x8_t) * 4 != 0)) + return false; + + STATIC_ASSERT(sizeof(mf_pos_t) == 2); + v = (int16x8_t) { + (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, + (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, + (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, + (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, + }; + p = (int16x8_t *)data; + n = size / (sizeof(int16x8_t) * 4); + do { + p[0] = vqaddq_s16(p[0], v); + p[1] = vqaddq_s16(p[1], v); + p[2] = vqaddq_s16(p[2], v); + p[3] = vqaddq_s16(p[3], v); + p += 4; + } while (--n); + return true; +} diff --git a/tools/android_test.sh b/tools/android_test.sh index fd73cb2..1414221 100755 --- a/tools/android_test.sh +++ b/tools/android_test.sh @@ -7,7 +7,7 @@ NDKDIR=/opt/android-ndk make clean make -j4 BUILD_SHARED_LIBRARY=no BUILD_BENCHMARK_PROGRAM=yes \ CC="$NDKDIR/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-gcc" \ - CFLAGS="--sysroot=$NDKDIR/platforms/android-12/arch-arm -march=armv7-a" + CFLAGS="--sysroot=$NDKDIR/platforms/android-12/arch-arm -march=armv7-a -fPIC -pie -mfpu=neon -mfloat-abi=softfp" adb push benchmark /data/local/tmp adb shell /data/local/tmp/benchmark /data/local/tmp/testdata "$@"