Add ARM NEON support

This commit is contained in:
Eric Biggers 2015-12-04 01:07:22 -06:00
parent 06923e432f
commit d747d2a0c3
3 changed files with 80 additions and 1 deletions

View File

@ -36,6 +36,14 @@ typedef s16 mf_pos_t;
# endif
#endif
#ifdef __ARM_NEON__
# include "matchfinder_neon.h"
# if MATCHFINDER_ALIGNMENT < 16
# undef MATCHFINDER_ALIGNMENT
# define MATCHFINDER_ALIGNMENT 16
# endif
#endif
/*
* Initialize the hash table portion of the matchfinder.
*
@ -58,6 +66,11 @@ matchfinder_init(mf_pos_t *data, size_t num_entries)
return;
#endif
#if defined(__ARM_NEON__) && defined(_aligned_attribute)
if (matchfinder_init_neon(data, size))
return;
#endif
for (size_t i = 0; i < num_entries; i++)
data[i] = MATCHFINDER_INITVAL;
}
@ -98,6 +111,11 @@ matchfinder_rebase(mf_pos_t *data, size_t num_entries)
return;
#endif
#if defined(__ARM_NEON__) && defined(_aligned_attribute)
if (matchfinder_rebase_neon(data, size))
return;
#endif
if (MATCHFINDER_WINDOW_SIZE == 32768) {
/* Branchless version for 32768 byte windows. If the value was
* already negative, clear all bits except the sign bit; this

61
src/matchfinder_neon.h Normal file
View File

@ -0,0 +1,61 @@
/*
* matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced
* SIMD) instructions
*/
#include <arm_neon.h>
static forceinline bool
matchfinder_init_neon(mf_pos_t *data, size_t size)
{
int16x8_t v, *p;
size_t n;
if (size % sizeof(int16x8_t) * 4)
return false;
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
v = (int16x8_t) {
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
};
p = (int16x8_t *)data;
n = size / (sizeof(int16x8_t) * 4);
do {
p[0] = v;
p[1] = v;
p[2] = v;
p[3] = v;
p += 4;
} while (--n);
return true;
}
static forceinline bool
matchfinder_rebase_neon(mf_pos_t *data, size_t size)
{
int16x8_t v, *p;
size_t n;
if ((size % sizeof(int16x8_t) * 4 != 0))
return false;
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
v = (int16x8_t) {
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
};
p = (int16x8_t *)data;
n = size / (sizeof(int16x8_t) * 4);
do {
p[0] = vqaddq_s16(p[0], v);
p[1] = vqaddq_s16(p[1], v);
p[2] = vqaddq_s16(p[2], v);
p[3] = vqaddq_s16(p[3], v);
p += 4;
} while (--n);
return true;
}

View File

@ -7,7 +7,7 @@ NDKDIR=/opt/android-ndk
make clean
make -j4 BUILD_SHARED_LIBRARY=no BUILD_BENCHMARK_PROGRAM=yes \
CC="$NDKDIR/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-gcc" \
CFLAGS="--sysroot=$NDKDIR/platforms/android-12/arch-arm -march=armv7-a"
CFLAGS="--sysroot=$NDKDIR/platforms/android-12/arch-arm -march=armv7-a -fPIC -pie -mfpu=neon -mfloat-abi=softfp"
adb push benchmark /data/local/tmp
adb shell /data/local/tmp/benchmark /data/local/tmp/testdata "$@"