mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-11 21:39:32 -04:00
Add ARM NEON support
This commit is contained in:
parent
06923e432f
commit
d747d2a0c3
@ -36,6 +36,14 @@ typedef s16 mf_pos_t;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_NEON__
|
||||
# include "matchfinder_neon.h"
|
||||
# if MATCHFINDER_ALIGNMENT < 16
|
||||
# undef MATCHFINDER_ALIGNMENT
|
||||
# define MATCHFINDER_ALIGNMENT 16
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initialize the hash table portion of the matchfinder.
|
||||
*
|
||||
@ -58,6 +66,11 @@ matchfinder_init(mf_pos_t *data, size_t num_entries)
|
||||
return;
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON__) && defined(_aligned_attribute)
|
||||
if (matchfinder_init_neon(data, size))
|
||||
return;
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i < num_entries; i++)
|
||||
data[i] = MATCHFINDER_INITVAL;
|
||||
}
|
||||
@ -98,6 +111,11 @@ matchfinder_rebase(mf_pos_t *data, size_t num_entries)
|
||||
return;
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON__) && defined(_aligned_attribute)
|
||||
if (matchfinder_rebase_neon(data, size))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (MATCHFINDER_WINDOW_SIZE == 32768) {
|
||||
/* Branchless version for 32768 byte windows. If the value was
|
||||
* already negative, clear all bits except the sign bit; this
|
||||
|
61
src/matchfinder_neon.h
Normal file
61
src/matchfinder_neon.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced
|
||||
* SIMD) instructions
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static forceinline bool
|
||||
matchfinder_init_neon(mf_pos_t *data, size_t size)
|
||||
{
|
||||
int16x8_t v, *p;
|
||||
size_t n;
|
||||
|
||||
if (size % sizeof(int16x8_t) * 4)
|
||||
return false;
|
||||
|
||||
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||
v = (int16x8_t) {
|
||||
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||
};
|
||||
p = (int16x8_t *)data;
|
||||
n = size / (sizeof(int16x8_t) * 4);
|
||||
do {
|
||||
p[0] = v;
|
||||
p[1] = v;
|
||||
p[2] = v;
|
||||
p[3] = v;
|
||||
p += 4;
|
||||
} while (--n);
|
||||
return true;
|
||||
}
|
||||
|
||||
static forceinline bool
|
||||
matchfinder_rebase_neon(mf_pos_t *data, size_t size)
|
||||
{
|
||||
int16x8_t v, *p;
|
||||
size_t n;
|
||||
|
||||
if ((size % sizeof(int16x8_t) * 4 != 0))
|
||||
return false;
|
||||
|
||||
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||
v = (int16x8_t) {
|
||||
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||
};
|
||||
p = (int16x8_t *)data;
|
||||
n = size / (sizeof(int16x8_t) * 4);
|
||||
do {
|
||||
p[0] = vqaddq_s16(p[0], v);
|
||||
p[1] = vqaddq_s16(p[1], v);
|
||||
p[2] = vqaddq_s16(p[2], v);
|
||||
p[3] = vqaddq_s16(p[3], v);
|
||||
p += 4;
|
||||
} while (--n);
|
||||
return true;
|
||||
}
|
@ -7,7 +7,7 @@ NDKDIR=/opt/android-ndk
|
||||
make clean
|
||||
make -j4 BUILD_SHARED_LIBRARY=no BUILD_BENCHMARK_PROGRAM=yes \
|
||||
CC="$NDKDIR/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-gcc" \
|
||||
CFLAGS="--sysroot=$NDKDIR/platforms/android-12/arch-arm -march=armv7-a"
|
||||
CFLAGS="--sysroot=$NDKDIR/platforms/android-12/arch-arm -march=armv7-a -fPIC -pie -mfpu=neon -mfloat-abi=softfp"
|
||||
|
||||
adb push benchmark /data/local/tmp
|
||||
adb shell /data/local/tmp/benchmark /data/local/tmp/testdata "$@"
|
||||
|
Loading…
x
Reference in New Issue
Block a user