mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-11 21:39:32 -04:00
Add ARM NEON support
This commit is contained in:
parent
06923e432f
commit
d747d2a0c3
@ -36,6 +36,14 @@ typedef s16 mf_pos_t;
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
# include "matchfinder_neon.h"
|
||||||
|
# if MATCHFINDER_ALIGNMENT < 16
|
||||||
|
# undef MATCHFINDER_ALIGNMENT
|
||||||
|
# define MATCHFINDER_ALIGNMENT 16
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize the hash table portion of the matchfinder.
|
* Initialize the hash table portion of the matchfinder.
|
||||||
*
|
*
|
||||||
@ -58,6 +66,11 @@ matchfinder_init(mf_pos_t *data, size_t num_entries)
|
|||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__ARM_NEON__) && defined(_aligned_attribute)
|
||||||
|
if (matchfinder_init_neon(data, size))
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (size_t i = 0; i < num_entries; i++)
|
for (size_t i = 0; i < num_entries; i++)
|
||||||
data[i] = MATCHFINDER_INITVAL;
|
data[i] = MATCHFINDER_INITVAL;
|
||||||
}
|
}
|
||||||
@ -98,6 +111,11 @@ matchfinder_rebase(mf_pos_t *data, size_t num_entries)
|
|||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__ARM_NEON__) && defined(_aligned_attribute)
|
||||||
|
if (matchfinder_rebase_neon(data, size))
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (MATCHFINDER_WINDOW_SIZE == 32768) {
|
if (MATCHFINDER_WINDOW_SIZE == 32768) {
|
||||||
/* Branchless version for 32768 byte windows. If the value was
|
/* Branchless version for 32768 byte windows. If the value was
|
||||||
* already negative, clear all bits except the sign bit; this
|
* already negative, clear all bits except the sign bit; this
|
||||||
|
61
src/matchfinder_neon.h
Normal file
61
src/matchfinder_neon.h
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced
|
||||||
|
* SIMD) instructions
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
static forceinline bool
|
||||||
|
matchfinder_init_neon(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
int16x8_t v, *p;
|
||||||
|
size_t n;
|
||||||
|
|
||||||
|
if (size % sizeof(int16x8_t) * 4)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
v = (int16x8_t) {
|
||||||
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
|
};
|
||||||
|
p = (int16x8_t *)data;
|
||||||
|
n = size / (sizeof(int16x8_t) * 4);
|
||||||
|
do {
|
||||||
|
p[0] = v;
|
||||||
|
p[1] = v;
|
||||||
|
p[2] = v;
|
||||||
|
p[3] = v;
|
||||||
|
p += 4;
|
||||||
|
} while (--n);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline bool
|
||||||
|
matchfinder_rebase_neon(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
int16x8_t v, *p;
|
||||||
|
size_t n;
|
||||||
|
|
||||||
|
if ((size % sizeof(int16x8_t) * 4 != 0))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
v = (int16x8_t) {
|
||||||
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
|
};
|
||||||
|
p = (int16x8_t *)data;
|
||||||
|
n = size / (sizeof(int16x8_t) * 4);
|
||||||
|
do {
|
||||||
|
p[0] = vqaddq_s16(p[0], v);
|
||||||
|
p[1] = vqaddq_s16(p[1], v);
|
||||||
|
p[2] = vqaddq_s16(p[2], v);
|
||||||
|
p[3] = vqaddq_s16(p[3], v);
|
||||||
|
p += 4;
|
||||||
|
} while (--n);
|
||||||
|
return true;
|
||||||
|
}
|
@ -7,7 +7,7 @@ NDKDIR=/opt/android-ndk
|
|||||||
make clean
|
make clean
|
||||||
make -j4 BUILD_SHARED_LIBRARY=no BUILD_BENCHMARK_PROGRAM=yes \
|
make -j4 BUILD_SHARED_LIBRARY=no BUILD_BENCHMARK_PROGRAM=yes \
|
||||||
CC="$NDKDIR/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-gcc" \
|
CC="$NDKDIR/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-gcc" \
|
||||||
CFLAGS="--sysroot=$NDKDIR/platforms/android-12/arch-arm -march=armv7-a"
|
CFLAGS="--sysroot=$NDKDIR/platforms/android-12/arch-arm -march=armv7-a -fPIC -pie -mfpu=neon -mfloat-abi=softfp"
|
||||||
|
|
||||||
adb push benchmark /data/local/tmp
|
adb push benchmark /data/local/tmp
|
||||||
adb shell /data/local/tmp/benchmark /data/local/tmp/testdata "$@"
|
adb shell /data/local/tmp/benchmark /data/local/tmp/testdata "$@"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user