mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-08-04 02:06:31 -04:00
lib/matchfinder: simplify init and rebase
Remove the ability of matchfinder_init() and matchfinder_rebase() to fail due to the matchfinder memory size being misaligned. Instead, require that the size always be 128-byte aligned -- which is already the case. Also, make the matchfinder memory always be 32-byte aligned -- which doesn't really have any downside.
This commit is contained in:
parent
f2f6a6e396
commit
ff8634427b
@ -26,68 +26,56 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef __ARM_NEON
|
#ifdef __ARM_NEON
|
||||||
# if MATCHFINDER_ALIGNMENT < 16
|
|
||||||
# undef MATCHFINDER_ALIGNMENT
|
|
||||||
# define MATCHFINDER_ALIGNMENT 16
|
|
||||||
# endif
|
|
||||||
# include <arm_neon.h>
|
# include <arm_neon.h>
|
||||||
static forceinline bool
|
static forceinline void
|
||||||
matchfinder_init_neon(mf_pos_t *data, size_t size)
|
matchfinder_init_neon(mf_pos_t *data, size_t size)
|
||||||
{
|
{
|
||||||
int16x8_t v, *p;
|
int16x8_t *p = (int16x8_t *)data;
|
||||||
size_t n;
|
int16x8_t v = (int16x8_t) {
|
||||||
|
|
||||||
if (size % (sizeof(int16x8_t) * 4) != 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
|
||||||
v = (int16x8_t) {
|
|
||||||
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
};
|
};
|
||||||
p = (int16x8_t *)data;
|
|
||||||
n = size / (sizeof(int16x8_t) * 4);
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
p[0] = v;
|
p[0] = v;
|
||||||
p[1] = v;
|
p[1] = v;
|
||||||
p[2] = v;
|
p[2] = v;
|
||||||
p[3] = v;
|
p[3] = v;
|
||||||
p += 4;
|
p += 4;
|
||||||
} while (--n);
|
size -= 4 * sizeof(*p);
|
||||||
return true;
|
} while (size != 0);
|
||||||
}
|
}
|
||||||
#undef arch_matchfinder_init
|
#define matchfinder_init matchfinder_init_neon
|
||||||
#define arch_matchfinder_init matchfinder_init_neon
|
|
||||||
|
|
||||||
static forceinline bool
|
static forceinline void
|
||||||
matchfinder_rebase_neon(mf_pos_t *data, size_t size)
|
matchfinder_rebase_neon(mf_pos_t *data, size_t size)
|
||||||
{
|
{
|
||||||
int16x8_t v, *p;
|
int16x8_t *p = (int16x8_t *)data;
|
||||||
size_t n;
|
int16x8_t v = (int16x8_t) {
|
||||||
|
|
||||||
if (size % (sizeof(int16x8_t) * 4) != 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
|
||||||
v = (int16x8_t) {
|
|
||||||
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
};
|
};
|
||||||
p = (int16x8_t *)data;
|
|
||||||
n = size / (sizeof(int16x8_t) * 4);
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
p[0] = vqaddq_s16(p[0], v);
|
p[0] = vqaddq_s16(p[0], v);
|
||||||
p[1] = vqaddq_s16(p[1], v);
|
p[1] = vqaddq_s16(p[1], v);
|
||||||
p[2] = vqaddq_s16(p[2], v);
|
p[2] = vqaddq_s16(p[2], v);
|
||||||
p[3] = vqaddq_s16(p[3], v);
|
p[3] = vqaddq_s16(p[3], v);
|
||||||
p += 4;
|
p += 4;
|
||||||
} while (--n);
|
size -= 4 * sizeof(*p);
|
||||||
return true;
|
} while (size != 0);
|
||||||
}
|
}
|
||||||
#undef arch_matchfinder_rebase
|
#define matchfinder_rebase matchfinder_rebase_neon
|
||||||
#define arch_matchfinder_rebase matchfinder_rebase_neon
|
|
||||||
|
|
||||||
#endif /* __ARM_NEON */
|
#endif /* __ARM_NEON */
|
||||||
|
@ -71,9 +71,9 @@
|
|||||||
#define BT_MATCHFINDER_HASH3_WAYS 2
|
#define BT_MATCHFINDER_HASH3_WAYS 2
|
||||||
#define BT_MATCHFINDER_HASH4_ORDER 16
|
#define BT_MATCHFINDER_HASH4_ORDER 16
|
||||||
|
|
||||||
#define BT_MATCHFINDER_TOTAL_HASH_LENGTH \
|
#define BT_MATCHFINDER_TOTAL_HASH_SIZE \
|
||||||
((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
|
(((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
|
||||||
(1UL << BT_MATCHFINDER_HASH4_ORDER))
|
(1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
|
||||||
|
|
||||||
/* Representation of a match found by the bt_matchfinder */
|
/* Representation of a match found by the bt_matchfinder */
|
||||||
struct lz_match {
|
struct lz_match {
|
||||||
@ -101,7 +101,7 @@ struct bt_matchfinder {
|
|||||||
|
|
||||||
}
|
}
|
||||||
#ifdef _aligned_attribute
|
#ifdef _aligned_attribute
|
||||||
_aligned_attribute(MATCHFINDER_ALIGNMENT)
|
_aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -109,14 +109,18 @@ _aligned_attribute(MATCHFINDER_ALIGNMENT)
|
|||||||
static forceinline void
|
static forceinline void
|
||||||
bt_matchfinder_init(struct bt_matchfinder *mf)
|
bt_matchfinder_init(struct bt_matchfinder *mf)
|
||||||
{
|
{
|
||||||
matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_LENGTH);
|
STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE %
|
||||||
|
MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||||
|
|
||||||
|
matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static forceinline void
|
static forceinline void
|
||||||
bt_matchfinder_slide_window(struct bt_matchfinder *mf)
|
bt_matchfinder_slide_window(struct bt_matchfinder *mf)
|
||||||
{
|
{
|
||||||
matchfinder_rebase((mf_pos_t *)mf,
|
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||||
sizeof(struct bt_matchfinder) / sizeof(mf_pos_t));
|
|
||||||
|
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
|
||||||
}
|
}
|
||||||
|
|
||||||
static forceinline mf_pos_t *
|
static forceinline mf_pos_t *
|
||||||
|
@ -2704,7 +2704,7 @@ libdeflate_alloc_compressor(int compression_level)
|
|||||||
size += sizeof(c->p.g);
|
size += sizeof(c->p.g);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
c = libdeflate_aligned_malloc(MATCHFINDER_ALIGNMENT, size);
|
c = libdeflate_aligned_malloc(MATCHFINDER_MEM_ALIGNMENT, size);
|
||||||
if (!c)
|
if (!c)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -111,9 +111,9 @@
|
|||||||
#define HC_MATCHFINDER_HASH3_ORDER 15
|
#define HC_MATCHFINDER_HASH3_ORDER 15
|
||||||
#define HC_MATCHFINDER_HASH4_ORDER 16
|
#define HC_MATCHFINDER_HASH4_ORDER 16
|
||||||
|
|
||||||
#define HC_MATCHFINDER_TOTAL_HASH_LENGTH \
|
#define HC_MATCHFINDER_TOTAL_HASH_SIZE \
|
||||||
((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
|
(((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
|
||||||
(1UL << HC_MATCHFINDER_HASH4_ORDER))
|
(1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
|
||||||
|
|
||||||
struct hc_matchfinder {
|
struct hc_matchfinder {
|
||||||
|
|
||||||
@ -130,7 +130,7 @@ struct hc_matchfinder {
|
|||||||
|
|
||||||
}
|
}
|
||||||
#ifdef _aligned_attribute
|
#ifdef _aligned_attribute
|
||||||
_aligned_attribute(MATCHFINDER_ALIGNMENT)
|
_aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -138,14 +138,18 @@ struct hc_matchfinder {
|
|||||||
static forceinline void
|
static forceinline void
|
||||||
hc_matchfinder_init(struct hc_matchfinder *mf)
|
hc_matchfinder_init(struct hc_matchfinder *mf)
|
||||||
{
|
{
|
||||||
matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_LENGTH);
|
STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE %
|
||||||
|
MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||||
|
|
||||||
|
matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static forceinline void
|
static forceinline void
|
||||||
hc_matchfinder_slide_window(struct hc_matchfinder *mf)
|
hc_matchfinder_slide_window(struct hc_matchfinder *mf)
|
||||||
{
|
{
|
||||||
matchfinder_rebase((mf_pos_t *)mf,
|
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||||
sizeof(struct hc_matchfinder) / sizeof(mf_pos_t));
|
|
||||||
|
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -18,11 +18,15 @@ typedef s16 mf_pos_t;
|
|||||||
|
|
||||||
#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
|
#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
|
||||||
|
|
||||||
#define MATCHFINDER_ALIGNMENT 8
|
/*
|
||||||
|
* Required alignment of the matchfinder buffer pointer and size. The values
|
||||||
#define arch_matchfinder_init(data, size) false
|
* here come from the AVX-2 implementation, which is the worst case.
|
||||||
#define arch_matchfinder_rebase(data, size) false
|
*/
|
||||||
|
#define MATCHFINDER_MEM_ALIGNMENT 32
|
||||||
|
#define MATCHFINDER_SIZE_ALIGNMENT 128
|
||||||
|
|
||||||
|
#undef matchfinder_init
|
||||||
|
#undef matchfinder_rebase
|
||||||
#ifdef _aligned_attribute
|
#ifdef _aligned_attribute
|
||||||
# if defined(__arm__) || defined(__aarch64__)
|
# if defined(__arm__) || defined(__aarch64__)
|
||||||
# include "arm/matchfinder_impl.h"
|
# include "arm/matchfinder_impl.h"
|
||||||
@ -36,19 +40,20 @@ typedef s16 mf_pos_t;
|
|||||||
*
|
*
|
||||||
* Essentially, this is an optimized memset().
|
* Essentially, this is an optimized memset().
|
||||||
*
|
*
|
||||||
* 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
|
* 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
|
||||||
|
* 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
|
||||||
*/
|
*/
|
||||||
|
#ifndef matchfinder_init
|
||||||
static forceinline void
|
static forceinline void
|
||||||
matchfinder_init(mf_pos_t *data, size_t num_entries)
|
matchfinder_init(mf_pos_t *data, size_t size)
|
||||||
{
|
{
|
||||||
|
size_t num_entries = size / sizeof(*data);
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
if (arch_matchfinder_init(data, num_entries * sizeof(data[0])))
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (i = 0; i < num_entries; i++)
|
for (i = 0; i < num_entries; i++)
|
||||||
data[i] = MATCHFINDER_INITVAL;
|
data[i] = MATCHFINDER_INITVAL;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Slide the matchfinder by WINDOW_SIZE bytes.
|
* Slide the matchfinder by WINDOW_SIZE bytes.
|
||||||
@ -71,14 +76,13 @@ matchfinder_init(mf_pos_t *data, size_t num_entries)
|
|||||||
* of "hash chains", and 2-ary in the case of "binary trees". In either case,
|
* of "hash chains", and 2-ary in the case of "binary trees". In either case,
|
||||||
* the links need to be rebased in the same way.
|
* the links need to be rebased in the same way.
|
||||||
*/
|
*/
|
||||||
|
#ifndef matchfinder_rebase
|
||||||
static forceinline void
|
static forceinline void
|
||||||
matchfinder_rebase(mf_pos_t *data, size_t num_entries)
|
matchfinder_rebase(mf_pos_t *data, size_t size)
|
||||||
{
|
{
|
||||||
|
size_t num_entries = size / sizeof(*data);
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
if (arch_matchfinder_rebase(data, num_entries * sizeof(data[0])))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (MATCHFINDER_WINDOW_SIZE == 32768) {
|
if (MATCHFINDER_WINDOW_SIZE == 32768) {
|
||||||
/* Branchless version for 32768 byte windows. If the value was
|
/* Branchless version for 32768 byte windows. If the value was
|
||||||
* already negative, clear all bits except the sign bit; this
|
* already negative, clear all bits except the sign bit; this
|
||||||
@ -101,6 +105,7 @@ matchfinder_rebase(mf_pos_t *data, size_t num_entries)
|
|||||||
data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
|
data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The hash function: given a sequence prefix held in the low-order bits of a
|
* The hash function: given a sequence prefix held in the low-order bits of a
|
||||||
|
@ -26,47 +26,38 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
# if MATCHFINDER_ALIGNMENT < 32
|
|
||||||
# undef MATCHFINDER_ALIGNMENT
|
|
||||||
# define MATCHFINDER_ALIGNMENT 32
|
|
||||||
# endif
|
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
static forceinline bool
|
static forceinline void
|
||||||
matchfinder_init_avx2(mf_pos_t *data, size_t size)
|
matchfinder_init_avx2(mf_pos_t *data, size_t size)
|
||||||
{
|
{
|
||||||
__m256i v, *p;
|
__m256i *p = (__m256i *)data;
|
||||||
size_t n;
|
__m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
|
||||||
|
|
||||||
if (size % (sizeof(__m256i) * 4) != 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
|
|
||||||
p = (__m256i *)data;
|
|
||||||
n = size / (sizeof(__m256i) * 4);
|
|
||||||
do {
|
do {
|
||||||
p[0] = v;
|
p[0] = v;
|
||||||
p[1] = v;
|
p[1] = v;
|
||||||
p[2] = v;
|
p[2] = v;
|
||||||
p[3] = v;
|
p[3] = v;
|
||||||
p += 4;
|
p += 4;
|
||||||
} while (--n);
|
size -= 4 * sizeof(*p);
|
||||||
return true;
|
} while (size != 0);
|
||||||
}
|
}
|
||||||
|
#define matchfinder_init matchfinder_init_avx2
|
||||||
|
|
||||||
static forceinline bool
|
static forceinline void
|
||||||
matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
|
matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
|
||||||
{
|
{
|
||||||
__m256i v, *p;
|
__m256i *p = (__m256i *)data;
|
||||||
size_t n;
|
__m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
|
||||||
|
|
||||||
if (size % (sizeof(__m256i) * 4) != 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
|
|
||||||
p = (__m256i *)data;
|
|
||||||
n = size / (sizeof(__m256i) * 4);
|
|
||||||
do {
|
do {
|
||||||
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
|
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
|
||||||
p[0] = _mm256_adds_epi16(p[0], v);
|
p[0] = _mm256_adds_epi16(p[0], v);
|
||||||
@ -74,53 +65,44 @@ matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
|
|||||||
p[2] = _mm256_adds_epi16(p[2], v);
|
p[2] = _mm256_adds_epi16(p[2], v);
|
||||||
p[3] = _mm256_adds_epi16(p[3], v);
|
p[3] = _mm256_adds_epi16(p[3], v);
|
||||||
p += 4;
|
p += 4;
|
||||||
} while (--n);
|
size -= 4 * sizeof(*p);
|
||||||
return true;
|
} while (size != 0);
|
||||||
}
|
}
|
||||||
#endif /* __AVX2__ */
|
#define matchfinder_rebase matchfinder_rebase_avx2
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#elif defined(__SSE2__)
|
||||||
# if MATCHFINDER_ALIGNMENT < 16
|
|
||||||
# undef MATCHFINDER_ALIGNMENT
|
|
||||||
# define MATCHFINDER_ALIGNMENT 16
|
|
||||||
# endif
|
|
||||||
# include <emmintrin.h>
|
# include <emmintrin.h>
|
||||||
static forceinline bool
|
static forceinline void
|
||||||
matchfinder_init_sse2(mf_pos_t *data, size_t size)
|
matchfinder_init_sse2(mf_pos_t *data, size_t size)
|
||||||
{
|
{
|
||||||
__m128i v, *p;
|
__m128i *p = (__m128i *)data;
|
||||||
size_t n;
|
__m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL);
|
||||||
|
|
||||||
if (size % (sizeof(__m128i) * 4) != 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
v = _mm_set1_epi16(MATCHFINDER_INITVAL);
|
|
||||||
p = (__m128i *)data;
|
|
||||||
n = size / (sizeof(__m128i) * 4);
|
|
||||||
do {
|
do {
|
||||||
p[0] = v;
|
p[0] = v;
|
||||||
p[1] = v;
|
p[1] = v;
|
||||||
p[2] = v;
|
p[2] = v;
|
||||||
p[3] = v;
|
p[3] = v;
|
||||||
p += 4;
|
p += 4;
|
||||||
} while (--n);
|
size -= 4 * sizeof(*p);
|
||||||
return true;
|
} while (size != 0);
|
||||||
}
|
}
|
||||||
|
#define matchfinder_init matchfinder_init_sse2
|
||||||
|
|
||||||
static forceinline bool
|
static forceinline void
|
||||||
matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
|
matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
|
||||||
{
|
{
|
||||||
__m128i v, *p;
|
__m128i *p = (__m128i *)data;
|
||||||
size_t n;
|
__m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
|
||||||
|
|
||||||
if (size % (sizeof(__m128i) * 4) != 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
|
|
||||||
p = (__m128i *)data;
|
|
||||||
n = size / (sizeof(__m128i) * 4);
|
|
||||||
do {
|
do {
|
||||||
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
|
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
|
||||||
p[0] = _mm_adds_epi16(p[0], v);
|
p[0] = _mm_adds_epi16(p[0], v);
|
||||||
@ -128,37 +110,8 @@ matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
|
|||||||
p[2] = _mm_adds_epi16(p[2], v);
|
p[2] = _mm_adds_epi16(p[2], v);
|
||||||
p[3] = _mm_adds_epi16(p[3], v);
|
p[3] = _mm_adds_epi16(p[3], v);
|
||||||
p += 4;
|
p += 4;
|
||||||
} while (--n);
|
size -= 4 * sizeof(*p);
|
||||||
return true;
|
} while (size != 0);
|
||||||
}
|
}
|
||||||
|
#define matchfinder_rebase matchfinder_rebase_sse2
|
||||||
#endif /* __SSE2__ */
|
#endif /* __SSE2__ */
|
||||||
|
|
||||||
#undef arch_matchfinder_init
|
|
||||||
static forceinline bool
|
|
||||||
arch_matchfinder_init(mf_pos_t *data, size_t size)
|
|
||||||
{
|
|
||||||
#ifdef __AVX2__
|
|
||||||
if (matchfinder_init_avx2(data, size))
|
|
||||||
return true;
|
|
||||||
#endif
|
|
||||||
#ifdef __SSE2__
|
|
||||||
if (matchfinder_init_sse2(data, size))
|
|
||||||
return true;
|
|
||||||
#endif
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef arch_matchfinder_rebase
|
|
||||||
static forceinline bool
|
|
||||||
arch_matchfinder_rebase(mf_pos_t *data, size_t size)
|
|
||||||
{
|
|
||||||
#ifdef __AVX2__
|
|
||||||
if (matchfinder_rebase_avx2(data, size))
|
|
||||||
return true;
|
|
||||||
#endif
|
|
||||||
#ifdef __SSE2__
|
|
||||||
if (matchfinder_rebase_sse2(data, size))
|
|
||||||
return true;
|
|
||||||
#endif
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user