lib/matchfinder: simplify init and rebase

Remove the ability of matchfinder_init() and matchfinder_rebase() to
fail due to the matchfinder memory size being misaligned.  Instead,
require that the size always be 128-byte aligned -- which is already the
case.  Also, make the matchfinder memory always be 32-byte aligned --
which doesn't really have any downside.
This commit is contained in:
Eric Biggers 2020-10-25 21:59:37 -07:00
parent f2f6a6e396
commit ff8634427b
6 changed files with 100 additions and 146 deletions

View File

@ -26,68 +26,56 @@
*/ */
#ifdef __ARM_NEON #ifdef __ARM_NEON
# if MATCHFINDER_ALIGNMENT < 16
# undef MATCHFINDER_ALIGNMENT
# define MATCHFINDER_ALIGNMENT 16
# endif
# include <arm_neon.h> # include <arm_neon.h>
static forceinline bool static forceinline void
matchfinder_init_neon(mf_pos_t *data, size_t size) matchfinder_init_neon(mf_pos_t *data, size_t size)
{ {
int16x8_t v, *p; int16x8_t *p = (int16x8_t *)data;
size_t n; int16x8_t v = (int16x8_t) {
if (size % (sizeof(int16x8_t) * 4) != 0)
return false;
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
v = (int16x8_t) {
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
}; };
p = (int16x8_t *)data;
n = size / (sizeof(int16x8_t) * 4); STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
do { do {
p[0] = v; p[0] = v;
p[1] = v; p[1] = v;
p[2] = v; p[2] = v;
p[3] = v; p[3] = v;
p += 4; p += 4;
} while (--n); size -= 4 * sizeof(*p);
return true; } while (size != 0);
} }
#undef arch_matchfinder_init #define matchfinder_init matchfinder_init_neon
#define arch_matchfinder_init matchfinder_init_neon
static forceinline bool static forceinline void
matchfinder_rebase_neon(mf_pos_t *data, size_t size) matchfinder_rebase_neon(mf_pos_t *data, size_t size)
{ {
int16x8_t v, *p; int16x8_t *p = (int16x8_t *)data;
size_t n; int16x8_t v = (int16x8_t) {
if (size % (sizeof(int16x8_t) * 4) != 0)
return false;
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
v = (int16x8_t) {
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
}; };
p = (int16x8_t *)data;
n = size / (sizeof(int16x8_t) * 4); STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
do { do {
p[0] = vqaddq_s16(p[0], v); p[0] = vqaddq_s16(p[0], v);
p[1] = vqaddq_s16(p[1], v); p[1] = vqaddq_s16(p[1], v);
p[2] = vqaddq_s16(p[2], v); p[2] = vqaddq_s16(p[2], v);
p[3] = vqaddq_s16(p[3], v); p[3] = vqaddq_s16(p[3], v);
p += 4; p += 4;
} while (--n); size -= 4 * sizeof(*p);
return true; } while (size != 0);
} }
#undef arch_matchfinder_rebase #define matchfinder_rebase matchfinder_rebase_neon
#define arch_matchfinder_rebase matchfinder_rebase_neon
#endif /* __ARM_NEON */ #endif /* __ARM_NEON */

View File

@ -71,9 +71,9 @@
#define BT_MATCHFINDER_HASH3_WAYS 2 #define BT_MATCHFINDER_HASH3_WAYS 2
#define BT_MATCHFINDER_HASH4_ORDER 16 #define BT_MATCHFINDER_HASH4_ORDER 16
#define BT_MATCHFINDER_TOTAL_HASH_LENGTH \ #define BT_MATCHFINDER_TOTAL_HASH_SIZE \
((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \ (((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
(1UL << BT_MATCHFINDER_HASH4_ORDER)) (1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
/* Representation of a match found by the bt_matchfinder */ /* Representation of a match found by the bt_matchfinder */
struct lz_match { struct lz_match {
@ -101,7 +101,7 @@ struct bt_matchfinder {
} }
#ifdef _aligned_attribute #ifdef _aligned_attribute
_aligned_attribute(MATCHFINDER_ALIGNMENT) _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
#endif #endif
; ;
@ -109,14 +109,18 @@ _aligned_attribute(MATCHFINDER_ALIGNMENT)
static forceinline void static forceinline void
bt_matchfinder_init(struct bt_matchfinder *mf) bt_matchfinder_init(struct bt_matchfinder *mf)
{ {
matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_LENGTH); STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE %
MATCHFINDER_SIZE_ALIGNMENT == 0);
matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE);
} }
static forceinline void static forceinline void
bt_matchfinder_slide_window(struct bt_matchfinder *mf) bt_matchfinder_slide_window(struct bt_matchfinder *mf)
{ {
matchfinder_rebase((mf_pos_t *)mf, STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
sizeof(struct bt_matchfinder) / sizeof(mf_pos_t));
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
} }
static forceinline mf_pos_t * static forceinline mf_pos_t *

View File

@ -2704,7 +2704,7 @@ libdeflate_alloc_compressor(int compression_level)
size += sizeof(c->p.g); size += sizeof(c->p.g);
#endif #endif
c = libdeflate_aligned_malloc(MATCHFINDER_ALIGNMENT, size); c = libdeflate_aligned_malloc(MATCHFINDER_MEM_ALIGNMENT, size);
if (!c) if (!c)
return NULL; return NULL;

View File

@ -111,9 +111,9 @@
#define HC_MATCHFINDER_HASH3_ORDER 15 #define HC_MATCHFINDER_HASH3_ORDER 15
#define HC_MATCHFINDER_HASH4_ORDER 16 #define HC_MATCHFINDER_HASH4_ORDER 16
#define HC_MATCHFINDER_TOTAL_HASH_LENGTH \ #define HC_MATCHFINDER_TOTAL_HASH_SIZE \
((1UL << HC_MATCHFINDER_HASH3_ORDER) + \ (((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
(1UL << HC_MATCHFINDER_HASH4_ORDER)) (1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
struct hc_matchfinder { struct hc_matchfinder {
@ -130,7 +130,7 @@ struct hc_matchfinder {
} }
#ifdef _aligned_attribute #ifdef _aligned_attribute
_aligned_attribute(MATCHFINDER_ALIGNMENT) _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
#endif #endif
; ;
@ -138,14 +138,18 @@ struct hc_matchfinder {
static forceinline void static forceinline void
hc_matchfinder_init(struct hc_matchfinder *mf) hc_matchfinder_init(struct hc_matchfinder *mf)
{ {
matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_LENGTH); STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE %
MATCHFINDER_SIZE_ALIGNMENT == 0);
matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE);
} }
static forceinline void static forceinline void
hc_matchfinder_slide_window(struct hc_matchfinder *mf) hc_matchfinder_slide_window(struct hc_matchfinder *mf)
{ {
matchfinder_rebase((mf_pos_t *)mf, STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
sizeof(struct hc_matchfinder) / sizeof(mf_pos_t));
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
} }
/* /*

View File

@ -18,11 +18,15 @@ typedef s16 mf_pos_t;
#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE) #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
#define MATCHFINDER_ALIGNMENT 8 /*
* Required alignment of the matchfinder buffer pointer and size. The values
#define arch_matchfinder_init(data, size) false * here come from the AVX-2 implementation, which is the worst case.
#define arch_matchfinder_rebase(data, size) false */
#define MATCHFINDER_MEM_ALIGNMENT 32
#define MATCHFINDER_SIZE_ALIGNMENT 128
#undef matchfinder_init
#undef matchfinder_rebase
#ifdef _aligned_attribute #ifdef _aligned_attribute
# if defined(__arm__) || defined(__aarch64__) # if defined(__arm__) || defined(__aarch64__)
# include "arm/matchfinder_impl.h" # include "arm/matchfinder_impl.h"
@ -36,19 +40,20 @@ typedef s16 mf_pos_t;
* *
* Essentially, this is an optimized memset(). * Essentially, this is an optimized memset().
* *
* 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary. * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
* 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
*/ */
#ifndef matchfinder_init
static forceinline void static forceinline void
matchfinder_init(mf_pos_t *data, size_t num_entries) matchfinder_init(mf_pos_t *data, size_t size)
{ {
size_t num_entries = size / sizeof(*data);
size_t i; size_t i;
if (arch_matchfinder_init(data, num_entries * sizeof(data[0])))
return;
for (i = 0; i < num_entries; i++) for (i = 0; i < num_entries; i++)
data[i] = MATCHFINDER_INITVAL; data[i] = MATCHFINDER_INITVAL;
} }
#endif
/* /*
* Slide the matchfinder by WINDOW_SIZE bytes. * Slide the matchfinder by WINDOW_SIZE bytes.
@ -71,14 +76,13 @@ matchfinder_init(mf_pos_t *data, size_t num_entries)
* of "hash chains", and 2-ary in the case of "binary trees". In either case, * of "hash chains", and 2-ary in the case of "binary trees". In either case,
* the links need to be rebased in the same way. * the links need to be rebased in the same way.
*/ */
#ifndef matchfinder_rebase
static forceinline void static forceinline void
matchfinder_rebase(mf_pos_t *data, size_t num_entries) matchfinder_rebase(mf_pos_t *data, size_t size)
{ {
size_t num_entries = size / sizeof(*data);
size_t i; size_t i;
if (arch_matchfinder_rebase(data, num_entries * sizeof(data[0])))
return;
if (MATCHFINDER_WINDOW_SIZE == 32768) { if (MATCHFINDER_WINDOW_SIZE == 32768) {
/* Branchless version for 32768 byte windows. If the value was /* Branchless version for 32768 byte windows. If the value was
* already negative, clear all bits except the sign bit; this * already negative, clear all bits except the sign bit; this
@ -101,6 +105,7 @@ matchfinder_rebase(mf_pos_t *data, size_t num_entries)
data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
} }
} }
#endif
/* /*
* The hash function: given a sequence prefix held in the low-order bits of a * The hash function: given a sequence prefix held in the low-order bits of a

View File

@ -26,47 +26,38 @@
*/ */
#ifdef __AVX2__ #ifdef __AVX2__
# if MATCHFINDER_ALIGNMENT < 32
# undef MATCHFINDER_ALIGNMENT
# define MATCHFINDER_ALIGNMENT 32
# endif
# include <immintrin.h> # include <immintrin.h>
static forceinline bool static forceinline void
matchfinder_init_avx2(mf_pos_t *data, size_t size) matchfinder_init_avx2(mf_pos_t *data, size_t size)
{ {
__m256i v, *p; __m256i *p = (__m256i *)data;
size_t n; __m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
if (size % (sizeof(__m256i) * 4) != 0)
return false;
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
STATIC_ASSERT(sizeof(mf_pos_t) == 2); STATIC_ASSERT(sizeof(mf_pos_t) == 2);
v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
p = (__m256i *)data;
n = size / (sizeof(__m256i) * 4);
do { do {
p[0] = v; p[0] = v;
p[1] = v; p[1] = v;
p[2] = v; p[2] = v;
p[3] = v; p[3] = v;
p += 4; p += 4;
} while (--n); size -= 4 * sizeof(*p);
return true; } while (size != 0);
} }
#define matchfinder_init matchfinder_init_avx2
static forceinline bool static forceinline void
matchfinder_rebase_avx2(mf_pos_t *data, size_t size) matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
{ {
__m256i v, *p; __m256i *p = (__m256i *)data;
size_t n; __m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
if (size % (sizeof(__m256i) * 4) != 0)
return false;
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
STATIC_ASSERT(sizeof(mf_pos_t) == 2); STATIC_ASSERT(sizeof(mf_pos_t) == 2);
v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
p = (__m256i *)data;
n = size / (sizeof(__m256i) * 4);
do { do {
/* PADDSW: Add Packed Signed Integers With Signed Saturation */ /* PADDSW: Add Packed Signed Integers With Signed Saturation */
p[0] = _mm256_adds_epi16(p[0], v); p[0] = _mm256_adds_epi16(p[0], v);
@ -74,53 +65,44 @@ matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
p[2] = _mm256_adds_epi16(p[2], v); p[2] = _mm256_adds_epi16(p[2], v);
p[3] = _mm256_adds_epi16(p[3], v); p[3] = _mm256_adds_epi16(p[3], v);
p += 4; p += 4;
} while (--n); size -= 4 * sizeof(*p);
return true; } while (size != 0);
} }
#endif /* __AVX2__ */ #define matchfinder_rebase matchfinder_rebase_avx2
#ifdef __SSE2__ #elif defined(__SSE2__)
# if MATCHFINDER_ALIGNMENT < 16
# undef MATCHFINDER_ALIGNMENT
# define MATCHFINDER_ALIGNMENT 16
# endif
# include <emmintrin.h> # include <emmintrin.h>
static forceinline bool static forceinline void
matchfinder_init_sse2(mf_pos_t *data, size_t size) matchfinder_init_sse2(mf_pos_t *data, size_t size)
{ {
__m128i v, *p; __m128i *p = (__m128i *)data;
size_t n; __m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL);
if (size % (sizeof(__m128i) * 4) != 0)
return false;
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
STATIC_ASSERT(sizeof(mf_pos_t) == 2); STATIC_ASSERT(sizeof(mf_pos_t) == 2);
v = _mm_set1_epi16(MATCHFINDER_INITVAL);
p = (__m128i *)data;
n = size / (sizeof(__m128i) * 4);
do { do {
p[0] = v; p[0] = v;
p[1] = v; p[1] = v;
p[2] = v; p[2] = v;
p[3] = v; p[3] = v;
p += 4; p += 4;
} while (--n); size -= 4 * sizeof(*p);
return true; } while (size != 0);
} }
#define matchfinder_init matchfinder_init_sse2
static forceinline bool static forceinline void
matchfinder_rebase_sse2(mf_pos_t *data, size_t size) matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
{ {
__m128i v, *p; __m128i *p = (__m128i *)data;
size_t n; __m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
if (size % (sizeof(__m128i) * 4) != 0)
return false;
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
STATIC_ASSERT(sizeof(mf_pos_t) == 2); STATIC_ASSERT(sizeof(mf_pos_t) == 2);
v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
p = (__m128i *)data;
n = size / (sizeof(__m128i) * 4);
do { do {
/* PADDSW: Add Packed Signed Integers With Signed Saturation */ /* PADDSW: Add Packed Signed Integers With Signed Saturation */
p[0] = _mm_adds_epi16(p[0], v); p[0] = _mm_adds_epi16(p[0], v);
@ -128,37 +110,8 @@ matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
p[2] = _mm_adds_epi16(p[2], v); p[2] = _mm_adds_epi16(p[2], v);
p[3] = _mm_adds_epi16(p[3], v); p[3] = _mm_adds_epi16(p[3], v);
p += 4; p += 4;
} while (--n); size -= 4 * sizeof(*p);
return true; } while (size != 0);
} }
#define matchfinder_rebase matchfinder_rebase_sse2
#endif /* __SSE2__ */ #endif /* __SSE2__ */
#undef arch_matchfinder_init
static forceinline bool
arch_matchfinder_init(mf_pos_t *data, size_t size)
{
#ifdef __AVX2__
if (matchfinder_init_avx2(data, size))
return true;
#endif
#ifdef __SSE2__
if (matchfinder_init_sse2(data, size))
return true;
#endif
return false;
}
#undef arch_matchfinder_rebase
static forceinline bool
arch_matchfinder_rebase(mf_pos_t *data, size_t size)
{
#ifdef __AVX2__
if (matchfinder_rebase_avx2(data, size))
return true;
#endif
#ifdef __SSE2__
if (matchfinder_rebase_sse2(data, size))
return true;
#endif
return false;
}