mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-08-03 09:46:04 -04:00
matchfinder: introduce the ht_matchfinder
Add a new matchfinder optimized for very fast compression.
This commit is contained in:
parent
8012927541
commit
19816c5e26
234
lib/ht_matchfinder.h
Normal file
234
lib/ht_matchfinder.h
Normal file
@ -0,0 +1,234 @@
|
||||
/*
|
||||
* ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
|
||||
*
|
||||
* Copyright 2022 Eric Biggers
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
* This is a Hash Table (ht) matchfinder.
|
||||
*
|
||||
* This is a variant of the Hash Chains (hc) matchfinder that is optimized for
|
||||
* very fast compression. The ht_matchfinder stores the hash chains inline in
|
||||
* the hash table, whereas the hc_matchfinder stores them in a separate array.
|
||||
* Storing the hash chains inline is the faster method when max_search_depth
|
||||
* (the maximum chain length) is very small. It is not appropriate when
|
||||
* max_search_depth is larger, as then it uses too much memory.
|
||||
*
|
||||
* Due to its focus on speed, the ht_matchfinder doesn't support length 3
|
||||
* matches. It also doesn't allow max_search_depth to vary at runtime; it is
|
||||
* fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
|
||||
*
|
||||
* See hc_matchfinder.h for more information.
|
||||
*/
|
||||
|
||||
#ifndef LIB_HT_MATCHFINDER_H
|
||||
#define LIB_HT_MATCHFINDER_H
|
||||
|
||||
#include "matchfinder_common.h"
|
||||
|
||||
#define HT_MATCHFINDER_HASH_ORDER 15
|
||||
#define HT_MATCHFINDER_BUCKET_SIZE 2
|
||||
|
||||
#define HT_MATCHFINDER_MIN_MATCH_LEN 4
|
||||
/* Minimum value of max_len for ht_matchfinder_longest_match() */
|
||||
#define HT_MATCHFINDER_REQUIRED_NBYTES 5
|
||||
|
||||
struct ht_matchfinder {
|
||||
mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
|
||||
[HT_MATCHFINDER_BUCKET_SIZE];
|
||||
} MATCHFINDER_ALIGNED;
|
||||
|
||||
static forceinline void
|
||||
ht_matchfinder_init(struct ht_matchfinder *mf)
|
||||
{
|
||||
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||
|
||||
matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
|
||||
}
|
||||
|
||||
static forceinline void
|
||||
ht_matchfinder_slide_window(struct ht_matchfinder *mf)
|
||||
{
|
||||
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
|
||||
}
|
||||
|
||||
/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
|
||||
static forceinline u32
|
||||
ht_matchfinder_longest_match(struct ht_matchfinder * const restrict mf,
|
||||
const u8 ** const restrict in_base_p,
|
||||
const u8 * const restrict in_next,
|
||||
const u32 max_len,
|
||||
const u32 nice_len,
|
||||
u32 * const restrict next_hash,
|
||||
u32 * const restrict offset_ret)
|
||||
{
|
||||
u32 best_len = 0;
|
||||
const u8 *best_matchptr = in_next;
|
||||
u32 cur_pos = in_next - *in_base_p;
|
||||
const u8 *in_base;
|
||||
mf_pos_t cutoff;
|
||||
u32 hash;
|
||||
u32 seq;
|
||||
mf_pos_t cur_node;
|
||||
const u8 *matchptr;
|
||||
#if HT_MATCHFINDER_BUCKET_SIZE > 1
|
||||
mf_pos_t to_insert;
|
||||
u32 len;
|
||||
#endif
|
||||
#if HT_MATCHFINDER_BUCKET_SIZE > 2
|
||||
int i;
|
||||
#endif
|
||||
|
||||
/* This is assumed throughout this function. */
|
||||
STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
|
||||
|
||||
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
|
||||
ht_matchfinder_slide_window(mf);
|
||||
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
||||
cur_pos = 0;
|
||||
}
|
||||
in_base = *in_base_p;
|
||||
cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
|
||||
|
||||
hash = *next_hash;
|
||||
STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
|
||||
*next_hash = lz_hash(get_unaligned_le32(in_next + 1),
|
||||
HT_MATCHFINDER_HASH_ORDER);
|
||||
seq = load_u32_unaligned(in_next);
|
||||
prefetchw(&mf->hash_tab[*next_hash]);
|
||||
#if HT_MATCHFINDER_BUCKET_SIZE == 1
|
||||
/* Hand-unrolled version for BUCKET_SIZE == 1 */
|
||||
cur_node = mf->hash_tab[hash][0];
|
||||
mf->hash_tab[hash][0] = cur_pos;
|
||||
if (cur_node <= cutoff)
|
||||
goto out;
|
||||
matchptr = &in_base[cur_node];
|
||||
if (load_u32_unaligned(matchptr) == seq) {
|
||||
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
||||
best_matchptr = matchptr;
|
||||
}
|
||||
#elif HT_MATCHFINDER_BUCKET_SIZE == 2
|
||||
/*
|
||||
* Hand-unrolled version for BUCKET_SIZE == 2. The logic here also
|
||||
* differs slightly in that it copies the first entry to the second even
|
||||
* if nice_len is reached on the first, as this can be slightly faster.
|
||||
*/
|
||||
cur_node = mf->hash_tab[hash][0];
|
||||
mf->hash_tab[hash][0] = cur_pos;
|
||||
if (cur_node <= cutoff)
|
||||
goto out;
|
||||
matchptr = &in_base[cur_node];
|
||||
|
||||
to_insert = cur_node;
|
||||
cur_node = mf->hash_tab[hash][1];
|
||||
mf->hash_tab[hash][1] = to_insert;
|
||||
|
||||
if (load_u32_unaligned(matchptr) == seq) {
|
||||
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
||||
best_matchptr = matchptr;
|
||||
if (cur_node <= cutoff || best_len >= nice_len)
|
||||
goto out;
|
||||
matchptr = &in_base[cur_node];
|
||||
if (load_u32_unaligned(matchptr) == seq &&
|
||||
load_u32_unaligned(matchptr + best_len - 3) ==
|
||||
load_u32_unaligned(in_next + best_len - 3)) {
|
||||
len = lz_extend(in_next, matchptr, 4, max_len);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
best_matchptr = matchptr;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (cur_node <= cutoff)
|
||||
goto out;
|
||||
matchptr = &in_base[cur_node];
|
||||
if (load_u32_unaligned(matchptr) == seq) {
|
||||
best_len = lz_extend(in_next, matchptr, 4, max_len);
|
||||
best_matchptr = matchptr;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
|
||||
to_insert = cur_pos;
|
||||
for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
|
||||
cur_node = mf->hash_tab[hash][i];
|
||||
mf->hash_tab[hash][i] = to_insert;
|
||||
if (cur_node <= cutoff)
|
||||
goto out;
|
||||
matchptr = &in_base[cur_node];
|
||||
if (load_u32_unaligned(matchptr) == seq) {
|
||||
len = lz_extend(in_next, matchptr, 4, max_len);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
best_matchptr = matchptr;
|
||||
if (best_len >= nice_len)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
to_insert = cur_node;
|
||||
}
|
||||
#endif
|
||||
out:
|
||||
*offset_ret = in_next - best_matchptr;
|
||||
return best_len;
|
||||
}
|
||||
|
||||
static forceinline void
|
||||
ht_matchfinder_skip_bytes(struct ht_matchfinder * const restrict mf,
|
||||
const u8 ** const restrict in_base_p,
|
||||
const u8 *in_next,
|
||||
const u8 * const in_end,
|
||||
const u32 count,
|
||||
u32 * const restrict next_hash)
|
||||
{
|
||||
s32 cur_pos = in_next - *in_base_p;
|
||||
u32 hash;
|
||||
u32 remaining = count;
|
||||
int i;
|
||||
|
||||
if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
|
||||
return;
|
||||
|
||||
if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
|
||||
ht_matchfinder_slide_window(mf);
|
||||
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
||||
cur_pos -= MATCHFINDER_WINDOW_SIZE;
|
||||
}
|
||||
|
||||
hash = *next_hash;
|
||||
do {
|
||||
for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
|
||||
mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
|
||||
mf->hash_tab[hash][0] = cur_pos;
|
||||
|
||||
hash = lz_hash(get_unaligned_le32(++in_next),
|
||||
HT_MATCHFINDER_HASH_ORDER);
|
||||
cur_pos++;
|
||||
} while (--remaining);
|
||||
|
||||
prefetchw(&mf->hash_tab[hash]);
|
||||
*next_hash = hash;
|
||||
}
|
||||
|
||||
#endif /* LIB_HT_MATCHFINDER_H */
|
Loading…
x
Reference in New Issue
Block a user