mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-08 03:39:25 -04:00
Cleanups and matchfinder updates
This commit is contained in:
parent
fed4597943
commit
5f3208e788
@ -78,7 +78,7 @@ install(FILES libdeflate.h DESTINATION "${CMAKE_INSTALL_PREFIX}/include")
|
||||
|
||||
option(BUILD_BENCHMARK "Build benchmark program" OFF)
|
||||
add_executable(benchmark test/benchmark.c)
|
||||
target_link_libraries(benchmark deflate -lz)
|
||||
target_link_libraries(benchmark deflatestatic -lz)
|
||||
|
||||
option(BUILD_GEN_CRC32_TABLE "Build CRC32 table generation program" OFF)
|
||||
add_executable(gen_crc32_table test/gen_crc32_table.c)
|
||||
|
22
libdeflate.h
22
libdeflate.h
@ -1,7 +1,9 @@
|
||||
/*
|
||||
* libdeflate.h
|
||||
*
|
||||
* Public header for the DEFLATE compression library.
|
||||
* Public header for libdeflate.
|
||||
*
|
||||
* This file has no copyright assigned and is placed in the Public Domain.
|
||||
*/
|
||||
|
||||
#ifndef LIBDEFLATE_H
|
||||
@ -26,7 +28,9 @@ struct deflate_compressor;
|
||||
* fastest, 6 = medium/default, 9 = slowest). The return value is a pointer to
|
||||
* the new DEFLATE compressor, or NULL if out of memory.
|
||||
*
|
||||
* Note: the sliding window size is defined at compilation time (default 32768).
|
||||
* Note: for compression, the sliding window size is defined at compilation time
|
||||
* to 32768, the largest size permissible in the DEFLATE format. It cannot be
|
||||
* changed at runtime.
|
||||
*/
|
||||
extern struct deflate_compressor *
|
||||
deflate_alloc_compressor(unsigned int compression_level);
|
||||
@ -44,7 +48,7 @@ deflate_compress(struct deflate_compressor *compressor,
|
||||
void *out, size_t out_nbytes_avail);
|
||||
|
||||
/*
|
||||
* Like deflate_compress(), but store the data in the zlib wrapper format.
|
||||
* Like deflate_compress(), but stores the data in the zlib wrapper format.
|
||||
*/
|
||||
extern size_t
|
||||
zlib_compress(struct deflate_compressor *compressor,
|
||||
@ -52,7 +56,7 @@ zlib_compress(struct deflate_compressor *compressor,
|
||||
void *out, size_t out_nbytes_avail);
|
||||
|
||||
/*
|
||||
* Like deflate_compress(), but store the data in the gzip wrapper format.
|
||||
* Like deflate_compress(), but stores the data in the gzip wrapper format.
|
||||
*/
|
||||
extern size_t
|
||||
gzip_compress(struct deflate_compressor *compressor,
|
||||
@ -61,7 +65,8 @@ gzip_compress(struct deflate_compressor *compressor,
|
||||
|
||||
/*
|
||||
* deflate_free_compressor() frees a DEFLATE compressor that was allocated with
|
||||
* deflate_alloc_compressor().
|
||||
* deflate_alloc_compressor(). If a NULL pointer is passed in, no action is
|
||||
* taken.
|
||||
*/
|
||||
extern void
|
||||
deflate_free_compressor(struct deflate_compressor *compressor);
|
||||
@ -79,7 +84,9 @@ struct deflate_decompressor;
|
||||
*
|
||||
* This function takes no parameters, and the returned decompressor is valid for
|
||||
* decompressing data that was compressed at any compression level and with any
|
||||
* sliding window size.
|
||||
* sliding window size. It can also be used for any wrapper format (raw
|
||||
* DEFLATE, zlib, or gzip); however, the appropriate decompression function must
|
||||
* be called.
|
||||
*/
|
||||
extern struct deflate_decompressor *
|
||||
deflate_alloc_decompressor(void);
|
||||
@ -118,7 +125,8 @@ gzip_decompress(struct deflate_decompressor *decompressor,
|
||||
|
||||
/*
|
||||
* deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
|
||||
* with deflate_alloc_decompressor().
|
||||
* with deflate_alloc_decompressor(). If a NULL pointer is passed in, no action
|
||||
* is taken.
|
||||
*/
|
||||
extern void
|
||||
deflate_free_decompressor(struct deflate_decompressor *decompressor);
|
||||
|
@ -39,7 +39,7 @@
|
||||
#define UNROLL_FACTOR 4
|
||||
|
||||
u32
|
||||
adler32(const u8 *buffer, size_t size)
|
||||
adler32(const void *buffer, size_t size)
|
||||
{
|
||||
u32 s1 = 1;
|
||||
u32 s2 = 0;
|
||||
|
@ -9,4 +9,4 @@
|
||||
#include "types.h"
|
||||
|
||||
extern u32
|
||||
adler32(const u8 *buffer, size_t size);
|
||||
adler32(const void *buffer, size_t size);
|
||||
|
18
src/bitops.h
18
src/bitops.h
@ -11,7 +11,8 @@
|
||||
|
||||
/* Find Last Set bit */
|
||||
|
||||
static inline unsigned fls32(u32 v)
|
||||
static inline unsigned
|
||||
fls32(u32 v)
|
||||
{
|
||||
#ifdef compiler_fls32
|
||||
return compiler_fls32(v);
|
||||
@ -23,7 +24,8 @@ static inline unsigned fls32(u32 v)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned fls64(u64 v)
|
||||
static inline unsigned
|
||||
fls64(u64 v)
|
||||
{
|
||||
#ifdef compiler_fls64
|
||||
return compiler_fls64(v);
|
||||
@ -35,7 +37,8 @@ static inline unsigned fls64(u64 v)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned flsw(machine_word_t v)
|
||||
static inline unsigned
|
||||
flsw(machine_word_t v)
|
||||
{
|
||||
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
||||
if (WORDSIZE == 4)
|
||||
@ -46,7 +49,8 @@ static inline unsigned flsw(machine_word_t v)
|
||||
|
||||
/* Find First Set bit */
|
||||
|
||||
static inline unsigned ffs32(u32 v)
|
||||
static inline unsigned
|
||||
ffs32(u32 v)
|
||||
{
|
||||
#ifdef compiler_ffs32
|
||||
return compiler_ffs32(v);
|
||||
@ -58,7 +62,8 @@ static inline unsigned ffs32(u32 v)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned ffs64(u64 v)
|
||||
static inline unsigned
|
||||
ffs64(u64 v)
|
||||
{
|
||||
#ifdef compiler_ffs64
|
||||
return compiler_ffs64(v);
|
||||
@ -70,7 +75,8 @@ static inline unsigned ffs64(u64 v)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned ffsw(machine_word_t v)
|
||||
static inline unsigned
|
||||
ffsw(machine_word_t v)
|
||||
{
|
||||
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
||||
if (WORDSIZE == 4)
|
||||
|
@ -1,51 +1,56 @@
|
||||
/*
|
||||
* bt_matchfinder.h
|
||||
*
|
||||
* This is a Binary Tree (bt) based matchfinder.
|
||||
* ----------------------------------------------------------------------------
|
||||
*
|
||||
* This is a Binary Trees (bt) based matchfinder.
|
||||
*
|
||||
* The data structure is a hash table where each hash bucket contains a binary
|
||||
* tree of sequences, referenced by position. The sequences in the binary tree
|
||||
* are ordered such that a left child is lexicographically lesser than its
|
||||
* parent, and a right child is lexicographically greater than its parent.
|
||||
* tree of sequences whose first 3 bytes share the same hash code. Each
|
||||
* sequence is identified by its starting position in the input buffer. Each
|
||||
* binary tree is always sorted such that each left child represents a sequence
|
||||
* lexicographically lesser than its parent and each right child represents a
|
||||
* sequence lexicographically greater than its parent.
|
||||
*
|
||||
* For each sequence (position) in the input, the first 3 bytes are hashed and
|
||||
* the the appropriate binary tree is re-rooted at that sequence (position).
|
||||
* Since the sequences are inserted in order, each binary tree maintains the
|
||||
* invariant that each child node has greater match offset than its parent.
|
||||
* The algorithm processes the input buffer sequentially. At each byte
|
||||
* position, the hash code of the first 3 bytes of the sequence beginning at
|
||||
* that position (the sequence being matched against) is computed. This
|
||||
* identifies the hash bucket to use for that position. Then, a new binary tree
|
||||
* node is created to represent the current sequence. Then, in a single tree
|
||||
* traversal, the hash bucket's binary tree is searched for matches and is
|
||||
* re-rooted at the new node.
|
||||
*
|
||||
* While inserting a sequence, we may search the binary tree for matches with
|
||||
* that sequence. At each step, the length of the match is computed. The
|
||||
* search ends when the sequences get too far away (outside of the sliding
|
||||
* window), or when the binary tree ends (in the code this is the same check as
|
||||
* "too far away"), or when 'max_search_depth' positions have been searched, or
|
||||
* when a match of at least 'nice_len' bytes has been found.
|
||||
* Compared to the simpler algorithm that uses linked lists instead of binary
|
||||
* trees (see hc_matchfinder.h), the binary tree version gains more information
|
||||
* at each node visitation. Ideally, the binary tree version will examine only
|
||||
* 'log(n)' nodes to find the same matches that the linked list version will
|
||||
* find by examining 'n' nodes. In addition, the binary tree version can
|
||||
* examine fewer bytes at each node by taking advantage of the common prefixes
|
||||
* that result from the sort order, whereas the linked list version may have to
|
||||
* examine up to the full length of the match at each node.
|
||||
*
|
||||
* Notes:
|
||||
* However, it is not always best to use the binary tree version. It requires
|
||||
* nearly twice as much memory as the linked list version, and it takes time to
|
||||
* keep the binary trees sorted, even at positions where the compressor does not
|
||||
* need matches. Generally, when doing fast compression on small buffers,
|
||||
* binary trees are the wrong approach. They are best suited for thorough
|
||||
* compression and/or large buffers.
|
||||
*
|
||||
* - Typically, we need to search more nodes to find a given match in a
|
||||
* binary tree versus in a linked list. However, a binary tree has more
|
||||
* overhead than a linked list: it needs to be kept sorted, and the inner
|
||||
* search loop is more complicated. As a result, binary trees are best
|
||||
* suited for compression modes where the potential matches are searched
|
||||
* more thoroughly.
|
||||
*
|
||||
* - Since no attempt is made to keep the binary trees balanced, it's
|
||||
* essential to have the 'max_search_depth' cutoff. Otherwise it could
|
||||
* take quadratic time to run data through the matchfinder.
|
||||
* ----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lz_extend.h"
|
||||
#include "lz_hash3.h"
|
||||
#include "lz_hash.h"
|
||||
#include "matchfinder_common.h"
|
||||
|
||||
#ifndef BT_MATCHFINDER_HASH_ORDER
|
||||
# if MATCHFINDER_WINDOW_ORDER < 14
|
||||
# define BT_MATCHFINDER_HASH_ORDER 14
|
||||
# else
|
||||
# define BT_MATCHFINDER_HASH_ORDER 15
|
||||
# endif
|
||||
#if MATCHFINDER_WINDOW_ORDER < 13
|
||||
# define BT_MATCHFINDER_HASH_ORDER 14
|
||||
#elif MATCHFINDER_WINDOW_ORDER < 15
|
||||
# define BT_MATCHFINDER_HASH_ORDER 15
|
||||
#else
|
||||
# define BT_MATCHFINDER_HASH_ORDER 16
|
||||
#endif
|
||||
|
||||
#define BT_MATCHFINDER_HASH_LENGTH (1UL << BT_MATCHFINDER_HASH_ORDER)
|
||||
@ -77,8 +82,37 @@ bt_matchfinder_slide_window(struct bt_matchfinder *mf)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline u32
|
||||
bt_matchfinder_hash_3_bytes(const u8 *in_next)
|
||||
{
|
||||
return lz_hash_3_bytes(in_next, BT_MATCHFINDER_HASH_ORDER);
|
||||
}
|
||||
|
||||
static inline pos_t *
|
||||
bt_child(struct bt_matchfinder *mf, pos_t node, int offset)
|
||||
{
|
||||
if (MATCHFINDER_WINDOW_ORDER < sizeof(pos_t) * 8) {
|
||||
/* no cast needed */
|
||||
return &mf->child_tab[(matchfinder_slot_for_match(node) << 1) + offset];
|
||||
} else {
|
||||
return &mf->child_tab[((size_t)matchfinder_slot_for_match(node) << 1) + offset];
|
||||
}
|
||||
}
|
||||
|
||||
static inline pos_t *
|
||||
bt_left_child(struct bt_matchfinder *mf, pos_t node)
|
||||
{
|
||||
return bt_child(mf, node, 0);
|
||||
}
|
||||
|
||||
static inline pos_t *
|
||||
bt_right_child(struct bt_matchfinder *mf, pos_t node)
|
||||
{
|
||||
return bt_child(mf, node, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find matches with the current sequence.
|
||||
* Retrieve a list of matches with the current position.
|
||||
*
|
||||
* @mf
|
||||
* The matchfinder structure.
|
||||
@ -87,115 +121,131 @@ bt_matchfinder_slide_window(struct bt_matchfinder *mf)
|
||||
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
|
||||
* @in_next
|
||||
* Pointer to the next byte in the input buffer to process. This is the
|
||||
* pointer to the bytes being matched against.
|
||||
* pointer to the sequence being matched against.
|
||||
* @min_len
|
||||
* Only record matches that are at least this long.
|
||||
* @max_len
|
||||
* Maximum match length to return.
|
||||
* The maximum permissible match length at this position.
|
||||
* @nice_len
|
||||
* Stop searching if a match of at least this length is found.
|
||||
* Must be <= @max_len.
|
||||
* @max_search_depth
|
||||
* Limit on the number of potential matches to consider.
|
||||
* @prev_hash
|
||||
* TODO
|
||||
* @matches
|
||||
* Space to write the matches that are found.
|
||||
* Limit on the number of potential matches to consider. Must be >= 1.
|
||||
* @next_hash
|
||||
* Pointer to the hash code for the current sequence, which was computed
|
||||
* one position in advance so that the binary tree root could be
|
||||
* prefetched. This is an input/output parameter.
|
||||
* @best_len_ret
|
||||
* The length of the longest match found is written here. (This is
|
||||
* actually redundant with the 'struct lz_match' array, but this is easier
|
||||
* for the compiler to optimize when inlined and the caller immediately
|
||||
* does a check against 'best_len'.)
|
||||
* @lz_matchptr
|
||||
* An array in which this function will record the matches. The recorded
|
||||
* matches will be sorted by strictly increasing length and strictly
|
||||
* increasing offset. The maximum number of matches that may be found is
|
||||
* 'min(nice_len, max_len) - 3 + 1'.
|
||||
*
|
||||
* Returns the number of matches found, which may be anywhere from 0 to
|
||||
* (nice_len - 3 + 1), inclusively. The matches are written to @matches in
|
||||
* order of strictly increasing length and strictly increasing offset. The
|
||||
* minimum match length is assumed to be 3.
|
||||
* The return value is a pointer to the next available slot in the @lz_matchptr
|
||||
* array. (If no matches were found, this will be the same as @lz_matchptr.)
|
||||
*/
|
||||
static inline unsigned
|
||||
static inline struct lz_match *
|
||||
bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
|
||||
const u8 * const in_base,
|
||||
const u8 * const in_next,
|
||||
const unsigned min_len,
|
||||
const unsigned max_len,
|
||||
const unsigned nice_len,
|
||||
const unsigned max_search_depth,
|
||||
unsigned long *prev_hash,
|
||||
struct lz_match * const restrict matches)
|
||||
u32 * restrict next_hash,
|
||||
unsigned * restrict best_len_ret,
|
||||
struct lz_match * restrict lz_matchptr)
|
||||
{
|
||||
struct lz_match *lz_matchptr = matches;
|
||||
unsigned depth_remaining = max_search_depth;
|
||||
unsigned hash;
|
||||
pos_t cur_match;
|
||||
u32 hash;
|
||||
pos_t cur_node;
|
||||
const u8 *matchptr;
|
||||
unsigned best_len;
|
||||
pos_t *pending_lt_ptr, *pending_gt_ptr;
|
||||
unsigned best_lt_len, best_gt_len;
|
||||
unsigned len;
|
||||
pos_t *children;
|
||||
unsigned best_len = min_len - 1;
|
||||
|
||||
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1))
|
||||
return 0;
|
||||
if (unlikely(max_len < LZ_HASH3_REQUIRED_NBYTES + 1)) {
|
||||
*best_len_ret = best_len;
|
||||
return lz_matchptr;
|
||||
}
|
||||
|
||||
hash = *prev_hash;
|
||||
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
|
||||
prefetch(&mf->hash_tab[*prev_hash]);
|
||||
cur_match = mf->hash_tab[hash];
|
||||
hash = *next_hash;
|
||||
*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
|
||||
cur_node = mf->hash_tab[hash];
|
||||
mf->hash_tab[hash] = in_next - in_base;
|
||||
prefetch(&mf->hash_tab[*next_hash]);
|
||||
|
||||
best_len = 2;
|
||||
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
|
||||
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
|
||||
pending_lt_ptr = bt_left_child(mf, in_next - in_base);
|
||||
pending_gt_ptr = bt_right_child(mf, in_next - in_base);
|
||||
best_lt_len = 0;
|
||||
best_gt_len = 0;
|
||||
len = 0;
|
||||
|
||||
if (!matchfinder_node_valid(cur_node, in_base, in_next)) {
|
||||
*pending_lt_ptr = MATCHFINDER_NULL;
|
||||
*pending_gt_ptr = MATCHFINDER_NULL;
|
||||
*best_len_ret = best_len;
|
||||
return lz_matchptr;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
if (!matchfinder_match_in_window(cur_match,
|
||||
in_base, in_next) ||
|
||||
!depth_remaining--)
|
||||
{
|
||||
*pending_lt_ptr = MATCHFINDER_INITVAL;
|
||||
*pending_gt_ptr = MATCHFINDER_INITVAL;
|
||||
return lz_matchptr - matches;
|
||||
}
|
||||
|
||||
matchptr = &in_base[cur_match];
|
||||
len = min(best_lt_len, best_gt_len);
|
||||
|
||||
children = &mf->child_tab[(unsigned long)
|
||||
matchfinder_slot_for_match(cur_match) << 1];
|
||||
matchptr = &in_base[cur_node];
|
||||
|
||||
if (matchptr[len] == in_next[len]) {
|
||||
|
||||
len = lz_extend(in_next, matchptr, len + 1, max_len);
|
||||
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
|
||||
lz_matchptr->length = len;
|
||||
lz_matchptr->offset = in_next - matchptr;
|
||||
lz_matchptr++;
|
||||
|
||||
if (len >= nice_len) {
|
||||
*pending_lt_ptr = children[0];
|
||||
*pending_gt_ptr = children[1];
|
||||
return lz_matchptr - matches;
|
||||
*pending_lt_ptr = *bt_left_child(mf, cur_node);
|
||||
*pending_gt_ptr = *bt_right_child(mf, cur_node);
|
||||
*best_len_ret = best_len;
|
||||
return lz_matchptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matchptr[len] < in_next[len]) {
|
||||
*pending_lt_ptr = cur_match;
|
||||
pending_lt_ptr = &children[1];
|
||||
cur_match = *pending_lt_ptr;
|
||||
*pending_lt_ptr = cur_node;
|
||||
pending_lt_ptr = bt_right_child(mf, cur_node);
|
||||
cur_node = *pending_lt_ptr;
|
||||
best_lt_len = len;
|
||||
if (best_gt_len < len)
|
||||
len = best_gt_len;
|
||||
} else {
|
||||
*pending_gt_ptr = cur_match;
|
||||
pending_gt_ptr = &children[0];
|
||||
cur_match = *pending_gt_ptr;
|
||||
*pending_gt_ptr = cur_node;
|
||||
pending_gt_ptr = bt_left_child(mf, cur_node);
|
||||
cur_node = *pending_gt_ptr;
|
||||
best_gt_len = len;
|
||||
if (best_lt_len < len)
|
||||
len = best_lt_len;
|
||||
}
|
||||
|
||||
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining) {
|
||||
*pending_lt_ptr = MATCHFINDER_NULL;
|
||||
*pending_gt_ptr = MATCHFINDER_NULL;
|
||||
*best_len_ret = best_len;
|
||||
return lz_matchptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the match-finder, but don't search for matches.
|
||||
* Advance the matchfinder, but don't record any matches.
|
||||
*
|
||||
* @mf
|
||||
* The matchfinder structure.
|
||||
* @in_base
|
||||
* Pointer to the next byte in the input buffer to process _at the last
|
||||
* time bc_matchfinder_init() or bc_matchfinder_slide_window() was called_.
|
||||
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
|
||||
* @in_next
|
||||
* Pointer to the next byte in the input buffer to process.
|
||||
* @in_end
|
||||
@ -204,8 +254,14 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
|
||||
* Stop searching if a match of at least this length is found.
|
||||
* @max_search_depth
|
||||
* Limit on the number of potential matches to consider.
|
||||
* @prev_hash
|
||||
* TODO
|
||||
* @next_hash
|
||||
* Pointer to the hash code for the current sequence, which was computed
|
||||
* one position in advance so that the binary tree root could be
|
||||
* prefetched. This is an input/output parameter.
|
||||
*
|
||||
* Note: this is very similar to bt_matchfinder_get_matches() because both
|
||||
* functions must do hashing and tree re-rooting. This version just doesn't
|
||||
* actually record any matches.
|
||||
*/
|
||||
static inline void
|
||||
bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
|
||||
@ -214,66 +270,70 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
|
||||
const u8 * const in_end,
|
||||
const unsigned nice_len,
|
||||
const unsigned max_search_depth,
|
||||
unsigned long *prev_hash)
|
||||
u32 * restrict next_hash)
|
||||
{
|
||||
unsigned depth_remaining = max_search_depth;
|
||||
unsigned hash;
|
||||
pos_t cur_match;
|
||||
u32 hash;
|
||||
pos_t cur_node;
|
||||
const u8 *matchptr;
|
||||
pos_t *pending_lt_ptr, *pending_gt_ptr;
|
||||
unsigned best_lt_len, best_gt_len;
|
||||
unsigned len;
|
||||
pos_t *children;
|
||||
|
||||
if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1))
|
||||
if (unlikely(in_end - in_next < LZ_HASH3_REQUIRED_NBYTES + 1))
|
||||
return;
|
||||
|
||||
hash = *prev_hash;
|
||||
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
|
||||
prefetch(&mf->hash_tab[*prev_hash]);
|
||||
cur_match = mf->hash_tab[hash];
|
||||
hash = *next_hash;
|
||||
*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
|
||||
cur_node = mf->hash_tab[hash];
|
||||
mf->hash_tab[hash] = in_next - in_base;
|
||||
prefetch(&mf->hash_tab[*next_hash]);
|
||||
|
||||
depth_remaining = max_search_depth;
|
||||
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
|
||||
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
|
||||
pending_lt_ptr = bt_left_child(mf, in_next - in_base);
|
||||
pending_gt_ptr = bt_right_child(mf, in_next - in_base);
|
||||
best_lt_len = 0;
|
||||
best_gt_len = 0;
|
||||
len = 0;
|
||||
|
||||
if (!matchfinder_node_valid(cur_node, in_base, in_next)) {
|
||||
*pending_lt_ptr = MATCHFINDER_NULL;
|
||||
*pending_gt_ptr = MATCHFINDER_NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
if (!matchfinder_match_in_window(cur_match,
|
||||
in_base, in_next) ||
|
||||
!depth_remaining--)
|
||||
{
|
||||
*pending_lt_ptr = MATCHFINDER_INITVAL;
|
||||
*pending_gt_ptr = MATCHFINDER_INITVAL;
|
||||
return;
|
||||
}
|
||||
|
||||
matchptr = &in_base[cur_match];
|
||||
len = min(best_lt_len, best_gt_len);
|
||||
|
||||
children = &mf->child_tab[(unsigned long)
|
||||
matchfinder_slot_for_match(cur_match) << 1];
|
||||
matchptr = &in_base[cur_node];
|
||||
|
||||
if (matchptr[len] == in_next[len]) {
|
||||
len = lz_extend(in_next, matchptr, len + 1, nice_len);
|
||||
if (len == nice_len) {
|
||||
*pending_lt_ptr = children[0];
|
||||
*pending_gt_ptr = children[1];
|
||||
*pending_lt_ptr = *bt_left_child(mf, cur_node);
|
||||
*pending_gt_ptr = *bt_right_child(mf, cur_node);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (matchptr[len] < in_next[len]) {
|
||||
*pending_lt_ptr = cur_match;
|
||||
pending_lt_ptr = &children[1];
|
||||
cur_match = *pending_lt_ptr;
|
||||
*pending_lt_ptr = cur_node;
|
||||
pending_lt_ptr = bt_right_child(mf, cur_node);
|
||||
cur_node = *pending_lt_ptr;
|
||||
best_lt_len = len;
|
||||
if (best_gt_len < len)
|
||||
len = best_gt_len;
|
||||
} else {
|
||||
*pending_gt_ptr = cur_match;
|
||||
pending_gt_ptr = &children[0];
|
||||
cur_match = *pending_gt_ptr;
|
||||
*pending_gt_ptr = cur_node;
|
||||
pending_gt_ptr = bt_left_child(mf, cur_node);
|
||||
cur_node = *pending_gt_ptr;
|
||||
best_gt_len = len;
|
||||
if (best_lt_len < len)
|
||||
len = best_lt_len;
|
||||
}
|
||||
|
||||
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining) {
|
||||
*pending_lt_ptr = MATCHFINDER_NULL;
|
||||
*pending_gt_ptr = MATCHFINDER_NULL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -35,7 +35,7 @@
|
||||
#define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
|
||||
(_a > _b) ? _a : _b; })
|
||||
|
||||
#define swap(a, b) ({ __typeof__(a) _a = a; (a) = (b); (b) = _a; })
|
||||
#define swap(a, b) ({ __typeof__(a) _a = (a); (a) = (b); (b) = _a; })
|
||||
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
|
||||
# define compiler_bswap32 __builtin_bswap32
|
||||
@ -46,7 +46,7 @@
|
||||
# define compiler_bswap16 __builtin_bswap16
|
||||
#endif
|
||||
|
||||
#define compiler_fls32(n) (31 - __builtin_clz(n))
|
||||
#define compiler_fls64(n) (63 - __builtin_clzll(n))
|
||||
#define compiler_ffs32(n) __builtin_ctz(n)
|
||||
#define compiler_ffs64(n) __builtin_ctzll(n)
|
||||
#define compiler_fls32(n) (31 - __builtin_clz(n))
|
||||
#define compiler_fls64(n) (63 - __builtin_clzll(n))
|
||||
#define compiler_ffs32(n) __builtin_ctz(n)
|
||||
#define compiler_ffs64(n) __builtin_ctzll(n)
|
||||
|
@ -9,37 +9,34 @@
|
||||
#ifdef __GNUC__
|
||||
# include "compiler-gcc.h"
|
||||
#else
|
||||
# warning "Unrecognized compiler. Please add a header file for your compiler."
|
||||
# error "Unrecognized compiler. Please add a header file for your compiler."
|
||||
#endif
|
||||
|
||||
#ifndef LIBEXPORT
|
||||
# define LIBEXPORT
|
||||
#endif
|
||||
|
||||
#ifndef BUILD_BUG_ON
|
||||
# define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
|
||||
#endif
|
||||
|
||||
#ifndef likely
|
||||
# define likely(expr) (expr)
|
||||
#endif
|
||||
|
||||
#ifndef unlikely
|
||||
# define unlikely(expr) (expr)
|
||||
#endif
|
||||
|
||||
#ifndef prefetch
|
||||
# define prefetch(addr)
|
||||
#ifndef _packed_attribute
|
||||
# error "missing required definition of _packed_attribute"
|
||||
#endif
|
||||
|
||||
#ifndef _aligned_attribute
|
||||
# error "missing required definition of _aligned_attribute"
|
||||
#endif
|
||||
|
||||
#ifndef _packed_attribute
|
||||
# error "missing required definition of _packed_attribute"
|
||||
#ifndef likely
|
||||
# define likely(expr) (expr)
|
||||
#endif
|
||||
|
||||
#ifndef unlikely
|
||||
# define unlikely(expr) (expr)
|
||||
#endif
|
||||
|
||||
#ifndef prefetch
|
||||
# define prefetch(addr)
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef CPU_IS_BIG_ENDIAN
|
||||
# error "missing required endianness definition"
|
||||
#endif
|
||||
@ -47,7 +44,6 @@
|
||||
#define CPU_IS_LITTLE_ENDIAN (!CPU_IS_BIG_ENDIAN)
|
||||
|
||||
#ifndef UNALIGNED_ACCESS_SPEED
|
||||
# warning "assuming unaligned accesses are not allowed"
|
||||
# define UNALIGNED_ACCESS_SPEED 0
|
||||
#endif
|
||||
|
||||
@ -58,3 +54,7 @@
|
||||
#if !defined(min) || !defined(max) || !defined(swap)
|
||||
# error "missing required definitions of min(), max(), and swap() macros"
|
||||
#endif
|
||||
|
||||
#ifndef BUILD_BUG_ON
|
||||
# define BUILD_BUG_ON(expr) ((void)sizeof(char[1 - 2*!!(expr)]))
|
||||
#endif
|
||||
|
@ -71,9 +71,9 @@
|
||||
* else
|
||||
* multiple = 0;
|
||||
*
|
||||
* remainder >>= 1;
|
||||
* remainder |= (u32)bit << 31;
|
||||
* remainder ^= multiple;
|
||||
* remainder >>= 1;
|
||||
* remainder |= (u32)bit << 31;
|
||||
* remainder ^= multiple;
|
||||
* }
|
||||
*
|
||||
* return ~remainder;
|
||||
@ -108,7 +108,7 @@
|
||||
* multiple = divisor;
|
||||
* else
|
||||
* multiple = 0;
|
||||
* remainder >>= 1;
|
||||
* remainder >>= 1;
|
||||
* remainder ^= multiple;
|
||||
* }
|
||||
*
|
||||
|
@ -1961,9 +1961,7 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
||||
struct lz_match *cache_end;
|
||||
const u8 *in_block_begin;
|
||||
const u8 *in_block_end;
|
||||
unsigned num_matches;
|
||||
unsigned best_len;
|
||||
unsigned long prev_hash = 0;
|
||||
u32 next_hash = 0;
|
||||
|
||||
deflate_init_output(&os, out, out_nbytes_avail);
|
||||
deflate_reset_symbol_frequencies(c);
|
||||
@ -1991,6 +1989,9 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
||||
|
||||
/* Find all match possibilities in this block. */
|
||||
do {
|
||||
struct lz_match *matches;
|
||||
unsigned best_len;
|
||||
|
||||
/* Decrease the maximum and nice match lengths if we're
|
||||
* approaching the end of the input buffer. */
|
||||
if (unlikely(max_len > in_end - in_next)) {
|
||||
@ -2028,71 +2029,68 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
||||
* search for matches at almost all positions, so this
|
||||
* advantage of hash chains is negated.
|
||||
*/
|
||||
num_matches =
|
||||
matches = cache_ptr;
|
||||
cache_ptr =
|
||||
bt_matchfinder_get_matches(&c->bt_mf,
|
||||
in_cur_base,
|
||||
in_next,
|
||||
DEFLATE_MIN_MATCH_LEN,
|
||||
max_len,
|
||||
nice_len,
|
||||
c->max_search_depth,
|
||||
&prev_hash,
|
||||
&next_hash,
|
||||
&best_len,
|
||||
cache_ptr);
|
||||
cache_ptr += num_matches;
|
||||
cache_ptr->length = num_matches;
|
||||
cache_ptr->length = cache_ptr - matches;
|
||||
cache_ptr->offset = *in_next;
|
||||
in_next++;
|
||||
cache_ptr++;
|
||||
|
||||
if (num_matches) {
|
||||
best_len = cache_ptr[-2].length;
|
||||
/*
|
||||
* If there was a very long match found, don't cache any
|
||||
* matches for the bytes covered by that match. This
|
||||
* avoids degenerate behavior when compressing highly
|
||||
* redundant data, where the number of matches can be
|
||||
* very large.
|
||||
*
|
||||
* This heuristic doesn't actually hurt the compression
|
||||
* ratio very much. If there's a long match, then the
|
||||
* data must be highly compressible, so it doesn't
|
||||
* matter much what we do.
|
||||
*
|
||||
* We also trigger this same case when approaching the
|
||||
* desired end of the block. This forces the block to
|
||||
* reach a "stopping point" where there are no matches
|
||||
* extending to later positions. (XXX: this behavior is
|
||||
* non-optimal and should be improved.)
|
||||
*/
|
||||
if (best_len >= DEFLATE_MIN_MATCH_LEN &&
|
||||
best_len >= min(nice_len, in_block_end - in_next)) {
|
||||
--best_len;
|
||||
do {
|
||||
if (unlikely(max_len > in_end - in_next)) {
|
||||
max_len = in_end - in_next;
|
||||
nice_len = min(max_len, nice_len);
|
||||
}
|
||||
if (in_next == in_next_slide) {
|
||||
bt_matchfinder_slide_window(&c->bt_mf);
|
||||
in_cur_base = in_next;
|
||||
in_next_slide = in_next + min(in_end - in_next,
|
||||
MATCHFINDER_WINDOW_SIZE);
|
||||
}
|
||||
bt_matchfinder_skip_position(&c->bt_mf,
|
||||
in_cur_base,
|
||||
in_next,
|
||||
in_end,
|
||||
nice_len,
|
||||
c->max_search_depth,
|
||||
&next_hash);
|
||||
|
||||
/*
|
||||
* If there was a very long match found, don't
|
||||
* cache any matches for the bytes covered by
|
||||
* that match. This avoids degenerate behavior
|
||||
* when compressing highly redundant data, where
|
||||
* the number of matches can be very large.
|
||||
*
|
||||
* This heuristic doesn't actually hurt the
|
||||
* compression ratio very much. If there's a
|
||||
* long match, then the data must be highly
|
||||
* compressible, so it doesn't matter much what
|
||||
* we do.
|
||||
*
|
||||
* We also trigger this same case when
|
||||
* approaching the desired end of the block.
|
||||
* This forces the block to reach a "stopping
|
||||
* point" where there are no matches extending
|
||||
* to later positions. (XXX: this behavior is
|
||||
* non-optimal and should be improved.)
|
||||
*/
|
||||
if (best_len >= min(nice_len, in_block_end - in_next)) {
|
||||
--best_len;
|
||||
do {
|
||||
if (unlikely(max_len > in_end - in_next)) {
|
||||
max_len = in_end - in_next;
|
||||
nice_len = min(max_len, nice_len);
|
||||
}
|
||||
if (in_next == in_next_slide) {
|
||||
bt_matchfinder_slide_window(&c->bt_mf);
|
||||
in_cur_base = in_next;
|
||||
in_next_slide = in_next + min(in_end - in_next,
|
||||
MATCHFINDER_WINDOW_SIZE);
|
||||
}
|
||||
bt_matchfinder_skip_position(&c->bt_mf,
|
||||
in_cur_base,
|
||||
in_next,
|
||||
in_end,
|
||||
nice_len,
|
||||
c->max_search_depth,
|
||||
&prev_hash);
|
||||
|
||||
cache_ptr->length = 0;
|
||||
cache_ptr->offset = *in_next;
|
||||
in_next++;
|
||||
cache_ptr++;
|
||||
} while (--best_len);
|
||||
}
|
||||
cache_ptr->length = 0;
|
||||
cache_ptr->offset = *in_next;
|
||||
in_next++;
|
||||
cache_ptr++;
|
||||
} while (--best_len);
|
||||
}
|
||||
} while (in_next < in_block_end);
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* endianness.h
|
||||
*
|
||||
* Inline functions for endianness conversion.
|
||||
* Macros and inline functions for endianness conversion.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
@ -57,7 +57,7 @@ gzip_compress(struct deflate_compressor *c, const void *in, size_t in_size,
|
||||
out_next += 4;
|
||||
|
||||
/* ISIZE */
|
||||
put_unaligned_u32_le(in_size, out_next);
|
||||
put_unaligned_u32_le((u32)in_size, out_next);
|
||||
out_next += 4;
|
||||
|
||||
return out_next - (u8 *)out;
|
||||
|
@ -6,8 +6,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
#define GZIP_MIN_HEADER_SIZE 10
|
||||
#define GZIP_FOOTER_SIZE 8
|
||||
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
|
||||
|
@ -54,20 +54,16 @@ gzip_decompress(struct deflate_decompressor *d,
|
||||
|
||||
/* Original file name (zero terminated) */
|
||||
if (flg & GZIP_FNAME) {
|
||||
while (*in_next != 0 && ++in_next != in_end)
|
||||
while (*in_next++ != 0 && in_next != in_end)
|
||||
;
|
||||
if (in_next != in_end)
|
||||
in_next++;
|
||||
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* File comment (zero terminated) */
|
||||
if (flg & GZIP_FCOMMENT) {
|
||||
while (*in_next != 0 && ++in_next != in_end)
|
||||
while (*in_next++ != 0 && ++in_next != in_end)
|
||||
;
|
||||
if (in_next != in_end)
|
||||
in_next++;
|
||||
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||
return false;
|
||||
}
|
||||
|
@ -1,37 +1,102 @@
|
||||
/*
|
||||
* hc_matchfinder.h
|
||||
*
|
||||
* This is a Hash Chain (hc) based matchfinder.
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
* Algorithm
|
||||
*
|
||||
* This is a Hash Chains (hc) based matchfinder.
|
||||
*
|
||||
* The data structure is a hash table where each hash bucket contains a linked
|
||||
* list of sequences, referenced by position.
|
||||
* list (or "chain") of sequences whose first 3 bytes share the same hash code.
|
||||
* Each sequence is identified by its starting position in the input buffer.
|
||||
*
|
||||
* For each sequence (position) in the input, the first 3 bytes are hashed and
|
||||
* that sequence (position) is prepended to the appropriate linked list in the
|
||||
* hash table. Since the sequences are inserted in order, each list is always
|
||||
* sorted by increasing match offset.
|
||||
* The algorithm processes the input buffer sequentially. At each byte
|
||||
* position, the hash code of the first 3 bytes of the sequence beginning at
|
||||
* that position (the sequence being matched against) is computed. This
|
||||
* identifies the hash bucket to use for that position. Then, this hash
|
||||
* bucket's linked list is searched for matches. Then, a new linked list node
|
||||
* is created to represent the current sequence and is prepended to the list.
|
||||
*
|
||||
* At the same time as inserting a sequence, we may search the linked list for
|
||||
* matches with that sequence. At each step, the length of the match is
|
||||
* computed. The search ends when the sequences get too far away (outside of
|
||||
* the sliding window), or when the list ends (in the code this is the same
|
||||
* check as "too far away"), or when 'max_search_depth' positions have been
|
||||
* searched, or when a match of at least 'nice_len' bytes has been found.
|
||||
* This algorithm has several useful properties:
|
||||
*
|
||||
* - It only finds true Lempel-Ziv matches; i.e., those where the matching
|
||||
* sequence occurs prior to the sequence being matched against.
|
||||
*
|
||||
* - The sequences in each linked list are always sorted by decreasing starting
|
||||
* position. Therefore, the closest (smallest offset) matches are found
|
||||
* first, which in many compression formats tend to be the cheapest to encode.
|
||||
*
|
||||
* - Although fast running time is not guaranteed due to the possibility of the
|
||||
* lists getting very long, the worst degenerate behavior can be easily
|
||||
* prevented by capping the number of nodes searched at each position.
|
||||
*
|
||||
* - If the compressor decides not to search for matches at a certain position,
|
||||
* then that position can be quickly inserted without searching the list.
|
||||
*
|
||||
* - The algorithm is adaptable to sliding windows: just store the positions
|
||||
* relative to a "base" value that is updated from time to time, and stop
|
||||
* searching each list when the sequences get too far away.
|
||||
*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
* Notes on usage
|
||||
*
|
||||
* You must define MATCHFINDER_WINDOW_ORDER before including this header because
|
||||
* that determines which integer type to use for positions. Since 16-bit
|
||||
* integers are faster than 32-bit integers due to reduced memory usage (and
|
||||
* therefore reduced cache pressure), the code only uses 32-bit integers if they
|
||||
* are needed to represent all possible positions.
|
||||
*
|
||||
* In addition, you must allocate the 'struct hc_matchfinder' on a
|
||||
* MATCHFINDER_ALIGNMENT-aligned boundary.
|
||||
*
|
||||
* ----------------------------------------------------------------------------
|
||||
*
|
||||
* Optimizations
|
||||
*
|
||||
* The longest_match() and skip_positions() functions are inlined into the
|
||||
* compressors that use them. This isn't just about saving the overhead of a
|
||||
* function call. These functions are intended to be called from the inner
|
||||
* loops of compressors, where giving the compiler more control over register
|
||||
* allocation is very helpful. There is also significant benefit to be gained
|
||||
* from allowing the CPU to predict branches independently at each call site.
|
||||
* For example, "lazy"-style compressors can be written with two calls to
|
||||
* longest_match(), each of which starts with a different 'best_len' and
|
||||
* therefore has significantly different performance characteristics.
|
||||
*
|
||||
* Although any hash function can be used, a multiplicative hash is fast and
|
||||
* works well.
|
||||
*
|
||||
* On some processors, it is significantly faster to extend matches by whole
|
||||
* words (32 or 64 bits) instead of by individual bytes. For this to be the
|
||||
* case, the processor must implement unaligned memory accesses efficiently and
|
||||
* must have either a fast "find first set bit" instruction or a fast "find last
|
||||
* set bit" instruction, depending on the processor's endianness.
|
||||
*
|
||||
* The code uses one loop for finding the first match and one loop for finding a
|
||||
* longer match. Each of these loops is tuned for its respective task and in
|
||||
* combination are faster than a single generalized loop that handles both
|
||||
* tasks.
|
||||
*
|
||||
* The code also uses a tight inner loop that only compares the last and first
|
||||
* bytes of a potential match. It is only when these bytes match that a full
|
||||
* match extension is attempted.
|
||||
*
|
||||
* ----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lz_extend.h"
|
||||
#include "lz_hash3.h"
|
||||
#include "lz_hash.h"
|
||||
#include "matchfinder_common.h"
|
||||
#include "unaligned.h"
|
||||
|
||||
#ifndef HC_MATCHFINDER_HASH_ORDER
|
||||
# if MATCHFINDER_WINDOW_ORDER < 14
|
||||
# define HC_MATCHFINDER_HASH_ORDER 14
|
||||
# else
|
||||
# define HC_MATCHFINDER_HASH_ORDER 15
|
||||
# endif
|
||||
#if MATCHFINDER_WINDOW_ORDER < 14
|
||||
# define HC_MATCHFINDER_HASH_ORDER 14
|
||||
#else
|
||||
# define HC_MATCHFINDER_HASH_ORDER 15
|
||||
#endif
|
||||
|
||||
#define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
|
||||
@ -73,17 +138,18 @@ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
|
||||
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
|
||||
* @in_next
|
||||
* Pointer to the next byte in the input buffer to process. This is the
|
||||
* pointer to the bytes being matched against.
|
||||
* pointer to the sequence being matched against.
|
||||
* @best_len
|
||||
* Require a match at least this long.
|
||||
* Require a match longer than this length.
|
||||
* @max_len
|
||||
* Maximum match length to return.
|
||||
* The maximum permissible match length at this position.
|
||||
* @nice_len
|
||||
* Stop searching if a match of at least this length is found.
|
||||
* Must be <= @max_len.
|
||||
* @max_search_depth
|
||||
* Limit on the number of potential matches to consider.
|
||||
* Limit on the number of potential matches to consider. Must be >= 1.
|
||||
* @offset_ret
|
||||
* The match offset is returned here.
|
||||
* If a match is found, its offset is returned in this location.
|
||||
*
|
||||
* Return the length of the match found, or 'best_len' if no match longer than
|
||||
* 'best_len' was found.
|
||||
@ -102,61 +168,57 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
||||
const u8 *best_matchptr = best_matchptr; /* uninitialized */
|
||||
const u8 *matchptr;
|
||||
unsigned len;
|
||||
unsigned hash;
|
||||
pos_t cur_match;
|
||||
u32 first_3_bytes;
|
||||
u32 hash;
|
||||
pos_t cur_node;
|
||||
|
||||
/* Insert the current sequence into the appropriate hash chain. */
|
||||
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
|
||||
/* Insert the current sequence into the appropriate linked list. */
|
||||
if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES))
|
||||
goto out;
|
||||
first_3_bytes = load_u24_unaligned(in_next);
|
||||
hash = lz_hash3_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
|
||||
cur_match = mf->hash_tab[hash];
|
||||
mf->next_tab[in_next - in_base] = cur_match;
|
||||
hash = lz_hash(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
|
||||
cur_node = mf->hash_tab[hash];
|
||||
mf->next_tab[in_next - in_base] = cur_node;
|
||||
mf->hash_tab[hash] = in_next - in_base;
|
||||
|
||||
if (unlikely(best_len >= max_len))
|
||||
goto out;
|
||||
|
||||
/* Search the appropriate hash chain for matches. */
|
||||
/* Search the appropriate linked list for matches. */
|
||||
|
||||
if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
|
||||
if (!(matchfinder_node_valid(cur_node, in_base, in_next)))
|
||||
goto out;
|
||||
|
||||
if (best_len < 3) {
|
||||
for (;;) {
|
||||
/* No length 3 match found yet.
|
||||
* Check the first 3 bytes. */
|
||||
matchptr = &in_base[cur_match];
|
||||
matchptr = &in_base[cur_node];
|
||||
|
||||
if (load_u24_unaligned(matchptr) == first_3_bytes)
|
||||
break;
|
||||
|
||||
/* Not a match; keep trying. */
|
||||
cur_match = mf->next_tab[
|
||||
matchfinder_slot_for_match(cur_match)];
|
||||
if (!matchfinder_match_in_window(cur_match,
|
||||
in_base, in_next))
|
||||
goto out;
|
||||
if (!--depth_remaining)
|
||||
/* The first 3 bytes did not match. Keep trying. */
|
||||
cur_node = mf->next_tab[
|
||||
matchfinder_slot_for_match(cur_node)];
|
||||
if (!matchfinder_node_valid(cur_node, in_base, in_next) ||
|
||||
!--depth_remaining)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Found a length 3 match. */
|
||||
/* Found a match of length >= 3. Extend it to its full length. */
|
||||
best_matchptr = matchptr;
|
||||
best_len = lz_extend(in_next, best_matchptr, 3, max_len);
|
||||
if (best_len >= nice_len)
|
||||
goto out;
|
||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
||||
goto out;
|
||||
if (!--depth_remaining)
|
||||
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
|
||||
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
for (;;) {
|
||||
matchptr = &in_base[cur_match];
|
||||
matchptr = &in_base[cur_node];
|
||||
|
||||
/* Already found a length 3 match. Try for a longer match;
|
||||
* start by checking the last 2 bytes and the first 4 bytes. */
|
||||
@ -170,17 +232,16 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
||||
#endif
|
||||
break;
|
||||
|
||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
||||
goto out;
|
||||
if (!--depth_remaining)
|
||||
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
|
||||
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST)
|
||||
len = 4;
|
||||
else
|
||||
len = 0;
|
||||
#if UNALIGNED_ACCESS_IS_FAST
|
||||
len = 4;
|
||||
#else
|
||||
len = 0;
|
||||
#endif
|
||||
len = lz_extend(in_next, matchptr, len, max_len);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
@ -188,10 +249,8 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
||||
if (best_len >= nice_len)
|
||||
goto out;
|
||||
}
|
||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
||||
goto out;
|
||||
if (!--depth_remaining)
|
||||
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
|
||||
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
@ -200,7 +259,7 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the match-finder, but don't search for matches.
|
||||
* Advance the matchfinder, but don't search for matches.
|
||||
*
|
||||
* @mf
|
||||
* The matchfinder structure.
|
||||
@ -212,7 +271,7 @@ out:
|
||||
* @in_end
|
||||
* Pointer to the end of the input buffer.
|
||||
* @count
|
||||
* Number of bytes to skip; must be > 0.
|
||||
* The number of bytes to advance. Must be > 0.
|
||||
*/
|
||||
static inline void
|
||||
hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
|
||||
@ -221,13 +280,13 @@ hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
|
||||
const u8 *in_end,
|
||||
unsigned count)
|
||||
{
|
||||
unsigned hash;
|
||||
u32 hash;
|
||||
|
||||
if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
|
||||
if (unlikely(in_next + count >= in_end - LZ_HASH3_REQUIRED_NBYTES))
|
||||
return;
|
||||
|
||||
do {
|
||||
hash = lz_hash3(in_next, HC_MATCHFINDER_HASH_ORDER);
|
||||
hash = lz_hash_3_bytes(in_next, HC_MATCHFINDER_HASH_ORDER);
|
||||
mf->next_tab[in_next - in_base] = mf->hash_tab[hash];
|
||||
mf->hash_tab[hash] = in_next - in_base;
|
||||
in_next++;
|
||||
|
@ -24,12 +24,12 @@ lz_extend(const u8 * const strptr, const u8 * const matchptr,
|
||||
|
||||
if (likely(max_len - len >= 4 * WORDSIZE)) {
|
||||
|
||||
#define COMPARE_WORD_STEP \
|
||||
v_word = load_word_unaligned(&matchptr[len]) ^ \
|
||||
load_word_unaligned(&strptr[len]); \
|
||||
if (v_word != 0) \
|
||||
goto word_differs; \
|
||||
len += WORDSIZE; \
|
||||
#define COMPARE_WORD_STEP \
|
||||
v_word = load_word_unaligned(&matchptr[len]) ^ \
|
||||
load_word_unaligned(&strptr[len]); \
|
||||
if (v_word != 0) \
|
||||
goto word_differs; \
|
||||
len += WORDSIZE; \
|
||||
|
||||
COMPARE_WORD_STEP
|
||||
COMPARE_WORD_STEP
|
||||
|
41
src/lz_hash.h
Normal file
41
src/lz_hash.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* lz_hash.h
|
||||
*
|
||||
* Hashing for Lempel-Ziv matchfinding.
|
||||
*/
|
||||
|
||||
#ifndef _LZ_HASH_H
|
||||
#define _LZ_HASH_H
|
||||
|
||||
#include "unaligned.h"
|
||||
|
||||
/*
|
||||
* The hash function: given a sequence prefix held in the low-order bits of a
|
||||
* 32-bit value, multiply by a carefully-chosen large constant. Discard any
|
||||
* bits of the product that don't fit in a 32-bit value, but take the
|
||||
* next-highest @num_bits bits of the product as the hash value, as those have
|
||||
* the most randomness.
|
||||
*/
|
||||
static inline u32
|
||||
lz_hash(u32 seq, unsigned num_bits)
|
||||
{
|
||||
return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hash the 3-byte sequence beginning at @p, producing a hash of length
|
||||
* @num_bits bits. At least LZ_HASH3_REQUIRED_NBYTES bytes of data must be
|
||||
* available at @p; note that this may be more than 3.
|
||||
*/
|
||||
static inline u32
|
||||
lz_hash_3_bytes(const u8 *p, unsigned num_bits)
|
||||
{
|
||||
u32 seq = load_u24_unaligned(p);
|
||||
if (num_bits >= 24)
|
||||
return seq;
|
||||
return lz_hash(seq, num_bits);
|
||||
}
|
||||
|
||||
#define LZ_HASH3_REQUIRED_NBYTES LOAD_U24_REQUIRED_NBYTES
|
||||
|
||||
#endif /* _LZ_HASH_H */
|
@ -1,49 +0,0 @@
|
||||
/*
|
||||
* lz_hash3.h
|
||||
*
|
||||
* 3-byte hashing for Lempel-Ziv matchfinding.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "unaligned.h"
|
||||
|
||||
static inline u32
|
||||
loaded_u32_to_u24(u32 v)
|
||||
{
|
||||
if (CPU_IS_LITTLE_ENDIAN)
|
||||
return v & 0xFFFFFF;
|
||||
else
|
||||
return v >> 8;
|
||||
}
|
||||
|
||||
static inline u32
|
||||
load_u24_unaligned(const u8 *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST)
|
||||
return loaded_u32_to_u24(load_u32_unaligned(p));
|
||||
else
|
||||
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
lz_hash3_u24(u32 str, unsigned num_bits)
|
||||
{
|
||||
return (u32)(str * 0x1E35A7BD) >> (32 - num_bits);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hash the next 3-byte sequence in the window, producing a hash of length
|
||||
* 'num_bits' bits. At least LZ_HASH_REQUIRED_NBYTES must be available at 'p';
|
||||
* this might be 4 bytes rather than 3 because an unaligned load is faster on
|
||||
* some architectures.
|
||||
*/
|
||||
static inline u32
|
||||
lz_hash3(const u8 *p, unsigned num_bits)
|
||||
{
|
||||
return lz_hash3_u24(load_u24_unaligned(p), num_bits);
|
||||
}
|
||||
|
||||
/* Number of bytes the hash function actually requires be available, due to the
|
||||
* possibility of an unaligned load. */
|
||||
#define LZ_HASH_REQUIRED_NBYTES (UNALIGNED_ACCESS_IS_FAST ? 4 : 3)
|
@ -16,9 +16,9 @@ matchfinder_init_avx2(pos_t *data, size_t size)
|
||||
return false;
|
||||
|
||||
if (sizeof(pos_t) == 2)
|
||||
v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
|
||||
v = _mm256_set1_epi16((u16)MATCHFINDER_NULL);
|
||||
else if (sizeof(pos_t) == 4)
|
||||
v = _mm256_set1_epi32(MATCHFINDER_INITVAL);
|
||||
v = _mm256_set1_epi32((u32)MATCHFINDER_NULL);
|
||||
else
|
||||
return false;
|
||||
|
||||
|
@ -60,7 +60,7 @@ static inline bool
|
||||
matchfinder_memset_init_okay(void)
|
||||
{
|
||||
/* All bytes must match in order to use memset. */
|
||||
const pos_t v = MATCHFINDER_INITVAL;
|
||||
const pos_t v = MATCHFINDER_NULL;
|
||||
if (sizeof(pos_t) == 2)
|
||||
return (u8)v == (u8)(v >> 8);
|
||||
if (sizeof(pos_t) == 4)
|
||||
@ -93,12 +93,12 @@ matchfinder_init(pos_t *data, size_t num_entries)
|
||||
#endif
|
||||
|
||||
if (matchfinder_memset_init_okay()) {
|
||||
memset(data, (u8)MATCHFINDER_INITVAL, size);
|
||||
memset(data, (u8)MATCHFINDER_NULL, size);
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_entries; i++)
|
||||
data[i] = MATCHFINDER_INITVAL;
|
||||
data[i] = MATCHFINDER_NULL;
|
||||
}
|
||||
|
||||
#if MATCHFINDER_IS_SLIDING
|
||||
|
@ -16,12 +16,12 @@ typedef u32 pos_t;
|
||||
|
||||
/* Not all the bits of the position type are needed, so the sign bit can be
|
||||
* reserved to mean "out of bounds". */
|
||||
#define MATCHFINDER_INITVAL ((pos_t)-1)
|
||||
#define MATCHFINDER_NULL ((pos_t)-1)
|
||||
|
||||
static inline bool
|
||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
||||
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
|
||||
{
|
||||
return !(cur_match & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
|
||||
return !(cur_node & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
|
||||
}
|
||||
|
||||
#else
|
||||
@ -30,18 +30,18 @@ matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_nex
|
||||
* This prevents the beginning of the buffer from matching anything; however,
|
||||
* this doesn't matter much. */
|
||||
|
||||
#define MATCHFINDER_INITVAL ((pos_t)0)
|
||||
#define MATCHFINDER_NULL ((pos_t)0)
|
||||
|
||||
static inline bool
|
||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
||||
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
|
||||
{
|
||||
return cur_match != 0;
|
||||
return cur_node != 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline pos_t
|
||||
matchfinder_slot_for_match(pos_t cur_match)
|
||||
matchfinder_slot_for_match(pos_t cur_node)
|
||||
{
|
||||
return cur_match;
|
||||
return cur_node;
|
||||
}
|
||||
|
@ -13,18 +13,18 @@ typedef s16 pos_t;
|
||||
typedef s32 pos_t;
|
||||
#endif
|
||||
|
||||
#define MATCHFINDER_INITVAL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
|
||||
#define MATCHFINDER_NULL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
|
||||
|
||||
/* In the sliding window case, positions are stored relative to 'in_base'. */
|
||||
|
||||
static inline bool
|
||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
||||
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
|
||||
{
|
||||
return cur_match > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
|
||||
return cur_node > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
|
||||
}
|
||||
|
||||
static inline pos_t
|
||||
matchfinder_slot_for_match(pos_t cur_match)
|
||||
matchfinder_slot_for_match(pos_t cur_node)
|
||||
{
|
||||
return cur_match & (MATCHFINDER_WINDOW_SIZE - 1);
|
||||
return cur_node & (MATCHFINDER_WINDOW_SIZE - 1);
|
||||
}
|
||||
|
@ -16,9 +16,9 @@ matchfinder_init_sse2(pos_t *data, size_t size)
|
||||
return false;
|
||||
|
||||
if (sizeof(pos_t) == 2)
|
||||
v = _mm_set1_epi16(MATCHFINDER_INITVAL);
|
||||
v = _mm_set1_epi16((u16)MATCHFINDER_NULL);
|
||||
else if (sizeof(pos_t) == 4)
|
||||
v = _mm_set1_epi32(MATCHFINDER_INITVAL);
|
||||
v = _mm_set1_epi32((u32)MATCHFINDER_NULL);
|
||||
else
|
||||
return false;
|
||||
|
||||
|
@ -6,9 +6,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* unaligned.h
|
||||
*
|
||||
* Inline functions for unaligned memory access.
|
||||
* Inline functions for unaligned memory accesses.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
@ -214,3 +214,36 @@ put_unaligned_u32_be(u32 v, void *p)
|
||||
p8[3] = (v >> 0) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a 32-bit value that was loaded with the platform's native endianness,
|
||||
* return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
|
||||
* bits contain the first 3 bytes, arranged in octets in a platform-dependent
|
||||
* order, at the memory location from which the input 32-bit value was loaded.
|
||||
*/
|
||||
static inline u32
|
||||
loaded_u32_to_u24(u32 v)
|
||||
{
|
||||
if (CPU_IS_LITTLE_ENDIAN)
|
||||
return v & 0xFFFFFF;
|
||||
else
|
||||
return v >> 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* Load the next 3 bytes from the memory location @p into the 24 low-order bits
|
||||
* of a 32-bit value. The order in which the 3 bytes will be arranged as octets
|
||||
* in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES
|
||||
* bytes must be available at @p; note that this may be more than 3.
|
||||
*/
|
||||
static inline u32
|
||||
load_u24_unaligned(const u8 *p)
|
||||
{
|
||||
#if UNALIGNED_ACCESS_IS_FAST
|
||||
# define LOAD_U24_REQUIRED_NBYTES 4
|
||||
return loaded_u32_to_u24(load_u32_unaligned(p));
|
||||
#else
|
||||
# define LOAD_U24_REQUIRED_NBYTES 3
|
||||
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
|
||||
#endif
|
||||
}
|
||||
|
@ -1,11 +1,9 @@
|
||||
/*
|
||||
* benchmark.c - A compression testing and benchmark program.
|
||||
*
|
||||
* The author dedicates this file to the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
* This file has no copyright assigned and is placed in the Public Domain.
|
||||
*/
|
||||
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
#define _GNU_SOURCE
|
||||
|
||||
@ -419,9 +417,9 @@ main(int argc, char **argv)
|
||||
wrapper == NO_WRAPPER ? "None" :
|
||||
wrapper == ZLIB_WRAPPER ? "zlib" : "gzip");
|
||||
printf("\tCompression engine: %s\n",
|
||||
compress_with_libz ? "zlib" : "libdeflate");
|
||||
compress_with_libz ? "libz" : "libdeflate");
|
||||
printf("\tDecompression engine: %s\n",
|
||||
decompress_with_libz ? "zlib" : "libdeflate");
|
||||
decompress_with_libz ? "libz" : "libdeflate");
|
||||
|
||||
ubuf1 = malloc(chunk_size);
|
||||
ubuf2 = malloc(chunk_size);
|
||||
|
Loading…
x
Reference in New Issue
Block a user