mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-10 12:58:30 -04:00
Cleanups and matchfinder updates
This commit is contained in:
parent
fed4597943
commit
5f3208e788
@ -78,7 +78,7 @@ install(FILES libdeflate.h DESTINATION "${CMAKE_INSTALL_PREFIX}/include")
|
|||||||
|
|
||||||
option(BUILD_BENCHMARK "Build benchmark program" OFF)
|
option(BUILD_BENCHMARK "Build benchmark program" OFF)
|
||||||
add_executable(benchmark test/benchmark.c)
|
add_executable(benchmark test/benchmark.c)
|
||||||
target_link_libraries(benchmark deflate -lz)
|
target_link_libraries(benchmark deflatestatic -lz)
|
||||||
|
|
||||||
option(BUILD_GEN_CRC32_TABLE "Build CRC32 table generation program" OFF)
|
option(BUILD_GEN_CRC32_TABLE "Build CRC32 table generation program" OFF)
|
||||||
add_executable(gen_crc32_table test/gen_crc32_table.c)
|
add_executable(gen_crc32_table test/gen_crc32_table.c)
|
||||||
|
22
libdeflate.h
22
libdeflate.h
@ -1,7 +1,9 @@
|
|||||||
/*
|
/*
|
||||||
* libdeflate.h
|
* libdeflate.h
|
||||||
*
|
*
|
||||||
* Public header for the DEFLATE compression library.
|
* Public header for libdeflate.
|
||||||
|
*
|
||||||
|
* This file has no copyright assigned and is placed in the Public Domain.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LIBDEFLATE_H
|
#ifndef LIBDEFLATE_H
|
||||||
@ -26,7 +28,9 @@ struct deflate_compressor;
|
|||||||
* fastest, 6 = medium/default, 9 = slowest). The return value is a pointer to
|
* fastest, 6 = medium/default, 9 = slowest). The return value is a pointer to
|
||||||
* the new DEFLATE compressor, or NULL if out of memory.
|
* the new DEFLATE compressor, or NULL if out of memory.
|
||||||
*
|
*
|
||||||
* Note: the sliding window size is defined at compilation time (default 32768).
|
* Note: for compression, the sliding window size is defined at compilation time
|
||||||
|
* to 32768, the largest size permissible in the DEFLATE format. It cannot be
|
||||||
|
* changed at runtime.
|
||||||
*/
|
*/
|
||||||
extern struct deflate_compressor *
|
extern struct deflate_compressor *
|
||||||
deflate_alloc_compressor(unsigned int compression_level);
|
deflate_alloc_compressor(unsigned int compression_level);
|
||||||
@ -44,7 +48,7 @@ deflate_compress(struct deflate_compressor *compressor,
|
|||||||
void *out, size_t out_nbytes_avail);
|
void *out, size_t out_nbytes_avail);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Like deflate_compress(), but store the data in the zlib wrapper format.
|
* Like deflate_compress(), but stores the data in the zlib wrapper format.
|
||||||
*/
|
*/
|
||||||
extern size_t
|
extern size_t
|
||||||
zlib_compress(struct deflate_compressor *compressor,
|
zlib_compress(struct deflate_compressor *compressor,
|
||||||
@ -52,7 +56,7 @@ zlib_compress(struct deflate_compressor *compressor,
|
|||||||
void *out, size_t out_nbytes_avail);
|
void *out, size_t out_nbytes_avail);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Like deflate_compress(), but store the data in the gzip wrapper format.
|
* Like deflate_compress(), but stores the data in the gzip wrapper format.
|
||||||
*/
|
*/
|
||||||
extern size_t
|
extern size_t
|
||||||
gzip_compress(struct deflate_compressor *compressor,
|
gzip_compress(struct deflate_compressor *compressor,
|
||||||
@ -61,7 +65,8 @@ gzip_compress(struct deflate_compressor *compressor,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* deflate_free_compressor() frees a DEFLATE compressor that was allocated with
|
* deflate_free_compressor() frees a DEFLATE compressor that was allocated with
|
||||||
* deflate_alloc_compressor().
|
* deflate_alloc_compressor(). If a NULL pointer is passed in, no action is
|
||||||
|
* taken.
|
||||||
*/
|
*/
|
||||||
extern void
|
extern void
|
||||||
deflate_free_compressor(struct deflate_compressor *compressor);
|
deflate_free_compressor(struct deflate_compressor *compressor);
|
||||||
@ -79,7 +84,9 @@ struct deflate_decompressor;
|
|||||||
*
|
*
|
||||||
* This function takes no parameters, and the returned decompressor is valid for
|
* This function takes no parameters, and the returned decompressor is valid for
|
||||||
* decompressing data that was compressed at any compression level and with any
|
* decompressing data that was compressed at any compression level and with any
|
||||||
* sliding window size.
|
* sliding window size. It can also be used for any wrapper format (raw
|
||||||
|
* DEFLATE, zlib, or gzip); however, the appropriate decompression function must
|
||||||
|
* be called.
|
||||||
*/
|
*/
|
||||||
extern struct deflate_decompressor *
|
extern struct deflate_decompressor *
|
||||||
deflate_alloc_decompressor(void);
|
deflate_alloc_decompressor(void);
|
||||||
@ -118,7 +125,8 @@ gzip_decompress(struct deflate_decompressor *decompressor,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
|
* deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
|
||||||
* with deflate_alloc_decompressor().
|
* with deflate_alloc_decompressor(). If a NULL pointer is passed in, no action
|
||||||
|
* is taken.
|
||||||
*/
|
*/
|
||||||
extern void
|
extern void
|
||||||
deflate_free_decompressor(struct deflate_decompressor *decompressor);
|
deflate_free_decompressor(struct deflate_decompressor *decompressor);
|
||||||
|
@ -39,7 +39,7 @@
|
|||||||
#define UNROLL_FACTOR 4
|
#define UNROLL_FACTOR 4
|
||||||
|
|
||||||
u32
|
u32
|
||||||
adler32(const u8 *buffer, size_t size)
|
adler32(const void *buffer, size_t size)
|
||||||
{
|
{
|
||||||
u32 s1 = 1;
|
u32 s1 = 1;
|
||||||
u32 s2 = 0;
|
u32 s2 = 0;
|
||||||
|
@ -9,4 +9,4 @@
|
|||||||
#include "types.h"
|
#include "types.h"
|
||||||
|
|
||||||
extern u32
|
extern u32
|
||||||
adler32(const u8 *buffer, size_t size);
|
adler32(const void *buffer, size_t size);
|
||||||
|
18
src/bitops.h
18
src/bitops.h
@ -11,7 +11,8 @@
|
|||||||
|
|
||||||
/* Find Last Set bit */
|
/* Find Last Set bit */
|
||||||
|
|
||||||
static inline unsigned fls32(u32 v)
|
static inline unsigned
|
||||||
|
fls32(u32 v)
|
||||||
{
|
{
|
||||||
#ifdef compiler_fls32
|
#ifdef compiler_fls32
|
||||||
return compiler_fls32(v);
|
return compiler_fls32(v);
|
||||||
@ -23,7 +24,8 @@ static inline unsigned fls32(u32 v)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned fls64(u64 v)
|
static inline unsigned
|
||||||
|
fls64(u64 v)
|
||||||
{
|
{
|
||||||
#ifdef compiler_fls64
|
#ifdef compiler_fls64
|
||||||
return compiler_fls64(v);
|
return compiler_fls64(v);
|
||||||
@ -35,7 +37,8 @@ static inline unsigned fls64(u64 v)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned flsw(machine_word_t v)
|
static inline unsigned
|
||||||
|
flsw(machine_word_t v)
|
||||||
{
|
{
|
||||||
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
||||||
if (WORDSIZE == 4)
|
if (WORDSIZE == 4)
|
||||||
@ -46,7 +49,8 @@ static inline unsigned flsw(machine_word_t v)
|
|||||||
|
|
||||||
/* Find First Set bit */
|
/* Find First Set bit */
|
||||||
|
|
||||||
static inline unsigned ffs32(u32 v)
|
static inline unsigned
|
||||||
|
ffs32(u32 v)
|
||||||
{
|
{
|
||||||
#ifdef compiler_ffs32
|
#ifdef compiler_ffs32
|
||||||
return compiler_ffs32(v);
|
return compiler_ffs32(v);
|
||||||
@ -58,7 +62,8 @@ static inline unsigned ffs32(u32 v)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned ffs64(u64 v)
|
static inline unsigned
|
||||||
|
ffs64(u64 v)
|
||||||
{
|
{
|
||||||
#ifdef compiler_ffs64
|
#ifdef compiler_ffs64
|
||||||
return compiler_ffs64(v);
|
return compiler_ffs64(v);
|
||||||
@ -70,7 +75,8 @@ static inline unsigned ffs64(u64 v)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned ffsw(machine_word_t v)
|
static inline unsigned
|
||||||
|
ffsw(machine_word_t v)
|
||||||
{
|
{
|
||||||
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
||||||
if (WORDSIZE == 4)
|
if (WORDSIZE == 4)
|
||||||
|
@ -1,51 +1,56 @@
|
|||||||
/*
|
/*
|
||||||
* bt_matchfinder.h
|
* bt_matchfinder.h
|
||||||
*
|
*
|
||||||
* This is a Binary Tree (bt) based matchfinder.
|
* ----------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* This is a Binary Trees (bt) based matchfinder.
|
||||||
*
|
*
|
||||||
* The data structure is a hash table where each hash bucket contains a binary
|
* The data structure is a hash table where each hash bucket contains a binary
|
||||||
* tree of sequences, referenced by position. The sequences in the binary tree
|
* tree of sequences whose first 3 bytes share the same hash code. Each
|
||||||
* are ordered such that a left child is lexicographically lesser than its
|
* sequence is identified by its starting position in the input buffer. Each
|
||||||
* parent, and a right child is lexicographically greater than its parent.
|
* binary tree is always sorted such that each left child represents a sequence
|
||||||
|
* lexicographically lesser than its parent and each right child represents a
|
||||||
|
* sequence lexicographically greater than its parent.
|
||||||
*
|
*
|
||||||
* For each sequence (position) in the input, the first 3 bytes are hashed and
|
* The algorithm processes the input buffer sequentially. At each byte
|
||||||
* the the appropriate binary tree is re-rooted at that sequence (position).
|
* position, the hash code of the first 3 bytes of the sequence beginning at
|
||||||
* Since the sequences are inserted in order, each binary tree maintains the
|
* that position (the sequence being matched against) is computed. This
|
||||||
* invariant that each child node has greater match offset than its parent.
|
* identifies the hash bucket to use for that position. Then, a new binary tree
|
||||||
|
* node is created to represent the current sequence. Then, in a single tree
|
||||||
|
* traversal, the hash bucket's binary tree is searched for matches and is
|
||||||
|
* re-rooted at the new node.
|
||||||
*
|
*
|
||||||
* While inserting a sequence, we may search the binary tree for matches with
|
* Compared to the simpler algorithm that uses linked lists instead of binary
|
||||||
* that sequence. At each step, the length of the match is computed. The
|
* trees (see hc_matchfinder.h), the binary tree version gains more information
|
||||||
* search ends when the sequences get too far away (outside of the sliding
|
* at each node visitation. Ideally, the binary tree version will examine only
|
||||||
* window), or when the binary tree ends (in the code this is the same check as
|
* 'log(n)' nodes to find the same matches that the linked list version will
|
||||||
* "too far away"), or when 'max_search_depth' positions have been searched, or
|
* find by examining 'n' nodes. In addition, the binary tree version can
|
||||||
* when a match of at least 'nice_len' bytes has been found.
|
* examine fewer bytes at each node by taking advantage of the common prefixes
|
||||||
|
* that result from the sort order, whereas the linked list version may have to
|
||||||
|
* examine up to the full length of the match at each node.
|
||||||
*
|
*
|
||||||
* Notes:
|
* However, it is not always best to use the binary tree version. It requires
|
||||||
|
* nearly twice as much memory as the linked list version, and it takes time to
|
||||||
|
* keep the binary trees sorted, even at positions where the compressor does not
|
||||||
|
* need matches. Generally, when doing fast compression on small buffers,
|
||||||
|
* binary trees are the wrong approach. They are best suited for thorough
|
||||||
|
* compression and/or large buffers.
|
||||||
*
|
*
|
||||||
* - Typically, we need to search more nodes to find a given match in a
|
* ----------------------------------------------------------------------------
|
||||||
* binary tree versus in a linked list. However, a binary tree has more
|
|
||||||
* overhead than a linked list: it needs to be kept sorted, and the inner
|
|
||||||
* search loop is more complicated. As a result, binary trees are best
|
|
||||||
* suited for compression modes where the potential matches are searched
|
|
||||||
* more thoroughly.
|
|
||||||
*
|
|
||||||
* - Since no attempt is made to keep the binary trees balanced, it's
|
|
||||||
* essential to have the 'max_search_depth' cutoff. Otherwise it could
|
|
||||||
* take quadratic time to run data through the matchfinder.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "lz_extend.h"
|
#include "lz_extend.h"
|
||||||
#include "lz_hash3.h"
|
#include "lz_hash.h"
|
||||||
#include "matchfinder_common.h"
|
#include "matchfinder_common.h"
|
||||||
|
|
||||||
#ifndef BT_MATCHFINDER_HASH_ORDER
|
#if MATCHFINDER_WINDOW_ORDER < 13
|
||||||
# if MATCHFINDER_WINDOW_ORDER < 14
|
|
||||||
# define BT_MATCHFINDER_HASH_ORDER 14
|
# define BT_MATCHFINDER_HASH_ORDER 14
|
||||||
# else
|
#elif MATCHFINDER_WINDOW_ORDER < 15
|
||||||
# define BT_MATCHFINDER_HASH_ORDER 15
|
# define BT_MATCHFINDER_HASH_ORDER 15
|
||||||
# endif
|
#else
|
||||||
|
# define BT_MATCHFINDER_HASH_ORDER 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define BT_MATCHFINDER_HASH_LENGTH (1UL << BT_MATCHFINDER_HASH_ORDER)
|
#define BT_MATCHFINDER_HASH_LENGTH (1UL << BT_MATCHFINDER_HASH_ORDER)
|
||||||
@ -77,8 +82,37 @@ bt_matchfinder_slide_window(struct bt_matchfinder *mf)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline u32
|
||||||
|
bt_matchfinder_hash_3_bytes(const u8 *in_next)
|
||||||
|
{
|
||||||
|
return lz_hash_3_bytes(in_next, BT_MATCHFINDER_HASH_ORDER);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline pos_t *
|
||||||
|
bt_child(struct bt_matchfinder *mf, pos_t node, int offset)
|
||||||
|
{
|
||||||
|
if (MATCHFINDER_WINDOW_ORDER < sizeof(pos_t) * 8) {
|
||||||
|
/* no cast needed */
|
||||||
|
return &mf->child_tab[(matchfinder_slot_for_match(node) << 1) + offset];
|
||||||
|
} else {
|
||||||
|
return &mf->child_tab[((size_t)matchfinder_slot_for_match(node) << 1) + offset];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline pos_t *
|
||||||
|
bt_left_child(struct bt_matchfinder *mf, pos_t node)
|
||||||
|
{
|
||||||
|
return bt_child(mf, node, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline pos_t *
|
||||||
|
bt_right_child(struct bt_matchfinder *mf, pos_t node)
|
||||||
|
{
|
||||||
|
return bt_child(mf, node, 1);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find matches with the current sequence.
|
* Retrieve a list of matches with the current position.
|
||||||
*
|
*
|
||||||
* @mf
|
* @mf
|
||||||
* The matchfinder structure.
|
* The matchfinder structure.
|
||||||
@ -87,115 +121,131 @@ bt_matchfinder_slide_window(struct bt_matchfinder *mf)
|
|||||||
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
|
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
|
||||||
* @in_next
|
* @in_next
|
||||||
* Pointer to the next byte in the input buffer to process. This is the
|
* Pointer to the next byte in the input buffer to process. This is the
|
||||||
* pointer to the bytes being matched against.
|
* pointer to the sequence being matched against.
|
||||||
|
* @min_len
|
||||||
|
* Only record matches that are at least this long.
|
||||||
* @max_len
|
* @max_len
|
||||||
* Maximum match length to return.
|
* The maximum permissible match length at this position.
|
||||||
* @nice_len
|
* @nice_len
|
||||||
* Stop searching if a match of at least this length is found.
|
* Stop searching if a match of at least this length is found.
|
||||||
|
* Must be <= @max_len.
|
||||||
* @max_search_depth
|
* @max_search_depth
|
||||||
* Limit on the number of potential matches to consider.
|
* Limit on the number of potential matches to consider. Must be >= 1.
|
||||||
* @prev_hash
|
* @next_hash
|
||||||
* TODO
|
* Pointer to the hash code for the current sequence, which was computed
|
||||||
* @matches
|
* one position in advance so that the binary tree root could be
|
||||||
* Space to write the matches that are found.
|
* prefetched. This is an input/output parameter.
|
||||||
|
* @best_len_ret
|
||||||
|
* The length of the longest match found is written here. (This is
|
||||||
|
* actually redundant with the 'struct lz_match' array, but this is easier
|
||||||
|
* for the compiler to optimize when inlined and the caller immediately
|
||||||
|
* does a check against 'best_len'.)
|
||||||
|
* @lz_matchptr
|
||||||
|
* An array in which this function will record the matches. The recorded
|
||||||
|
* matches will be sorted by strictly increasing length and strictly
|
||||||
|
* increasing offset. The maximum number of matches that may be found is
|
||||||
|
* 'min(nice_len, max_len) - 3 + 1'.
|
||||||
*
|
*
|
||||||
* Returns the number of matches found, which may be anywhere from 0 to
|
* The return value is a pointer to the next available slot in the @lz_matchptr
|
||||||
* (nice_len - 3 + 1), inclusively. The matches are written to @matches in
|
* array. (If no matches were found, this will be the same as @lz_matchptr.)
|
||||||
* order of strictly increasing length and strictly increasing offset. The
|
|
||||||
* minimum match length is assumed to be 3.
|
|
||||||
*/
|
*/
|
||||||
static inline unsigned
|
static inline struct lz_match *
|
||||||
bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
|
bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
|
||||||
const u8 * const in_base,
|
const u8 * const in_base,
|
||||||
const u8 * const in_next,
|
const u8 * const in_next,
|
||||||
|
const unsigned min_len,
|
||||||
const unsigned max_len,
|
const unsigned max_len,
|
||||||
const unsigned nice_len,
|
const unsigned nice_len,
|
||||||
const unsigned max_search_depth,
|
const unsigned max_search_depth,
|
||||||
unsigned long *prev_hash,
|
u32 * restrict next_hash,
|
||||||
struct lz_match * const restrict matches)
|
unsigned * restrict best_len_ret,
|
||||||
|
struct lz_match * restrict lz_matchptr)
|
||||||
{
|
{
|
||||||
struct lz_match *lz_matchptr = matches;
|
|
||||||
unsigned depth_remaining = max_search_depth;
|
unsigned depth_remaining = max_search_depth;
|
||||||
unsigned hash;
|
u32 hash;
|
||||||
pos_t cur_match;
|
pos_t cur_node;
|
||||||
const u8 *matchptr;
|
const u8 *matchptr;
|
||||||
unsigned best_len;
|
|
||||||
pos_t *pending_lt_ptr, *pending_gt_ptr;
|
pos_t *pending_lt_ptr, *pending_gt_ptr;
|
||||||
unsigned best_lt_len, best_gt_len;
|
unsigned best_lt_len, best_gt_len;
|
||||||
unsigned len;
|
unsigned len;
|
||||||
pos_t *children;
|
unsigned best_len = min_len - 1;
|
||||||
|
|
||||||
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1))
|
if (unlikely(max_len < LZ_HASH3_REQUIRED_NBYTES + 1)) {
|
||||||
return 0;
|
*best_len_ret = best_len;
|
||||||
|
return lz_matchptr;
|
||||||
hash = *prev_hash;
|
|
||||||
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
|
|
||||||
prefetch(&mf->hash_tab[*prev_hash]);
|
|
||||||
cur_match = mf->hash_tab[hash];
|
|
||||||
mf->hash_tab[hash] = in_next - in_base;
|
|
||||||
|
|
||||||
best_len = 2;
|
|
||||||
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
|
|
||||||
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
|
|
||||||
best_lt_len = 0;
|
|
||||||
best_gt_len = 0;
|
|
||||||
for (;;) {
|
|
||||||
if (!matchfinder_match_in_window(cur_match,
|
|
||||||
in_base, in_next) ||
|
|
||||||
!depth_remaining--)
|
|
||||||
{
|
|
||||||
*pending_lt_ptr = MATCHFINDER_INITVAL;
|
|
||||||
*pending_gt_ptr = MATCHFINDER_INITVAL;
|
|
||||||
return lz_matchptr - matches;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
matchptr = &in_base[cur_match];
|
hash = *next_hash;
|
||||||
len = min(best_lt_len, best_gt_len);
|
*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
|
||||||
|
cur_node = mf->hash_tab[hash];
|
||||||
|
mf->hash_tab[hash] = in_next - in_base;
|
||||||
|
prefetch(&mf->hash_tab[*next_hash]);
|
||||||
|
|
||||||
children = &mf->child_tab[(unsigned long)
|
pending_lt_ptr = bt_left_child(mf, in_next - in_base);
|
||||||
matchfinder_slot_for_match(cur_match) << 1];
|
pending_gt_ptr = bt_right_child(mf, in_next - in_base);
|
||||||
|
best_lt_len = 0;
|
||||||
|
best_gt_len = 0;
|
||||||
|
len = 0;
|
||||||
|
|
||||||
|
if (!matchfinder_node_valid(cur_node, in_base, in_next)) {
|
||||||
|
*pending_lt_ptr = MATCHFINDER_NULL;
|
||||||
|
*pending_gt_ptr = MATCHFINDER_NULL;
|
||||||
|
*best_len_ret = best_len;
|
||||||
|
return lz_matchptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
matchptr = &in_base[cur_node];
|
||||||
|
|
||||||
if (matchptr[len] == in_next[len]) {
|
if (matchptr[len] == in_next[len]) {
|
||||||
|
|
||||||
len = lz_extend(in_next, matchptr, len + 1, max_len);
|
len = lz_extend(in_next, matchptr, len + 1, max_len);
|
||||||
|
|
||||||
if (len > best_len) {
|
if (len > best_len) {
|
||||||
best_len = len;
|
best_len = len;
|
||||||
|
|
||||||
lz_matchptr->length = len;
|
lz_matchptr->length = len;
|
||||||
lz_matchptr->offset = in_next - matchptr;
|
lz_matchptr->offset = in_next - matchptr;
|
||||||
lz_matchptr++;
|
lz_matchptr++;
|
||||||
|
|
||||||
if (len >= nice_len) {
|
if (len >= nice_len) {
|
||||||
*pending_lt_ptr = children[0];
|
*pending_lt_ptr = *bt_left_child(mf, cur_node);
|
||||||
*pending_gt_ptr = children[1];
|
*pending_gt_ptr = *bt_right_child(mf, cur_node);
|
||||||
return lz_matchptr - matches;
|
*best_len_ret = best_len;
|
||||||
|
return lz_matchptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matchptr[len] < in_next[len]) {
|
if (matchptr[len] < in_next[len]) {
|
||||||
*pending_lt_ptr = cur_match;
|
*pending_lt_ptr = cur_node;
|
||||||
pending_lt_ptr = &children[1];
|
pending_lt_ptr = bt_right_child(mf, cur_node);
|
||||||
cur_match = *pending_lt_ptr;
|
cur_node = *pending_lt_ptr;
|
||||||
best_lt_len = len;
|
best_lt_len = len;
|
||||||
|
if (best_gt_len < len)
|
||||||
|
len = best_gt_len;
|
||||||
} else {
|
} else {
|
||||||
*pending_gt_ptr = cur_match;
|
*pending_gt_ptr = cur_node;
|
||||||
pending_gt_ptr = &children[0];
|
pending_gt_ptr = bt_left_child(mf, cur_node);
|
||||||
cur_match = *pending_gt_ptr;
|
cur_node = *pending_gt_ptr;
|
||||||
best_gt_len = len;
|
best_gt_len = len;
|
||||||
|
if (best_lt_len < len)
|
||||||
|
len = best_lt_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining) {
|
||||||
|
*pending_lt_ptr = MATCHFINDER_NULL;
|
||||||
|
*pending_gt_ptr = MATCHFINDER_NULL;
|
||||||
|
*best_len_ret = best_len;
|
||||||
|
return lz_matchptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Advance the match-finder, but don't search for matches.
|
* Advance the matchfinder, but don't record any matches.
|
||||||
*
|
*
|
||||||
* @mf
|
* @mf
|
||||||
* The matchfinder structure.
|
* The matchfinder structure.
|
||||||
* @in_base
|
* @in_base
|
||||||
* Pointer to the next byte in the input buffer to process _at the last
|
* Pointer to the next byte in the input buffer to process _at the last
|
||||||
* time bc_matchfinder_init() or bc_matchfinder_slide_window() was called_.
|
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
|
||||||
* @in_next
|
* @in_next
|
||||||
* Pointer to the next byte in the input buffer to process.
|
* Pointer to the next byte in the input buffer to process.
|
||||||
* @in_end
|
* @in_end
|
||||||
@ -204,8 +254,14 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
|
|||||||
* Stop searching if a match of at least this length is found.
|
* Stop searching if a match of at least this length is found.
|
||||||
* @max_search_depth
|
* @max_search_depth
|
||||||
* Limit on the number of potential matches to consider.
|
* Limit on the number of potential matches to consider.
|
||||||
* @prev_hash
|
* @next_hash
|
||||||
* TODO
|
* Pointer to the hash code for the current sequence, which was computed
|
||||||
|
* one position in advance so that the binary tree root could be
|
||||||
|
* prefetched. This is an input/output parameter.
|
||||||
|
*
|
||||||
|
* Note: this is very similar to bt_matchfinder_get_matches() because both
|
||||||
|
* functions must do hashing and tree re-rooting. This version just doesn't
|
||||||
|
* actually record any matches.
|
||||||
*/
|
*/
|
||||||
static inline void
|
static inline void
|
||||||
bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
|
bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
|
||||||
@ -214,66 +270,70 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
|
|||||||
const u8 * const in_end,
|
const u8 * const in_end,
|
||||||
const unsigned nice_len,
|
const unsigned nice_len,
|
||||||
const unsigned max_search_depth,
|
const unsigned max_search_depth,
|
||||||
unsigned long *prev_hash)
|
u32 * restrict next_hash)
|
||||||
{
|
{
|
||||||
unsigned depth_remaining = max_search_depth;
|
unsigned depth_remaining = max_search_depth;
|
||||||
unsigned hash;
|
u32 hash;
|
||||||
pos_t cur_match;
|
pos_t cur_node;
|
||||||
const u8 *matchptr;
|
const u8 *matchptr;
|
||||||
pos_t *pending_lt_ptr, *pending_gt_ptr;
|
pos_t *pending_lt_ptr, *pending_gt_ptr;
|
||||||
unsigned best_lt_len, best_gt_len;
|
unsigned best_lt_len, best_gt_len;
|
||||||
unsigned len;
|
unsigned len;
|
||||||
pos_t *children;
|
|
||||||
|
|
||||||
if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1))
|
if (unlikely(in_end - in_next < LZ_HASH3_REQUIRED_NBYTES + 1))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
hash = *prev_hash;
|
hash = *next_hash;
|
||||||
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
|
*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
|
||||||
prefetch(&mf->hash_tab[*prev_hash]);
|
cur_node = mf->hash_tab[hash];
|
||||||
cur_match = mf->hash_tab[hash];
|
|
||||||
mf->hash_tab[hash] = in_next - in_base;
|
mf->hash_tab[hash] = in_next - in_base;
|
||||||
|
prefetch(&mf->hash_tab[*next_hash]);
|
||||||
|
|
||||||
depth_remaining = max_search_depth;
|
depth_remaining = max_search_depth;
|
||||||
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
|
pending_lt_ptr = bt_left_child(mf, in_next - in_base);
|
||||||
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
|
pending_gt_ptr = bt_right_child(mf, in_next - in_base);
|
||||||
best_lt_len = 0;
|
best_lt_len = 0;
|
||||||
best_gt_len = 0;
|
best_gt_len = 0;
|
||||||
for (;;) {
|
len = 0;
|
||||||
if (!matchfinder_match_in_window(cur_match,
|
|
||||||
in_base, in_next) ||
|
if (!matchfinder_node_valid(cur_node, in_base, in_next)) {
|
||||||
!depth_remaining--)
|
*pending_lt_ptr = MATCHFINDER_NULL;
|
||||||
{
|
*pending_gt_ptr = MATCHFINDER_NULL;
|
||||||
*pending_lt_ptr = MATCHFINDER_INITVAL;
|
|
||||||
*pending_gt_ptr = MATCHFINDER_INITVAL;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
matchptr = &in_base[cur_match];
|
for (;;) {
|
||||||
len = min(best_lt_len, best_gt_len);
|
matchptr = &in_base[cur_node];
|
||||||
|
|
||||||
children = &mf->child_tab[(unsigned long)
|
|
||||||
matchfinder_slot_for_match(cur_match) << 1];
|
|
||||||
|
|
||||||
if (matchptr[len] == in_next[len]) {
|
if (matchptr[len] == in_next[len]) {
|
||||||
len = lz_extend(in_next, matchptr, len + 1, nice_len);
|
len = lz_extend(in_next, matchptr, len + 1, nice_len);
|
||||||
if (len == nice_len) {
|
if (len == nice_len) {
|
||||||
*pending_lt_ptr = children[0];
|
*pending_lt_ptr = *bt_left_child(mf, cur_node);
|
||||||
*pending_gt_ptr = children[1];
|
*pending_gt_ptr = *bt_right_child(mf, cur_node);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matchptr[len] < in_next[len]) {
|
if (matchptr[len] < in_next[len]) {
|
||||||
*pending_lt_ptr = cur_match;
|
*pending_lt_ptr = cur_node;
|
||||||
pending_lt_ptr = &children[1];
|
pending_lt_ptr = bt_right_child(mf, cur_node);
|
||||||
cur_match = *pending_lt_ptr;
|
cur_node = *pending_lt_ptr;
|
||||||
best_lt_len = len;
|
best_lt_len = len;
|
||||||
|
if (best_gt_len < len)
|
||||||
|
len = best_gt_len;
|
||||||
} else {
|
} else {
|
||||||
*pending_gt_ptr = cur_match;
|
*pending_gt_ptr = cur_node;
|
||||||
pending_gt_ptr = &children[0];
|
pending_gt_ptr = bt_left_child(mf, cur_node);
|
||||||
cur_match = *pending_gt_ptr;
|
cur_node = *pending_gt_ptr;
|
||||||
best_gt_len = len;
|
best_gt_len = len;
|
||||||
|
if (best_lt_len < len)
|
||||||
|
len = best_lt_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining) {
|
||||||
|
*pending_lt_ptr = MATCHFINDER_NULL;
|
||||||
|
*pending_gt_ptr = MATCHFINDER_NULL;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@
|
|||||||
#define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
|
#define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
|
||||||
(_a > _b) ? _a : _b; })
|
(_a > _b) ? _a : _b; })
|
||||||
|
|
||||||
#define swap(a, b) ({ __typeof__(a) _a = a; (a) = (b); (b) = _a; })
|
#define swap(a, b) ({ __typeof__(a) _a = (a); (a) = (b); (b) = _a; })
|
||||||
|
|
||||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
|
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
|
||||||
# define compiler_bswap32 __builtin_bswap32
|
# define compiler_bswap32 __builtin_bswap32
|
||||||
|
@ -9,15 +9,19 @@
|
|||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
# include "compiler-gcc.h"
|
# include "compiler-gcc.h"
|
||||||
#else
|
#else
|
||||||
# warning "Unrecognized compiler. Please add a header file for your compiler."
|
# error "Unrecognized compiler. Please add a header file for your compiler."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef LIBEXPORT
|
#ifndef LIBEXPORT
|
||||||
# define LIBEXPORT
|
# define LIBEXPORT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef BUILD_BUG_ON
|
#ifndef _packed_attribute
|
||||||
# define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
|
# error "missing required definition of _packed_attribute"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _aligned_attribute
|
||||||
|
# error "missing required definition of _aligned_attribute"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef likely
|
#ifndef likely
|
||||||
@ -32,13 +36,6 @@
|
|||||||
# define prefetch(addr)
|
# define prefetch(addr)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _aligned_attribute
|
|
||||||
# error "missing required definition of _aligned_attribute"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef _packed_attribute
|
|
||||||
# error "missing required definition of _packed_attribute"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef CPU_IS_BIG_ENDIAN
|
#ifndef CPU_IS_BIG_ENDIAN
|
||||||
# error "missing required endianness definition"
|
# error "missing required endianness definition"
|
||||||
@ -47,7 +44,6 @@
|
|||||||
#define CPU_IS_LITTLE_ENDIAN (!CPU_IS_BIG_ENDIAN)
|
#define CPU_IS_LITTLE_ENDIAN (!CPU_IS_BIG_ENDIAN)
|
||||||
|
|
||||||
#ifndef UNALIGNED_ACCESS_SPEED
|
#ifndef UNALIGNED_ACCESS_SPEED
|
||||||
# warning "assuming unaligned accesses are not allowed"
|
|
||||||
# define UNALIGNED_ACCESS_SPEED 0
|
# define UNALIGNED_ACCESS_SPEED 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -58,3 +54,7 @@
|
|||||||
#if !defined(min) || !defined(max) || !defined(swap)
|
#if !defined(min) || !defined(max) || !defined(swap)
|
||||||
# error "missing required definitions of min(), max(), and swap() macros"
|
# error "missing required definitions of min(), max(), and swap() macros"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef BUILD_BUG_ON
|
||||||
|
# define BUILD_BUG_ON(expr) ((void)sizeof(char[1 - 2*!!(expr)]))
|
||||||
|
#endif
|
||||||
|
@ -1961,9 +1961,7 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
|||||||
struct lz_match *cache_end;
|
struct lz_match *cache_end;
|
||||||
const u8 *in_block_begin;
|
const u8 *in_block_begin;
|
||||||
const u8 *in_block_end;
|
const u8 *in_block_end;
|
||||||
unsigned num_matches;
|
u32 next_hash = 0;
|
||||||
unsigned best_len;
|
|
||||||
unsigned long prev_hash = 0;
|
|
||||||
|
|
||||||
deflate_init_output(&os, out, out_nbytes_avail);
|
deflate_init_output(&os, out, out_nbytes_avail);
|
||||||
deflate_reset_symbol_frequencies(c);
|
deflate_reset_symbol_frequencies(c);
|
||||||
@ -1991,6 +1989,9 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
|||||||
|
|
||||||
/* Find all match possibilities in this block. */
|
/* Find all match possibilities in this block. */
|
||||||
do {
|
do {
|
||||||
|
struct lz_match *matches;
|
||||||
|
unsigned best_len;
|
||||||
|
|
||||||
/* Decrease the maximum and nice match lengths if we're
|
/* Decrease the maximum and nice match lengths if we're
|
||||||
* approaching the end of the input buffer. */
|
* approaching the end of the input buffer. */
|
||||||
if (unlikely(max_len > in_end - in_next)) {
|
if (unlikely(max_len > in_end - in_next)) {
|
||||||
@ -2028,45 +2029,43 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
|||||||
* search for matches at almost all positions, so this
|
* search for matches at almost all positions, so this
|
||||||
* advantage of hash chains is negated.
|
* advantage of hash chains is negated.
|
||||||
*/
|
*/
|
||||||
num_matches =
|
matches = cache_ptr;
|
||||||
|
cache_ptr =
|
||||||
bt_matchfinder_get_matches(&c->bt_mf,
|
bt_matchfinder_get_matches(&c->bt_mf,
|
||||||
in_cur_base,
|
in_cur_base,
|
||||||
in_next,
|
in_next,
|
||||||
|
DEFLATE_MIN_MATCH_LEN,
|
||||||
max_len,
|
max_len,
|
||||||
nice_len,
|
nice_len,
|
||||||
c->max_search_depth,
|
c->max_search_depth,
|
||||||
&prev_hash,
|
&next_hash,
|
||||||
|
&best_len,
|
||||||
cache_ptr);
|
cache_ptr);
|
||||||
cache_ptr += num_matches;
|
cache_ptr->length = cache_ptr - matches;
|
||||||
cache_ptr->length = num_matches;
|
|
||||||
cache_ptr->offset = *in_next;
|
cache_ptr->offset = *in_next;
|
||||||
in_next++;
|
in_next++;
|
||||||
cache_ptr++;
|
cache_ptr++;
|
||||||
|
|
||||||
if (num_matches) {
|
|
||||||
best_len = cache_ptr[-2].length;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there was a very long match found, don't
|
* If there was a very long match found, don't cache any
|
||||||
* cache any matches for the bytes covered by
|
* matches for the bytes covered by that match. This
|
||||||
* that match. This avoids degenerate behavior
|
* avoids degenerate behavior when compressing highly
|
||||||
* when compressing highly redundant data, where
|
* redundant data, where the number of matches can be
|
||||||
* the number of matches can be very large.
|
* very large.
|
||||||
*
|
*
|
||||||
* This heuristic doesn't actually hurt the
|
* This heuristic doesn't actually hurt the compression
|
||||||
* compression ratio very much. If there's a
|
* ratio very much. If there's a long match, then the
|
||||||
* long match, then the data must be highly
|
* data must be highly compressible, so it doesn't
|
||||||
* compressible, so it doesn't matter much what
|
* matter much what we do.
|
||||||
* we do.
|
|
||||||
*
|
*
|
||||||
* We also trigger this same case when
|
* We also trigger this same case when approaching the
|
||||||
* approaching the desired end of the block.
|
* desired end of the block. This forces the block to
|
||||||
* This forces the block to reach a "stopping
|
* reach a "stopping point" where there are no matches
|
||||||
* point" where there are no matches extending
|
* extending to later positions. (XXX: this behavior is
|
||||||
* to later positions. (XXX: this behavior is
|
|
||||||
* non-optimal and should be improved.)
|
* non-optimal and should be improved.)
|
||||||
*/
|
*/
|
||||||
if (best_len >= min(nice_len, in_block_end - in_next)) {
|
if (best_len >= DEFLATE_MIN_MATCH_LEN &&
|
||||||
|
best_len >= min(nice_len, in_block_end - in_next)) {
|
||||||
--best_len;
|
--best_len;
|
||||||
do {
|
do {
|
||||||
if (unlikely(max_len > in_end - in_next)) {
|
if (unlikely(max_len > in_end - in_next)) {
|
||||||
@ -2085,7 +2084,7 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
|||||||
in_end,
|
in_end,
|
||||||
nice_len,
|
nice_len,
|
||||||
c->max_search_depth,
|
c->max_search_depth,
|
||||||
&prev_hash);
|
&next_hash);
|
||||||
|
|
||||||
cache_ptr->length = 0;
|
cache_ptr->length = 0;
|
||||||
cache_ptr->offset = *in_next;
|
cache_ptr->offset = *in_next;
|
||||||
@ -2093,7 +2092,6 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
|||||||
cache_ptr++;
|
cache_ptr++;
|
||||||
} while (--best_len);
|
} while (--best_len);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} while (in_next < in_block_end);
|
} while (in_next < in_block_end);
|
||||||
|
|
||||||
/* All the matches for this block have been cached. Now compute
|
/* All the matches for this block have been cached. Now compute
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* endianness.h
|
* endianness.h
|
||||||
*
|
*
|
||||||
* Inline functions for endianness conversion.
|
* Macros and inline functions for endianness conversion.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
@ -57,7 +57,7 @@ gzip_compress(struct deflate_compressor *c, const void *in, size_t in_size,
|
|||||||
out_next += 4;
|
out_next += 4;
|
||||||
|
|
||||||
/* ISIZE */
|
/* ISIZE */
|
||||||
put_unaligned_u32_le(in_size, out_next);
|
put_unaligned_u32_le((u32)in_size, out_next);
|
||||||
out_next += 4;
|
out_next += 4;
|
||||||
|
|
||||||
return out_next - (u8 *)out;
|
return out_next - (u8 *)out;
|
||||||
|
@ -6,8 +6,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "compiler.h"
|
|
||||||
|
|
||||||
#define GZIP_MIN_HEADER_SIZE 10
|
#define GZIP_MIN_HEADER_SIZE 10
|
||||||
#define GZIP_FOOTER_SIZE 8
|
#define GZIP_FOOTER_SIZE 8
|
||||||
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
|
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
|
||||||
|
@ -54,20 +54,16 @@ gzip_decompress(struct deflate_decompressor *d,
|
|||||||
|
|
||||||
/* Original file name (zero terminated) */
|
/* Original file name (zero terminated) */
|
||||||
if (flg & GZIP_FNAME) {
|
if (flg & GZIP_FNAME) {
|
||||||
while (*in_next != 0 && ++in_next != in_end)
|
while (*in_next++ != 0 && in_next != in_end)
|
||||||
;
|
;
|
||||||
if (in_next != in_end)
|
|
||||||
in_next++;
|
|
||||||
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* File comment (zero terminated) */
|
/* File comment (zero terminated) */
|
||||||
if (flg & GZIP_FCOMMENT) {
|
if (flg & GZIP_FCOMMENT) {
|
||||||
while (*in_next != 0 && ++in_next != in_end)
|
while (*in_next++ != 0 && ++in_next != in_end)
|
||||||
;
|
;
|
||||||
if (in_next != in_end)
|
|
||||||
in_next++;
|
|
||||||
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1,37 +1,102 @@
|
|||||||
/*
|
/*
|
||||||
* hc_matchfinder.h
|
* hc_matchfinder.h
|
||||||
*
|
*
|
||||||
* This is a Hash Chain (hc) based matchfinder.
|
* ---------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Algorithm
|
||||||
|
*
|
||||||
|
* This is a Hash Chains (hc) based matchfinder.
|
||||||
*
|
*
|
||||||
* The data structure is a hash table where each hash bucket contains a linked
|
* The data structure is a hash table where each hash bucket contains a linked
|
||||||
* list of sequences, referenced by position.
|
* list (or "chain") of sequences whose first 3 bytes share the same hash code.
|
||||||
|
* Each sequence is identified by its starting position in the input buffer.
|
||||||
*
|
*
|
||||||
* For each sequence (position) in the input, the first 3 bytes are hashed and
|
* The algorithm processes the input buffer sequentially. At each byte
|
||||||
* that sequence (position) is prepended to the appropriate linked list in the
|
* position, the hash code of the first 3 bytes of the sequence beginning at
|
||||||
* hash table. Since the sequences are inserted in order, each list is always
|
* that position (the sequence being matched against) is computed. This
|
||||||
* sorted by increasing match offset.
|
* identifies the hash bucket to use for that position. Then, this hash
|
||||||
|
* bucket's linked list is searched for matches. Then, a new linked list node
|
||||||
|
* is created to represent the current sequence and is prepended to the list.
|
||||||
*
|
*
|
||||||
* At the same time as inserting a sequence, we may search the linked list for
|
* This algorithm has several useful properties:
|
||||||
* matches with that sequence. At each step, the length of the match is
|
*
|
||||||
* computed. The search ends when the sequences get too far away (outside of
|
* - It only finds true Lempel-Ziv matches; i.e., those where the matching
|
||||||
* the sliding window), or when the list ends (in the code this is the same
|
* sequence occurs prior to the sequence being matched against.
|
||||||
* check as "too far away"), or when 'max_search_depth' positions have been
|
*
|
||||||
* searched, or when a match of at least 'nice_len' bytes has been found.
|
* - The sequences in each linked list are always sorted by decreasing starting
|
||||||
|
* position. Therefore, the closest (smallest offset) matches are found
|
||||||
|
* first, which in many compression formats tend to be the cheapest to encode.
|
||||||
|
*
|
||||||
|
* - Although fast running time is not guaranteed due to the possibility of the
|
||||||
|
* lists getting very long, the worst degenerate behavior can be easily
|
||||||
|
* prevented by capping the number of nodes searched at each position.
|
||||||
|
*
|
||||||
|
* - If the compressor decides not to search for matches at a certain position,
|
||||||
|
* then that position can be quickly inserted without searching the list.
|
||||||
|
*
|
||||||
|
* - The algorithm is adaptable to sliding windows: just store the positions
|
||||||
|
* relative to a "base" value that is updated from time to time, and stop
|
||||||
|
* searching each list when the sequences get too far away.
|
||||||
|
*
|
||||||
|
* ---------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Notes on usage
|
||||||
|
*
|
||||||
|
* You must define MATCHFINDER_WINDOW_ORDER before including this header because
|
||||||
|
* that determines which integer type to use for positions. Since 16-bit
|
||||||
|
* integers are faster than 32-bit integers due to reduced memory usage (and
|
||||||
|
* therefore reduced cache pressure), the code only uses 32-bit integers if they
|
||||||
|
* are needed to represent all possible positions.
|
||||||
|
*
|
||||||
|
* In addition, you must allocate the 'struct hc_matchfinder' on a
|
||||||
|
* MATCHFINDER_ALIGNMENT-aligned boundary.
|
||||||
|
*
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Optimizations
|
||||||
|
*
|
||||||
|
* The longest_match() and skip_positions() functions are inlined into the
|
||||||
|
* compressors that use them. This isn't just about saving the overhead of a
|
||||||
|
* function call. These functions are intended to be called from the inner
|
||||||
|
* loops of compressors, where giving the compiler more control over register
|
||||||
|
* allocation is very helpful. There is also significant benefit to be gained
|
||||||
|
* from allowing the CPU to predict branches independently at each call site.
|
||||||
|
* For example, "lazy"-style compressors can be written with two calls to
|
||||||
|
* longest_match(), each of which starts with a different 'best_len' and
|
||||||
|
* therefore has significantly different performance characteristics.
|
||||||
|
*
|
||||||
|
* Although any hash function can be used, a multiplicative hash is fast and
|
||||||
|
* works well.
|
||||||
|
*
|
||||||
|
* On some processors, it is significantly faster to extend matches by whole
|
||||||
|
* words (32 or 64 bits) instead of by individual bytes. For this to be the
|
||||||
|
* case, the processor must implement unaligned memory accesses efficiently and
|
||||||
|
* must have either a fast "find first set bit" instruction or a fast "find last
|
||||||
|
* set bit" instruction, depending on the processor's endianness.
|
||||||
|
*
|
||||||
|
* The code uses one loop for finding the first match and one loop for finding a
|
||||||
|
* longer match. Each of these loops is tuned for its respective task and in
|
||||||
|
* combination are faster than a single generalized loop that handles both
|
||||||
|
* tasks.
|
||||||
|
*
|
||||||
|
* The code also uses a tight inner loop that only compares the last and first
|
||||||
|
* bytes of a potential match. It is only when these bytes match that a full
|
||||||
|
* match extension is attempted.
|
||||||
|
*
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "lz_extend.h"
|
#include "lz_extend.h"
|
||||||
#include "lz_hash3.h"
|
#include "lz_hash.h"
|
||||||
#include "matchfinder_common.h"
|
#include "matchfinder_common.h"
|
||||||
#include "unaligned.h"
|
#include "unaligned.h"
|
||||||
|
|
||||||
#ifndef HC_MATCHFINDER_HASH_ORDER
|
#if MATCHFINDER_WINDOW_ORDER < 14
|
||||||
# if MATCHFINDER_WINDOW_ORDER < 14
|
|
||||||
# define HC_MATCHFINDER_HASH_ORDER 14
|
# define HC_MATCHFINDER_HASH_ORDER 14
|
||||||
# else
|
#else
|
||||||
# define HC_MATCHFINDER_HASH_ORDER 15
|
# define HC_MATCHFINDER_HASH_ORDER 15
|
||||||
# endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
|
#define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
|
||||||
@ -73,17 +138,18 @@ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
|
|||||||
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
|
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
|
||||||
* @in_next
|
* @in_next
|
||||||
* Pointer to the next byte in the input buffer to process. This is the
|
* Pointer to the next byte in the input buffer to process. This is the
|
||||||
* pointer to the bytes being matched against.
|
* pointer to the sequence being matched against.
|
||||||
* @best_len
|
* @best_len
|
||||||
* Require a match at least this long.
|
* Require a match longer than this length.
|
||||||
* @max_len
|
* @max_len
|
||||||
* Maximum match length to return.
|
* The maximum permissible match length at this position.
|
||||||
* @nice_len
|
* @nice_len
|
||||||
* Stop searching if a match of at least this length is found.
|
* Stop searching if a match of at least this length is found.
|
||||||
|
* Must be <= @max_len.
|
||||||
* @max_search_depth
|
* @max_search_depth
|
||||||
* Limit on the number of potential matches to consider.
|
* Limit on the number of potential matches to consider. Must be >= 1.
|
||||||
* @offset_ret
|
* @offset_ret
|
||||||
* The match offset is returned here.
|
* If a match is found, its offset is returned in this location.
|
||||||
*
|
*
|
||||||
* Return the length of the match found, or 'best_len' if no match longer than
|
* Return the length of the match found, or 'best_len' if no match longer than
|
||||||
* 'best_len' was found.
|
* 'best_len' was found.
|
||||||
@ -102,61 +168,57 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
|||||||
const u8 *best_matchptr = best_matchptr; /* uninitialized */
|
const u8 *best_matchptr = best_matchptr; /* uninitialized */
|
||||||
const u8 *matchptr;
|
const u8 *matchptr;
|
||||||
unsigned len;
|
unsigned len;
|
||||||
unsigned hash;
|
|
||||||
pos_t cur_match;
|
|
||||||
u32 first_3_bytes;
|
u32 first_3_bytes;
|
||||||
|
u32 hash;
|
||||||
|
pos_t cur_node;
|
||||||
|
|
||||||
/* Insert the current sequence into the appropriate hash chain. */
|
/* Insert the current sequence into the appropriate linked list. */
|
||||||
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
|
if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES))
|
||||||
goto out;
|
goto out;
|
||||||
first_3_bytes = load_u24_unaligned(in_next);
|
first_3_bytes = load_u24_unaligned(in_next);
|
||||||
hash = lz_hash3_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
|
hash = lz_hash(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
|
||||||
cur_match = mf->hash_tab[hash];
|
cur_node = mf->hash_tab[hash];
|
||||||
mf->next_tab[in_next - in_base] = cur_match;
|
mf->next_tab[in_next - in_base] = cur_node;
|
||||||
mf->hash_tab[hash] = in_next - in_base;
|
mf->hash_tab[hash] = in_next - in_base;
|
||||||
|
|
||||||
if (unlikely(best_len >= max_len))
|
if (unlikely(best_len >= max_len))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* Search the appropriate hash chain for matches. */
|
/* Search the appropriate linked list for matches. */
|
||||||
|
|
||||||
if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
|
if (!(matchfinder_node_valid(cur_node, in_base, in_next)))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (best_len < 3) {
|
if (best_len < 3) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/* No length 3 match found yet.
|
/* No length 3 match found yet.
|
||||||
* Check the first 3 bytes. */
|
* Check the first 3 bytes. */
|
||||||
matchptr = &in_base[cur_match];
|
matchptr = &in_base[cur_node];
|
||||||
|
|
||||||
if (load_u24_unaligned(matchptr) == first_3_bytes)
|
if (load_u24_unaligned(matchptr) == first_3_bytes)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Not a match; keep trying. */
|
/* The first 3 bytes did not match. Keep trying. */
|
||||||
cur_match = mf->next_tab[
|
cur_node = mf->next_tab[
|
||||||
matchfinder_slot_for_match(cur_match)];
|
matchfinder_slot_for_match(cur_node)];
|
||||||
if (!matchfinder_match_in_window(cur_match,
|
if (!matchfinder_node_valid(cur_node, in_base, in_next) ||
|
||||||
in_base, in_next))
|
!--depth_remaining)
|
||||||
goto out;
|
|
||||||
if (!--depth_remaining)
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Found a length 3 match. */
|
/* Found a match of length >= 3. Extend it to its full length. */
|
||||||
best_matchptr = matchptr;
|
best_matchptr = matchptr;
|
||||||
best_len = lz_extend(in_next, best_matchptr, 3, max_len);
|
best_len = lz_extend(in_next, best_matchptr, 3, max_len);
|
||||||
if (best_len >= nice_len)
|
if (best_len >= nice_len)
|
||||||
goto out;
|
goto out;
|
||||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
|
||||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
|
||||||
goto out;
|
|
||||||
if (!--depth_remaining)
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
matchptr = &in_base[cur_match];
|
matchptr = &in_base[cur_node];
|
||||||
|
|
||||||
/* Already found a length 3 match. Try for a longer match;
|
/* Already found a length 3 match. Try for a longer match;
|
||||||
* start by checking the last 2 bytes and the first 4 bytes. */
|
* start by checking the last 2 bytes and the first 4 bytes. */
|
||||||
@ -170,17 +232,16 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
|||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
|
||||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
|
||||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
|
||||||
goto out;
|
|
||||||
if (!--depth_remaining)
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UNALIGNED_ACCESS_IS_FAST)
|
#if UNALIGNED_ACCESS_IS_FAST
|
||||||
len = 4;
|
len = 4;
|
||||||
else
|
#else
|
||||||
len = 0;
|
len = 0;
|
||||||
|
#endif
|
||||||
len = lz_extend(in_next, matchptr, len, max_len);
|
len = lz_extend(in_next, matchptr, len, max_len);
|
||||||
if (len > best_len) {
|
if (len > best_len) {
|
||||||
best_len = len;
|
best_len = len;
|
||||||
@ -188,10 +249,8 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
|||||||
if (best_len >= nice_len)
|
if (best_len >= nice_len)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
|
||||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
|
||||||
goto out;
|
|
||||||
if (!--depth_remaining)
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
@ -200,7 +259,7 @@ out:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Advance the match-finder, but don't search for matches.
|
* Advance the matchfinder, but don't search for matches.
|
||||||
*
|
*
|
||||||
* @mf
|
* @mf
|
||||||
* The matchfinder structure.
|
* The matchfinder structure.
|
||||||
@ -212,7 +271,7 @@ out:
|
|||||||
* @in_end
|
* @in_end
|
||||||
* Pointer to the end of the input buffer.
|
* Pointer to the end of the input buffer.
|
||||||
* @count
|
* @count
|
||||||
* Number of bytes to skip; must be > 0.
|
* The number of bytes to advance. Must be > 0.
|
||||||
*/
|
*/
|
||||||
static inline void
|
static inline void
|
||||||
hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
|
hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
|
||||||
@ -221,13 +280,13 @@ hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
|
|||||||
const u8 *in_end,
|
const u8 *in_end,
|
||||||
unsigned count)
|
unsigned count)
|
||||||
{
|
{
|
||||||
unsigned hash;
|
u32 hash;
|
||||||
|
|
||||||
if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
|
if (unlikely(in_next + count >= in_end - LZ_HASH3_REQUIRED_NBYTES))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
hash = lz_hash3(in_next, HC_MATCHFINDER_HASH_ORDER);
|
hash = lz_hash_3_bytes(in_next, HC_MATCHFINDER_HASH_ORDER);
|
||||||
mf->next_tab[in_next - in_base] = mf->hash_tab[hash];
|
mf->next_tab[in_next - in_base] = mf->hash_tab[hash];
|
||||||
mf->hash_tab[hash] = in_next - in_base;
|
mf->hash_tab[hash] = in_next - in_base;
|
||||||
in_next++;
|
in_next++;
|
||||||
|
41
src/lz_hash.h
Normal file
41
src/lz_hash.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
/*
|
||||||
|
* lz_hash.h
|
||||||
|
*
|
||||||
|
* Hashing for Lempel-Ziv matchfinding.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _LZ_HASH_H
|
||||||
|
#define _LZ_HASH_H
|
||||||
|
|
||||||
|
#include "unaligned.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The hash function: given a sequence prefix held in the low-order bits of a
|
||||||
|
* 32-bit value, multiply by a carefully-chosen large constant. Discard any
|
||||||
|
* bits of the product that don't fit in a 32-bit value, but take the
|
||||||
|
* next-highest @num_bits bits of the product as the hash value, as those have
|
||||||
|
* the most randomness.
|
||||||
|
*/
|
||||||
|
static inline u32
|
||||||
|
lz_hash(u32 seq, unsigned num_bits)
|
||||||
|
{
|
||||||
|
return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hash the 3-byte sequence beginning at @p, producing a hash of length
|
||||||
|
* @num_bits bits. At least LZ_HASH3_REQUIRED_NBYTES bytes of data must be
|
||||||
|
* available at @p; note that this may be more than 3.
|
||||||
|
*/
|
||||||
|
static inline u32
|
||||||
|
lz_hash_3_bytes(const u8 *p, unsigned num_bits)
|
||||||
|
{
|
||||||
|
u32 seq = load_u24_unaligned(p);
|
||||||
|
if (num_bits >= 24)
|
||||||
|
return seq;
|
||||||
|
return lz_hash(seq, num_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define LZ_HASH3_REQUIRED_NBYTES LOAD_U24_REQUIRED_NBYTES
|
||||||
|
|
||||||
|
#endif /* _LZ_HASH_H */
|
@ -1,49 +0,0 @@
|
|||||||
/*
|
|
||||||
* lz_hash3.h
|
|
||||||
*
|
|
||||||
* 3-byte hashing for Lempel-Ziv matchfinding.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "unaligned.h"
|
|
||||||
|
|
||||||
static inline u32
|
|
||||||
loaded_u32_to_u24(u32 v)
|
|
||||||
{
|
|
||||||
if (CPU_IS_LITTLE_ENDIAN)
|
|
||||||
return v & 0xFFFFFF;
|
|
||||||
else
|
|
||||||
return v >> 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline u32
|
|
||||||
load_u24_unaligned(const u8 *p)
|
|
||||||
{
|
|
||||||
if (UNALIGNED_ACCESS_IS_FAST)
|
|
||||||
return loaded_u32_to_u24(load_u32_unaligned(p));
|
|
||||||
else
|
|
||||||
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline u32
|
|
||||||
lz_hash3_u24(u32 str, unsigned num_bits)
|
|
||||||
{
|
|
||||||
return (u32)(str * 0x1E35A7BD) >> (32 - num_bits);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Hash the next 3-byte sequence in the window, producing a hash of length
|
|
||||||
* 'num_bits' bits. At least LZ_HASH_REQUIRED_NBYTES must be available at 'p';
|
|
||||||
* this might be 4 bytes rather than 3 because an unaligned load is faster on
|
|
||||||
* some architectures.
|
|
||||||
*/
|
|
||||||
static inline u32
|
|
||||||
lz_hash3(const u8 *p, unsigned num_bits)
|
|
||||||
{
|
|
||||||
return lz_hash3_u24(load_u24_unaligned(p), num_bits);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Number of bytes the hash function actually requires be available, due to the
|
|
||||||
* possibility of an unaligned load. */
|
|
||||||
#define LZ_HASH_REQUIRED_NBYTES (UNALIGNED_ACCESS_IS_FAST ? 4 : 3)
|
|
@ -16,9 +16,9 @@ matchfinder_init_avx2(pos_t *data, size_t size)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (sizeof(pos_t) == 2)
|
if (sizeof(pos_t) == 2)
|
||||||
v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
|
v = _mm256_set1_epi16((u16)MATCHFINDER_NULL);
|
||||||
else if (sizeof(pos_t) == 4)
|
else if (sizeof(pos_t) == 4)
|
||||||
v = _mm256_set1_epi32(MATCHFINDER_INITVAL);
|
v = _mm256_set1_epi32((u32)MATCHFINDER_NULL);
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ static inline bool
|
|||||||
matchfinder_memset_init_okay(void)
|
matchfinder_memset_init_okay(void)
|
||||||
{
|
{
|
||||||
/* All bytes must match in order to use memset. */
|
/* All bytes must match in order to use memset. */
|
||||||
const pos_t v = MATCHFINDER_INITVAL;
|
const pos_t v = MATCHFINDER_NULL;
|
||||||
if (sizeof(pos_t) == 2)
|
if (sizeof(pos_t) == 2)
|
||||||
return (u8)v == (u8)(v >> 8);
|
return (u8)v == (u8)(v >> 8);
|
||||||
if (sizeof(pos_t) == 4)
|
if (sizeof(pos_t) == 4)
|
||||||
@ -93,12 +93,12 @@ matchfinder_init(pos_t *data, size_t num_entries)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (matchfinder_memset_init_okay()) {
|
if (matchfinder_memset_init_okay()) {
|
||||||
memset(data, (u8)MATCHFINDER_INITVAL, size);
|
memset(data, (u8)MATCHFINDER_NULL, size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < num_entries; i++)
|
for (size_t i = 0; i < num_entries; i++)
|
||||||
data[i] = MATCHFINDER_INITVAL;
|
data[i] = MATCHFINDER_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if MATCHFINDER_IS_SLIDING
|
#if MATCHFINDER_IS_SLIDING
|
||||||
|
@ -16,12 +16,12 @@ typedef u32 pos_t;
|
|||||||
|
|
||||||
/* Not all the bits of the position type are needed, so the sign bit can be
|
/* Not all the bits of the position type are needed, so the sign bit can be
|
||||||
* reserved to mean "out of bounds". */
|
* reserved to mean "out of bounds". */
|
||||||
#define MATCHFINDER_INITVAL ((pos_t)-1)
|
#define MATCHFINDER_NULL ((pos_t)-1)
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
|
||||||
{
|
{
|
||||||
return !(cur_match & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
|
return !(cur_node & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
@ -30,18 +30,18 @@ matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_nex
|
|||||||
* This prevents the beginning of the buffer from matching anything; however,
|
* This prevents the beginning of the buffer from matching anything; however,
|
||||||
* this doesn't matter much. */
|
* this doesn't matter much. */
|
||||||
|
|
||||||
#define MATCHFINDER_INITVAL ((pos_t)0)
|
#define MATCHFINDER_NULL ((pos_t)0)
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
|
||||||
{
|
{
|
||||||
return cur_match != 0;
|
return cur_node != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline pos_t
|
static inline pos_t
|
||||||
matchfinder_slot_for_match(pos_t cur_match)
|
matchfinder_slot_for_match(pos_t cur_node)
|
||||||
{
|
{
|
||||||
return cur_match;
|
return cur_node;
|
||||||
}
|
}
|
||||||
|
@ -13,18 +13,18 @@ typedef s16 pos_t;
|
|||||||
typedef s32 pos_t;
|
typedef s32 pos_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MATCHFINDER_INITVAL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
|
#define MATCHFINDER_NULL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
|
||||||
|
|
||||||
/* In the sliding window case, positions are stored relative to 'in_base'. */
|
/* In the sliding window case, positions are stored relative to 'in_base'. */
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
|
||||||
{
|
{
|
||||||
return cur_match > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
|
return cur_node > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline pos_t
|
static inline pos_t
|
||||||
matchfinder_slot_for_match(pos_t cur_match)
|
matchfinder_slot_for_match(pos_t cur_node)
|
||||||
{
|
{
|
||||||
return cur_match & (MATCHFINDER_WINDOW_SIZE - 1);
|
return cur_node & (MATCHFINDER_WINDOW_SIZE - 1);
|
||||||
}
|
}
|
||||||
|
@ -16,9 +16,9 @@ matchfinder_init_sse2(pos_t *data, size_t size)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (sizeof(pos_t) == 2)
|
if (sizeof(pos_t) == 2)
|
||||||
v = _mm_set1_epi16(MATCHFINDER_INITVAL);
|
v = _mm_set1_epi16((u16)MATCHFINDER_NULL);
|
||||||
else if (sizeof(pos_t) == 4)
|
else if (sizeof(pos_t) == 4)
|
||||||
v = _mm_set1_epi32(MATCHFINDER_INITVAL);
|
v = _mm_set1_epi32((u32)MATCHFINDER_NULL);
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
typedef uint8_t u8;
|
typedef uint8_t u8;
|
||||||
typedef uint16_t u16;
|
typedef uint16_t u16;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* unaligned.h
|
* unaligned.h
|
||||||
*
|
*
|
||||||
* Inline functions for unaligned memory access.
|
* Inline functions for unaligned memory accesses.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
@ -214,3 +214,36 @@ put_unaligned_u32_be(u32 v, void *p)
|
|||||||
p8[3] = (v >> 0) & 0xFF;
|
p8[3] = (v >> 0) & 0xFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a 32-bit value that was loaded with the platform's native endianness,
|
||||||
|
* return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
|
||||||
|
* bits contain the first 3 bytes, arranged in octets in a platform-dependent
|
||||||
|
* order, at the memory location from which the input 32-bit value was loaded.
|
||||||
|
*/
|
||||||
|
static inline u32
|
||||||
|
loaded_u32_to_u24(u32 v)
|
||||||
|
{
|
||||||
|
if (CPU_IS_LITTLE_ENDIAN)
|
||||||
|
return v & 0xFFFFFF;
|
||||||
|
else
|
||||||
|
return v >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Load the next 3 bytes from the memory location @p into the 24 low-order bits
|
||||||
|
* of a 32-bit value. The order in which the 3 bytes will be arranged as octets
|
||||||
|
* in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES
|
||||||
|
* bytes must be available at @p; note that this may be more than 3.
|
||||||
|
*/
|
||||||
|
static inline u32
|
||||||
|
load_u24_unaligned(const u8 *p)
|
||||||
|
{
|
||||||
|
#if UNALIGNED_ACCESS_IS_FAST
|
||||||
|
# define LOAD_U24_REQUIRED_NBYTES 4
|
||||||
|
return loaded_u32_to_u24(load_u32_unaligned(p));
|
||||||
|
#else
|
||||||
|
# define LOAD_U24_REQUIRED_NBYTES 3
|
||||||
|
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
@ -1,11 +1,9 @@
|
|||||||
/*
|
/*
|
||||||
* benchmark.c - A compression testing and benchmark program.
|
* benchmark.c - A compression testing and benchmark program.
|
||||||
*
|
*
|
||||||
* The author dedicates this file to the public domain.
|
* This file has no copyright assigned and is placed in the Public Domain.
|
||||||
* You can do whatever you want with this file.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#define _FILE_OFFSET_BITS 64
|
#define _FILE_OFFSET_BITS 64
|
||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
@ -419,9 +417,9 @@ main(int argc, char **argv)
|
|||||||
wrapper == NO_WRAPPER ? "None" :
|
wrapper == NO_WRAPPER ? "None" :
|
||||||
wrapper == ZLIB_WRAPPER ? "zlib" : "gzip");
|
wrapper == ZLIB_WRAPPER ? "zlib" : "gzip");
|
||||||
printf("\tCompression engine: %s\n",
|
printf("\tCompression engine: %s\n",
|
||||||
compress_with_libz ? "zlib" : "libdeflate");
|
compress_with_libz ? "libz" : "libdeflate");
|
||||||
printf("\tDecompression engine: %s\n",
|
printf("\tDecompression engine: %s\n",
|
||||||
decompress_with_libz ? "zlib" : "libdeflate");
|
decompress_with_libz ? "libz" : "libdeflate");
|
||||||
|
|
||||||
ubuf1 = malloc(chunk_size);
|
ubuf1 = malloc(chunk_size);
|
||||||
ubuf2 = malloc(chunk_size);
|
ubuf2 = malloc(chunk_size);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user