Cleanups and matchfinder updates

This commit is contained in:
Eric Biggers 2015-01-22 00:05:01 -06:00
parent fed4597943
commit 5f3208e788
26 changed files with 528 additions and 380 deletions

View File

@ -78,7 +78,7 @@ install(FILES libdeflate.h DESTINATION "${CMAKE_INSTALL_PREFIX}/include")
option(BUILD_BENCHMARK "Build benchmark program" OFF)
add_executable(benchmark test/benchmark.c)
target_link_libraries(benchmark deflate -lz)
target_link_libraries(benchmark deflatestatic -lz)
option(BUILD_GEN_CRC32_TABLE "Build CRC32 table generation program" OFF)
add_executable(gen_crc32_table test/gen_crc32_table.c)

View File

@ -1,7 +1,9 @@
/*
* libdeflate.h
*
* Public header for the DEFLATE compression library.
* Public header for libdeflate.
*
* This file has no copyright assigned and is placed in the Public Domain.
*/
#ifndef LIBDEFLATE_H
@ -26,7 +28,9 @@ struct deflate_compressor;
* fastest, 6 = medium/default, 9 = slowest). The return value is a pointer to
* the new DEFLATE compressor, or NULL if out of memory.
*
* Note: the sliding window size is defined at compilation time (default 32768).
* Note: for compression, the sliding window size is defined at compilation time
* to 32768, the largest size permissible in the DEFLATE format. It cannot be
* changed at runtime.
*/
extern struct deflate_compressor *
deflate_alloc_compressor(unsigned int compression_level);
@ -44,7 +48,7 @@ deflate_compress(struct deflate_compressor *compressor,
void *out, size_t out_nbytes_avail);
/*
* Like deflate_compress(), but store the data in the zlib wrapper format.
* Like deflate_compress(), but stores the data in the zlib wrapper format.
*/
extern size_t
zlib_compress(struct deflate_compressor *compressor,
@ -52,7 +56,7 @@ zlib_compress(struct deflate_compressor *compressor,
void *out, size_t out_nbytes_avail);
/*
* Like deflate_compress(), but store the data in the gzip wrapper format.
* Like deflate_compress(), but stores the data in the gzip wrapper format.
*/
extern size_t
gzip_compress(struct deflate_compressor *compressor,
@ -61,7 +65,8 @@ gzip_compress(struct deflate_compressor *compressor,
/*
* deflate_free_compressor() frees a DEFLATE compressor that was allocated with
* deflate_alloc_compressor().
* deflate_alloc_compressor(). If a NULL pointer is passed in, no action is
* taken.
*/
extern void
deflate_free_compressor(struct deflate_compressor *compressor);
@ -79,7 +84,9 @@ struct deflate_decompressor;
*
* This function takes no parameters, and the returned decompressor is valid for
* decompressing data that was compressed at any compression level and with any
* sliding window size.
* sliding window size. It can also be used for any wrapper format (raw
* DEFLATE, zlib, or gzip); however, the appropriate decompression function must
* be called.
*/
extern struct deflate_decompressor *
deflate_alloc_decompressor(void);
@ -118,7 +125,8 @@ gzip_decompress(struct deflate_decompressor *decompressor,
/*
* deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
* with deflate_alloc_decompressor().
* with deflate_alloc_decompressor(). If a NULL pointer is passed in, no action
* is taken.
*/
extern void
deflate_free_decompressor(struct deflate_decompressor *decompressor);

View File

@ -39,7 +39,7 @@
#define UNROLL_FACTOR 4
u32
adler32(const u8 *buffer, size_t size)
adler32(const void *buffer, size_t size)
{
u32 s1 = 1;
u32 s2 = 0;

View File

@ -9,4 +9,4 @@
#include "types.h"
extern u32
adler32(const u8 *buffer, size_t size);
adler32(const void *buffer, size_t size);

View File

@ -11,7 +11,8 @@
/* Find Last Set bit */
static inline unsigned fls32(u32 v)
static inline unsigned
fls32(u32 v)
{
#ifdef compiler_fls32
return compiler_fls32(v);
@ -23,7 +24,8 @@ static inline unsigned fls32(u32 v)
#endif
}
static inline unsigned fls64(u64 v)
static inline unsigned
fls64(u64 v)
{
#ifdef compiler_fls64
return compiler_fls64(v);
@ -35,7 +37,8 @@ static inline unsigned fls64(u64 v)
#endif
}
static inline unsigned flsw(machine_word_t v)
static inline unsigned
flsw(machine_word_t v)
{
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
if (WORDSIZE == 4)
@ -46,7 +49,8 @@ static inline unsigned flsw(machine_word_t v)
/* Find First Set bit */
static inline unsigned ffs32(u32 v)
static inline unsigned
ffs32(u32 v)
{
#ifdef compiler_ffs32
return compiler_ffs32(v);
@ -58,7 +62,8 @@ static inline unsigned ffs32(u32 v)
#endif
}
static inline unsigned ffs64(u64 v)
static inline unsigned
ffs64(u64 v)
{
#ifdef compiler_ffs64
return compiler_ffs64(v);
@ -70,7 +75,8 @@ static inline unsigned ffs64(u64 v)
#endif
}
static inline unsigned ffsw(machine_word_t v)
static inline unsigned
ffsw(machine_word_t v)
{
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
if (WORDSIZE == 4)

View File

@ -1,51 +1,56 @@
/*
* bt_matchfinder.h
*
* This is a Binary Tree (bt) based matchfinder.
* ----------------------------------------------------------------------------
*
* This is a Binary Trees (bt) based matchfinder.
*
* The data structure is a hash table where each hash bucket contains a binary
* tree of sequences, referenced by position. The sequences in the binary tree
* are ordered such that a left child is lexicographically lesser than its
* parent, and a right child is lexicographically greater than its parent.
* tree of sequences whose first 3 bytes share the same hash code. Each
* sequence is identified by its starting position in the input buffer. Each
* binary tree is always sorted such that each left child represents a sequence
* lexicographically lesser than its parent and each right child represents a
* sequence lexicographically greater than its parent.
*
* For each sequence (position) in the input, the first 3 bytes are hashed and
* the the appropriate binary tree is re-rooted at that sequence (position).
* Since the sequences are inserted in order, each binary tree maintains the
* invariant that each child node has greater match offset than its parent.
* The algorithm processes the input buffer sequentially. At each byte
* position, the hash code of the first 3 bytes of the sequence beginning at
* that position (the sequence being matched against) is computed. This
* identifies the hash bucket to use for that position. Then, a new binary tree
* node is created to represent the current sequence. Then, in a single tree
* traversal, the hash bucket's binary tree is searched for matches and is
* re-rooted at the new node.
*
* While inserting a sequence, we may search the binary tree for matches with
* that sequence. At each step, the length of the match is computed. The
* search ends when the sequences get too far away (outside of the sliding
* window), or when the binary tree ends (in the code this is the same check as
* "too far away"), or when 'max_search_depth' positions have been searched, or
* when a match of at least 'nice_len' bytes has been found.
* Compared to the simpler algorithm that uses linked lists instead of binary
* trees (see hc_matchfinder.h), the binary tree version gains more information
* at each node visitation. Ideally, the binary tree version will examine only
* 'log(n)' nodes to find the same matches that the linked list version will
* find by examining 'n' nodes. In addition, the binary tree version can
* examine fewer bytes at each node by taking advantage of the common prefixes
* that result from the sort order, whereas the linked list version may have to
* examine up to the full length of the match at each node.
*
* Notes:
* However, it is not always best to use the binary tree version. It requires
* nearly twice as much memory as the linked list version, and it takes time to
* keep the binary trees sorted, even at positions where the compressor does not
* need matches. Generally, when doing fast compression on small buffers,
* binary trees are the wrong approach. They are best suited for thorough
* compression and/or large buffers.
*
* - Typically, we need to search more nodes to find a given match in a
* binary tree versus in a linked list. However, a binary tree has more
* overhead than a linked list: it needs to be kept sorted, and the inner
* search loop is more complicated. As a result, binary trees are best
* suited for compression modes where the potential matches are searched
* more thoroughly.
*
* - Since no attempt is made to keep the binary trees balanced, it's
* essential to have the 'max_search_depth' cutoff. Otherwise it could
* take quadratic time to run data through the matchfinder.
* ----------------------------------------------------------------------------
*/
#pragma once
#include "lz_extend.h"
#include "lz_hash3.h"
#include "lz_hash.h"
#include "matchfinder_common.h"
#ifndef BT_MATCHFINDER_HASH_ORDER
# if MATCHFINDER_WINDOW_ORDER < 14
# define BT_MATCHFINDER_HASH_ORDER 14
# else
# define BT_MATCHFINDER_HASH_ORDER 15
# endif
#if MATCHFINDER_WINDOW_ORDER < 13
# define BT_MATCHFINDER_HASH_ORDER 14
#elif MATCHFINDER_WINDOW_ORDER < 15
# define BT_MATCHFINDER_HASH_ORDER 15
#else
# define BT_MATCHFINDER_HASH_ORDER 16
#endif
#define BT_MATCHFINDER_HASH_LENGTH (1UL << BT_MATCHFINDER_HASH_ORDER)
@ -77,8 +82,37 @@ bt_matchfinder_slide_window(struct bt_matchfinder *mf)
}
#endif
static inline u32
bt_matchfinder_hash_3_bytes(const u8 *in_next)
{
return lz_hash_3_bytes(in_next, BT_MATCHFINDER_HASH_ORDER);
}
static inline pos_t *
bt_child(struct bt_matchfinder *mf, pos_t node, int offset)
{
if (MATCHFINDER_WINDOW_ORDER < sizeof(pos_t) * 8) {
/* no cast needed */
return &mf->child_tab[(matchfinder_slot_for_match(node) << 1) + offset];
} else {
return &mf->child_tab[((size_t)matchfinder_slot_for_match(node) << 1) + offset];
}
}
static inline pos_t *
bt_left_child(struct bt_matchfinder *mf, pos_t node)
{
return bt_child(mf, node, 0);
}
static inline pos_t *
bt_right_child(struct bt_matchfinder *mf, pos_t node)
{
return bt_child(mf, node, 1);
}
/*
* Find matches with the current sequence.
* Retrieve a list of matches with the current position.
*
* @mf
* The matchfinder structure.
@ -87,115 +121,131 @@ bt_matchfinder_slide_window(struct bt_matchfinder *mf)
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
* @in_next
* Pointer to the next byte in the input buffer to process. This is the
* pointer to the bytes being matched against.
* pointer to the sequence being matched against.
* @min_len
* Only record matches that are at least this long.
* @max_len
* Maximum match length to return.
* The maximum permissible match length at this position.
* @nice_len
* Stop searching if a match of at least this length is found.
* Must be <= @max_len.
* @max_search_depth
* Limit on the number of potential matches to consider.
* @prev_hash
* TODO
* @matches
* Space to write the matches that are found.
* Limit on the number of potential matches to consider. Must be >= 1.
* @next_hash
* Pointer to the hash code for the current sequence, which was computed
* one position in advance so that the binary tree root could be
* prefetched. This is an input/output parameter.
* @best_len_ret
* The length of the longest match found is written here. (This is
* actually redundant with the 'struct lz_match' array, but this is easier
* for the compiler to optimize when inlined and the caller immediately
* does a check against 'best_len'.)
* @lz_matchptr
* An array in which this function will record the matches. The recorded
* matches will be sorted by strictly increasing length and strictly
* increasing offset. The maximum number of matches that may be found is
* 'min(nice_len, max_len) - 3 + 1'.
*
* Returns the number of matches found, which may be anywhere from 0 to
* (nice_len - 3 + 1), inclusively. The matches are written to @matches in
* order of strictly increasing length and strictly increasing offset. The
* minimum match length is assumed to be 3.
* The return value is a pointer to the next available slot in the @lz_matchptr
* array. (If no matches were found, this will be the same as @lz_matchptr.)
*/
static inline unsigned
static inline struct lz_match *
bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
const u8 * const in_base,
const u8 * const in_next,
const unsigned min_len,
const unsigned max_len,
const unsigned nice_len,
const unsigned max_search_depth,
unsigned long *prev_hash,
struct lz_match * const restrict matches)
u32 * restrict next_hash,
unsigned * restrict best_len_ret,
struct lz_match * restrict lz_matchptr)
{
struct lz_match *lz_matchptr = matches;
unsigned depth_remaining = max_search_depth;
unsigned hash;
pos_t cur_match;
u32 hash;
pos_t cur_node;
const u8 *matchptr;
unsigned best_len;
pos_t *pending_lt_ptr, *pending_gt_ptr;
unsigned best_lt_len, best_gt_len;
unsigned len;
pos_t *children;
unsigned best_len = min_len - 1;
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1))
return 0;
if (unlikely(max_len < LZ_HASH3_REQUIRED_NBYTES + 1)) {
*best_len_ret = best_len;
return lz_matchptr;
}
hash = *prev_hash;
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
prefetch(&mf->hash_tab[*prev_hash]);
cur_match = mf->hash_tab[hash];
hash = *next_hash;
*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
cur_node = mf->hash_tab[hash];
mf->hash_tab[hash] = in_next - in_base;
prefetch(&mf->hash_tab[*next_hash]);
best_len = 2;
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
pending_lt_ptr = bt_left_child(mf, in_next - in_base);
pending_gt_ptr = bt_right_child(mf, in_next - in_base);
best_lt_len = 0;
best_gt_len = 0;
len = 0;
if (!matchfinder_node_valid(cur_node, in_base, in_next)) {
*pending_lt_ptr = MATCHFINDER_NULL;
*pending_gt_ptr = MATCHFINDER_NULL;
*best_len_ret = best_len;
return lz_matchptr;
}
for (;;) {
if (!matchfinder_match_in_window(cur_match,
in_base, in_next) ||
!depth_remaining--)
{
*pending_lt_ptr = MATCHFINDER_INITVAL;
*pending_gt_ptr = MATCHFINDER_INITVAL;
return lz_matchptr - matches;
}
matchptr = &in_base[cur_match];
len = min(best_lt_len, best_gt_len);
children = &mf->child_tab[(unsigned long)
matchfinder_slot_for_match(cur_match) << 1];
matchptr = &in_base[cur_node];
if (matchptr[len] == in_next[len]) {
len = lz_extend(in_next, matchptr, len + 1, max_len);
if (len > best_len) {
best_len = len;
lz_matchptr->length = len;
lz_matchptr->offset = in_next - matchptr;
lz_matchptr++;
if (len >= nice_len) {
*pending_lt_ptr = children[0];
*pending_gt_ptr = children[1];
return lz_matchptr - matches;
*pending_lt_ptr = *bt_left_child(mf, cur_node);
*pending_gt_ptr = *bt_right_child(mf, cur_node);
*best_len_ret = best_len;
return lz_matchptr;
}
}
}
if (matchptr[len] < in_next[len]) {
*pending_lt_ptr = cur_match;
pending_lt_ptr = &children[1];
cur_match = *pending_lt_ptr;
*pending_lt_ptr = cur_node;
pending_lt_ptr = bt_right_child(mf, cur_node);
cur_node = *pending_lt_ptr;
best_lt_len = len;
if (best_gt_len < len)
len = best_gt_len;
} else {
*pending_gt_ptr = cur_match;
pending_gt_ptr = &children[0];
cur_match = *pending_gt_ptr;
*pending_gt_ptr = cur_node;
pending_gt_ptr = bt_left_child(mf, cur_node);
cur_node = *pending_gt_ptr;
best_gt_len = len;
if (best_lt_len < len)
len = best_lt_len;
}
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining) {
*pending_lt_ptr = MATCHFINDER_NULL;
*pending_gt_ptr = MATCHFINDER_NULL;
*best_len_ret = best_len;
return lz_matchptr;
}
}
}
/*
* Advance the match-finder, but don't search for matches.
* Advance the matchfinder, but don't record any matches.
*
* @mf
* The matchfinder structure.
* @in_base
* Pointer to the next byte in the input buffer to process _at the last
* time bc_matchfinder_init() or bc_matchfinder_slide_window() was called_.
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
* @in_next
* Pointer to the next byte in the input buffer to process.
* @in_end
@ -204,8 +254,14 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
* Stop searching if a match of at least this length is found.
* @max_search_depth
* Limit on the number of potential matches to consider.
* @prev_hash
* TODO
* @next_hash
* Pointer to the hash code for the current sequence, which was computed
* one position in advance so that the binary tree root could be
* prefetched. This is an input/output parameter.
*
* Note: this is very similar to bt_matchfinder_get_matches() because both
* functions must do hashing and tree re-rooting. This version just doesn't
* actually record any matches.
*/
static inline void
bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
@ -214,66 +270,70 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
const u8 * const in_end,
const unsigned nice_len,
const unsigned max_search_depth,
unsigned long *prev_hash)
u32 * restrict next_hash)
{
unsigned depth_remaining = max_search_depth;
unsigned hash;
pos_t cur_match;
u32 hash;
pos_t cur_node;
const u8 *matchptr;
pos_t *pending_lt_ptr, *pending_gt_ptr;
unsigned best_lt_len, best_gt_len;
unsigned len;
pos_t *children;
if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1))
if (unlikely(in_end - in_next < LZ_HASH3_REQUIRED_NBYTES + 1))
return;
hash = *prev_hash;
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
prefetch(&mf->hash_tab[*prev_hash]);
cur_match = mf->hash_tab[hash];
hash = *next_hash;
*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
cur_node = mf->hash_tab[hash];
mf->hash_tab[hash] = in_next - in_base;
prefetch(&mf->hash_tab[*next_hash]);
depth_remaining = max_search_depth;
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
pending_lt_ptr = bt_left_child(mf, in_next - in_base);
pending_gt_ptr = bt_right_child(mf, in_next - in_base);
best_lt_len = 0;
best_gt_len = 0;
len = 0;
if (!matchfinder_node_valid(cur_node, in_base, in_next)) {
*pending_lt_ptr = MATCHFINDER_NULL;
*pending_gt_ptr = MATCHFINDER_NULL;
return;
}
for (;;) {
if (!matchfinder_match_in_window(cur_match,
in_base, in_next) ||
!depth_remaining--)
{
*pending_lt_ptr = MATCHFINDER_INITVAL;
*pending_gt_ptr = MATCHFINDER_INITVAL;
return;
}
matchptr = &in_base[cur_match];
len = min(best_lt_len, best_gt_len);
children = &mf->child_tab[(unsigned long)
matchfinder_slot_for_match(cur_match) << 1];
matchptr = &in_base[cur_node];
if (matchptr[len] == in_next[len]) {
len = lz_extend(in_next, matchptr, len + 1, nice_len);
if (len == nice_len) {
*pending_lt_ptr = children[0];
*pending_gt_ptr = children[1];
*pending_lt_ptr = *bt_left_child(mf, cur_node);
*pending_gt_ptr = *bt_right_child(mf, cur_node);
return;
}
}
if (matchptr[len] < in_next[len]) {
*pending_lt_ptr = cur_match;
pending_lt_ptr = &children[1];
cur_match = *pending_lt_ptr;
*pending_lt_ptr = cur_node;
pending_lt_ptr = bt_right_child(mf, cur_node);
cur_node = *pending_lt_ptr;
best_lt_len = len;
if (best_gt_len < len)
len = best_gt_len;
} else {
*pending_gt_ptr = cur_match;
pending_gt_ptr = &children[0];
cur_match = *pending_gt_ptr;
*pending_gt_ptr = cur_node;
pending_gt_ptr = bt_left_child(mf, cur_node);
cur_node = *pending_gt_ptr;
best_gt_len = len;
if (best_lt_len < len)
len = best_lt_len;
}
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining) {
*pending_lt_ptr = MATCHFINDER_NULL;
*pending_gt_ptr = MATCHFINDER_NULL;
return;
}
}
}

View File

@ -35,7 +35,7 @@
#define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
(_a > _b) ? _a : _b; })
#define swap(a, b) ({ __typeof__(a) _a = a; (a) = (b); (b) = _a; })
#define swap(a, b) ({ __typeof__(a) _a = (a); (a) = (b); (b) = _a; })
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
# define compiler_bswap32 __builtin_bswap32
@ -46,7 +46,7 @@
# define compiler_bswap16 __builtin_bswap16
#endif
#define compiler_fls32(n) (31 - __builtin_clz(n))
#define compiler_fls64(n) (63 - __builtin_clzll(n))
#define compiler_ffs32(n) __builtin_ctz(n)
#define compiler_ffs64(n) __builtin_ctzll(n)
#define compiler_fls32(n) (31 - __builtin_clz(n))
#define compiler_fls64(n) (63 - __builtin_clzll(n))
#define compiler_ffs32(n) __builtin_ctz(n)
#define compiler_ffs64(n) __builtin_ctzll(n)

View File

@ -9,37 +9,34 @@
#ifdef __GNUC__
# include "compiler-gcc.h"
#else
# warning "Unrecognized compiler. Please add a header file for your compiler."
# error "Unrecognized compiler. Please add a header file for your compiler."
#endif
#ifndef LIBEXPORT
# define LIBEXPORT
#endif
#ifndef BUILD_BUG_ON
# define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#endif
#ifndef likely
# define likely(expr) (expr)
#endif
#ifndef unlikely
# define unlikely(expr) (expr)
#endif
#ifndef prefetch
# define prefetch(addr)
#ifndef _packed_attribute
# error "missing required definition of _packed_attribute"
#endif
#ifndef _aligned_attribute
# error "missing required definition of _aligned_attribute"
#endif
#ifndef _packed_attribute
# error "missing required definition of _packed_attribute"
#ifndef likely
# define likely(expr) (expr)
#endif
#ifndef unlikely
# define unlikely(expr) (expr)
#endif
#ifndef prefetch
# define prefetch(addr)
#endif
#ifndef CPU_IS_BIG_ENDIAN
# error "missing required endianness definition"
#endif
@ -47,7 +44,6 @@
#define CPU_IS_LITTLE_ENDIAN (!CPU_IS_BIG_ENDIAN)
#ifndef UNALIGNED_ACCESS_SPEED
# warning "assuming unaligned accesses are not allowed"
# define UNALIGNED_ACCESS_SPEED 0
#endif
@ -58,3 +54,7 @@
#if !defined(min) || !defined(max) || !defined(swap)
# error "missing required definitions of min(), max(), and swap() macros"
#endif
#ifndef BUILD_BUG_ON
# define BUILD_BUG_ON(expr) ((void)sizeof(char[1 - 2*!!(expr)]))
#endif

View File

@ -71,9 +71,9 @@
* else
* multiple = 0;
*
* remainder >>= 1;
* remainder |= (u32)bit << 31;
* remainder ^= multiple;
* remainder >>= 1;
* remainder |= (u32)bit << 31;
* remainder ^= multiple;
* }
*
* return ~remainder;
@ -108,7 +108,7 @@
* multiple = divisor;
* else
* multiple = 0;
* remainder >>= 1;
* remainder >>= 1;
* remainder ^= multiple;
* }
*

View File

@ -1961,9 +1961,7 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
struct lz_match *cache_end;
const u8 *in_block_begin;
const u8 *in_block_end;
unsigned num_matches;
unsigned best_len;
unsigned long prev_hash = 0;
u32 next_hash = 0;
deflate_init_output(&os, out, out_nbytes_avail);
deflate_reset_symbol_frequencies(c);
@ -1991,6 +1989,9 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
/* Find all match possibilities in this block. */
do {
struct lz_match *matches;
unsigned best_len;
/* Decrease the maximum and nice match lengths if we're
* approaching the end of the input buffer. */
if (unlikely(max_len > in_end - in_next)) {
@ -2028,71 +2029,68 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
* search for matches at almost all positions, so this
* advantage of hash chains is negated.
*/
num_matches =
matches = cache_ptr;
cache_ptr =
bt_matchfinder_get_matches(&c->bt_mf,
in_cur_base,
in_next,
DEFLATE_MIN_MATCH_LEN,
max_len,
nice_len,
c->max_search_depth,
&prev_hash,
&next_hash,
&best_len,
cache_ptr);
cache_ptr += num_matches;
cache_ptr->length = num_matches;
cache_ptr->length = cache_ptr - matches;
cache_ptr->offset = *in_next;
in_next++;
cache_ptr++;
if (num_matches) {
best_len = cache_ptr[-2].length;
/*
* If there was a very long match found, don't cache any
* matches for the bytes covered by that match. This
* avoids degenerate behavior when compressing highly
* redundant data, where the number of matches can be
* very large.
*
* This heuristic doesn't actually hurt the compression
* ratio very much. If there's a long match, then the
* data must be highly compressible, so it doesn't
* matter much what we do.
*
* We also trigger this same case when approaching the
* desired end of the block. This forces the block to
* reach a "stopping point" where there are no matches
* extending to later positions. (XXX: this behavior is
* non-optimal and should be improved.)
*/
if (best_len >= DEFLATE_MIN_MATCH_LEN &&
best_len >= min(nice_len, in_block_end - in_next)) {
--best_len;
do {
if (unlikely(max_len > in_end - in_next)) {
max_len = in_end - in_next;
nice_len = min(max_len, nice_len);
}
if (in_next == in_next_slide) {
bt_matchfinder_slide_window(&c->bt_mf);
in_cur_base = in_next;
in_next_slide = in_next + min(in_end - in_next,
MATCHFINDER_WINDOW_SIZE);
}
bt_matchfinder_skip_position(&c->bt_mf,
in_cur_base,
in_next,
in_end,
nice_len,
c->max_search_depth,
&next_hash);
/*
* If there was a very long match found, don't
* cache any matches for the bytes covered by
* that match. This avoids degenerate behavior
* when compressing highly redundant data, where
* the number of matches can be very large.
*
* This heuristic doesn't actually hurt the
* compression ratio very much. If there's a
* long match, then the data must be highly
* compressible, so it doesn't matter much what
* we do.
*
* We also trigger this same case when
* approaching the desired end of the block.
* This forces the block to reach a "stopping
* point" where there are no matches extending
* to later positions. (XXX: this behavior is
* non-optimal and should be improved.)
*/
if (best_len >= min(nice_len, in_block_end - in_next)) {
--best_len;
do {
if (unlikely(max_len > in_end - in_next)) {
max_len = in_end - in_next;
nice_len = min(max_len, nice_len);
}
if (in_next == in_next_slide) {
bt_matchfinder_slide_window(&c->bt_mf);
in_cur_base = in_next;
in_next_slide = in_next + min(in_end - in_next,
MATCHFINDER_WINDOW_SIZE);
}
bt_matchfinder_skip_position(&c->bt_mf,
in_cur_base,
in_next,
in_end,
nice_len,
c->max_search_depth,
&prev_hash);
cache_ptr->length = 0;
cache_ptr->offset = *in_next;
in_next++;
cache_ptr++;
} while (--best_len);
}
cache_ptr->length = 0;
cache_ptr->offset = *in_next;
in_next++;
cache_ptr++;
} while (--best_len);
}
} while (in_next < in_block_end);

View File

@ -1,7 +1,7 @@
/*
* endianness.h
*
* Inline functions for endianness conversion.
* Macros and inline functions for endianness conversion.
*/
#pragma once

View File

@ -57,7 +57,7 @@ gzip_compress(struct deflate_compressor *c, const void *in, size_t in_size,
out_next += 4;
/* ISIZE */
put_unaligned_u32_le(in_size, out_next);
put_unaligned_u32_le((u32)in_size, out_next);
out_next += 4;
return out_next - (u8 *)out;

View File

@ -6,8 +6,6 @@
#pragma once
#include "compiler.h"
#define GZIP_MIN_HEADER_SIZE 10
#define GZIP_FOOTER_SIZE 8
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)

View File

@ -54,20 +54,16 @@ gzip_decompress(struct deflate_decompressor *d,
/* Original file name (zero terminated) */
if (flg & GZIP_FNAME) {
while (*in_next != 0 && ++in_next != in_end)
while (*in_next++ != 0 && in_next != in_end)
;
if (in_next != in_end)
in_next++;
if (in_end - in_next < GZIP_FOOTER_SIZE)
return false;
}
/* File comment (zero terminated) */
if (flg & GZIP_FCOMMENT) {
while (*in_next != 0 && ++in_next != in_end)
while (*in_next++ != 0 && ++in_next != in_end)
;
if (in_next != in_end)
in_next++;
if (in_end - in_next < GZIP_FOOTER_SIZE)
return false;
}

View File

@ -1,37 +1,102 @@
/*
* hc_matchfinder.h
*
* This is a Hash Chain (hc) based matchfinder.
* ---------------------------------------------------------------------------
*
* Algorithm
*
* This is a Hash Chains (hc) based matchfinder.
*
* The data structure is a hash table where each hash bucket contains a linked
* list of sequences, referenced by position.
* list (or "chain") of sequences whose first 3 bytes share the same hash code.
* Each sequence is identified by its starting position in the input buffer.
*
* For each sequence (position) in the input, the first 3 bytes are hashed and
* that sequence (position) is prepended to the appropriate linked list in the
* hash table. Since the sequences are inserted in order, each list is always
* sorted by increasing match offset.
* The algorithm processes the input buffer sequentially. At each byte
* position, the hash code of the first 3 bytes of the sequence beginning at
* that position (the sequence being matched against) is computed. This
* identifies the hash bucket to use for that position. Then, this hash
* bucket's linked list is searched for matches. Then, a new linked list node
* is created to represent the current sequence and is prepended to the list.
*
* At the same time as inserting a sequence, we may search the linked list for
* matches with that sequence. At each step, the length of the match is
* computed. The search ends when the sequences get too far away (outside of
* the sliding window), or when the list ends (in the code this is the same
* check as "too far away"), or when 'max_search_depth' positions have been
* searched, or when a match of at least 'nice_len' bytes has been found.
* This algorithm has several useful properties:
*
* - It only finds true Lempel-Ziv matches; i.e., those where the matching
* sequence occurs prior to the sequence being matched against.
*
* - The sequences in each linked list are always sorted by decreasing starting
* position. Therefore, the closest (smallest offset) matches are found
* first, which in many compression formats tend to be the cheapest to encode.
*
* - Although fast running time is not guaranteed due to the possibility of the
* lists getting very long, the worst degenerate behavior can be easily
* prevented by capping the number of nodes searched at each position.
*
* - If the compressor decides not to search for matches at a certain position,
* then that position can be quickly inserted without searching the list.
*
* - The algorithm is adaptable to sliding windows: just store the positions
* relative to a "base" value that is updated from time to time, and stop
* searching each list when the sequences get too far away.
*
* ---------------------------------------------------------------------------
*
* Notes on usage
*
* You must define MATCHFINDER_WINDOW_ORDER before including this header because
* that determines which integer type to use for positions. Since 16-bit
* integers are faster than 32-bit integers due to reduced memory usage (and
* therefore reduced cache pressure), the code only uses 32-bit integers if they
* are needed to represent all possible positions.
*
* In addition, you must allocate the 'struct hc_matchfinder' on a
* MATCHFINDER_ALIGNMENT-aligned boundary.
*
* ----------------------------------------------------------------------------
*
* Optimizations
*
* The longest_match() and skip_positions() functions are inlined into the
* compressors that use them. This isn't just about saving the overhead of a
* function call. These functions are intended to be called from the inner
* loops of compressors, where giving the compiler more control over register
* allocation is very helpful. There is also significant benefit to be gained
* from allowing the CPU to predict branches independently at each call site.
* For example, "lazy"-style compressors can be written with two calls to
* longest_match(), each of which starts with a different 'best_len' and
* therefore has significantly different performance characteristics.
*
* Although any hash function can be used, a multiplicative hash is fast and
* works well.
*
* On some processors, it is significantly faster to extend matches by whole
* words (32 or 64 bits) instead of by individual bytes. For this to be the
* case, the processor must implement unaligned memory accesses efficiently and
* must have either a fast "find first set bit" instruction or a fast "find last
* set bit" instruction, depending on the processor's endianness.
*
* The code uses one loop for finding the first match and one loop for finding a
* longer match. Each of these loops is tuned for its respective task and in
* combination are faster than a single generalized loop that handles both
* tasks.
*
* The code also uses a tight inner loop that only compares the last and first
* bytes of a potential match. It is only when these bytes match that a full
* match extension is attempted.
*
* ----------------------------------------------------------------------------
*/
#pragma once
#include "lz_extend.h"
#include "lz_hash3.h"
#include "lz_hash.h"
#include "matchfinder_common.h"
#include "unaligned.h"
#ifndef HC_MATCHFINDER_HASH_ORDER
# if MATCHFINDER_WINDOW_ORDER < 14
# define HC_MATCHFINDER_HASH_ORDER 14
# else
# define HC_MATCHFINDER_HASH_ORDER 15
# endif
#if MATCHFINDER_WINDOW_ORDER < 14
# define HC_MATCHFINDER_HASH_ORDER 14
#else
# define HC_MATCHFINDER_HASH_ORDER 15
#endif
#define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
@ -73,17 +138,18 @@ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
* @in_next
* Pointer to the next byte in the input buffer to process. This is the
* pointer to the bytes being matched against.
* pointer to the sequence being matched against.
* @best_len
* Require a match at least this long.
* Require a match longer than this length.
* @max_len
* Maximum match length to return.
* The maximum permissible match length at this position.
* @nice_len
* Stop searching if a match of at least this length is found.
* Must be <= @max_len.
* @max_search_depth
* Limit on the number of potential matches to consider.
* Limit on the number of potential matches to consider. Must be >= 1.
* @offset_ret
* The match offset is returned here.
* If a match is found, its offset is returned in this location.
*
* Return the length of the match found, or 'best_len' if no match longer than
* 'best_len' was found.
@ -102,61 +168,57 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
const u8 *best_matchptr = best_matchptr; /* uninitialized */
const u8 *matchptr;
unsigned len;
unsigned hash;
pos_t cur_match;
u32 first_3_bytes;
u32 hash;
pos_t cur_node;
/* Insert the current sequence into the appropriate hash chain. */
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
/* Insert the current sequence into the appropriate linked list. */
if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES))
goto out;
first_3_bytes = load_u24_unaligned(in_next);
hash = lz_hash3_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
cur_match = mf->hash_tab[hash];
mf->next_tab[in_next - in_base] = cur_match;
hash = lz_hash(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
cur_node = mf->hash_tab[hash];
mf->next_tab[in_next - in_base] = cur_node;
mf->hash_tab[hash] = in_next - in_base;
if (unlikely(best_len >= max_len))
goto out;
/* Search the appropriate hash chain for matches. */
/* Search the appropriate linked list for matches. */
if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
if (!(matchfinder_node_valid(cur_node, in_base, in_next)))
goto out;
if (best_len < 3) {
for (;;) {
/* No length 3 match found yet.
* Check the first 3 bytes. */
matchptr = &in_base[cur_match];
matchptr = &in_base[cur_node];
if (load_u24_unaligned(matchptr) == first_3_bytes)
break;
/* Not a match; keep trying. */
cur_match = mf->next_tab[
matchfinder_slot_for_match(cur_match)];
if (!matchfinder_match_in_window(cur_match,
in_base, in_next))
goto out;
if (!--depth_remaining)
/* The first 3 bytes did not match. Keep trying. */
cur_node = mf->next_tab[
matchfinder_slot_for_match(cur_node)];
if (!matchfinder_node_valid(cur_node, in_base, in_next) ||
!--depth_remaining)
goto out;
}
/* Found a length 3 match. */
/* Found a match of length >= 3. Extend it to its full length. */
best_matchptr = matchptr;
best_len = lz_extend(in_next, best_matchptr, 3, max_len);
if (best_len >= nice_len)
goto out;
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
goto out;
if (!--depth_remaining)
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
goto out;
}
for (;;) {
for (;;) {
matchptr = &in_base[cur_match];
matchptr = &in_base[cur_node];
/* Already found a length 3 match. Try for a longer match;
* start by checking the last 2 bytes and the first 4 bytes. */
@ -170,17 +232,16 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
#endif
break;
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
goto out;
if (!--depth_remaining)
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
goto out;
}
if (UNALIGNED_ACCESS_IS_FAST)
len = 4;
else
len = 0;
#if UNALIGNED_ACCESS_IS_FAST
len = 4;
#else
len = 0;
#endif
len = lz_extend(in_next, matchptr, len, max_len);
if (len > best_len) {
best_len = len;
@ -188,10 +249,8 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
if (best_len >= nice_len)
goto out;
}
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
goto out;
if (!--depth_remaining)
cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
goto out;
}
out:
@ -200,7 +259,7 @@ out:
}
/*
* Advance the match-finder, but don't search for matches.
* Advance the matchfinder, but don't search for matches.
*
* @mf
* The matchfinder structure.
@ -212,7 +271,7 @@ out:
* @in_end
* Pointer to the end of the input buffer.
* @count
* Number of bytes to skip; must be > 0.
* The number of bytes to advance. Must be > 0.
*/
static inline void
hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
@ -221,13 +280,13 @@ hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
const u8 *in_end,
unsigned count)
{
unsigned hash;
u32 hash;
if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
if (unlikely(in_next + count >= in_end - LZ_HASH3_REQUIRED_NBYTES))
return;
do {
hash = lz_hash3(in_next, HC_MATCHFINDER_HASH_ORDER);
hash = lz_hash_3_bytes(in_next, HC_MATCHFINDER_HASH_ORDER);
mf->next_tab[in_next - in_base] = mf->hash_tab[hash];
mf->hash_tab[hash] = in_next - in_base;
in_next++;

View File

@ -24,12 +24,12 @@ lz_extend(const u8 * const strptr, const u8 * const matchptr,
if (likely(max_len - len >= 4 * WORDSIZE)) {
#define COMPARE_WORD_STEP \
v_word = load_word_unaligned(&matchptr[len]) ^ \
load_word_unaligned(&strptr[len]); \
if (v_word != 0) \
goto word_differs; \
len += WORDSIZE; \
#define COMPARE_WORD_STEP \
v_word = load_word_unaligned(&matchptr[len]) ^ \
load_word_unaligned(&strptr[len]); \
if (v_word != 0) \
goto word_differs; \
len += WORDSIZE; \
COMPARE_WORD_STEP
COMPARE_WORD_STEP

41
src/lz_hash.h Normal file
View File

@ -0,0 +1,41 @@
/*
* lz_hash.h
*
* Hashing for Lempel-Ziv matchfinding.
*/
#ifndef _LZ_HASH_H
#define _LZ_HASH_H
#include "unaligned.h"
/*
* The hash function: given a sequence prefix held in the low-order bits of a
* 32-bit value, multiply by a carefully-chosen large constant. Discard any
* bits of the product that don't fit in a 32-bit value, but take the
* next-highest @num_bits bits of the product as the hash value, as those have
* the most randomness.
*/
static inline u32
lz_hash(u32 seq, unsigned num_bits)
{
return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
}
/*
* Hash the 3-byte sequence beginning at @p, producing a hash of length
* @num_bits bits. At least LZ_HASH3_REQUIRED_NBYTES bytes of data must be
* available at @p; note that this may be more than 3.
*/
static inline u32
lz_hash_3_bytes(const u8 *p, unsigned num_bits)
{
u32 seq = load_u24_unaligned(p);
if (num_bits >= 24)
return seq;
return lz_hash(seq, num_bits);
}
#define LZ_HASH3_REQUIRED_NBYTES LOAD_U24_REQUIRED_NBYTES
#endif /* _LZ_HASH_H */

View File

@ -1,49 +0,0 @@
/*
* lz_hash3.h
*
* 3-byte hashing for Lempel-Ziv matchfinding.
*/
#pragma once
#include "unaligned.h"
static inline u32
loaded_u32_to_u24(u32 v)
{
if (CPU_IS_LITTLE_ENDIAN)
return v & 0xFFFFFF;
else
return v >> 8;
}
static inline u32
load_u24_unaligned(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return loaded_u32_to_u24(load_u32_unaligned(p));
else
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
}
static inline u32
lz_hash3_u24(u32 str, unsigned num_bits)
{
return (u32)(str * 0x1E35A7BD) >> (32 - num_bits);
}
/*
* Hash the next 3-byte sequence in the window, producing a hash of length
* 'num_bits' bits. At least LZ_HASH_REQUIRED_NBYTES must be available at 'p';
* this might be 4 bytes rather than 3 because an unaligned load is faster on
* some architectures.
*/
static inline u32
lz_hash3(const u8 *p, unsigned num_bits)
{
return lz_hash3_u24(load_u24_unaligned(p), num_bits);
}
/* Number of bytes the hash function actually requires be available, due to the
* possibility of an unaligned load. */
#define LZ_HASH_REQUIRED_NBYTES (UNALIGNED_ACCESS_IS_FAST ? 4 : 3)

View File

@ -16,9 +16,9 @@ matchfinder_init_avx2(pos_t *data, size_t size)
return false;
if (sizeof(pos_t) == 2)
v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
v = _mm256_set1_epi16((u16)MATCHFINDER_NULL);
else if (sizeof(pos_t) == 4)
v = _mm256_set1_epi32(MATCHFINDER_INITVAL);
v = _mm256_set1_epi32((u32)MATCHFINDER_NULL);
else
return false;

View File

@ -60,7 +60,7 @@ static inline bool
matchfinder_memset_init_okay(void)
{
/* All bytes must match in order to use memset. */
const pos_t v = MATCHFINDER_INITVAL;
const pos_t v = MATCHFINDER_NULL;
if (sizeof(pos_t) == 2)
return (u8)v == (u8)(v >> 8);
if (sizeof(pos_t) == 4)
@ -93,12 +93,12 @@ matchfinder_init(pos_t *data, size_t num_entries)
#endif
if (matchfinder_memset_init_okay()) {
memset(data, (u8)MATCHFINDER_INITVAL, size);
memset(data, (u8)MATCHFINDER_NULL, size);
return;
}
for (size_t i = 0; i < num_entries; i++)
data[i] = MATCHFINDER_INITVAL;
data[i] = MATCHFINDER_NULL;
}
#if MATCHFINDER_IS_SLIDING

View File

@ -16,12 +16,12 @@ typedef u32 pos_t;
/* Not all the bits of the position type are needed, so the sign bit can be
* reserved to mean "out of bounds". */
#define MATCHFINDER_INITVAL ((pos_t)-1)
#define MATCHFINDER_NULL ((pos_t)-1)
static inline bool
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
{
return !(cur_match & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
return !(cur_node & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
}
#else
@ -30,18 +30,18 @@ matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_nex
* This prevents the beginning of the buffer from matching anything; however,
* this doesn't matter much. */
#define MATCHFINDER_INITVAL ((pos_t)0)
#define MATCHFINDER_NULL ((pos_t)0)
static inline bool
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
{
return cur_match != 0;
return cur_node != 0;
}
#endif
static inline pos_t
matchfinder_slot_for_match(pos_t cur_match)
matchfinder_slot_for_match(pos_t cur_node)
{
return cur_match;
return cur_node;
}

View File

@ -13,18 +13,18 @@ typedef s16 pos_t;
typedef s32 pos_t;
#endif
#define MATCHFINDER_INITVAL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
#define MATCHFINDER_NULL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
/* In the sliding window case, positions are stored relative to 'in_base'. */
static inline bool
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
{
return cur_match > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
return cur_node > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
}
static inline pos_t
matchfinder_slot_for_match(pos_t cur_match)
matchfinder_slot_for_match(pos_t cur_node)
{
return cur_match & (MATCHFINDER_WINDOW_SIZE - 1);
return cur_node & (MATCHFINDER_WINDOW_SIZE - 1);
}

View File

@ -16,9 +16,9 @@ matchfinder_init_sse2(pos_t *data, size_t size)
return false;
if (sizeof(pos_t) == 2)
v = _mm_set1_epi16(MATCHFINDER_INITVAL);
v = _mm_set1_epi16((u16)MATCHFINDER_NULL);
else if (sizeof(pos_t) == 4)
v = _mm_set1_epi32(MATCHFINDER_INITVAL);
v = _mm_set1_epi32((u32)MATCHFINDER_NULL);
else
return false;

View File

@ -6,9 +6,9 @@
#pragma once
#include <inttypes.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
typedef uint8_t u8;
typedef uint16_t u16;

View File

@ -1,7 +1,7 @@
/*
* unaligned.h
*
* Inline functions for unaligned memory access.
* Inline functions for unaligned memory accesses.
*/
#pragma once
@ -214,3 +214,36 @@ put_unaligned_u32_be(u32 v, void *p)
p8[3] = (v >> 0) & 0xFF;
}
}
/*
* Given a 32-bit value that was loaded with the platform's native endianness,
* return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
* bits contain the first 3 bytes, arranged in octets in a platform-dependent
* order, at the memory location from which the input 32-bit value was loaded.
*/
static inline u32
loaded_u32_to_u24(u32 v)
{
if (CPU_IS_LITTLE_ENDIAN)
return v & 0xFFFFFF;
else
return v >> 8;
}
/*
* Load the next 3 bytes from the memory location @p into the 24 low-order bits
* of a 32-bit value. The order in which the 3 bytes will be arranged as octets
* in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES
* bytes must be available at @p; note that this may be more than 3.
*/
static inline u32
load_u24_unaligned(const u8 *p)
{
#if UNALIGNED_ACCESS_IS_FAST
# define LOAD_U24_REQUIRED_NBYTES 4
return loaded_u32_to_u24(load_u32_unaligned(p));
#else
# define LOAD_U24_REQUIRED_NBYTES 3
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
#endif
}

View File

@ -1,11 +1,9 @@
/*
* benchmark.c - A compression testing and benchmark program.
*
* The author dedicates this file to the public domain.
* You can do whatever you want with this file.
* This file has no copyright assigned and is placed in the Public Domain.
*/
#define _FILE_OFFSET_BITS 64
#define _GNU_SOURCE
@ -419,9 +417,9 @@ main(int argc, char **argv)
wrapper == NO_WRAPPER ? "None" :
wrapper == ZLIB_WRAPPER ? "zlib" : "gzip");
printf("\tCompression engine: %s\n",
compress_with_libz ? "zlib" : "libdeflate");
compress_with_libz ? "libz" : "libdeflate");
printf("\tDecompression engine: %s\n",
decompress_with_libz ? "zlib" : "libdeflate");
decompress_with_libz ? "libz" : "libdeflate");
ubuf1 = malloc(chunk_size);
ubuf2 = malloc(chunk_size);