Cleanups and matchfinder updates

2025-09-10 12:58:30 -04:00 · 2015-01-22 00:05:01 -06:00 · 2015-01-22 00:05:01 -06:00 · 5f3208e788
commit 5f3208e788
parent fed4597943
26 changed files with 528 additions and 380 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -78,7 +78,7 @@ install(FILES libdeflate.h DESTINATION "${CMAKE_INSTALL_PREFIX}/include")
 option(BUILD_BENCHMARK "Build benchmark program" OFF)
 add_executable(benchmark test/benchmark.c)
-target_link_libraries(benchmark deflate -lz)
+target_link_libraries(benchmark deflatestatic -lz)
 option(BUILD_GEN_CRC32_TABLE "Build CRC32 table generation program" OFF)
 add_executable(gen_crc32_table test/gen_crc32_table.c)
--- a/libdeflate.h
+++ b/libdeflate.h
@ -1,7 +1,9 @@
 /*
 * libdeflate.h
 *
- * Public header for the DEFLATE compression library.
+ * Public header for libdeflate.
 *
 * This file has no copyright assigned and is placed in the Public Domain.
 */
 #ifndef LIBDEFLATE_H
@ -26,7 +28,9 @@ struct deflate_compressor;
 * fastest, 6 = medium/default, 9 = slowest).  The return value is a pointer to
 * the new DEFLATE compressor, or NULL if out of memory.
 *
- * Note: the sliding window size is defined at compilation time (default 32768).
+ * Note: for compression, the sliding window size is defined at compilation time
 * to 32768, the largest size permissible in the DEFLATE format.  It cannot be
 * changed at runtime.
 */
 extern struct deflate_compressor *
 deflate_alloc_compressor(unsigned int compression_level);
@ -44,7 +48,7 @@ deflate_compress(struct deflate_compressor *compressor,
 		 void *out, size_t out_nbytes_avail);
 /*
- * Like deflate_compress(), but store the data in the zlib wrapper format.
+ * Like deflate_compress(), but stores the data in the zlib wrapper format.
 */
 extern size_t
 zlib_compress(struct deflate_compressor *compressor,
@ -52,7 +56,7 @@ zlib_compress(struct deflate_compressor *compressor,
 	      void *out, size_t out_nbytes_avail);
 /*
- * Like deflate_compress(), but store the data in the gzip wrapper format.
+ * Like deflate_compress(), but stores the data in the gzip wrapper format.
 */
 extern size_t
 gzip_compress(struct deflate_compressor *compressor,
@ -61,7 +65,8 @@ gzip_compress(struct deflate_compressor *compressor,
 /*
 * deflate_free_compressor() frees a DEFLATE compressor that was allocated with
- * deflate_alloc_compressor().
+ * deflate_alloc_compressor().  If a NULL pointer is passed in, no action is
 * taken.
 */
 extern void
 deflate_free_compressor(struct deflate_compressor *compressor);
@ -79,7 +84,9 @@ struct deflate_decompressor;
 *
 * This function takes no parameters, and the returned decompressor is valid for
 * decompressing data that was compressed at any compression level and with any
- * sliding window size.
+ * sliding window size.  It can also be used for any wrapper format (raw
 * DEFLATE, zlib, or gzip); however, the appropriate decompression function must
 * be called.
 */
 extern struct deflate_decompressor *
 deflate_alloc_decompressor(void);
@ -118,7 +125,8 @@ gzip_decompress(struct deflate_decompressor *decompressor,
 /*
 * deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
- * with deflate_alloc_decompressor().
+ * with deflate_alloc_decompressor().  If a NULL pointer is passed in, no action
 * is taken.
 */
 extern void
 deflate_free_decompressor(struct deflate_decompressor *decompressor);
--- a/src/adler32.c
+++ b/src/adler32.c
@ -39,7 +39,7 @@
 #define UNROLL_FACTOR	4
 u32
-adler32(const u8 *buffer, size_t size)
+adler32(const void *buffer, size_t size)
 {
 	u32 s1 = 1;
 	u32 s2 = 0;
--- a/src/adler32.h
+++ b/src/adler32.h
@ -9,4 +9,4 @@
 #include "types.h"
 extern u32
-adler32(const u8 *buffer, size_t size);
+adler32(const void *buffer, size_t size);
--- a/src/bitops.h
+++ b/src/bitops.h
@ -11,7 +11,8 @@
 /* Find Last Set bit   */
-static inline unsigned fls32(u32 v)
+static inline unsigned
 fls32(u32 v)
 {
 #ifdef compiler_fls32
 	return compiler_fls32(v);
@ -23,7 +24,8 @@ static inline unsigned fls32(u32 v)
 #endif
 }
-static inline unsigned fls64(u64 v)
+static inline unsigned
 fls64(u64 v)
 {
 #ifdef compiler_fls64
 	return compiler_fls64(v);
@ -35,7 +37,8 @@ static inline unsigned fls64(u64 v)
 #endif
 }
-static inline unsigned flsw(machine_word_t v)
+static inline unsigned
 flsw(machine_word_t v)
 {
 	BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
 	if (WORDSIZE == 4)
@ -46,7 +49,8 @@ static inline unsigned flsw(machine_word_t v)
 /* Find First Set bit   */
-static inline unsigned ffs32(u32 v)
+static inline unsigned
 ffs32(u32 v)
 {
 #ifdef compiler_ffs32
 	return compiler_ffs32(v);
@ -58,7 +62,8 @@ static inline unsigned ffs32(u32 v)
 #endif
 }
-static inline unsigned ffs64(u64 v)
+static inline unsigned
 ffs64(u64 v)
 {
 #ifdef compiler_ffs64
 	return compiler_ffs64(v);
@ -70,7 +75,8 @@ static inline unsigned ffs64(u64 v)
 #endif
 }
-static inline unsigned ffsw(machine_word_t v)
+static inline unsigned
 ffsw(machine_word_t v)
 {
 	BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
 	if (WORDSIZE == 4)
--- a/src/bt_matchfinder.h
+++ b/src/bt_matchfinder.h
@ -1,51 +1,56 @@
 /*
 * bt_matchfinder.h
 *
- * This is a Binary Tree (bt) based matchfinder.
+ * ----------------------------------------------------------------------------
 *
 * This is a Binary Trees (bt) based matchfinder.
 *
 * The data structure is a hash table where each hash bucket contains a binary
- * tree of sequences, referenced by position.  The sequences in the binary tree
+ * tree of sequences whose first 3 bytes share the same hash code.  Each
- * are ordered such that a left child is lexicographically lesser than its
+ * sequence is identified by its starting position in the input buffer.  Each
- * parent, and a right child is lexicographically greater than its parent.
+ * binary tree is always sorted such that each left child represents a sequence
 * lexicographically lesser than its parent and each right child represents a
 * sequence lexicographically greater than its parent.
 *
- * For each sequence (position) in the input, the first 3 bytes are hashed and
+ * The algorithm processes the input buffer sequentially.  At each byte
- * the the appropriate binary tree is re-rooted at that sequence (position).
+ * position, the hash code of the first 3 bytes of the sequence beginning at
- * Since the sequences are inserted in order, each binary tree maintains the
+ * that position (the sequence being matched against) is computed.  This
- * invariant that each child node has greater match offset than its parent.
+ * identifies the hash bucket to use for that position.  Then, a new binary tree
 * node is created to represent the current sequence.  Then, in a single tree
 * traversal, the hash bucket's binary tree is searched for matches and is
 * re-rooted at the new node.
 *
- * While inserting a sequence, we may search the binary tree for matches with
+ * Compared to the simpler algorithm that uses linked lists instead of binary
- * that sequence.  At each step, the length of the match is computed.  The
+ * trees (see hc_matchfinder.h), the binary tree version gains more information
- * search ends when the sequences get too far away (outside of the sliding
+ * at each node visitation.  Ideally, the binary tree version will examine only
- * window), or when the binary tree ends (in the code this is the same check as
+ * 'log(n)' nodes to find the same matches that the linked list version will
- * "too far away"), or when 'max_search_depth' positions have been searched, or
+ * find by examining 'n' nodes.  In addition, the binary tree version can
- * when a match of at least 'nice_len' bytes has been found.
+ * examine fewer bytes at each node by taking advantage of the common prefixes
 * that result from the sort order, whereas the linked list version may have to
 * examine up to the full length of the match at each node.
 *
- * Notes:
+ * However, it is not always best to use the binary tree version.  It requires
 * nearly twice as much memory as the linked list version, and it takes time to
 * keep the binary trees sorted, even at positions where the compressor does not
 * need matches.  Generally, when doing fast compression on small buffers,
 * binary trees are the wrong approach.  They are best suited for thorough
 * compression and/or large buffers.
 *
- *	- Typically, we need to search more nodes to find a given match in a
+ * ----------------------------------------------------------------------------
 *	  binary tree versus in a linked list.  However, a binary tree has more
 *	  overhead than a linked list: it needs to be kept sorted, and the inner
 *	  search loop is more complicated.  As a result, binary trees are best
 *	  suited for compression modes where the potential matches are searched
 *	  more thoroughly.
 *
 *	- Since no attempt is made to keep the binary trees balanced, it's
 *	  essential to have the 'max_search_depth' cutoff.  Otherwise it could
 *	  take quadratic time to run data through the matchfinder.
 */
 #pragma once
 #include "lz_extend.h"
-#include "lz_hash3.h"
+#include "lz_hash.h"
 #include "matchfinder_common.h"
-#ifndef BT_MATCHFINDER_HASH_ORDER
+#if MATCHFINDER_WINDOW_ORDER < 13
 #  if MATCHFINDER_WINDOW_ORDER < 14
 #  define BT_MATCHFINDER_HASH_ORDER 14
-#  else
+#elif MATCHFINDER_WINDOW_ORDER < 15
 #  define BT_MATCHFINDER_HASH_ORDER 15
-#  endif
+#else
 #  define BT_MATCHFINDER_HASH_ORDER 16
 #endif
 #define BT_MATCHFINDER_HASH_LENGTH	(1UL << BT_MATCHFINDER_HASH_ORDER)
@ -77,8 +82,37 @@ bt_matchfinder_slide_window(struct bt_matchfinder *mf)
 }
 #endif
 static inline u32
 bt_matchfinder_hash_3_bytes(const u8 *in_next)
 {
 	return lz_hash_3_bytes(in_next, BT_MATCHFINDER_HASH_ORDER);
 }
 static inline pos_t *
 bt_child(struct bt_matchfinder *mf, pos_t node, int offset)
 {
 	if (MATCHFINDER_WINDOW_ORDER < sizeof(pos_t) * 8) {
 		/* no cast needed */
 		return &mf->child_tab[(matchfinder_slot_for_match(node) << 1) + offset];
 	} else {
 		return &mf->child_tab[((size_t)matchfinder_slot_for_match(node) << 1) + offset];
 	}
 }
 static inline pos_t *
 bt_left_child(struct bt_matchfinder *mf, pos_t node)
 {
 	return bt_child(mf, node, 0);
 }
 static inline pos_t *
 bt_right_child(struct bt_matchfinder *mf, pos_t node)
 {
 	return bt_child(mf, node, 1);
 }
 /*
- * Find matches with the current sequence.
+ * Retrieve a list of matches with the current position.
 *
 * @mf
 *	The matchfinder structure.
@ -87,115 +121,131 @@ bt_matchfinder_slide_window(struct bt_matchfinder *mf)
 *	time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
 * @in_next
 *	Pointer to the next byte in the input buffer to process.  This is the
- *	pointer to the bytes being matched against.
+ *	pointer to the sequence being matched against.
 * @min_len
 *	Only record matches that are at least this long.
 * @max_len
- *	Maximum match length to return.
+ *	The maximum permissible match length at this position.
 * @nice_len
 *	Stop searching if a match of at least this length is found.
 *	Must be <= @max_len.
 * @max_search_depth
- *	Limit on the number of potential matches to consider.
+ *	Limit on the number of potential matches to consider.  Must be >= 1.
- * @prev_hash
+ * @next_hash
- *	TODO
+ *	Pointer to the hash code for the current sequence, which was computed
- * @matches
+ *	one position in advance so that the binary tree root could be
- *	Space to write the matches that are found.
+ *	prefetched.  This is an input/output parameter.
 * @best_len_ret
 *	The length of the longest match found is written here.  (This is
 *	actually redundant with the 'struct lz_match' array, but this is easier
 *	for the compiler to optimize when inlined and the caller immediately
 *	does a check against 'best_len'.)
 * @lz_matchptr
 *	An array in which this function will record the matches.  The recorded
 *	matches will be sorted by strictly increasing length and strictly
 *	increasing offset.  The maximum number of matches that may be found is
 *	'min(nice_len, max_len) - 3 + 1'.
 *
- * Returns the number of matches found, which may be anywhere from 0 to
+ * The return value is a pointer to the next available slot in the @lz_matchptr
- * (nice_len - 3 + 1), inclusively.  The matches are written to @matches in
+ * array.  (If no matches were found, this will be the same as @lz_matchptr.)
 * order of strictly increasing length and strictly increasing offset.  The
 * minimum match length is assumed to be 3.
 */
-static inline unsigned
+static inline struct lz_match *
 bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
 			   const u8 * const in_base,
 			   const u8 * const in_next,
 			   const unsigned min_len,
 			   const unsigned max_len,
 			   const unsigned nice_len,
 			   const unsigned max_search_depth,
-			   unsigned long *prev_hash,
+			   u32 * restrict next_hash,
-			   struct lz_match * const restrict matches)
+			   unsigned * restrict best_len_ret,
 			   struct lz_match * restrict lz_matchptr)
 {
 	struct lz_match *lz_matchptr = matches;
 	unsigned depth_remaining = max_search_depth;
-	unsigned hash;
+	u32 hash;
-	pos_t cur_match;
+	pos_t cur_node;
 	const u8 *matchptr;
 	unsigned best_len;
 	pos_t *pending_lt_ptr, *pending_gt_ptr;
 	unsigned best_lt_len, best_gt_len;
 	unsigned len;
-	pos_t *children;
+	unsigned best_len = min_len - 1;
-	if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1))
+	if (unlikely(max_len < LZ_HASH3_REQUIRED_NBYTES + 1)) {
-		return 0;
+		*best_len_ret = best_len;
-
+		return lz_matchptr;
 	hash = *prev_hash;
 	*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
 	prefetch(&mf->hash_tab[*prev_hash]);
 	cur_match = mf->hash_tab[hash];
 	mf->hash_tab[hash] = in_next - in_base;
 	best_len = 2;
 	pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
 	pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
 	best_lt_len = 0;
 	best_gt_len = 0;
 	for (;;) {
 		if (!matchfinder_match_in_window(cur_match,
 						 in_base, in_next) ||
 		    !depth_remaining--)
 		{
 			*pending_lt_ptr = MATCHFINDER_INITVAL;
 			*pending_gt_ptr = MATCHFINDER_INITVAL;
 			return lz_matchptr - matches;
 	}
-		matchptr = &in_base[cur_match];
+	hash = *next_hash;
-		len = min(best_lt_len, best_gt_len);
+	*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
 	cur_node = mf->hash_tab[hash];
 	mf->hash_tab[hash] = in_next - in_base;
 	prefetch(&mf->hash_tab[*next_hash]);
-		children = &mf->child_tab[(unsigned long)
+	pending_lt_ptr = bt_left_child(mf, in_next - in_base);
-				matchfinder_slot_for_match(cur_match) << 1];
+	pending_gt_ptr = bt_right_child(mf, in_next - in_base);
 	best_lt_len = 0;
 	best_gt_len = 0;
 	len = 0;
 	if (!matchfinder_node_valid(cur_node, in_base, in_next)) {
 		*pending_lt_ptr = MATCHFINDER_NULL;
 		*pending_gt_ptr = MATCHFINDER_NULL;
 		*best_len_ret = best_len;
 		return lz_matchptr;
 	}
 	for (;;) {
 		matchptr = &in_base[cur_node];
 		if (matchptr[len] == in_next[len]) {
 			len = lz_extend(in_next, matchptr, len + 1, max_len);
 			if (len > best_len) {
 				best_len = len;
 				lz_matchptr->length = len;
 				lz_matchptr->offset = in_next - matchptr;
 				lz_matchptr++;
 				if (len >= nice_len) {
-					*pending_lt_ptr = children[0];
+					*pending_lt_ptr = *bt_left_child(mf, cur_node);
-					*pending_gt_ptr = children[1];
+					*pending_gt_ptr = *bt_right_child(mf, cur_node);
-					return lz_matchptr - matches;
+					*best_len_ret = best_len;
 					return lz_matchptr;
 				}
 			}
 		}
 		if (matchptr[len] < in_next[len]) {
-			*pending_lt_ptr = cur_match;
+			*pending_lt_ptr = cur_node;
-			pending_lt_ptr = &children[1];
+			pending_lt_ptr = bt_right_child(mf, cur_node);
-			cur_match = *pending_lt_ptr;
+			cur_node = *pending_lt_ptr;
 			best_lt_len = len;
 			if (best_gt_len < len)
 				len = best_gt_len;
 		} else {
-			*pending_gt_ptr = cur_match;
+			*pending_gt_ptr = cur_node;
-			pending_gt_ptr = &children[0];
+			pending_gt_ptr = bt_left_child(mf, cur_node);
-			cur_match = *pending_gt_ptr;
+			cur_node = *pending_gt_ptr;
 			best_gt_len = len;
 			if (best_lt_len < len)
 				len = best_lt_len;
 		}
 		if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining) {
 			*pending_lt_ptr = MATCHFINDER_NULL;
 			*pending_gt_ptr = MATCHFINDER_NULL;
 			*best_len_ret = best_len;
 			return lz_matchptr;
 		}
 	}
 }
 /*
- * Advance the match-finder, but don't search for matches.
+ * Advance the matchfinder, but don't record any matches.
 *
 * @mf
 *	The matchfinder structure.
 * @in_base
 *	Pointer to the next byte in the input buffer to process _at the last
- *	time bc_matchfinder_init() or bc_matchfinder_slide_window() was called_.
+ *	time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
 * @in_next
 *	Pointer to the next byte in the input buffer to process.
 * @in_end
@ -204,8 +254,14 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
 *	Stop searching if a match of at least this length is found.
 * @max_search_depth
 *	Limit on the number of potential matches to consider.
- * @prev_hash
+ * @next_hash
- *	TODO
+ *	Pointer to the hash code for the current sequence, which was computed
 *	one position in advance so that the binary tree root could be
 *	prefetched.  This is an input/output parameter.
 *
 * Note: this is very similar to bt_matchfinder_get_matches() because both
 * functions must do hashing and tree re-rooting.  This version just doesn't
 * actually record any matches.
 */
 static inline void
 bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
@ -214,66 +270,70 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
 			     const u8 * const in_end,
 			     const unsigned nice_len,
 			     const unsigned max_search_depth,
-			     unsigned long *prev_hash)
+			     u32 * restrict next_hash)
 {
 	unsigned depth_remaining = max_search_depth;
-	unsigned hash;
+	u32 hash;
-	pos_t cur_match;
+	pos_t cur_node;
 	const u8 *matchptr;
 	pos_t *pending_lt_ptr, *pending_gt_ptr;
 	unsigned best_lt_len, best_gt_len;
 	unsigned len;
 	pos_t *children;
-	if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1))
+	if (unlikely(in_end - in_next < LZ_HASH3_REQUIRED_NBYTES + 1))
 		return;
-	hash = *prev_hash;
+	hash = *next_hash;
-	*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
+	*next_hash = bt_matchfinder_hash_3_bytes(in_next + 1);
-	prefetch(&mf->hash_tab[*prev_hash]);
+	cur_node = mf->hash_tab[hash];
 	cur_match = mf->hash_tab[hash];
 	mf->hash_tab[hash] = in_next - in_base;
 	prefetch(&mf->hash_tab[*next_hash]);
 	depth_remaining = max_search_depth;
-	pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
+	pending_lt_ptr = bt_left_child(mf, in_next - in_base);
-	pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
+	pending_gt_ptr = bt_right_child(mf, in_next - in_base);
 	best_lt_len = 0;
 	best_gt_len = 0;
-	for (;;) {
+	len = 0;
-		if (!matchfinder_match_in_window(cur_match,
+
-						 in_base, in_next) ||
+	if (!matchfinder_node_valid(cur_node, in_base, in_next)) {
-		    !depth_remaining--)
+		*pending_lt_ptr = MATCHFINDER_NULL;
-		{
+		*pending_gt_ptr = MATCHFINDER_NULL;
 			*pending_lt_ptr = MATCHFINDER_INITVAL;
 			*pending_gt_ptr = MATCHFINDER_INITVAL;
 		return;
 	}
-		matchptr = &in_base[cur_match];
+	for (;;) {
-		len = min(best_lt_len, best_gt_len);
+		matchptr = &in_base[cur_node];
 		children = &mf->child_tab[(unsigned long)
 				matchfinder_slot_for_match(cur_match) << 1];
 		if (matchptr[len] == in_next[len]) {
 			len = lz_extend(in_next, matchptr, len + 1, nice_len);
 			if (len == nice_len) {
-				*pending_lt_ptr = children[0];
+				*pending_lt_ptr = *bt_left_child(mf, cur_node);
-				*pending_gt_ptr = children[1];
+				*pending_gt_ptr = *bt_right_child(mf, cur_node);
 				return;
 			}
 		}
 		if (matchptr[len] < in_next[len]) {
-			*pending_lt_ptr = cur_match;
+			*pending_lt_ptr = cur_node;
-			pending_lt_ptr = &children[1];
+			pending_lt_ptr = bt_right_child(mf, cur_node);
-			cur_match = *pending_lt_ptr;
+			cur_node = *pending_lt_ptr;
 			best_lt_len = len;
 			if (best_gt_len < len)
 				len = best_gt_len;
 		} else {
-			*pending_gt_ptr = cur_match;
+			*pending_gt_ptr = cur_node;
-			pending_gt_ptr = &children[0];
+			pending_gt_ptr = bt_left_child(mf, cur_node);
-			cur_match = *pending_gt_ptr;
+			cur_node = *pending_gt_ptr;
 			best_gt_len = len;
 			if (best_lt_len < len)
 				len = best_lt_len;
 		}
 		if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining) {
 			*pending_lt_ptr = MATCHFINDER_NULL;
 			*pending_gt_ptr = MATCHFINDER_NULL;
 			return;
 		}
 	}
 }
--- a/src/compiler-gcc.h
+++ b/src/compiler-gcc.h
@ -35,7 +35,7 @@
 #define max(a, b)  ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
 			(_a > _b) ? _a : _b; })
-#define swap(a, b) ({ __typeof__(a) _a = a; (a) = (b); (b) = _a; })
+#define swap(a, b) ({ __typeof__(a) _a = (a); (a) = (b); (b) = _a; })
 #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
 #  define compiler_bswap32 __builtin_bswap32
--- a/src/compiler.h
+++ b/src/compiler.h
@ -9,15 +9,19 @@
 #ifdef __GNUC__
 #  include "compiler-gcc.h"
 #else
-#  warning "Unrecognized compiler.  Please add a header file for your compiler."
+#  error "Unrecognized compiler.  Please add a header file for your compiler."
 #endif
 #ifndef LIBEXPORT
 #  define LIBEXPORT
 #endif
-#ifndef BUILD_BUG_ON
+#ifndef _packed_attribute
-#  define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#  error "missing required definition of _packed_attribute"
 #endif
 #ifndef _aligned_attribute
 #  error "missing required definition of _aligned_attribute"
 #endif
 #ifndef likely
@ -32,13 +36,6 @@
 #  define prefetch(addr)
 #endif
 #ifndef _aligned_attribute
 #  error "missing required definition of _aligned_attribute"
 #endif
 #ifndef _packed_attribute
 #  error "missing required definition of _packed_attribute"
 #endif
 #ifndef CPU_IS_BIG_ENDIAN
 #  error "missing required endianness definition"
@ -47,7 +44,6 @@
 #define CPU_IS_LITTLE_ENDIAN (!CPU_IS_BIG_ENDIAN)
 #ifndef UNALIGNED_ACCESS_SPEED
 #  warning "assuming unaligned accesses are not allowed"
 #  define UNALIGNED_ACCESS_SPEED 0
 #endif
@ -58,3 +54,7 @@
 #if !defined(min) || !defined(max) || !defined(swap)
 #  error "missing required definitions of min(), max(), and swap() macros"
 #endif
 #ifndef BUILD_BUG_ON
 #  define BUILD_BUG_ON(expr)	((void)sizeof(char[1 - 2*!!(expr)]))
 #endif
--- a/src/deflate_compress.c
+++ b/src/deflate_compress.c
@ -1961,9 +1961,7 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
 	struct lz_match *cache_end;
 	const u8 *in_block_begin;
 	const u8 *in_block_end;
-	unsigned num_matches;
+	u32 next_hash = 0;
 	unsigned best_len;
 	unsigned long prev_hash = 0;
 	deflate_init_output(&os, out, out_nbytes_avail);
 	deflate_reset_symbol_frequencies(c);
@ -1991,6 +1989,9 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
 		/* Find all match possibilities in this block.  */
 		do {
 			struct lz_match *matches;
 			unsigned best_len;
 			/* Decrease the maximum and nice match lengths if we're
 			 * approaching the end of the input buffer.  */
 			if (unlikely(max_len > in_end - in_next)) {
@ -2028,45 +2029,43 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
 			 *   search for matches at almost all positions, so this
 			 *   advantage of hash chains is negated.
 			 */
-			num_matches =
+			matches = cache_ptr;
 			cache_ptr =
 				bt_matchfinder_get_matches(&c->bt_mf,
 							   in_cur_base,
 							   in_next,
 							   DEFLATE_MIN_MATCH_LEN,
 							   max_len,
 							   nice_len,
 							   c->max_search_depth,
-							   &prev_hash,
+							   &next_hash,
 							   &best_len,
 							   cache_ptr);
-			cache_ptr += num_matches;
+			cache_ptr->length = cache_ptr - matches;
 			cache_ptr->length = num_matches;
 			cache_ptr->offset = *in_next;
 			in_next++;
 			cache_ptr++;
 			if (num_matches) {
 				best_len = cache_ptr[-2].length;
 			/*
-				 * If there was a very long match found, don't
+			 * If there was a very long match found, don't cache any
-				 * cache any matches for the bytes covered by
+			 * matches for the bytes covered by that match.  This
-				 * that match.  This avoids degenerate behavior
+			 * avoids degenerate behavior when compressing highly
-				 * when compressing highly redundant data, where
+			 * redundant data, where the number of matches can be
-				 * the number of matches can be very large.
+			 * very large.
 			 *
-				 * This heuristic doesn't actually hurt the
+			 * This heuristic doesn't actually hurt the compression
-				 * compression ratio very much.  If there's a
+			 * ratio very much.  If there's a long match, then the
-				 * long match, then the data must be highly
+			 * data must be highly compressible, so it doesn't
-				 * compressible, so it doesn't matter much what
+			 * matter much what we do.
 				 * we do.
 			 *
-				 * We also trigger this same case when
+			 * We also trigger this same case when approaching the
-				 * approaching the desired end of the block.
+			 * desired end of the block.  This forces the block to
-				 * This forces the block to reach a "stopping
+			 * reach a "stopping point" where there are no matches
-				 * point" where there are no matches extending
+			 * extending to later positions.  (XXX: this behavior is
 				 * to later positions.  (XXX: this behavior is
 			 * non-optimal and should be improved.)
 			 */
-				if (best_len >= min(nice_len, in_block_end - in_next)) {
+			if (best_len >= DEFLATE_MIN_MATCH_LEN &&
 			    best_len >= min(nice_len, in_block_end - in_next)) {
 				--best_len;
 				do {
 					if (unlikely(max_len > in_end - in_next)) {
@ -2085,7 +2084,7 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
 								     in_end,
 								     nice_len,
 								     c->max_search_depth,
-									     &prev_hash);
+								     &next_hash);
 					cache_ptr->length = 0;
 					cache_ptr->offset = *in_next;
@ -2093,7 +2092,6 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
 					cache_ptr++;
 				} while (--best_len);
 			}
 			}
 		} while (in_next < in_block_end);
 		/* All the matches for this block have been cached.  Now compute
--- a/src/endianness.h
+++ b/src/endianness.h
@ -1,7 +1,7 @@
 /*
 * endianness.h
 *
- * Inline functions for endianness conversion.
+ * Macros and inline functions for endianness conversion.
 */
 #pragma once
--- a/src/gzip_compress.c
+++ b/src/gzip_compress.c
@ -57,7 +57,7 @@ gzip_compress(struct deflate_compressor *c, const void *in, size_t in_size,
 	out_next += 4;
 	/* ISIZE */
-	put_unaligned_u32_le(in_size, out_next);
+	put_unaligned_u32_le((u32)in_size, out_next);
 	out_next += 4;
 	return out_next - (u8 *)out;
--- a/src/gzip_constants.h
+++ b/src/gzip_constants.h
@ -6,8 +6,6 @@
 #pragma once
 #include "compiler.h"
 #define GZIP_MIN_HEADER_SIZE	10
 #define GZIP_FOOTER_SIZE	8
 #define GZIP_MIN_OVERHEAD	(GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
--- a/src/gzip_decompress.c
+++ b/src/gzip_decompress.c
@ -54,20 +54,16 @@ gzip_decompress(struct deflate_decompressor *d,
 	/* Original file name (zero terminated) */
 	if (flg & GZIP_FNAME) {
-		while (*in_next != 0 && ++in_next != in_end)
+		while (*in_next++ != 0 && in_next != in_end)
 			;
 		if (in_next != in_end)
 			in_next++;
 		if (in_end - in_next < GZIP_FOOTER_SIZE)
 			return false;
 	}
 	/* File comment (zero terminated) */
 	if (flg & GZIP_FCOMMENT) {
-		while (*in_next != 0 && ++in_next != in_end)
+		while (*in_next++ != 0 && ++in_next != in_end)
 			;
 		if (in_next != in_end)
 			in_next++;
 		if (in_end - in_next < GZIP_FOOTER_SIZE)
 			return false;
 	}
--- a/src/hc_matchfinder.h
+++ b/src/hc_matchfinder.h
@ -1,37 +1,102 @@
 /*
 * hc_matchfinder.h
 *
- * This is a Hash Chain (hc) based matchfinder.
+ * ---------------------------------------------------------------------------
 *
 *				   Algorithm
 *
 * This is a Hash Chains (hc) based matchfinder.
 *
 * The data structure is a hash table where each hash bucket contains a linked
- * list of sequences, referenced by position.
+ * list (or "chain") of sequences whose first 3 bytes share the same hash code.
 * Each sequence is identified by its starting position in the input buffer.
 *
- * For each sequence (position) in the input, the first 3 bytes are hashed and
+ * The algorithm processes the input buffer sequentially.  At each byte
- * that sequence (position) is prepended to the appropriate linked list in the
+ * position, the hash code of the first 3 bytes of the sequence beginning at
- * hash table.  Since the sequences are inserted in order, each list is always
+ * that position (the sequence being matched against) is computed.  This
- * sorted by increasing match offset.
+ * identifies the hash bucket to use for that position.  Then, this hash
 * bucket's linked list is searched for matches.  Then, a new linked list node
 * is created to represent the current sequence and is prepended to the list.
 *
- * At the same time as inserting a sequence, we may search the linked list for
+ * This algorithm has several useful properties:
- * matches with that sequence.  At each step, the length of the match is
+ *
- * computed.  The search ends when the sequences get too far away (outside of
+ * - It only finds true Lempel-Ziv matches; i.e., those where the matching
- * the sliding window), or when the list ends (in the code this is the same
+ *   sequence occurs prior to the sequence being matched against.
- * check as "too far away"), or when 'max_search_depth' positions have been
+ *
- * searched, or when a match of at least 'nice_len' bytes has been found.
+ * - The sequences in each linked list are always sorted by decreasing starting
 *   position.  Therefore, the closest (smallest offset) matches are found
 *   first, which in many compression formats tend to be the cheapest to encode.
 *
 * - Although fast running time is not guaranteed due to the possibility of the
 *   lists getting very long, the worst degenerate behavior can be easily
 *   prevented by capping the number of nodes searched at each position.
 *
 * - If the compressor decides not to search for matches at a certain position,
 *   then that position can be quickly inserted without searching the list.
 *
 * - The algorithm is adaptable to sliding windows: just store the positions
 *   relative to a "base" value that is updated from time to time, and stop
 *   searching each list when the sequences get too far away.
 *
 * ---------------------------------------------------------------------------
 *
 *				Notes on usage
 *
 * You must define MATCHFINDER_WINDOW_ORDER before including this header because
 * that determines which integer type to use for positions.  Since 16-bit
 * integers are faster than 32-bit integers due to reduced memory usage (and
 * therefore reduced cache pressure), the code only uses 32-bit integers if they
 * are needed to represent all possible positions.
 *
 * In addition, you must allocate the 'struct hc_matchfinder' on a
 * MATCHFINDER_ALIGNMENT-aligned boundary.
 *
 * ----------------------------------------------------------------------------
 *
 *				 Optimizations
 *
 * The longest_match() and skip_positions() functions are inlined into the
 * compressors that use them.  This isn't just about saving the overhead of a
 * function call.  These functions are intended to be called from the inner
 * loops of compressors, where giving the compiler more control over register
 * allocation is very helpful.  There is also significant benefit to be gained
 * from allowing the CPU to predict branches independently at each call site.
 * For example, "lazy"-style compressors can be written with two calls to
 * longest_match(), each of which starts with a different 'best_len' and
 * therefore has significantly different performance characteristics.
 *
 * Although any hash function can be used, a multiplicative hash is fast and
 * works well.
 *
 * On some processors, it is significantly faster to extend matches by whole
 * words (32 or 64 bits) instead of by individual bytes.  For this to be the
 * case, the processor must implement unaligned memory accesses efficiently and
 * must have either a fast "find first set bit" instruction or a fast "find last
 * set bit" instruction, depending on the processor's endianness.
 *
 * The code uses one loop for finding the first match and one loop for finding a
 * longer match.  Each of these loops is tuned for its respective task and in
 * combination are faster than a single generalized loop that handles both
 * tasks.
 *
 * The code also uses a tight inner loop that only compares the last and first
 * bytes of a potential match.  It is only when these bytes match that a full
 * match extension is attempted.
 *
 * ----------------------------------------------------------------------------
 */
 #pragma once
 #include "lz_extend.h"
-#include "lz_hash3.h"
+#include "lz_hash.h"
 #include "matchfinder_common.h"
 #include "unaligned.h"
-#ifndef HC_MATCHFINDER_HASH_ORDER
+#if MATCHFINDER_WINDOW_ORDER < 14
 #  if MATCHFINDER_WINDOW_ORDER < 14
 #  define HC_MATCHFINDER_HASH_ORDER 14
-#  else
+#else
 #  define HC_MATCHFINDER_HASH_ORDER 15
 #  endif
 #endif
 #define HC_MATCHFINDER_HASH_LENGTH	(1UL << HC_MATCHFINDER_HASH_ORDER)
@ -73,17 +138,18 @@ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
 *	time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
 * @in_next
 *	Pointer to the next byte in the input buffer to process.  This is the
- *	pointer to the bytes being matched against.
+ *	pointer to the sequence being matched against.
 * @best_len
- *	Require a match at least this long.
+ *	Require a match longer than this length.
 * @max_len
- *	Maximum match length to return.
+ *	The maximum permissible match length at this position.
 * @nice_len
 *	Stop searching if a match of at least this length is found.
 *	Must be <= @max_len.
 * @max_search_depth
- *	Limit on the number of potential matches to consider.
+ *	Limit on the number of potential matches to consider.  Must be >= 1.
 * @offset_ret
- *	The match offset is returned here.
+ *	If a match is found, its offset is returned in this location.
 *
 * Return the length of the match found, or 'best_len' if no match longer than
 * 'best_len' was found.
@ -102,61 +168,57 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
 	const u8 *best_matchptr = best_matchptr; /* uninitialized */
 	const u8 *matchptr;
 	unsigned len;
 	unsigned hash;
 	pos_t cur_match;
 	u32 first_3_bytes;
 	u32 hash;
 	pos_t cur_node;
-	/* Insert the current sequence into the appropriate hash chain.  */
+	/* Insert the current sequence into the appropriate linked list.  */
-	if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
+	if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES))
 		goto out;
 	first_3_bytes = load_u24_unaligned(in_next);
-	hash = lz_hash3_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
+	hash = lz_hash(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
-	cur_match = mf->hash_tab[hash];
+	cur_node = mf->hash_tab[hash];
-	mf->next_tab[in_next - in_base] = cur_match;
+	mf->next_tab[in_next - in_base] = cur_node;
 	mf->hash_tab[hash] = in_next - in_base;
 	if (unlikely(best_len >= max_len))
 		goto out;
-	/* Search the appropriate hash chain for matches.  */
+	/* Search the appropriate linked list for matches.  */
-	if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
+	if (!(matchfinder_node_valid(cur_node, in_base, in_next)))
 		goto out;
 	if (best_len < 3) {
 		for (;;) {
 			/* No length 3 match found yet.
 			 * Check the first 3 bytes.  */
-			matchptr = &in_base[cur_match];
+			matchptr = &in_base[cur_node];
 			if (load_u24_unaligned(matchptr) == first_3_bytes)
 				break;
-			/* Not a match; keep trying.  */
+			/* The first 3 bytes did not match.  Keep trying.  */
-			cur_match = mf->next_tab[
+			cur_node = mf->next_tab[
-					matchfinder_slot_for_match(cur_match)];
+					matchfinder_slot_for_match(cur_node)];
-			if (!matchfinder_match_in_window(cur_match,
+			if (!matchfinder_node_valid(cur_node, in_base, in_next) ||
-							 in_base, in_next))
+			    !--depth_remaining)
 				goto out;
 			if (!--depth_remaining)
 				goto out;
 		}
-		/* Found a length 3 match.  */
+		/* Found a match of length >= 3.  Extend it to its full length.  */
 		best_matchptr = matchptr;
 		best_len = lz_extend(in_next, best_matchptr, 3, max_len);
 		if (best_len >= nice_len)
 			goto out;
-		cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
+		cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
-		if (!matchfinder_match_in_window(cur_match, in_base, in_next))
+		if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
 			goto out;
 		if (!--depth_remaining)
 			goto out;
 	}
 	for (;;) {
 		for (;;) {
-			matchptr = &in_base[cur_match];
+			matchptr = &in_base[cur_node];
 			/* Already found a length 3 match.  Try for a longer match;
 			 * start by checking the last 2 bytes and the first 4 bytes.  */
@ -170,17 +232,16 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
 		#endif
 				break;
-			cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
+			cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
-			if (!matchfinder_match_in_window(cur_match, in_base, in_next))
+			if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
 				goto out;
 			if (!--depth_remaining)
 				goto out;
 		}
-		if (UNALIGNED_ACCESS_IS_FAST)
+	#if UNALIGNED_ACCESS_IS_FAST
 		len = 4;
-		else
+	#else
 		len = 0;
 	#endif
 		len = lz_extend(in_next, matchptr, len, max_len);
 		if (len > best_len) {
 			best_len = len;
@ -188,10 +249,8 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
 			if (best_len >= nice_len)
 				goto out;
 		}
-		cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
+		cur_node = mf->next_tab[matchfinder_slot_for_match(cur_node)];
-		if (!matchfinder_match_in_window(cur_match, in_base, in_next))
+		if (!matchfinder_node_valid(cur_node, in_base, in_next) || !--depth_remaining)
 			goto out;
 		if (!--depth_remaining)
 			goto out;
 	}
 out:
@ -200,7 +259,7 @@ out:
 }
 /*
- * Advance the match-finder, but don't search for matches.
+ * Advance the matchfinder, but don't search for matches.
 *
 * @mf
 *	The matchfinder structure.
@ -212,7 +271,7 @@ out:
 * @in_end
 *	Pointer to the end of the input buffer.
 * @count
- *	Number of bytes to skip; must be > 0.
+ *	The number of bytes to advance.  Must be > 0.
 */
 static inline void
 hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
@ -221,13 +280,13 @@ hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
 			      const u8 *in_end,
 			      unsigned count)
 {
-	unsigned hash;
+	u32 hash;
-	if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
+	if (unlikely(in_next + count >= in_end - LZ_HASH3_REQUIRED_NBYTES))
 		return;
 	do {
-		hash = lz_hash3(in_next, HC_MATCHFINDER_HASH_ORDER);
+		hash = lz_hash_3_bytes(in_next, HC_MATCHFINDER_HASH_ORDER);
 		mf->next_tab[in_next - in_base] = mf->hash_tab[hash];
 		mf->hash_tab[hash] = in_next - in_base;
 		in_next++;
--- a/src/lz_hash.h
+++ b/src/lz_hash.h
@ -0,0 +1,41 @@
 /*
 * lz_hash.h
 *
 * Hashing for Lempel-Ziv matchfinding.
 */
 #ifndef _LZ_HASH_H
 #define _LZ_HASH_H
 #include "unaligned.h"
 /*
 * The hash function: given a sequence prefix held in the low-order bits of a
 * 32-bit value, multiply by a carefully-chosen large constant.  Discard any
 * bits of the product that don't fit in a 32-bit value, but take the
 * next-highest @num_bits bits of the product as the hash value, as those have
 * the most randomness.
 */
 static inline u32
 lz_hash(u32 seq, unsigned num_bits)
 {
 	return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
 }
 /*
 * Hash the 3-byte sequence beginning at @p, producing a hash of length
 * @num_bits bits.  At least LZ_HASH3_REQUIRED_NBYTES bytes of data must be
 * available at @p; note that this may be more than 3.
 */
 static inline u32
 lz_hash_3_bytes(const u8 *p, unsigned num_bits)
 {
 	u32 seq = load_u24_unaligned(p);
 	if (num_bits >= 24)
 		return seq;
 	return lz_hash(seq, num_bits);
 }
 #define LZ_HASH3_REQUIRED_NBYTES LOAD_U24_REQUIRED_NBYTES
 #endif /* _LZ_HASH_H */
--- a/src/lz_hash3.h
+++ b/src/lz_hash3.h
@ -1,49 +0,0 @@
 /*
 * lz_hash3.h
 *
 * 3-byte hashing for Lempel-Ziv matchfinding.
 */
 #pragma once
 #include "unaligned.h"
 static inline u32
 loaded_u32_to_u24(u32 v)
 {
 	if (CPU_IS_LITTLE_ENDIAN)
 		return v & 0xFFFFFF;
 	else
 		return v >> 8;
 }
 static inline u32
 load_u24_unaligned(const u8 *p)
 {
 	if (UNALIGNED_ACCESS_IS_FAST)
 		return loaded_u32_to_u24(load_u32_unaligned(p));
 	else
 		return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
 }
 static inline u32
 lz_hash3_u24(u32 str, unsigned num_bits)
 {
 	return (u32)(str * 0x1E35A7BD) >> (32 - num_bits);
 }
 /*
 * Hash the next 3-byte sequence in the window, producing a hash of length
 * 'num_bits' bits.  At least LZ_HASH_REQUIRED_NBYTES must be available at 'p';
 * this might be 4 bytes rather than 3 because an unaligned load is faster on
 * some architectures.
 */
 static inline u32
 lz_hash3(const u8 *p, unsigned num_bits)
 {
 	return lz_hash3_u24(load_u24_unaligned(p), num_bits);
 }
 /* Number of bytes the hash function actually requires be available, due to the
 * possibility of an unaligned load.  */
 #define LZ_HASH_REQUIRED_NBYTES (UNALIGNED_ACCESS_IS_FAST ? 4 : 3)
--- a/src/matchfinder_avx2.h
+++ b/src/matchfinder_avx2.h
@ -16,9 +16,9 @@ matchfinder_init_avx2(pos_t *data, size_t size)
 		return false;
 	if (sizeof(pos_t) == 2)
-		v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
+		v = _mm256_set1_epi16((u16)MATCHFINDER_NULL);
 	else if (sizeof(pos_t) == 4)
-		v = _mm256_set1_epi32(MATCHFINDER_INITVAL);
+		v = _mm256_set1_epi32((u32)MATCHFINDER_NULL);
 	else
 		return false;
--- a/src/matchfinder_common.h
+++ b/src/matchfinder_common.h
@ -60,7 +60,7 @@ static inline bool
 matchfinder_memset_init_okay(void)
 {
 	/* All bytes must match in order to use memset.  */
-	const pos_t v = MATCHFINDER_INITVAL;
+	const pos_t v = MATCHFINDER_NULL;
 	if (sizeof(pos_t) == 2)
 		return (u8)v == (u8)(v >> 8);
 	if (sizeof(pos_t) == 4)
@ -93,12 +93,12 @@ matchfinder_init(pos_t *data, size_t num_entries)
 #endif
 	if (matchfinder_memset_init_okay()) {
-		memset(data, (u8)MATCHFINDER_INITVAL, size);
+		memset(data, (u8)MATCHFINDER_NULL, size);
 		return;
 	}
 	for (size_t i = 0; i < num_entries; i++)
-		data[i] = MATCHFINDER_INITVAL;
+		data[i] = MATCHFINDER_NULL;
 }
 #if MATCHFINDER_IS_SLIDING
--- a/src/matchfinder_nonsliding.h
+++ b/src/matchfinder_nonsliding.h
@ -16,12 +16,12 @@ typedef u32 pos_t;
 /* Not all the bits of the position type are needed, so the sign bit can be
 * reserved to mean "out of bounds".  */
-#define MATCHFINDER_INITVAL ((pos_t)-1)
+#define MATCHFINDER_NULL ((pos_t)-1)
 static inline bool
-matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
+matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
 {
-	return !(cur_match & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
+	return !(cur_node & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
 }
 #else
@ -30,18 +30,18 @@ matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_nex
 * This prevents the beginning of the buffer from matching anything; however,
 * this doesn't matter much.  */
-#define MATCHFINDER_INITVAL ((pos_t)0)
+#define MATCHFINDER_NULL ((pos_t)0)
 static inline bool
-matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
+matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
 {
-	return cur_match != 0;
+	return cur_node != 0;
 }
 #endif
 static inline pos_t
-matchfinder_slot_for_match(pos_t cur_match)
+matchfinder_slot_for_match(pos_t cur_node)
 {
-	return cur_match;
+	return cur_node;
 }
--- a/src/matchfinder_sliding.h
+++ b/src/matchfinder_sliding.h
@ -13,18 +13,18 @@ typedef s16 pos_t;
 typedef s32 pos_t;
 #endif
-#define MATCHFINDER_INITVAL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
+#define MATCHFINDER_NULL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
 /* In the sliding window case, positions are stored relative to 'in_base'.  */
 static inline bool
-matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
+matchfinder_node_valid(pos_t cur_node, const u8 *in_base, const u8 *in_next)
 {
-	return cur_match > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
+	return cur_node > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
 }
 static inline pos_t
-matchfinder_slot_for_match(pos_t cur_match)
+matchfinder_slot_for_match(pos_t cur_node)
 {
-	return cur_match & (MATCHFINDER_WINDOW_SIZE - 1);
+	return cur_node & (MATCHFINDER_WINDOW_SIZE - 1);
 }
--- a/src/matchfinder_sse2.h
+++ b/src/matchfinder_sse2.h
@ -16,9 +16,9 @@ matchfinder_init_sse2(pos_t *data, size_t size)
 		return false;
 	if (sizeof(pos_t) == 2)
-		v = _mm_set1_epi16(MATCHFINDER_INITVAL);
+		v = _mm_set1_epi16((u16)MATCHFINDER_NULL);
 	else if (sizeof(pos_t) == 4)
-		v = _mm_set1_epi32(MATCHFINDER_INITVAL);
+		v = _mm_set1_epi32((u32)MATCHFINDER_NULL);
 	else
 		return false;
--- a/src/types.h
+++ b/src/types.h
@ -6,9 +6,9 @@
 #pragma once
 #include <inttypes.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 typedef uint8_t  u8;
 typedef uint16_t u16;
--- a/src/unaligned.h
+++ b/src/unaligned.h
@ -1,7 +1,7 @@
 /*
 * unaligned.h
 *
- * Inline functions for unaligned memory access.
+ * Inline functions for unaligned memory accesses.
 */
 #pragma once
@ -214,3 +214,36 @@ put_unaligned_u32_be(u32 v, void *p)
 		p8[3] = (v >> 0) & 0xFF;
 	}
 }
 /*
 * Given a 32-bit value that was loaded with the platform's native endianness,
 * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
 * bits contain the first 3 bytes, arranged in octets in a platform-dependent
 * order, at the memory location from which the input 32-bit value was loaded.
 */
 static inline u32
 loaded_u32_to_u24(u32 v)
 {
 	if (CPU_IS_LITTLE_ENDIAN)
 		return v & 0xFFFFFF;
 	else
 		return v >> 8;
 }
 /*
 * Load the next 3 bytes from the memory location @p into the 24 low-order bits
 * of a 32-bit value.  The order in which the 3 bytes will be arranged as octets
 * in the 24 bits is platform-dependent.  At least LOAD_U24_REQUIRED_NBYTES
 * bytes must be available at @p; note that this may be more than 3.
 */
 static inline u32
 load_u24_unaligned(const u8 *p)
 {
 #if UNALIGNED_ACCESS_IS_FAST
 #  define LOAD_U24_REQUIRED_NBYTES 4
 	return loaded_u32_to_u24(load_u32_unaligned(p));
 #else
 #  define LOAD_U24_REQUIRED_NBYTES 3
 	return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
 #endif
 }
--- a/test/benchmark.c
+++ b/test/benchmark.c
@ -1,11 +1,9 @@
 /*
 * benchmark.c - A compression testing and benchmark program.
 *
- * The author dedicates this file to the public domain.
+ * This file has no copyright assigned and is placed in the Public Domain.
 * You can do whatever you want with this file.
 */
 #define _FILE_OFFSET_BITS 64
 #define _GNU_SOURCE
@ -419,9 +417,9 @@ main(int argc, char **argv)
 	       wrapper == NO_WRAPPER ? "None" :
 	       wrapper == ZLIB_WRAPPER ? "zlib" : "gzip");
 	printf("\tCompression engine: %s\n",
-	       compress_with_libz ? "zlib" : "libdeflate");
+	       compress_with_libz ? "libz" : "libdeflate");
 	printf("\tDecompression engine: %s\n",
-	       decompress_with_libz ? "zlib" : "libdeflate");
+	       decompress_with_libz ? "libz" : "libdeflate");
 	ubuf1 = malloc(chunk_size);
 	ubuf2 = malloc(chunk_size);