deflate_compress: improve match scoring in lazy compressor

In the lazy compressor, it's usually worthwhile to (quickly) consider
the match offset too, not just the match length.
This commit is contained in:
Eric Biggers 2021-12-31 16:04:49 -06:00
parent 4b7e9029d1
commit 193dedc73f

View File

@ -2072,7 +2072,7 @@ deflate_compress_greedy(struct libdeflate_compressor * restrict c,
/*
* This is the "lazy" DEFLATE compressor. Before choosing a match, it checks to
* see if there's a longer match at the next position. If yes, it outputs a
* see if there's a better match at the next position. If yes, it outputs a
* literal and continues to the next position. If no, it outputs the match.
*/
static size_t
@ -2153,7 +2153,7 @@ deflate_compress_lazy(struct libdeflate_compressor * restrict c,
}
/*
* Try to find a longer match at the next position.
* Try to find a better match at the next position.
*
* Note: since we already have a match at the *current*
* position, we use only half the 'max_search_depth'
@ -2164,7 +2164,7 @@ deflate_compress_lazy(struct libdeflate_compressor * restrict c,
* Note: it's possible to structure the code such that
* there's only one call to longest_match(), which
* handles both the "find the initial match" and "try to
* find a longer match" cases. However, it is faster to
* find a better match" cases. However, it is faster to
* have two call sites, with longest_match() inlined at
* each.
*/
@ -2174,15 +2174,18 @@ deflate_compress_lazy(struct libdeflate_compressor * restrict c,
&c->p.g.hc_mf,
&in_cur_base,
in_next++,
cur_len,
cur_len - 1,
max_len,
nice_len,
c->max_search_depth >> 1,
next_hashes,
&next_offset);
if (next_len > cur_len) {
if (next_len >= cur_len &&
4 * (int)(next_len - cur_len) +
((int)bsr32(cur_offset) -
(int)bsr32(next_offset)) > 2) {
/*
* Found a longer match at the next position.
* Found a better match at the next position.
* Output a literal. Then the next match
* becomes the current match.
*/
@ -2193,7 +2196,7 @@ deflate_compress_lazy(struct libdeflate_compressor * restrict c,
goto have_cur_match;
}
/*
* No longer match at the next position. Output the
* No better match at the next position. Output the
* current match.
*/
deflate_choose_match(c, cur_len, cur_offset,