From 193dedc73fb1401be8835f4c37eff9603ea3052c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 31 Dec 2021 16:04:49 -0600 Subject: [PATCH] deflate_compress: improve match scoring in lazy compressor In the lazy compressor, it's usually worthwhile to (quickly) consider the match offset too, not just the match length. --- lib/deflate_compress.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/deflate_compress.c b/lib/deflate_compress.c index fe6b9b5..9b00fcd 100644 --- a/lib/deflate_compress.c +++ b/lib/deflate_compress.c @@ -2072,7 +2072,7 @@ deflate_compress_greedy(struct libdeflate_compressor * restrict c, /* * This is the "lazy" DEFLATE compressor. Before choosing a match, it checks to - * see if there's a longer match at the next position. If yes, it outputs a + * see if there's a better match at the next position. If yes, it outputs a * literal and continues to the next position. If no, it outputs the match. */ static size_t @@ -2153,7 +2153,7 @@ deflate_compress_lazy(struct libdeflate_compressor * restrict c, } /* - * Try to find a longer match at the next position. + * Try to find a better match at the next position. * * Note: since we already have a match at the *current* * position, we use only half the 'max_search_depth' @@ -2164,7 +2164,7 @@ deflate_compress_lazy(struct libdeflate_compressor * restrict c, * Note: it's possible to structure the code such that * there's only one call to longest_match(), which * handles both the "find the initial match" and "try to - * find a longer match" cases. However, it is faster to + * find a better match" cases. However, it is faster to * have two call sites, with longest_match() inlined at * each. */ @@ -2174,15 +2174,18 @@ deflate_compress_lazy(struct libdeflate_compressor * restrict c, &c->p.g.hc_mf, &in_cur_base, in_next++, - cur_len, + cur_len - 1, max_len, nice_len, c->max_search_depth >> 1, next_hashes, &next_offset); - if (next_len > cur_len) { + if (next_len >= cur_len && + 4 * (int)(next_len - cur_len) + + ((int)bsr32(cur_offset) - + (int)bsr32(next_offset)) > 2) { /* - * Found a longer match at the next position. + * Found a better match at the next position. * Output a literal. Then the next match * becomes the current match. */ @@ -2193,7 +2196,7 @@ deflate_compress_lazy(struct libdeflate_compressor * restrict c, goto have_cur_match; } /* - * No longer match at the next position. Output the + * No better match at the next position. Output the * current match. */ deflate_choose_match(c, cur_len, cur_offset,