From 166084acaa12e0b47cf5c48302bd9dd1d124f442 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Sun, 18 Oct 2020 15:50:45 -0700
Subject: [PATCH] lib/deflate_compress: select min_size_to_compress based on
 level

The cutoff for outputting uncompressed data is currently < 16 bytes for
all compression levels.  That isn't ideal, since the higher the
compression level, the more we should bother with very small inputs; and
the lower the compression level, the less we should bother.

Use a formula that produces the following cutoffs:

        Level  Cutoff
        -----  ------
        0      56
        1      52
        2      48
        3      44
        4      40
        5      36
        6      32
        7      28
        8      24
        9      20
        10     16
        11     12
        12     8

Update https://github.com/ebiggers/libdeflate/issues/67
---
 lib/deflate_compress.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/lib/deflate_compress.c b/lib/deflate_compress.c
index eac3d89..36572f7 100644
--- a/lib/deflate_compress.c
+++ b/lib/deflate_compress.c
@@ -366,6 +366,9 @@ struct libdeflate_compressor {
 	/* The compression level with which this compressor was created.  */
 	unsigned compression_level;
 
+	/* Anything smaller than this we won't bother trying to compress.  */
+	unsigned min_size_to_compress;
+
 	/* Temporary space for Huffman code output  */
 	u32 precode_freqs[DEFLATE_NUM_PRECODE_SYMS];
 	u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
@@ -2688,6 +2691,9 @@ libdeflate_alloc_compressor(int compression_level)
 	struct libdeflate_compressor *c;
 	size_t size = offsetof(struct libdeflate_compressor, p);
 
+	if (compression_level < 0 || compression_level > 12)
+		return NULL;
+
 #if SUPPORT_NEAR_OPTIMAL_PARSING
 	if (compression_level >= 8)
 		size += sizeof(c->p.n);
@@ -2702,6 +2708,14 @@ libdeflate_alloc_compressor(int compression_level)
 	if (!c)
 		return NULL;
 
+	c->compression_level = compression_level;
+
+	/*
+	 * The higher the compression level, the more we should bother trying to
+	 * compress very small inputs.
+	 */
+	c->min_size_to_compress = 56 - (compression_level * 4);
+
 	switch (compression_level) {
 	case 0:
 		c->impl = deflate_compress_none;
@@ -2766,7 +2780,7 @@ libdeflate_alloc_compressor(int compression_level)
 		c->nice_match_length = 80;
 		c->p.n.num_optim_passes = 3;
 		break;
-	case 12:
+	default:
 		c->impl = deflate_compress_near_optimal;
 		c->max_search_depth = 100;
 		c->nice_match_length = 133;
@@ -2778,19 +2792,14 @@ libdeflate_alloc_compressor(int compression_level)
 		c->max_search_depth = 150;
 		c->nice_match_length = 200;
 		break;
-	case 9:
+	default:
 		c->impl = deflate_compress_lazy;
 		c->max_search_depth = 200;
 		c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
 		break;
 #endif
-	default:
-		libdeflate_aligned_free(c);
-		return NULL;
 	}
 
-	c->compression_level = compression_level;
-
 	deflate_init_offset_slot_fast(c);
 	deflate_init_static_codes(c);
 
@@ -2806,7 +2815,7 @@ libdeflate_deflate_compress(struct libdeflate_compressor *c,
 		return 0;
 
 	/* For extremely small inputs just use a single uncompressed block. */
-	if (unlikely(in_nbytes < 16)) {
+	if (unlikely(in_nbytes < c->min_size_to_compress)) {
 		struct deflate_output_bitstream os;
 		deflate_init_output(&os, out, out_nbytes_avail);
 		if (in_nbytes == 0)