lib/deflate_compress: select min_size_to_compress based on level

The cutoff for outputting uncompressed data is currently < 16 bytes for
all compression levels.  That isn't ideal, since the higher the
compression level, the more we should bother with very small inputs; and
the lower the compression level, the less we should bother.

Use a formula that produces the following cutoffs:

        Level  Cutoff
        -----  ------
        0      56
        1      52
        2      48
        3      44
        4      40
        5      36
        6      32
        7      28
        8      24
        9      20
        10     16
        11     12
        12     8

Update https://github.com/ebiggers/libdeflate/issues/67
This commit is contained in:
Eric Biggers 2020-10-18 15:50:45 -07:00
parent a2d92667b5
commit 166084acaa

View File

@ -366,6 +366,9 @@ struct libdeflate_compressor {
/* The compression level with which this compressor was created. */ /* The compression level with which this compressor was created. */
unsigned compression_level; unsigned compression_level;
/* Anything smaller than this we won't bother trying to compress. */
unsigned min_size_to_compress;
/* Temporary space for Huffman code output */ /* Temporary space for Huffman code output */
u32 precode_freqs[DEFLATE_NUM_PRECODE_SYMS]; u32 precode_freqs[DEFLATE_NUM_PRECODE_SYMS];
u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS]; u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
@ -2688,6 +2691,9 @@ libdeflate_alloc_compressor(int compression_level)
struct libdeflate_compressor *c; struct libdeflate_compressor *c;
size_t size = offsetof(struct libdeflate_compressor, p); size_t size = offsetof(struct libdeflate_compressor, p);
if (compression_level < 0 || compression_level > 12)
return NULL;
#if SUPPORT_NEAR_OPTIMAL_PARSING #if SUPPORT_NEAR_OPTIMAL_PARSING
if (compression_level >= 8) if (compression_level >= 8)
size += sizeof(c->p.n); size += sizeof(c->p.n);
@ -2702,6 +2708,14 @@ libdeflate_alloc_compressor(int compression_level)
if (!c) if (!c)
return NULL; return NULL;
c->compression_level = compression_level;
/*
* The higher the compression level, the more we should bother trying to
* compress very small inputs.
*/
c->min_size_to_compress = 56 - (compression_level * 4);
switch (compression_level) { switch (compression_level) {
case 0: case 0:
c->impl = deflate_compress_none; c->impl = deflate_compress_none;
@ -2766,7 +2780,7 @@ libdeflate_alloc_compressor(int compression_level)
c->nice_match_length = 80; c->nice_match_length = 80;
c->p.n.num_optim_passes = 3; c->p.n.num_optim_passes = 3;
break; break;
case 12: default:
c->impl = deflate_compress_near_optimal; c->impl = deflate_compress_near_optimal;
c->max_search_depth = 100; c->max_search_depth = 100;
c->nice_match_length = 133; c->nice_match_length = 133;
@ -2778,19 +2792,14 @@ libdeflate_alloc_compressor(int compression_level)
c->max_search_depth = 150; c->max_search_depth = 150;
c->nice_match_length = 200; c->nice_match_length = 200;
break; break;
case 9: default:
c->impl = deflate_compress_lazy; c->impl = deflate_compress_lazy;
c->max_search_depth = 200; c->max_search_depth = 200;
c->nice_match_length = DEFLATE_MAX_MATCH_LEN; c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
break; break;
#endif #endif
default:
libdeflate_aligned_free(c);
return NULL;
} }
c->compression_level = compression_level;
deflate_init_offset_slot_fast(c); deflate_init_offset_slot_fast(c);
deflate_init_static_codes(c); deflate_init_static_codes(c);
@ -2806,7 +2815,7 @@ libdeflate_deflate_compress(struct libdeflate_compressor *c,
return 0; return 0;
/* For extremely small inputs just use a single uncompressed block. */ /* For extremely small inputs just use a single uncompressed block. */
if (unlikely(in_nbytes < 16)) { if (unlikely(in_nbytes < c->min_size_to_compress)) {
struct deflate_output_bitstream os; struct deflate_output_bitstream os;
deflate_init_output(&os, out, out_nbytes_avail); deflate_init_output(&os, out, out_nbytes_avail);
if (in_nbytes == 0) if (in_nbytes == 0)