deflate_compress: misc cleanups for new code

This commit is contained in:
Eric Biggers 2022-01-02 19:44:10 -06:00
parent 71db68b27f
commit 4dd63ea272

View File

@ -131,6 +131,8 @@
* *
* BIT_COST should be a power of 2. A value of 8 or 16 works well. A higher * BIT_COST should be a power of 2. A value of 8 or 16 works well. A higher
* value isn't very useful since the calculations are approximate anyway. * value isn't very useful since the calculations are approximate anyway.
*
* BIT_COST doesn't apply to deflate_flush_block(), which considers whole bits.
*/ */
#define BIT_COST 16 #define BIT_COST 16
@ -178,7 +180,7 @@ check_buildtime_parameters(void)
{ {
/* /*
* Verify that MIN_BLOCK_LENGTH is being honored, as * Verify that MIN_BLOCK_LENGTH is being honored, as
* libdeflate_compress_bound() depends on it. * libdeflate_deflate_compress_bound() depends on it.
*/ */
STATIC_ASSERT(SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH); STATIC_ASSERT(SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH);
STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH); STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH);
@ -548,14 +550,13 @@ struct libdeflate_compressor {
* worst-case number of nodes, which occurs when the * worst-case number of nodes, which occurs when the
* final block is of length SOFT_MAX_BLOCK_LENGTH + * final block is of length SOFT_MAX_BLOCK_LENGTH +
* MIN_BLOCK_LENGTH, or when any block is of length * MIN_BLOCK_LENGTH, or when any block is of length
* SOFT_MAX_BLOCK_LENGTH + DEFLATE_MAX_MATCH_LEN * SOFT_MAX_BLOCK_LENGTH - 1 + DEFLATE_MAX_MATCH_LEN.
* - 1. Add one for the end-of-block node. * Add 1 for the end-of-block node.
*/ */
struct deflate_optimum_node optimum_nodes[ struct deflate_optimum_node optimum_nodes[
SOFT_MAX_BLOCK_LENGTH + MAX(SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH,
MAX(MIN_BLOCK_LENGTH, SOFT_MAX_BLOCK_LENGTH - 1 +
DEFLATE_MAX_MATCH_LEN - 1) DEFLATE_MAX_MATCH_LEN) + 1];
+ 1];
/* The current cost model being used. */ /* The current cost model being used. */
struct deflate_costs costs; struct deflate_costs costs;
@ -2183,11 +2184,11 @@ calculate_min_match_len(const u8 *data, size_t data_len,
{ {
u8 used[256] = { 0 }; u8 used[256] = { 0 };
unsigned num_used_literals = 0; unsigned num_used_literals = 0;
int i; size_t i;
/* /*
* For an initial approximation, scan the first 4 KiB of data. * For an initial approximation, scan the first 4 KiB of data. The
* recalculate_min_match_len() will update the min_len later. * caller may use recalculate_min_match_len() to update min_len later.
*/ */
data_len = MIN(data_len, 4096); data_len = MIN(data_len, 4096);
for (i = 0; i < data_len; i++) for (i = 0; i < data_len; i++)
@ -2354,16 +2355,14 @@ deflate_compress_greedy(struct libdeflate_compressor * restrict c,
const u8 * const in_block_begin = in_next; const u8 * const in_block_begin = in_next;
const u8 * const in_max_block_end = choose_max_block_end( const u8 * const in_max_block_end = choose_max_block_end(
in_next, in_end, SOFT_MAX_BLOCK_LENGTH); in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
unsigned min_len;
struct deflate_sequence *seq = c->p.g.sequences; struct deflate_sequence *seq = c->p.g.sequences;
unsigned min_len;
init_block_split_stats(&c->split_stats); init_block_split_stats(&c->split_stats);
deflate_begin_sequences(c, seq); deflate_begin_sequences(c, seq);
min_len = calculate_min_match_len(in_next, min_len = calculate_min_match_len(in_next,
in_max_block_end - in_next, in_max_block_end - in_next,
c->max_search_depth); c->max_search_depth);
do { do {
u32 length; u32 length;
u32 offset; u32 offset;
@ -2440,12 +2439,11 @@ deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c,
in_next, in_end, SOFT_MAX_BLOCK_LENGTH); in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
const u8 *next_recalc_min_len = const u8 *next_recalc_min_len =
in_next + MIN(in_end - in_next, 10000); in_next + MIN(in_end - in_next, 10000);
unsigned min_len = DEFLATE_MIN_MATCH_LEN;
struct deflate_sequence *seq = c->p.g.sequences; struct deflate_sequence *seq = c->p.g.sequences;
unsigned min_len;
init_block_split_stats(&c->split_stats); init_block_split_stats(&c->split_stats);
deflate_begin_sequences(c, seq); deflate_begin_sequences(c, seq);
min_len = calculate_min_match_len(in_next, min_len = calculate_min_match_len(in_next,
in_max_block_end - in_next, in_max_block_end - in_next,
c->max_search_depth); c->max_search_depth);
@ -2876,7 +2874,7 @@ deflate_choose_default_litlen_costs(struct libdeflate_compressor *c,
u32 literal_freq = block_length; u32 literal_freq = block_length;
u32 match_freq = 0; u32 match_freq = 0;
u32 cutoff; u32 cutoff;
int i; unsigned i;
/* Calculate the number of distinct literals that exist in the data. */ /* Calculate the number of distinct literals that exist in the data. */
cutoff = literal_freq >> 11; /* Ignore literals used very rarely */ cutoff = literal_freq >> 11; /* Ignore literals used very rarely */
@ -2911,6 +2909,7 @@ deflate_choose_default_litlen_costs(struct libdeflate_compressor *c,
else else
i = 0; /* few matches */ i = 0; /* few matches */
STATIC_ASSERT(BIT_COST == 16);
*lit_cost = default_litlen_costs[i].used_lits_to_lit_cost[ *lit_cost = default_litlen_costs[i].used_lits_to_lit_cost[
num_used_literals]; num_used_literals];
*len_sym_cost = default_litlen_costs[i].len_sym_cost; *len_sym_cost = default_litlen_costs[i].len_sym_cost;