Compressor updates

This commit is contained in:
Eric Biggers 2016-06-11 15:33:27 -05:00
parent e3cfa7b5cd
commit f649a4b8db
2 changed files with 167 additions and 148 deletions

View File

@ -211,8 +211,7 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
matchptr = &in_base[cur_node]; matchptr = &in_base[cur_node];
if (matchptr[len] == in_next[len]) { if (matchptr[len] == in_next[len]) {
len = lz_extend(in_next, matchptr, len + 1, len = lz_extend(in_next, matchptr, len + 1, max_len);
(record_matches ? max_len : nice_len));
if (!record_matches || len > best_len) { if (!record_matches || len > best_len) {
if (record_matches) { if (record_matches) {
best_len = len; best_len = len;
@ -325,7 +324,6 @@ static forceinline void
bt_matchfinder_skip_position(struct bt_matchfinder *mf, bt_matchfinder_skip_position(struct bt_matchfinder *mf,
const u8 *in_base, const u8 *in_base,
ptrdiff_t cur_pos, ptrdiff_t cur_pos,
u32 max_len,
u32 nice_len, u32 nice_len,
u32 max_search_depth, u32 max_search_depth,
u32 next_hashes[2]) u32 next_hashes[2])
@ -334,7 +332,7 @@ bt_matchfinder_skip_position(struct bt_matchfinder *mf,
bt_matchfinder_advance_one_byte(mf, bt_matchfinder_advance_one_byte(mf,
in_base, in_base,
cur_pos, cur_pos,
max_len, nice_len,
nice_len, nice_len,
max_search_depth, max_search_depth,
next_hashes, next_hashes,

View File

@ -51,12 +51,24 @@
#endif #endif
/* /*
* The minimum and maximum block lengths, in bytes of source data, which the * The compressor always chooses a block of at least MIN_BLOCK_LENGTH bytes,
* parsing algorithms may choose. Caveat: due to implementation details, the * except if the last block has to be shorter.
* actual maximum will be slightly higher than the number defined below.
*/ */
#define MIN_BLOCK_LENGTH 10000 #define MIN_BLOCK_LENGTH 10000
#define MAX_BLOCK_LENGTH 300000
/*
* The compressor attempts to end blocks after SOFT_MAX_BLOCK_LENGTH bytes, but
* the final length might be slightly longer due to matches extending beyond
* this limit.
*/
#define SOFT_MAX_BLOCK_LENGTH 300000
/*
* The number of observed matches or literals that represents sufficient data to
* decide whether the current block should be terminated or not.
*/
#define NUM_OBSERVATIONS_PER_BLOCK_CHECK 512
#if SUPPORT_NEAR_OPTIMAL_PARSING #if SUPPORT_NEAR_OPTIMAL_PARSING
/* Constants specific to the near-optimal parsing algorithm */ /* Constants specific to the near-optimal parsing algorithm */
@ -77,7 +89,7 @@
* However, fallback behavior (immediately terminating the block) on cache * However, fallback behavior (immediately terminating the block) on cache
* overflow is still required. * overflow is still required.
*/ */
# define CACHE_LENGTH (MAX_BLOCK_LENGTH * 5) # define CACHE_LENGTH (SOFT_MAX_BLOCK_LENGTH * 5)
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
@ -85,7 +97,7 @@
* These are the compressor-side limits on the codeword lengths for each Huffman * These are the compressor-side limits on the codeword lengths for each Huffman
* code. To make outputting bits slightly faster, some of these limits are * code. To make outputting bits slightly faster, some of these limits are
* lower than the limits defined by the DEFLATE format. This does not * lower than the limits defined by the DEFLATE format. This does not
* significantly affect the compression ratio, at least for the block sizes we * significantly affect the compression ratio, at least for the block lengths we
* use. * use.
*/ */
#define MAX_LITLEN_CODEWORD_LEN 14 #define MAX_LITLEN_CODEWORD_LEN 14
@ -365,7 +377,7 @@ struct deflate_compressor {
* that can ever be chosen for a single block, plus one * that can ever be chosen for a single block, plus one
* for the special entry at the end. */ * for the special entry at the end. */
struct deflate_sequence sequences[ struct deflate_sequence sequences[
DIV_ROUND_UP(MAX_BLOCK_LENGTH, DIV_ROUND_UP(SOFT_MAX_BLOCK_LENGTH,
DEFLATE_MIN_MATCH_LEN) + 1]; DEFLATE_MIN_MATCH_LEN) + 1];
} g; /* (g)reedy */ } g; /* (g)reedy */
@ -411,11 +423,12 @@ struct deflate_compressor {
* This array must be large enough to accommodate the * This array must be large enough to accommodate the
* worst-case number of nodes, which occurs if we find a * worst-case number of nodes, which occurs if we find a
* match of length DEFLATE_MAX_MATCH_LEN at position * match of length DEFLATE_MAX_MATCH_LEN at position
* MAX_BLOCK_LENGTH - 1, producing a block of length * SOFT_MAX_BLOCK_LENGTH - 1, producing a block of
* MAX_BLOCK_LENGTH - 1 + DEFLATE_MAX_MATCH_LEN. Add * length SOFT_MAX_BLOCK_LENGTH - 1 +
* one for the end-of-block node. * DEFLATE_MAX_MATCH_LEN. Add one for the end-of-block
* node.
*/ */
struct deflate_optimum_node optimum_nodes[MAX_BLOCK_LENGTH - 1 + struct deflate_optimum_node optimum_nodes[SOFT_MAX_BLOCK_LENGTH - 1 +
DEFLATE_MAX_MATCH_LEN + 1]; DEFLATE_MAX_MATCH_LEN + 1];
/* The current cost model being used. */ /* The current cost model being used. */
@ -1829,10 +1842,10 @@ deflate_finish_sequence(struct deflate_sequence *seq, unsigned litrunlen)
* For determining whether the frequency distributions are "different enough" to * For determining whether the frequency distributions are "different enough" to
* start a new block, the simply heuristic of splitting when the sum of absolute * start a new block, the simply heuristic of splitting when the sum of absolute
* differences exceeds a constant seems to be good enough. We also add a number * differences exceeds a constant seems to be good enough. We also add a number
* proportional to the block size so that the algorithm is more likely to end * proportional to the block length so that the algorithm is more likely to end
* large blocks than small blocks. This reflects the general expectation that * long blocks than short blocks. This reflects the general expectation that it
* it will become increasingly beneficial to start a new block as the current * will become increasingly beneficial to start a new block as the current
* blocks grows larger. * block grows longer.
* *
* Finally, for an approximation, it is not strictly necessary that the exact * Finally, for an approximation, it is not strictly necessary that the exact
* symbols being used are considered. With "near-optimal parsing", for example, * symbols being used are considered. With "near-optimal parsing", for example,
@ -1874,7 +1887,7 @@ observe_match(struct block_split_stats *stats, unsigned length)
} }
static bool static bool
do_end_block_check(struct block_split_stats *stats, u32 block_size) do_end_block_check(struct block_split_stats *stats, u32 block_length)
{ {
int i; int i;
@ -1893,8 +1906,8 @@ do_end_block_check(struct block_split_stats *stats, u32 block_size)
} }
/* Ready to end the block? */ /* Ready to end the block? */
if (total_delta + (block_size >> 12) * stats->num_observations >= if (total_delta + (block_length / 4096) * stats->num_observations >=
200 * stats->num_observations) NUM_OBSERVATIONS_PER_BLOCK_CHECK * 200 / 512 * stats->num_observations)
return true; return true;
} }
@ -1912,9 +1925,9 @@ should_end_block(struct block_split_stats *stats,
const u8 *in_block_begin, const u8 *in_next, const u8 *in_end) const u8 *in_block_begin, const u8 *in_next, const u8 *in_end)
{ {
/* Ready to check block split statistics? */ /* Ready to check block split statistics? */
if (stats->num_new_observations < 512 || if (stats->num_new_observations < NUM_OBSERVATIONS_PER_BLOCK_CHECK ||
in_next - in_block_begin < MIN_BLOCK_LENGTH || in_next - in_block_begin < MIN_BLOCK_LENGTH ||
in_end - in_next < 16384) in_end - in_next < MIN_BLOCK_LENGTH)
return false; return false;
return do_end_block_check(stats, in_next - in_block_begin); return do_end_block_check(stats, in_next - in_block_begin);
@ -1945,7 +1958,8 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
/* Starting a new DEFLATE block. */ /* Starting a new DEFLATE block. */
const u8 * const in_block_begin = in_next; const u8 * const in_block_begin = in_next;
const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH); const u8 * const in_max_block_end =
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
u32 litrunlen = 0; u32 litrunlen = 0;
struct deflate_sequence *next_seq = c->p.g.sequences; struct deflate_sequence *next_seq = c->p.g.sequences;
@ -2029,7 +2043,8 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
/* Starting a new DEFLATE block. */ /* Starting a new DEFLATE block. */
const u8 * const in_block_begin = in_next; const u8 * const in_block_begin = in_next;
const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH); const u8 * const in_max_block_end =
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
u32 litrunlen = 0; u32 litrunlen = 0;
struct deflate_sequence *next_seq = c->p.g.sequences; struct deflate_sequence *next_seq = c->p.g.sequences;
@ -2155,13 +2170,13 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
/* /*
* Follow the minimum-cost path in the graph of possible match/literal choices * Follow the minimum-cost path in the graph of possible match/literal choices
* for the current block and compute the frequencies of the Huffman symbols that * for the current block and compute the frequencies of the Huffman symbols that
* are needed to output those matches and literals. * would be needed to output those matches and literals.
*/ */
static void static void
deflate_tally_item_list(struct deflate_compressor *c, deflate_tally_item_list(struct deflate_compressor *c, u32 block_length)
struct deflate_optimum_node *end_node)
{ {
struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0]; struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0];
struct deflate_optimum_node *end_node = &c->p.n.optimum_nodes[block_length];
do { do {
unsigned length = cur_node->item & OPTIMUM_LEN_MASK; unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT; unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
@ -2180,7 +2195,8 @@ deflate_tally_item_list(struct deflate_compressor *c,
/* Set the current cost model from the codeword lengths specified in @lens. */ /* Set the current cost model from the codeword lengths specified in @lens. */
static void static void
deflate_set_costs(struct deflate_compressor *c, const struct deflate_lens *lens) deflate_set_costs_from_codes(struct deflate_compressor *c,
const struct deflate_lens *lens)
{ {
unsigned i; unsigned i;
@ -2232,10 +2248,10 @@ deflate_default_offset_slot_cost(unsigned offset_slot)
} }
/* /*
* Set default Huffman symbol costs for the first optimization pass. * Set default symbol costs for the first block's first optimization pass.
* *
* It works well to assume that each Huffman symbol is equally probable. This * It works well to assume that each symbol is equally probable. This results
* results in each symbol being assigned a cost of (-log2(1.0/num_syms) * (1 << * in each symbol being assigned a cost of (-log2(1.0/num_syms) * (1 <<
* COST_SHIFT)) where 'num_syms' is the number of symbols in the corresponding * COST_SHIFT)) where 'num_syms' is the number of symbols in the corresponding
* alphabet. However, we intentionally bias the parse towards matches rather * alphabet. However, we intentionally bias the parse towards matches rather
* than literals by using a slightly lower default cost for length symbols than * than literals by using a slightly lower default cost for length symbols than
@ -2297,51 +2313,32 @@ deflate_adjust_costs(struct deflate_compressor *c)
deflate_default_offset_slot_cost(i)); deflate_default_offset_slot_cost(i));
} }
static void /*
deflate_optimize_and_write_block(struct deflate_compressor *c, * Find the minimum-cost path through the graph of possible match/literal
struct deflate_output_bitstream *os,
const u8 * const block_begin,
const u32 block_length,
const struct lz_match * const end_cache_ptr,
const bool is_final_block)
{
struct deflate_optimum_node * const end_node =
&c->p.n.optimum_nodes[block_length];
unsigned num_passes_remaining = c->p.n.num_optim_passes;
u32 i;
/* Force the block to really end at 'end_node', even if some matches
* extend beyond it. */
for (i = block_length; i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN,
ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
do {
/*
* Beginning a new optimization pass and finding a new
* minimum-cost path through the graph of possible match/literal
* choices for this block. * choices for this block.
* *
* We find the minimum cost path from 'c->optimum_nodes[0]', * We find the minimum cost path from 'c->p.n.optimum_nodes[0]', which
* which represents the node at the beginning of the block, to * represents the node at the beginning of the block, to
* 'end_node', which represents the node at the end of the * 'c->p.n.optimum_nodes[block_length]', which represents the node at the end of
* block. Edge costs are evaluated using the cost model * the block. Edge costs are evaluated using the cost model 'c->p.n.costs'.
* 'c->costs'.
* *
* The algorithm works backward, starting at 'end_node' and * The algorithm works backwards, starting at the end node and proceeding
* proceeding backwards one position at a time. At each * backwards one node at a time. At each node, the minimum cost to reach the
* position, the minimum cost to reach 'end_node' is computed * end node is computed and the match/literal choice that begins that path is
* and the match/literal choice is saved. * saved.
*/ */
static void
deflate_find_min_cost_path(struct deflate_compressor *c, const u32 block_length,
const struct lz_match *cache_ptr)
{
struct deflate_optimum_node *end_node = &c->p.n.optimum_nodes[block_length];
struct deflate_optimum_node *cur_node = end_node; struct deflate_optimum_node *cur_node = end_node;
const struct lz_match *cache_ptr = end_cache_ptr;
cur_node->cost_to_end = 0; cur_node->cost_to_end = 0;
do { do {
unsigned num_matches; unsigned num_matches;
unsigned literal; unsigned literal;
u32 best_cost_to_end; u32 best_cost_to_end;
u32 best_item;
cur_node--; cur_node--;
cache_ptr--; cache_ptr--;
@ -2352,7 +2349,7 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
/* It's always possible to choose a literal. */ /* It's always possible to choose a literal. */
best_cost_to_end = c->p.n.costs.literal[literal] + best_cost_to_end = c->p.n.costs.literal[literal] +
(cur_node + 1)->cost_to_end; (cur_node + 1)->cost_to_end;
best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1; cur_node->item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
/* Also consider matches if there are any. */ /* Also consider matches if there are any. */
if (num_matches) { if (num_matches) {
@ -2365,13 +2362,12 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
/* /*
* Consider each length from the minimum * Consider each length from the minimum
* (DEFLATE_MIN_MATCH_LEN) to the length of the * (DEFLATE_MIN_MATCH_LEN) to the length of the longest
* longest match found at this position. For * match found at this position. For each length, we
* each length, we consider only the smallest * consider only the smallest offset for which that
* offset for which that length is available. * length is available. Although this is not guaranteed
* Although this is not guaranteed to be optimal * to be optimal due to the possibility of a larger
* due to the possibility of a larger offset * offset costing less than a smaller offset to code,
* costing less than a smaller offset to code,
* this is a very useful heuristic. * this is a very useful heuristic.
*/ */
match = cache_ptr - num_matches; match = cache_ptr - num_matches;
@ -2386,31 +2382,61 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
(cur_node + len)->cost_to_end; (cur_node + len)->cost_to_end;
if (cost_to_end < best_cost_to_end) { if (cost_to_end < best_cost_to_end) {
best_cost_to_end = cost_to_end; best_cost_to_end = cost_to_end;
best_item = ((u32)offset << OPTIMUM_OFFSET_SHIFT) | len; cur_node->item = ((u32)offset << OPTIMUM_OFFSET_SHIFT) | len;
} }
} while (++len <= match->length); } while (++len <= match->length);
} while (++match != cache_ptr); } while (++match != cache_ptr);
cache_ptr -= num_matches; cache_ptr -= num_matches;
} }
cur_node->cost_to_end = best_cost_to_end; cur_node->cost_to_end = best_cost_to_end;
cur_node->item = best_item;
} while (cur_node != &c->p.n.optimum_nodes[0]); } while (cur_node != &c->p.n.optimum_nodes[0]);
}
/* Tally Huffman symbol frequencies. */ /*
deflate_tally_item_list(c, end_node); * Choose the literal/match sequence to use for the current block. The basic
* algorithm finds a minimum-cost path through the block's graph of
* literal/match choices, given a cost model. However, the cost of each symbol
* is unknown until the Huffman codes have been built, but at the same time the
* Huffman codes depend on the frequencies of chosen symbols. Consequently,
* multiple passes must be used to try to approximate an optimal solution. The
* first pass uses default costs, mixed with the costs from the previous block
* if any. Later passes use the Huffman codeword lengths from the previous pass
* as the costs.
*/
static void
deflate_optimize_block(struct deflate_compressor *c, u32 block_length,
const struct lz_match *cache_ptr, bool is_first_block)
{
unsigned num_passes_remaining = c->p.n.num_optim_passes;
u32 i;
/* If this wasn't the last pass, update the cost model. */ /* Force the block to really end at the desired length, even if some
if (num_passes_remaining > 1) { * matches extend beyond it. */
deflate_make_huffman_codes(&c->freqs, &c->codes); for (i = block_length; i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN,
deflate_set_costs(c, &c->codes.lens); ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
/* Set the initial costs. */
if (is_first_block)
deflate_set_default_costs(c);
else
deflate_adjust_costs(c);
for (;;) {
/* Find the minimum cost path for this pass. */
deflate_find_min_cost_path(c, block_length, cache_ptr);
/* Compute frequencies of the chosen symbols. */
deflate_reset_symbol_frequencies(c); deflate_reset_symbol_frequencies(c);
} deflate_tally_item_list(c, block_length);
} while (--num_passes_remaining);
/* All optimization passes are done. Output a block using the if (--num_passes_remaining == 0)
* minimum-cost path computed on the last optimization pass. */ break;
deflate_flush_block(c, os, block_begin, block_length,
is_final_block, true); /* At least one optimization pass remains; update the costs. */
deflate_make_huffman_codes(&c->freqs, &c->codes);
deflate_set_costs_from_codes(c, &c->codes.lens);
}
} }
/* /*
@ -2448,17 +2474,17 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
struct lz_match *cache_ptr = c->p.n.match_cache; struct lz_match *cache_ptr = c->p.n.match_cache;
const u8 * const in_block_begin = in_next; const u8 * const in_block_begin = in_next;
const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH); const u8 * const in_max_block_end =
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
const u8 *next_observation = in_next; const u8 *next_observation = in_next;
init_block_split_stats(&c->split_stats); init_block_split_stats(&c->split_stats);
deflate_reset_symbol_frequencies(c);
/* /*
* Find matches until we decide to end the block. We end the * Find matches until we decide to end the block. We end the
* block if any of the following is true: * block if any of the following is true:
* *
* (1) Maximum block size has been reached * (1) Maximum block length has been reached
* (2) Match catch may overflow. * (2) Match catch may overflow.
* (3) Block split heuristic says to split now. * (3) Block split heuristic says to split now.
*/ */
@ -2556,7 +2582,6 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
bt_matchfinder_skip_position(&c->p.n.bt_mf, bt_matchfinder_skip_position(&c->p.n.bt_mf,
in_cur_base, in_cur_base,
in_next - in_cur_base, in_next - in_cur_base,
max_len,
nice_len, nice_len,
c->max_search_depth, c->max_search_depth,
next_hashes); next_hashes);
@ -2571,16 +2596,12 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
cache_ptr < &c->p.n.match_cache[CACHE_LENGTH] && cache_ptr < &c->p.n.match_cache[CACHE_LENGTH] &&
!should_end_block(&c->split_stats, in_block_begin, in_next, in_end)); !should_end_block(&c->split_stats, in_block_begin, in_next, in_end));
/* All the matches for this block have been cached. Now compute /* All the matches for this block have been cached. Now choose
* a near-optimal sequence of literals and matches, and output * the sequence of items to output and flush the block. */
* the block. */ deflate_optimize_block(c, in_next - in_block_begin, cache_ptr,
if (in_block_begin == in) in_block_begin == in);
deflate_set_default_costs(c); deflate_flush_block(c, &os, in_block_begin, in_next - in_block_begin,
else in_next == in_end, true);
deflate_adjust_costs(c);
deflate_optimize_and_write_block(c, &os, in_block_begin,
in_next - in_block_begin,
cache_ptr, in_next == in_end);
} while (in_next != in_end); } while (in_next != in_end);
return deflate_flush_output(&os); return deflate_flush_output(&os);