mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-11 21:39:32 -04:00
Compressor updates
This commit is contained in:
parent
e3cfa7b5cd
commit
f649a4b8db
@ -211,8 +211,7 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
|
|||||||
matchptr = &in_base[cur_node];
|
matchptr = &in_base[cur_node];
|
||||||
|
|
||||||
if (matchptr[len] == in_next[len]) {
|
if (matchptr[len] == in_next[len]) {
|
||||||
len = lz_extend(in_next, matchptr, len + 1,
|
len = lz_extend(in_next, matchptr, len + 1, max_len);
|
||||||
(record_matches ? max_len : nice_len));
|
|
||||||
if (!record_matches || len > best_len) {
|
if (!record_matches || len > best_len) {
|
||||||
if (record_matches) {
|
if (record_matches) {
|
||||||
best_len = len;
|
best_len = len;
|
||||||
@ -325,7 +324,6 @@ static forceinline void
|
|||||||
bt_matchfinder_skip_position(struct bt_matchfinder *mf,
|
bt_matchfinder_skip_position(struct bt_matchfinder *mf,
|
||||||
const u8 *in_base,
|
const u8 *in_base,
|
||||||
ptrdiff_t cur_pos,
|
ptrdiff_t cur_pos,
|
||||||
u32 max_len,
|
|
||||||
u32 nice_len,
|
u32 nice_len,
|
||||||
u32 max_search_depth,
|
u32 max_search_depth,
|
||||||
u32 next_hashes[2])
|
u32 next_hashes[2])
|
||||||
@ -334,7 +332,7 @@ bt_matchfinder_skip_position(struct bt_matchfinder *mf,
|
|||||||
bt_matchfinder_advance_one_byte(mf,
|
bt_matchfinder_advance_one_byte(mf,
|
||||||
in_base,
|
in_base,
|
||||||
cur_pos,
|
cur_pos,
|
||||||
max_len,
|
nice_len,
|
||||||
nice_len,
|
nice_len,
|
||||||
max_search_depth,
|
max_search_depth,
|
||||||
next_hashes,
|
next_hashes,
|
||||||
|
@ -51,12 +51,24 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The minimum and maximum block lengths, in bytes of source data, which the
|
* The compressor always chooses a block of at least MIN_BLOCK_LENGTH bytes,
|
||||||
* parsing algorithms may choose. Caveat: due to implementation details, the
|
* except if the last block has to be shorter.
|
||||||
* actual maximum will be slightly higher than the number defined below.
|
|
||||||
*/
|
*/
|
||||||
#define MIN_BLOCK_LENGTH 10000
|
#define MIN_BLOCK_LENGTH 10000
|
||||||
#define MAX_BLOCK_LENGTH 300000
|
|
||||||
|
/*
|
||||||
|
* The compressor attempts to end blocks after SOFT_MAX_BLOCK_LENGTH bytes, but
|
||||||
|
* the final length might be slightly longer due to matches extending beyond
|
||||||
|
* this limit.
|
||||||
|
*/
|
||||||
|
#define SOFT_MAX_BLOCK_LENGTH 300000
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The number of observed matches or literals that represents sufficient data to
|
||||||
|
* decide whether the current block should be terminated or not.
|
||||||
|
*/
|
||||||
|
#define NUM_OBSERVATIONS_PER_BLOCK_CHECK 512
|
||||||
|
|
||||||
|
|
||||||
#if SUPPORT_NEAR_OPTIMAL_PARSING
|
#if SUPPORT_NEAR_OPTIMAL_PARSING
|
||||||
/* Constants specific to the near-optimal parsing algorithm */
|
/* Constants specific to the near-optimal parsing algorithm */
|
||||||
@ -77,7 +89,7 @@
|
|||||||
* However, fallback behavior (immediately terminating the block) on cache
|
* However, fallback behavior (immediately terminating the block) on cache
|
||||||
* overflow is still required.
|
* overflow is still required.
|
||||||
*/
|
*/
|
||||||
# define CACHE_LENGTH (MAX_BLOCK_LENGTH * 5)
|
# define CACHE_LENGTH (SOFT_MAX_BLOCK_LENGTH * 5)
|
||||||
|
|
||||||
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
|
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
|
||||||
|
|
||||||
@ -85,7 +97,7 @@
|
|||||||
* These are the compressor-side limits on the codeword lengths for each Huffman
|
* These are the compressor-side limits on the codeword lengths for each Huffman
|
||||||
* code. To make outputting bits slightly faster, some of these limits are
|
* code. To make outputting bits slightly faster, some of these limits are
|
||||||
* lower than the limits defined by the DEFLATE format. This does not
|
* lower than the limits defined by the DEFLATE format. This does not
|
||||||
* significantly affect the compression ratio, at least for the block sizes we
|
* significantly affect the compression ratio, at least for the block lengths we
|
||||||
* use.
|
* use.
|
||||||
*/
|
*/
|
||||||
#define MAX_LITLEN_CODEWORD_LEN 14
|
#define MAX_LITLEN_CODEWORD_LEN 14
|
||||||
@ -365,7 +377,7 @@ struct deflate_compressor {
|
|||||||
* that can ever be chosen for a single block, plus one
|
* that can ever be chosen for a single block, plus one
|
||||||
* for the special entry at the end. */
|
* for the special entry at the end. */
|
||||||
struct deflate_sequence sequences[
|
struct deflate_sequence sequences[
|
||||||
DIV_ROUND_UP(MAX_BLOCK_LENGTH,
|
DIV_ROUND_UP(SOFT_MAX_BLOCK_LENGTH,
|
||||||
DEFLATE_MIN_MATCH_LEN) + 1];
|
DEFLATE_MIN_MATCH_LEN) + 1];
|
||||||
} g; /* (g)reedy */
|
} g; /* (g)reedy */
|
||||||
|
|
||||||
@ -411,11 +423,12 @@ struct deflate_compressor {
|
|||||||
* This array must be large enough to accommodate the
|
* This array must be large enough to accommodate the
|
||||||
* worst-case number of nodes, which occurs if we find a
|
* worst-case number of nodes, which occurs if we find a
|
||||||
* match of length DEFLATE_MAX_MATCH_LEN at position
|
* match of length DEFLATE_MAX_MATCH_LEN at position
|
||||||
* MAX_BLOCK_LENGTH - 1, producing a block of length
|
* SOFT_MAX_BLOCK_LENGTH - 1, producing a block of
|
||||||
* MAX_BLOCK_LENGTH - 1 + DEFLATE_MAX_MATCH_LEN. Add
|
* length SOFT_MAX_BLOCK_LENGTH - 1 +
|
||||||
* one for the end-of-block node.
|
* DEFLATE_MAX_MATCH_LEN. Add one for the end-of-block
|
||||||
|
* node.
|
||||||
*/
|
*/
|
||||||
struct deflate_optimum_node optimum_nodes[MAX_BLOCK_LENGTH - 1 +
|
struct deflate_optimum_node optimum_nodes[SOFT_MAX_BLOCK_LENGTH - 1 +
|
||||||
DEFLATE_MAX_MATCH_LEN + 1];
|
DEFLATE_MAX_MATCH_LEN + 1];
|
||||||
|
|
||||||
/* The current cost model being used. */
|
/* The current cost model being used. */
|
||||||
@ -1829,10 +1842,10 @@ deflate_finish_sequence(struct deflate_sequence *seq, unsigned litrunlen)
|
|||||||
* For determining whether the frequency distributions are "different enough" to
|
* For determining whether the frequency distributions are "different enough" to
|
||||||
* start a new block, the simply heuristic of splitting when the sum of absolute
|
* start a new block, the simply heuristic of splitting when the sum of absolute
|
||||||
* differences exceeds a constant seems to be good enough. We also add a number
|
* differences exceeds a constant seems to be good enough. We also add a number
|
||||||
* proportional to the block size so that the algorithm is more likely to end
|
* proportional to the block length so that the algorithm is more likely to end
|
||||||
* large blocks than small blocks. This reflects the general expectation that
|
* long blocks than short blocks. This reflects the general expectation that it
|
||||||
* it will become increasingly beneficial to start a new block as the current
|
* will become increasingly beneficial to start a new block as the current
|
||||||
* blocks grows larger.
|
* block grows longer.
|
||||||
*
|
*
|
||||||
* Finally, for an approximation, it is not strictly necessary that the exact
|
* Finally, for an approximation, it is not strictly necessary that the exact
|
||||||
* symbols being used are considered. With "near-optimal parsing", for example,
|
* symbols being used are considered. With "near-optimal parsing", for example,
|
||||||
@ -1874,7 +1887,7 @@ observe_match(struct block_split_stats *stats, unsigned length)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
do_end_block_check(struct block_split_stats *stats, u32 block_size)
|
do_end_block_check(struct block_split_stats *stats, u32 block_length)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -1893,8 +1906,8 @@ do_end_block_check(struct block_split_stats *stats, u32 block_size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Ready to end the block? */
|
/* Ready to end the block? */
|
||||||
if (total_delta + (block_size >> 12) * stats->num_observations >=
|
if (total_delta + (block_length / 4096) * stats->num_observations >=
|
||||||
200 * stats->num_observations)
|
NUM_OBSERVATIONS_PER_BLOCK_CHECK * 200 / 512 * stats->num_observations)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1912,9 +1925,9 @@ should_end_block(struct block_split_stats *stats,
|
|||||||
const u8 *in_block_begin, const u8 *in_next, const u8 *in_end)
|
const u8 *in_block_begin, const u8 *in_next, const u8 *in_end)
|
||||||
{
|
{
|
||||||
/* Ready to check block split statistics? */
|
/* Ready to check block split statistics? */
|
||||||
if (stats->num_new_observations < 512 ||
|
if (stats->num_new_observations < NUM_OBSERVATIONS_PER_BLOCK_CHECK ||
|
||||||
in_next - in_block_begin < MIN_BLOCK_LENGTH ||
|
in_next - in_block_begin < MIN_BLOCK_LENGTH ||
|
||||||
in_end - in_next < 16384)
|
in_end - in_next < MIN_BLOCK_LENGTH)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return do_end_block_check(stats, in_next - in_block_begin);
|
return do_end_block_check(stats, in_next - in_block_begin);
|
||||||
@ -1945,7 +1958,8 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
|
|||||||
/* Starting a new DEFLATE block. */
|
/* Starting a new DEFLATE block. */
|
||||||
|
|
||||||
const u8 * const in_block_begin = in_next;
|
const u8 * const in_block_begin = in_next;
|
||||||
const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH);
|
const u8 * const in_max_block_end =
|
||||||
|
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
|
||||||
u32 litrunlen = 0;
|
u32 litrunlen = 0;
|
||||||
struct deflate_sequence *next_seq = c->p.g.sequences;
|
struct deflate_sequence *next_seq = c->p.g.sequences;
|
||||||
|
|
||||||
@ -2029,7 +2043,8 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
|
|||||||
/* Starting a new DEFLATE block. */
|
/* Starting a new DEFLATE block. */
|
||||||
|
|
||||||
const u8 * const in_block_begin = in_next;
|
const u8 * const in_block_begin = in_next;
|
||||||
const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH);
|
const u8 * const in_max_block_end =
|
||||||
|
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
|
||||||
u32 litrunlen = 0;
|
u32 litrunlen = 0;
|
||||||
struct deflate_sequence *next_seq = c->p.g.sequences;
|
struct deflate_sequence *next_seq = c->p.g.sequences;
|
||||||
|
|
||||||
@ -2155,13 +2170,13 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
|
|||||||
/*
|
/*
|
||||||
* Follow the minimum-cost path in the graph of possible match/literal choices
|
* Follow the minimum-cost path in the graph of possible match/literal choices
|
||||||
* for the current block and compute the frequencies of the Huffman symbols that
|
* for the current block and compute the frequencies of the Huffman symbols that
|
||||||
* are needed to output those matches and literals.
|
* would be needed to output those matches and literals.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
deflate_tally_item_list(struct deflate_compressor *c,
|
deflate_tally_item_list(struct deflate_compressor *c, u32 block_length)
|
||||||
struct deflate_optimum_node *end_node)
|
|
||||||
{
|
{
|
||||||
struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0];
|
struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0];
|
||||||
|
struct deflate_optimum_node *end_node = &c->p.n.optimum_nodes[block_length];
|
||||||
do {
|
do {
|
||||||
unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
|
unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
|
||||||
unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
|
unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
|
||||||
@ -2180,7 +2195,8 @@ deflate_tally_item_list(struct deflate_compressor *c,
|
|||||||
|
|
||||||
/* Set the current cost model from the codeword lengths specified in @lens. */
|
/* Set the current cost model from the codeword lengths specified in @lens. */
|
||||||
static void
|
static void
|
||||||
deflate_set_costs(struct deflate_compressor *c, const struct deflate_lens *lens)
|
deflate_set_costs_from_codes(struct deflate_compressor *c,
|
||||||
|
const struct deflate_lens *lens)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
@ -2232,10 +2248,10 @@ deflate_default_offset_slot_cost(unsigned offset_slot)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set default Huffman symbol costs for the first optimization pass.
|
* Set default symbol costs for the first block's first optimization pass.
|
||||||
*
|
*
|
||||||
* It works well to assume that each Huffman symbol is equally probable. This
|
* It works well to assume that each symbol is equally probable. This results
|
||||||
* results in each symbol being assigned a cost of (-log2(1.0/num_syms) * (1 <<
|
* in each symbol being assigned a cost of (-log2(1.0/num_syms) * (1 <<
|
||||||
* COST_SHIFT)) where 'num_syms' is the number of symbols in the corresponding
|
* COST_SHIFT)) where 'num_syms' is the number of symbols in the corresponding
|
||||||
* alphabet. However, we intentionally bias the parse towards matches rather
|
* alphabet. However, we intentionally bias the parse towards matches rather
|
||||||
* than literals by using a slightly lower default cost for length symbols than
|
* than literals by using a slightly lower default cost for length symbols than
|
||||||
@ -2297,51 +2313,32 @@ deflate_adjust_costs(struct deflate_compressor *c)
|
|||||||
deflate_default_offset_slot_cost(i));
|
deflate_default_offset_slot_cost(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
/*
|
||||||
deflate_optimize_and_write_block(struct deflate_compressor *c,
|
* Find the minimum-cost path through the graph of possible match/literal
|
||||||
struct deflate_output_bitstream *os,
|
|
||||||
const u8 * const block_begin,
|
|
||||||
const u32 block_length,
|
|
||||||
const struct lz_match * const end_cache_ptr,
|
|
||||||
const bool is_final_block)
|
|
||||||
{
|
|
||||||
struct deflate_optimum_node * const end_node =
|
|
||||||
&c->p.n.optimum_nodes[block_length];
|
|
||||||
unsigned num_passes_remaining = c->p.n.num_optim_passes;
|
|
||||||
u32 i;
|
|
||||||
|
|
||||||
/* Force the block to really end at 'end_node', even if some matches
|
|
||||||
* extend beyond it. */
|
|
||||||
for (i = block_length; i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN,
|
|
||||||
ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
|
|
||||||
c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
|
|
||||||
|
|
||||||
do {
|
|
||||||
/*
|
|
||||||
* Beginning a new optimization pass and finding a new
|
|
||||||
* minimum-cost path through the graph of possible match/literal
|
|
||||||
* choices for this block.
|
* choices for this block.
|
||||||
*
|
*
|
||||||
* We find the minimum cost path from 'c->optimum_nodes[0]',
|
* We find the minimum cost path from 'c->p.n.optimum_nodes[0]', which
|
||||||
* which represents the node at the beginning of the block, to
|
* represents the node at the beginning of the block, to
|
||||||
* 'end_node', which represents the node at the end of the
|
* 'c->p.n.optimum_nodes[block_length]', which represents the node at the end of
|
||||||
* block. Edge costs are evaluated using the cost model
|
* the block. Edge costs are evaluated using the cost model 'c->p.n.costs'.
|
||||||
* 'c->costs'.
|
|
||||||
*
|
*
|
||||||
* The algorithm works backward, starting at 'end_node' and
|
* The algorithm works backwards, starting at the end node and proceeding
|
||||||
* proceeding backwards one position at a time. At each
|
* backwards one node at a time. At each node, the minimum cost to reach the
|
||||||
* position, the minimum cost to reach 'end_node' is computed
|
* end node is computed and the match/literal choice that begins that path is
|
||||||
* and the match/literal choice is saved.
|
* saved.
|
||||||
*/
|
*/
|
||||||
|
static void
|
||||||
|
deflate_find_min_cost_path(struct deflate_compressor *c, const u32 block_length,
|
||||||
|
const struct lz_match *cache_ptr)
|
||||||
|
{
|
||||||
|
struct deflate_optimum_node *end_node = &c->p.n.optimum_nodes[block_length];
|
||||||
struct deflate_optimum_node *cur_node = end_node;
|
struct deflate_optimum_node *cur_node = end_node;
|
||||||
const struct lz_match *cache_ptr = end_cache_ptr;
|
|
||||||
|
|
||||||
cur_node->cost_to_end = 0;
|
cur_node->cost_to_end = 0;
|
||||||
do {
|
do {
|
||||||
unsigned num_matches;
|
unsigned num_matches;
|
||||||
unsigned literal;
|
unsigned literal;
|
||||||
u32 best_cost_to_end;
|
u32 best_cost_to_end;
|
||||||
u32 best_item;
|
|
||||||
|
|
||||||
cur_node--;
|
cur_node--;
|
||||||
cache_ptr--;
|
cache_ptr--;
|
||||||
@ -2352,7 +2349,7 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
|
|||||||
/* It's always possible to choose a literal. */
|
/* It's always possible to choose a literal. */
|
||||||
best_cost_to_end = c->p.n.costs.literal[literal] +
|
best_cost_to_end = c->p.n.costs.literal[literal] +
|
||||||
(cur_node + 1)->cost_to_end;
|
(cur_node + 1)->cost_to_end;
|
||||||
best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
|
cur_node->item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
|
||||||
|
|
||||||
/* Also consider matches if there are any. */
|
/* Also consider matches if there are any. */
|
||||||
if (num_matches) {
|
if (num_matches) {
|
||||||
@ -2365,13 +2362,12 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Consider each length from the minimum
|
* Consider each length from the minimum
|
||||||
* (DEFLATE_MIN_MATCH_LEN) to the length of the
|
* (DEFLATE_MIN_MATCH_LEN) to the length of the longest
|
||||||
* longest match found at this position. For
|
* match found at this position. For each length, we
|
||||||
* each length, we consider only the smallest
|
* consider only the smallest offset for which that
|
||||||
* offset for which that length is available.
|
* length is available. Although this is not guaranteed
|
||||||
* Although this is not guaranteed to be optimal
|
* to be optimal due to the possibility of a larger
|
||||||
* due to the possibility of a larger offset
|
* offset costing less than a smaller offset to code,
|
||||||
* costing less than a smaller offset to code,
|
|
||||||
* this is a very useful heuristic.
|
* this is a very useful heuristic.
|
||||||
*/
|
*/
|
||||||
match = cache_ptr - num_matches;
|
match = cache_ptr - num_matches;
|
||||||
@ -2386,31 +2382,61 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
|
|||||||
(cur_node + len)->cost_to_end;
|
(cur_node + len)->cost_to_end;
|
||||||
if (cost_to_end < best_cost_to_end) {
|
if (cost_to_end < best_cost_to_end) {
|
||||||
best_cost_to_end = cost_to_end;
|
best_cost_to_end = cost_to_end;
|
||||||
best_item = ((u32)offset << OPTIMUM_OFFSET_SHIFT) | len;
|
cur_node->item = ((u32)offset << OPTIMUM_OFFSET_SHIFT) | len;
|
||||||
}
|
}
|
||||||
} while (++len <= match->length);
|
} while (++len <= match->length);
|
||||||
} while (++match != cache_ptr);
|
} while (++match != cache_ptr);
|
||||||
cache_ptr -= num_matches;
|
cache_ptr -= num_matches;
|
||||||
}
|
}
|
||||||
cur_node->cost_to_end = best_cost_to_end;
|
cur_node->cost_to_end = best_cost_to_end;
|
||||||
cur_node->item = best_item;
|
|
||||||
} while (cur_node != &c->p.n.optimum_nodes[0]);
|
} while (cur_node != &c->p.n.optimum_nodes[0]);
|
||||||
|
}
|
||||||
|
|
||||||
/* Tally Huffman symbol frequencies. */
|
/*
|
||||||
deflate_tally_item_list(c, end_node);
|
* Choose the literal/match sequence to use for the current block. The basic
|
||||||
|
* algorithm finds a minimum-cost path through the block's graph of
|
||||||
|
* literal/match choices, given a cost model. However, the cost of each symbol
|
||||||
|
* is unknown until the Huffman codes have been built, but at the same time the
|
||||||
|
* Huffman codes depend on the frequencies of chosen symbols. Consequently,
|
||||||
|
* multiple passes must be used to try to approximate an optimal solution. The
|
||||||
|
* first pass uses default costs, mixed with the costs from the previous block
|
||||||
|
* if any. Later passes use the Huffman codeword lengths from the previous pass
|
||||||
|
* as the costs.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
deflate_optimize_block(struct deflate_compressor *c, u32 block_length,
|
||||||
|
const struct lz_match *cache_ptr, bool is_first_block)
|
||||||
|
{
|
||||||
|
unsigned num_passes_remaining = c->p.n.num_optim_passes;
|
||||||
|
u32 i;
|
||||||
|
|
||||||
/* If this wasn't the last pass, update the cost model. */
|
/* Force the block to really end at the desired length, even if some
|
||||||
if (num_passes_remaining > 1) {
|
* matches extend beyond it. */
|
||||||
deflate_make_huffman_codes(&c->freqs, &c->codes);
|
for (i = block_length; i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN,
|
||||||
deflate_set_costs(c, &c->codes.lens);
|
ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
|
||||||
|
c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
|
||||||
|
|
||||||
|
/* Set the initial costs. */
|
||||||
|
if (is_first_block)
|
||||||
|
deflate_set_default_costs(c);
|
||||||
|
else
|
||||||
|
deflate_adjust_costs(c);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
/* Find the minimum cost path for this pass. */
|
||||||
|
deflate_find_min_cost_path(c, block_length, cache_ptr);
|
||||||
|
|
||||||
|
/* Compute frequencies of the chosen symbols. */
|
||||||
deflate_reset_symbol_frequencies(c);
|
deflate_reset_symbol_frequencies(c);
|
||||||
}
|
deflate_tally_item_list(c, block_length);
|
||||||
} while (--num_passes_remaining);
|
|
||||||
|
|
||||||
/* All optimization passes are done. Output a block using the
|
if (--num_passes_remaining == 0)
|
||||||
* minimum-cost path computed on the last optimization pass. */
|
break;
|
||||||
deflate_flush_block(c, os, block_begin, block_length,
|
|
||||||
is_final_block, true);
|
/* At least one optimization pass remains; update the costs. */
|
||||||
|
deflate_make_huffman_codes(&c->freqs, &c->codes);
|
||||||
|
deflate_set_costs_from_codes(c, &c->codes.lens);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2448,17 +2474,17 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
|||||||
|
|
||||||
struct lz_match *cache_ptr = c->p.n.match_cache;
|
struct lz_match *cache_ptr = c->p.n.match_cache;
|
||||||
const u8 * const in_block_begin = in_next;
|
const u8 * const in_block_begin = in_next;
|
||||||
const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH);
|
const u8 * const in_max_block_end =
|
||||||
|
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
|
||||||
const u8 *next_observation = in_next;
|
const u8 *next_observation = in_next;
|
||||||
|
|
||||||
init_block_split_stats(&c->split_stats);
|
init_block_split_stats(&c->split_stats);
|
||||||
deflate_reset_symbol_frequencies(c);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find matches until we decide to end the block. We end the
|
* Find matches until we decide to end the block. We end the
|
||||||
* block if any of the following is true:
|
* block if any of the following is true:
|
||||||
*
|
*
|
||||||
* (1) Maximum block size has been reached
|
* (1) Maximum block length has been reached
|
||||||
* (2) Match catch may overflow.
|
* (2) Match catch may overflow.
|
||||||
* (3) Block split heuristic says to split now.
|
* (3) Block split heuristic says to split now.
|
||||||
*/
|
*/
|
||||||
@ -2556,7 +2582,6 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
|||||||
bt_matchfinder_skip_position(&c->p.n.bt_mf,
|
bt_matchfinder_skip_position(&c->p.n.bt_mf,
|
||||||
in_cur_base,
|
in_cur_base,
|
||||||
in_next - in_cur_base,
|
in_next - in_cur_base,
|
||||||
max_len,
|
|
||||||
nice_len,
|
nice_len,
|
||||||
c->max_search_depth,
|
c->max_search_depth,
|
||||||
next_hashes);
|
next_hashes);
|
||||||
@ -2571,16 +2596,12 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
|
|||||||
cache_ptr < &c->p.n.match_cache[CACHE_LENGTH] &&
|
cache_ptr < &c->p.n.match_cache[CACHE_LENGTH] &&
|
||||||
!should_end_block(&c->split_stats, in_block_begin, in_next, in_end));
|
!should_end_block(&c->split_stats, in_block_begin, in_next, in_end));
|
||||||
|
|
||||||
/* All the matches for this block have been cached. Now compute
|
/* All the matches for this block have been cached. Now choose
|
||||||
* a near-optimal sequence of literals and matches, and output
|
* the sequence of items to output and flush the block. */
|
||||||
* the block. */
|
deflate_optimize_block(c, in_next - in_block_begin, cache_ptr,
|
||||||
if (in_block_begin == in)
|
in_block_begin == in);
|
||||||
deflate_set_default_costs(c);
|
deflate_flush_block(c, &os, in_block_begin, in_next - in_block_begin,
|
||||||
else
|
in_next == in_end, true);
|
||||||
deflate_adjust_costs(c);
|
|
||||||
deflate_optimize_and_write_block(c, &os, in_block_begin,
|
|
||||||
in_next - in_block_begin,
|
|
||||||
cache_ptr, in_next == in_end);
|
|
||||||
} while (in_next != in_end);
|
} while (in_next != in_end);
|
||||||
|
|
||||||
return deflate_flush_output(&os);
|
return deflate_flush_output(&os);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user