mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-08-03 09:46:04 -04:00
deflate_compress: rewind blocks in near-optimal compressor
The block splitting algorithm works by examining successive chunks of data and ending the block when a chunk differs significantly from the rest of the block. Currently, the data chunk where the change is detected is included in the block, which is suboptimal -- after all, we know that it's different from the rest of the block. Better results could be achieved by ending the block just before the chunk. Implement this in the near-optimal compressor. This slightly improves its compression ratio. Note: I also tested an implementation of this for the lazy compressor. It improves compression ratio too, but it doesn't seem worthwhile there from a performance and complexity standpoint.
This commit is contained in:
parent
5f4da4b243
commit
55f9f70972
@ -592,6 +592,7 @@ struct libdeflate_compressor {
|
|||||||
* greedy parse, gathered during matchfinding. This is
|
* greedy parse, gathered during matchfinding. This is
|
||||||
* used for setting the initial symbol costs.
|
* used for setting the initial symbol costs.
|
||||||
*/
|
*/
|
||||||
|
u32 new_match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1];
|
||||||
u32 match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1];
|
u32 match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1];
|
||||||
|
|
||||||
unsigned num_optim_passes;
|
unsigned num_optim_passes;
|
||||||
@ -2164,14 +2165,22 @@ do_end_block_check(struct block_split_stats *stats, u32 block_length)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static forceinline bool
|
||||||
|
ready_to_check_block(const struct block_split_stats *stats,
|
||||||
|
const u8 *in_block_begin, const u8 *in_next,
|
||||||
|
const u8 *in_end)
|
||||||
|
{
|
||||||
|
return stats->num_new_observations >= NUM_OBSERVATIONS_PER_BLOCK_CHECK
|
||||||
|
&& in_next - in_block_begin >= MIN_BLOCK_LENGTH
|
||||||
|
&& in_end - in_next >= MIN_BLOCK_LENGTH;
|
||||||
|
}
|
||||||
|
|
||||||
static forceinline bool
|
static forceinline bool
|
||||||
should_end_block(struct block_split_stats *stats,
|
should_end_block(struct block_split_stats *stats,
|
||||||
const u8 *in_block_begin, const u8 *in_next, const u8 *in_end)
|
const u8 *in_block_begin, const u8 *in_next, const u8 *in_end)
|
||||||
{
|
{
|
||||||
/* Ready to check block split statistics? */
|
/* Ready to try to end the block (again)? */
|
||||||
if (stats->num_new_observations < NUM_OBSERVATIONS_PER_BLOCK_CHECK ||
|
if (!ready_to_check_block(stats, in_block_begin, in_next, in_end))
|
||||||
in_next - in_block_begin < MIN_BLOCK_LENGTH ||
|
|
||||||
in_end - in_next < MIN_BLOCK_LENGTH)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return do_end_block_check(stats, in_next - in_block_begin);
|
return do_end_block_check(stats, in_next - in_block_begin);
|
||||||
@ -2330,11 +2339,12 @@ recalculate_min_match_len(const struct deflate_freqs *freqs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static forceinline const u8 *
|
static forceinline const u8 *
|
||||||
choose_max_block_end(const u8 *in_next, const u8 *in_end, size_t soft_max_len)
|
choose_max_block_end(const u8 *in_block_begin, const u8 *in_end,
|
||||||
|
size_t soft_max_len)
|
||||||
{
|
{
|
||||||
if (in_end - in_next < soft_max_len + MIN_BLOCK_LENGTH)
|
if (in_end - in_block_begin < soft_max_len + MIN_BLOCK_LENGTH)
|
||||||
return in_end;
|
return in_end;
|
||||||
return in_next + soft_max_len;
|
return in_block_begin + soft_max_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2981,17 +2991,21 @@ static const struct {
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
deflate_choose_default_litlen_costs(struct libdeflate_compressor *c,
|
deflate_choose_default_litlen_costs(struct libdeflate_compressor *c,
|
||||||
u32 block_length,
|
const u8 *block_begin, u32 block_length,
|
||||||
u32 *lit_cost, u32 *len_sym_cost)
|
u32 *lit_cost, u32 *len_sym_cost)
|
||||||
{
|
{
|
||||||
unsigned num_used_literals = 0;
|
unsigned num_used_literals = 0;
|
||||||
u32 literal_freq = block_length;
|
u32 literal_freq = block_length;
|
||||||
u32 match_freq = 0;
|
u32 match_freq = 0;
|
||||||
u32 cutoff;
|
u32 cutoff;
|
||||||
unsigned i;
|
u32 i;
|
||||||
|
|
||||||
/* Calculate the number of distinct literals that exist in the data. */
|
/* Calculate the number of distinct literals that exist in the data. */
|
||||||
|
memset(c->freqs.litlen, 0,
|
||||||
|
DEFLATE_NUM_LITERALS * sizeof(c->freqs.litlen[0]));
|
||||||
cutoff = literal_freq >> 11; /* Ignore literals used very rarely. */
|
cutoff = literal_freq >> 11; /* Ignore literals used very rarely. */
|
||||||
|
for (i = 0; i < block_length; i++)
|
||||||
|
c->freqs.litlen[block_begin[i]]++;
|
||||||
for (i = 0; i < DEFLATE_NUM_LITERALS; i++) {
|
for (i = 0; i < DEFLATE_NUM_LITERALS; i++) {
|
||||||
if (c->freqs.litlen[i] > cutoff)
|
if (c->freqs.litlen[i] > cutoff)
|
||||||
num_used_literals++;
|
num_used_literals++;
|
||||||
@ -3258,7 +3272,8 @@ deflate_find_min_cost_path(struct libdeflate_compressor *c,
|
|||||||
* as the costs.
|
* as the costs.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
deflate_optimize_block(struct libdeflate_compressor *c, u32 block_length,
|
deflate_optimize_block(struct libdeflate_compressor *c,
|
||||||
|
const u8 *block_begin, u32 block_length,
|
||||||
const struct lz_match *cache_ptr, bool is_first_block,
|
const struct lz_match *cache_ptr, bool is_first_block,
|
||||||
bool is_final_block)
|
bool is_final_block)
|
||||||
{
|
{
|
||||||
@ -3275,11 +3290,8 @@ deflate_optimize_block(struct libdeflate_compressor *c, u32 block_length,
|
|||||||
ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
|
ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
|
||||||
c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
|
c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
|
||||||
|
|
||||||
/* Make sure the literal/match statistics are up to date. */
|
|
||||||
merge_new_observations(&c->split_stats);
|
|
||||||
|
|
||||||
/* Set the initial costs. */
|
/* Set the initial costs. */
|
||||||
deflate_choose_default_litlen_costs(c, block_length,
|
deflate_choose_default_litlen_costs(c, block_begin, block_length,
|
||||||
&lit_cost, &len_sym_cost);
|
&lit_cost, &len_sym_cost);
|
||||||
if (is_first_block)
|
if (is_first_block)
|
||||||
deflate_set_default_costs(c, lit_cost, len_sym_cost);
|
deflate_set_default_costs(c, lit_cost, len_sym_cost);
|
||||||
@ -3308,31 +3320,49 @@ deflate_optimize_block(struct libdeflate_compressor *c, u32 block_length,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
deflate_near_optimal_begin_block(struct libdeflate_compressor *c,
|
deflate_near_optimal_init_stats(struct libdeflate_compressor *c)
|
||||||
bool is_first_block)
|
{
|
||||||
|
init_block_split_stats(&c->split_stats);
|
||||||
|
memset(c->p.n.new_match_len_freqs, 0,
|
||||||
|
sizeof(c->p.n.new_match_len_freqs));
|
||||||
|
memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
deflate_near_optimal_merge_stats(struct libdeflate_compressor *c)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
merge_new_observations(&c->split_stats);
|
||||||
|
for (i = 0; i < ARRAY_LEN(c->p.n.match_len_freqs); i++) {
|
||||||
|
c->p.n.match_len_freqs[i] += c->p.n.new_match_len_freqs[i];
|
||||||
|
c->p.n.new_match_len_freqs[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Save some literal/match statistics from the previous block so that
|
||||||
|
* deflate_adjust_costs() will be able to decide how much the current block
|
||||||
|
* differs from the previous one.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
deflate_near_optimal_save_stats(struct libdeflate_compressor *c)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (!is_first_block) {
|
for (i = 0; i < NUM_OBSERVATION_TYPES; i++)
|
||||||
/*
|
c->p.n.prev_observations[i] = c->split_stats.observations[i];
|
||||||
* Save some literal/match statistics from the previous block so
|
c->p.n.prev_num_observations = c->split_stats.num_observations;
|
||||||
* that deflate_adjust_costs() will be able to decide how much
|
}
|
||||||
* the current block differs from the previous one.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
|
|
||||||
c->p.n.prev_observations[i] =
|
|
||||||
c->split_stats.observations[i];
|
|
||||||
}
|
|
||||||
c->p.n.prev_num_observations = c->split_stats.num_observations;
|
|
||||||
}
|
|
||||||
init_block_split_stats(&c->split_stats);
|
|
||||||
|
|
||||||
/*
|
static void
|
||||||
* During matchfinding, we keep track of approximate literal and match
|
deflate_near_optimal_clear_old_stats(struct libdeflate_compressor *c)
|
||||||
* length frequencies for the purpose of setting the initial costs.
|
{
|
||||||
*/
|
int i;
|
||||||
memset(c->freqs.litlen, 0,
|
|
||||||
DEFLATE_NUM_LITERALS * sizeof(c->freqs.litlen[0]));
|
for (i = 0; i < NUM_OBSERVATION_TYPES; i++)
|
||||||
|
c->split_stats.observations[i] = 0;
|
||||||
|
c->split_stats.num_observations = 0;
|
||||||
memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs));
|
memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3355,6 +3385,7 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
|
|||||||
u8 * restrict out, size_t out_nbytes_avail)
|
u8 * restrict out, size_t out_nbytes_avail)
|
||||||
{
|
{
|
||||||
const u8 *in_next = in;
|
const u8 *in_next = in;
|
||||||
|
const u8 *in_block_begin = in_next;
|
||||||
const u8 *in_end = in_next + in_nbytes;
|
const u8 *in_end = in_next + in_nbytes;
|
||||||
struct deflate_output_bitstream os;
|
struct deflate_output_bitstream os;
|
||||||
const u8 *in_cur_base = in_next;
|
const u8 *in_cur_base = in_next;
|
||||||
@ -3362,23 +3393,29 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
|
|||||||
in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE);
|
in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE);
|
||||||
unsigned max_len = DEFLATE_MAX_MATCH_LEN;
|
unsigned max_len = DEFLATE_MAX_MATCH_LEN;
|
||||||
unsigned nice_len = MIN(c->nice_match_length, max_len);
|
unsigned nice_len = MIN(c->nice_match_length, max_len);
|
||||||
|
struct lz_match *cache_ptr = c->p.n.match_cache;
|
||||||
u32 next_hashes[2] = {0, 0};
|
u32 next_hashes[2] = {0, 0};
|
||||||
|
|
||||||
deflate_init_output(&os, out, out_nbytes_avail);
|
deflate_init_output(&os, out, out_nbytes_avail);
|
||||||
bt_matchfinder_init(&c->p.n.bt_mf);
|
bt_matchfinder_init(&c->p.n.bt_mf);
|
||||||
|
deflate_near_optimal_init_stats(c);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
/* Starting a new DEFLATE block */
|
/* Starting a new DEFLATE block */
|
||||||
|
|
||||||
struct lz_match *cache_ptr = c->p.n.match_cache;
|
|
||||||
const u8 * const in_block_begin = in_next;
|
|
||||||
const u8 * const in_max_block_end = choose_max_block_end(
|
const u8 * const in_max_block_end = choose_max_block_end(
|
||||||
in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
|
in_block_begin, in_end, SOFT_MAX_BLOCK_LENGTH);
|
||||||
|
const u8 *prev_end_block_check = NULL;
|
||||||
|
bool change_detected = false;
|
||||||
const u8 *next_observation = in_next;
|
const u8 *next_observation = in_next;
|
||||||
unsigned min_len;
|
unsigned min_len;
|
||||||
|
|
||||||
deflate_near_optimal_begin_block(c, in_block_begin == in);
|
/*
|
||||||
min_len = calculate_min_match_len(in_next,
|
* Use the minimum match length heuristic to improve the
|
||||||
|
* literal/match statistics gathered during matchfinding.
|
||||||
|
* However, the actual near-optimal parse won't respect min_len,
|
||||||
|
* as it can accurately assess the costs of different matches.
|
||||||
|
*/
|
||||||
|
min_len = calculate_min_match_len(in_block_begin,
|
||||||
in_max_block_end - in_next,
|
in_max_block_end - in_next,
|
||||||
c->max_search_depth);
|
c->max_search_depth);
|
||||||
|
|
||||||
@ -3390,7 +3427,7 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
|
|||||||
* (2) Match catch may overflow.
|
* (2) Match catch may overflow.
|
||||||
* (3) Block split heuristic says to split now.
|
* (3) Block split heuristic says to split now.
|
||||||
*/
|
*/
|
||||||
do {
|
for (;;) {
|
||||||
struct lz_match *matches;
|
struct lz_match *matches;
|
||||||
unsigned best_len;
|
unsigned best_len;
|
||||||
size_t remaining = in_end - in_next;
|
size_t remaining = in_end - in_next;
|
||||||
@ -3436,13 +3473,12 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
|
|||||||
if (cache_ptr > matches)
|
if (cache_ptr > matches)
|
||||||
best_len = cache_ptr[-1].length;
|
best_len = cache_ptr[-1].length;
|
||||||
}
|
}
|
||||||
c->freqs.litlen[*in_next]++;
|
|
||||||
if (in_next >= next_observation) {
|
if (in_next >= next_observation) {
|
||||||
if (best_len >= min_len) {
|
if (best_len >= min_len) {
|
||||||
observe_match(&c->split_stats,
|
observe_match(&c->split_stats,
|
||||||
best_len);
|
best_len);
|
||||||
next_observation = in_next + best_len;
|
next_observation = in_next + best_len;
|
||||||
c->p.n.match_len_freqs[best_len]++;
|
c->p.n.new_match_len_freqs[best_len]++;
|
||||||
} else {
|
} else {
|
||||||
observe_literal(&c->split_stats,
|
observe_literal(&c->split_stats,
|
||||||
*in_next);
|
*in_next);
|
||||||
@ -3495,24 +3531,101 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
|
|||||||
}
|
}
|
||||||
cache_ptr->length = 0;
|
cache_ptr->length = 0;
|
||||||
cache_ptr->offset = *in_next;
|
cache_ptr->offset = *in_next;
|
||||||
c->freqs.litlen[*in_next]++;
|
|
||||||
in_next++;
|
in_next++;
|
||||||
cache_ptr++;
|
cache_ptr++;
|
||||||
} while (--best_len);
|
} while (--best_len);
|
||||||
}
|
}
|
||||||
} while (in_next < in_max_block_end &&
|
/* Maximum block length or end of input reached? */
|
||||||
cache_ptr < &c->p.n.match_cache[MATCH_CACHE_LENGTH] &&
|
if (in_next >= in_max_block_end)
|
||||||
!should_end_block(&c->split_stats,
|
break;
|
||||||
in_block_begin, in_next, in_end));
|
/* Match cache overflowed? */
|
||||||
|
if (cache_ptr >=
|
||||||
|
&c->p.n.match_cache[MATCH_CACHE_LENGTH])
|
||||||
|
break;
|
||||||
|
/* Not ready to try to end the block (again)? */
|
||||||
|
if (!ready_to_check_block(&c->split_stats,
|
||||||
|
in_block_begin, in_next,
|
||||||
|
in_end))
|
||||||
|
continue;
|
||||||
|
/* Check if it would be worthwhile to end the block. */
|
||||||
|
if (do_end_block_check(&c->split_stats,
|
||||||
|
in_next - in_block_begin)) {
|
||||||
|
change_detected = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* Ending the block doesn't seem worthwhile here. */
|
||||||
|
deflate_near_optimal_merge_stats(c);
|
||||||
|
prev_end_block_check = in_next;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* All the matches for this block have been cached. Now choose
|
* All the matches for this block have been cached. Now choose
|
||||||
* the sequence of items to output and flush the block.
|
* the precise end of the block and the sequence of items to
|
||||||
|
* output to represent it, then flush the block.
|
||||||
*/
|
*/
|
||||||
deflate_optimize_block(c, in_next - in_block_begin, cache_ptr,
|
if (change_detected && prev_end_block_check != NULL) {
|
||||||
in_block_begin == in, in_next == in_end);
|
/*
|
||||||
deflate_flush_block(c, &os, in_block_begin,
|
* The block is being ended because a recent chunk of
|
||||||
in_next - in_block_begin,
|
* data differs from the rest of the block. We could
|
||||||
NULL, in_next == in_end);
|
* end the block at 'in_next' like the greedy and lazy
|
||||||
|
* compressors do, but that's not ideal since it would
|
||||||
|
* include the differing chunk in the block. The
|
||||||
|
* near-optimal compressor has time to do a better job.
|
||||||
|
* Therefore, we rewind to just before the chunk, and
|
||||||
|
* output a block that only goes up to there.
|
||||||
|
*
|
||||||
|
* We then set things up to correctly start the next
|
||||||
|
* block, considering that some work has already been
|
||||||
|
* done on it (some matches found and stats gathered).
|
||||||
|
*/
|
||||||
|
struct lz_match *orig_cache_ptr = cache_ptr;
|
||||||
|
const u8 *in_block_end = prev_end_block_check;
|
||||||
|
u32 block_length = in_block_end - in_block_begin;
|
||||||
|
bool is_first = (in_block_begin == in);
|
||||||
|
bool is_final = false;
|
||||||
|
u32 num_bytes_to_rewind = in_next - in_block_end;
|
||||||
|
size_t cache_len_rewound;
|
||||||
|
|
||||||
|
/* Rewind the match cache. */
|
||||||
|
do {
|
||||||
|
cache_ptr--;
|
||||||
|
cache_ptr -= cache_ptr->length;
|
||||||
|
} while (--num_bytes_to_rewind);
|
||||||
|
cache_len_rewound = orig_cache_ptr - cache_ptr;
|
||||||
|
|
||||||
|
deflate_optimize_block(c, in_block_begin, block_length,
|
||||||
|
cache_ptr, is_first, is_final);
|
||||||
|
deflate_flush_block(c, &os, in_block_begin,
|
||||||
|
block_length, NULL, is_final);
|
||||||
|
memmove(c->p.n.match_cache, cache_ptr,
|
||||||
|
cache_len_rewound * sizeof(*cache_ptr));
|
||||||
|
cache_ptr = &c->p.n.match_cache[cache_len_rewound];
|
||||||
|
deflate_near_optimal_save_stats(c);
|
||||||
|
/*
|
||||||
|
* Clear the stats for the just-flushed block, leaving
|
||||||
|
* just the stats for the beginning of the next block.
|
||||||
|
*/
|
||||||
|
deflate_near_optimal_clear_old_stats(c);
|
||||||
|
in_block_begin = in_block_end;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* The block is being ended for a reason other than a
|
||||||
|
* differing data chunk being detected. Don't rewind at
|
||||||
|
* all; just end the block at the current position.
|
||||||
|
*/
|
||||||
|
u32 block_length = in_next - in_block_begin;
|
||||||
|
bool is_first = (in_block_begin == in);
|
||||||
|
bool is_final = (in_next == in_end);
|
||||||
|
|
||||||
|
deflate_near_optimal_merge_stats(c);
|
||||||
|
deflate_optimize_block(c, in_block_begin, block_length,
|
||||||
|
cache_ptr, is_first, is_final);
|
||||||
|
deflate_flush_block(c, &os, in_block_begin,
|
||||||
|
block_length, NULL, is_final);
|
||||||
|
cache_ptr = &c->p.n.match_cache[0];
|
||||||
|
deflate_near_optimal_save_stats(c);
|
||||||
|
deflate_near_optimal_init_stats(c);
|
||||||
|
in_block_begin = in_next;
|
||||||
|
}
|
||||||
} while (in_next != in_end);
|
} while (in_next != in_end);
|
||||||
|
|
||||||
return deflate_flush_output(&os);
|
return deflate_flush_output(&os);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user