mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-08-03 09:46:04 -04:00
deflate_compress: adjust block splitting conditions
For fastest, greedy, lazy, and lazy2: save memory by reducing the length of the sequence store, and forcing a split if it is filled. For fastest: increase the max block length, but use a relatively short sequence store that will cause shorter blocks to be used often. For all: allow the final block to exceed the soft maximum length if it avoids having to create a block below the minimum length.
This commit is contained in:
parent
7c60c4cdaf
commit
71db68b27f
@ -51,10 +51,11 @@
|
|||||||
#define SUPPORT_NEAR_OPTIMAL_PARSING 1
|
#define SUPPORT_NEAR_OPTIMAL_PARSING 1
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the minimum block length, in uncompressed bytes, which the compressor
|
* This is the minimum block length that the compressor will use, in
|
||||||
* will use. This should be a value below which using shorter blocks is very
|
* uncompressed bytes. It is also the amount by which the final block is
|
||||||
* unlikely to be worthwhile, due to the per-block overhead. This parameter
|
* allowed to grow past the soft maximum length in order to avoid using a very
|
||||||
* doesn't apply to the final block, which can be arbitrarily short.
|
* short block at the end. This should be a value below which using shorter
|
||||||
|
* blocks is unlikely to be worthwhile, due to the per-block overhead.
|
||||||
*
|
*
|
||||||
* Defining a fixed minimum block length is needed in order to guarantee a
|
* Defining a fixed minimum block length is needed in order to guarantee a
|
||||||
* reasonable upper bound on the compressed size. It's also needed because our
|
* reasonable upper bound on the compressed size. It's also needed because our
|
||||||
@ -63,23 +64,46 @@
|
|||||||
#define MIN_BLOCK_LENGTH 10000
|
#define MIN_BLOCK_LENGTH 10000
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the soft maximum block length, in uncompressed bytes, which the
|
* For the greedy, lazy, lazy2, and near-optimal compressors: This is the soft
|
||||||
* compressor will use. This is a "soft" maximum, meaning that the compressor
|
* maximum block length, in uncompressed bytes. The compressor will try to end
|
||||||
* will try to end blocks at this length, but it may go slightly past it if
|
* blocks at this length, but it may go slightly past it if there is a match
|
||||||
* there is a match that straddles this limit. This parameter doesn't apply to
|
* that straddles this limit or if the input data ends soon after this limit.
|
||||||
* uncompressed blocks, which the DEFLATE format limits to 65535 bytes.
|
* This parameter doesn't apply to uncompressed blocks, which the DEFLATE format
|
||||||
|
* limits to 65535 bytes.
|
||||||
*
|
*
|
||||||
* This should be a value above which it is very likely that splitting the block
|
* This should be a value above which it is very likely that splitting the block
|
||||||
* would produce a better compression ratio. Increasing/decreasing this
|
* would produce a better compression ratio. For the near-optimal compressor,
|
||||||
* parameter will increase/decrease per-compressor memory usage linearly.
|
* increasing/decreasing this parameter will increase/decrease per-compressor
|
||||||
|
* memory usage linearly.
|
||||||
*/
|
*/
|
||||||
#define SOFT_MAX_BLOCK_LENGTH 300000
|
#define SOFT_MAX_BLOCK_LENGTH 300000
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block length, in uncompressed bytes, used by deflate_compress_fastest().
|
* For the greedy, lazy, and lazy2 compressors: this is the length of the
|
||||||
* deflate_compress_fastest() doesn't use the other block length settings.
|
* sequence store, which is an array where the compressor temporarily stores
|
||||||
|
* matches that it's going to use in the current block. This value is 1 more
|
||||||
|
* than the number of matches that can be used in a block. If the sequence
|
||||||
|
* store fills up, then the compressor will be forced to end the block early.
|
||||||
|
* This value should be large enough so that this rarely happens, due to the
|
||||||
|
* block being ended normally before then. Increasing/decreasing this value
|
||||||
|
* will increase/decrease per-compressor memory usage linearly.
|
||||||
*/
|
*/
|
||||||
#define FAST_BLOCK_LENGTH MIN(32768, SOFT_MAX_BLOCK_LENGTH)
|
#define SEQ_STORE_LENGTH 50000
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For deflate_compress_fastest(): This is the soft maximum block length.
|
||||||
|
* deflate_compress_fastest() doesn't use the regular block splitting algorithm;
|
||||||
|
* it only ends blocks when they reach FAST_SOFT_MAX_BLOCK_LENGTH bytes or
|
||||||
|
* FAST_SEQ_STORE_LENGTH - 1 matches. Therefore, this value should be lower
|
||||||
|
* than the regular SOFT_MAX_BLOCK_LENGTH.
|
||||||
|
*/
|
||||||
|
#define FAST_SOFT_MAX_BLOCK_LENGTH 65535
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For deflate_compress_fastest(): this is the length of the sequence store.
|
||||||
|
* This is like SEQ_STORE_LENGTH, but this should be a lower value.
|
||||||
|
*/
|
||||||
|
#define FAST_SEQ_STORE_LENGTH 8192
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These are the maximum codeword lengths, in bits, the compressor will use for
|
* These are the maximum codeword lengths, in bits, the compressor will use for
|
||||||
@ -97,13 +121,13 @@
|
|||||||
/* Parameters specific to the near-optimal parsing algorithm */
|
/* Parameters specific to the near-optimal parsing algorithm */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BIT_COST is a scaling factor that allows the compressor to consider
|
* BIT_COST is a scaling factor that allows the near-optimal compressor to
|
||||||
* fractional bit costs when deciding which literal/match sequence to use. This
|
* consider fractional bit costs when deciding which literal/match sequence to
|
||||||
* is useful when the true symbol costs are unknown. For example, if the
|
* use. This is useful when the true symbol costs are unknown. For example, if
|
||||||
* compressor thinks that a symbol has 6.5 bits of entropy, it can set its cost
|
* the compressor thinks that a symbol has 6.5 bits of entropy, it can set its
|
||||||
* to 6.5 bits rather than have to use 6 or 7 bits. Although in the end each
|
* cost to 6.5 bits rather than have to use 6 or 7 bits. Although in the end
|
||||||
* symbol will use a whole number of bits due to the Huffman coding, considering
|
* each symbol will use a whole number of bits due to the Huffman coding,
|
||||||
* fractional bits can be helpful due to the limited information.
|
* considering fractional bits can be helpful due to the limited information.
|
||||||
*
|
*
|
||||||
* BIT_COST should be a power of 2. A value of 8 or 16 works well. A higher
|
* BIT_COST should be a power of 2. A value of 8 or 16 works well. A higher
|
||||||
* value isn't very useful since the calculations are approximate anyway.
|
* value isn't very useful since the calculations are approximate anyway.
|
||||||
@ -122,12 +146,9 @@
|
|||||||
#define OFFSET_NOSTAT_BITS 10
|
#define OFFSET_NOSTAT_BITS 10
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is (approximately) the maximum number of matches that the compressor
|
* This is (slightly less than) the maximum number of matches that the
|
||||||
* will cache per block. If the match cache becomes full, then the compressor
|
* near-optimal compressor will cache per block. This behaves similarly to
|
||||||
* will be forced to end the block early. This value should be large enough so
|
* SEQ_STORE_LENGTH for the other compressors.
|
||||||
* that this rarely happens, due to the block being ended normally before the
|
|
||||||
* cache fills up. Increasing/decreasing this parameter will increase/decrease
|
|
||||||
* per-compressor memory usage linearly.
|
|
||||||
*/
|
*/
|
||||||
#define MATCH_CACHE_LENGTH (SOFT_MAX_BLOCK_LENGTH * 5)
|
#define MATCH_CACHE_LENGTH (SOFT_MAX_BLOCK_LENGTH * 5)
|
||||||
|
|
||||||
@ -152,6 +173,47 @@
|
|||||||
(DEFLATE_MAX_MATCH_LEN - DEFLATE_MIN_MATCH_LEN + 1)
|
(DEFLATE_MAX_MATCH_LEN - DEFLATE_MIN_MATCH_LEN + 1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
check_buildtime_parameters(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Verify that MIN_BLOCK_LENGTH is being honored, as
|
||||||
|
* libdeflate_compress_bound() depends on it.
|
||||||
|
*/
|
||||||
|
STATIC_ASSERT(SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH);
|
||||||
|
STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH);
|
||||||
|
STATIC_ASSERT(
|
||||||
|
(SEQ_STORE_LENGTH - 1) * DEFLATE_MIN_MATCH_LEN >=
|
||||||
|
MIN_BLOCK_LENGTH);
|
||||||
|
STATIC_ASSERT(
|
||||||
|
(FAST_SEQ_STORE_LENGTH - 1) * HT_MATCHFINDER_MIN_MATCH_LEN >=
|
||||||
|
MIN_BLOCK_LENGTH);
|
||||||
|
|
||||||
|
/* Verify that the sequence stores aren't uselessly large. */
|
||||||
|
STATIC_ASSERT(
|
||||||
|
(SEQ_STORE_LENGTH - 1) * DEFLATE_MIN_MATCH_LEN <=
|
||||||
|
SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH);
|
||||||
|
STATIC_ASSERT(
|
||||||
|
(FAST_SEQ_STORE_LENGTH - 1) * HT_MATCHFINDER_MIN_MATCH_LEN <=
|
||||||
|
FAST_SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH);
|
||||||
|
|
||||||
|
/* Verify that the maximum codeword lengths are valid. */
|
||||||
|
STATIC_ASSERT(
|
||||||
|
MAX_LITLEN_CODEWORD_LEN <= DEFLATE_MAX_LITLEN_CODEWORD_LEN);
|
||||||
|
STATIC_ASSERT(
|
||||||
|
MAX_OFFSET_CODEWORD_LEN <= DEFLATE_MAX_OFFSET_CODEWORD_LEN);
|
||||||
|
STATIC_ASSERT(
|
||||||
|
MAX_PRE_CODEWORD_LEN <= DEFLATE_MAX_PRE_CODEWORD_LEN);
|
||||||
|
STATIC_ASSERT(
|
||||||
|
(1U << MAX_LITLEN_CODEWORD_LEN) >= DEFLATE_NUM_LITLEN_SYMS);
|
||||||
|
STATIC_ASSERT(
|
||||||
|
(1U << MAX_OFFSET_CODEWORD_LEN) >= DEFLATE_NUM_OFFSET_SYMS);
|
||||||
|
STATIC_ASSERT(
|
||||||
|
(1U << MAX_PRE_CODEWORD_LEN) >= DEFLATE_NUM_PRECODE_SYMS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
/* Table: length slot => length slot base value */
|
/* Table: length slot => length slot base value */
|
||||||
static const unsigned deflate_length_slot_base[] = {
|
static const unsigned deflate_length_slot_base[] = {
|
||||||
3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ,
|
3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ,
|
||||||
@ -424,14 +486,12 @@ struct libdeflate_compressor {
|
|||||||
/* Hash chains matchfinder */
|
/* Hash chains matchfinder */
|
||||||
struct hc_matchfinder hc_mf;
|
struct hc_matchfinder hc_mf;
|
||||||
|
|
||||||
/* The matches and literals that the parser has chosen
|
/*
|
||||||
* for the current block. The required length of this
|
* The matches and literals that the parser has chosen
|
||||||
* array is limited by the maximum number of matches
|
* for the current block.
|
||||||
* that can ever be chosen for a single block, plus one
|
*/
|
||||||
* for the special entry at the end. */
|
struct deflate_sequence sequences[SEQ_STORE_LENGTH];
|
||||||
struct deflate_sequence sequences[
|
|
||||||
DIV_ROUND_UP(SOFT_MAX_BLOCK_LENGTH,
|
|
||||||
DEFLATE_MIN_MATCH_LEN) + 1];
|
|
||||||
} g; /* (g)reedy */
|
} g; /* (g)reedy */
|
||||||
|
|
||||||
/* Data for fastest parsing */
|
/* Data for fastest parsing */
|
||||||
@ -440,8 +500,8 @@ struct libdeflate_compressor {
|
|||||||
struct ht_matchfinder ht_mf;
|
struct ht_matchfinder ht_mf;
|
||||||
|
|
||||||
struct deflate_sequence sequences[
|
struct deflate_sequence sequences[
|
||||||
DIV_ROUND_UP(FAST_BLOCK_LENGTH,
|
FAST_SEQ_STORE_LENGTH];
|
||||||
HT_MATCHFINDER_MIN_MATCH_LEN) + 1];
|
|
||||||
} f; /* (f)astest */
|
} f; /* (f)astest */
|
||||||
|
|
||||||
#if SUPPORT_NEAR_OPTIMAL_PARSING
|
#if SUPPORT_NEAR_OPTIMAL_PARSING
|
||||||
@ -485,15 +545,17 @@ struct libdeflate_compressor {
|
|||||||
* minimum-cost path algorithm.
|
* minimum-cost path algorithm.
|
||||||
*
|
*
|
||||||
* This array must be large enough to accommodate the
|
* This array must be large enough to accommodate the
|
||||||
* worst-case number of nodes, which occurs if we find a
|
* worst-case number of nodes, which occurs when the
|
||||||
* match of length DEFLATE_MAX_MATCH_LEN at position
|
* final block is of length SOFT_MAX_BLOCK_LENGTH +
|
||||||
* SOFT_MAX_BLOCK_LENGTH - 1, producing a block of
|
* MIN_BLOCK_LENGTH, or when any block is of length
|
||||||
* length SOFT_MAX_BLOCK_LENGTH - 1 +
|
* SOFT_MAX_BLOCK_LENGTH + DEFLATE_MAX_MATCH_LEN
|
||||||
* DEFLATE_MAX_MATCH_LEN. Add one for the end-of-block
|
* - 1. Add one for the end-of-block node.
|
||||||
* node.
|
|
||||||
*/
|
*/
|
||||||
struct deflate_optimum_node optimum_nodes[SOFT_MAX_BLOCK_LENGTH - 1 +
|
struct deflate_optimum_node optimum_nodes[
|
||||||
DEFLATE_MAX_MATCH_LEN + 1];
|
SOFT_MAX_BLOCK_LENGTH +
|
||||||
|
MAX(MIN_BLOCK_LENGTH,
|
||||||
|
DEFLATE_MAX_MATCH_LEN - 1)
|
||||||
|
+ 1];
|
||||||
|
|
||||||
/* The current cost model being used. */
|
/* The current cost model being used. */
|
||||||
struct deflate_costs costs;
|
struct deflate_costs costs;
|
||||||
@ -2160,6 +2222,14 @@ recalculate_min_match_len(const struct deflate_freqs *freqs,
|
|||||||
return choose_min_match_len(num_used_literals, max_search_depth);
|
return choose_min_match_len(num_used_literals, max_search_depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static forceinline const u8 *
|
||||||
|
choose_max_block_end(const u8 *in_next, const u8 *in_end, size_t soft_max_len)
|
||||||
|
{
|
||||||
|
if (in_end - in_next < soft_max_len + MIN_BLOCK_LENGTH)
|
||||||
|
return in_end;
|
||||||
|
return in_next + soft_max_len;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the level 0 "compressor". It always outputs uncompressed blocks.
|
* This is the level 0 "compressor". It always outputs uncompressed blocks.
|
||||||
*/
|
*/
|
||||||
@ -2203,8 +2273,8 @@ deflate_compress_fastest(struct libdeflate_compressor * restrict c,
|
|||||||
/* Starting a new DEFLATE block. */
|
/* Starting a new DEFLATE block. */
|
||||||
|
|
||||||
const u8 * const in_block_begin = in_next;
|
const u8 * const in_block_begin = in_next;
|
||||||
const u8 * const in_max_block_end =
|
const u8 * const in_max_block_end = choose_max_block_end(
|
||||||
in_next + MIN(in_end - in_next, FAST_BLOCK_LENGTH);
|
in_next, in_end, FAST_SOFT_MAX_BLOCK_LENGTH);
|
||||||
struct deflate_sequence *seq = c->p.f.sequences;
|
struct deflate_sequence *seq = c->p.f.sequences;
|
||||||
|
|
||||||
deflate_begin_sequences(c, seq);
|
deflate_begin_sequences(c, seq);
|
||||||
@ -2248,7 +2318,8 @@ deflate_compress_fastest(struct libdeflate_compressor * restrict c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Check if it's time to output another block. */
|
/* Check if it's time to output another block. */
|
||||||
} while (in_next < in_max_block_end);
|
} while (in_next < in_max_block_end &&
|
||||||
|
seq < &c->p.f.sequences[ARRAY_LEN(c->p.f.sequences)]);
|
||||||
|
|
||||||
deflate_flush_block(c, &os, in_block_begin,
|
deflate_flush_block(c, &os, in_block_begin,
|
||||||
in_next - in_block_begin,
|
in_next - in_block_begin,
|
||||||
@ -2281,8 +2352,8 @@ deflate_compress_greedy(struct libdeflate_compressor * restrict c,
|
|||||||
/* Starting a new DEFLATE block. */
|
/* Starting a new DEFLATE block. */
|
||||||
|
|
||||||
const u8 * const in_block_begin = in_next;
|
const u8 * const in_block_begin = in_next;
|
||||||
const u8 * const in_max_block_end =
|
const u8 * const in_max_block_end = choose_max_block_end(
|
||||||
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
|
in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
|
||||||
unsigned min_len;
|
unsigned min_len;
|
||||||
struct deflate_sequence *seq = c->p.g.sequences;
|
struct deflate_sequence *seq = c->p.g.sequences;
|
||||||
|
|
||||||
@ -2332,6 +2403,7 @@ deflate_compress_greedy(struct libdeflate_compressor * restrict c,
|
|||||||
|
|
||||||
/* Check if it's time to output another block. */
|
/* Check if it's time to output another block. */
|
||||||
} while (in_next < in_max_block_end &&
|
} while (in_next < in_max_block_end &&
|
||||||
|
seq < &c->p.g.sequences[ARRAY_LEN(c->p.g.sequences)] &&
|
||||||
!should_end_block(&c->split_stats,
|
!should_end_block(&c->split_stats,
|
||||||
in_block_begin, in_next, in_end));
|
in_block_begin, in_next, in_end));
|
||||||
|
|
||||||
@ -2364,8 +2436,8 @@ deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c,
|
|||||||
/* Starting a new DEFLATE block. */
|
/* Starting a new DEFLATE block. */
|
||||||
|
|
||||||
const u8 * const in_block_begin = in_next;
|
const u8 * const in_block_begin = in_next;
|
||||||
const u8 * const in_max_block_end =
|
const u8 * const in_max_block_end = choose_max_block_end(
|
||||||
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
|
in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
|
||||||
const u8 *next_recalc_min_len =
|
const u8 *next_recalc_min_len =
|
||||||
in_next + MIN(in_end - in_next, 10000);
|
in_next + MIN(in_end - in_next, 10000);
|
||||||
unsigned min_len = DEFLATE_MIN_MATCH_LEN;
|
unsigned min_len = DEFLATE_MIN_MATCH_LEN;
|
||||||
@ -2544,6 +2616,7 @@ deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c,
|
|||||||
}
|
}
|
||||||
/* Check if it's time to output another block. */
|
/* Check if it's time to output another block. */
|
||||||
} while (in_next < in_max_block_end &&
|
} while (in_next < in_max_block_end &&
|
||||||
|
seq < &c->p.g.sequences[ARRAY_LEN(c->p.g.sequences)] &&
|
||||||
!should_end_block(&c->split_stats,
|
!should_end_block(&c->split_stats,
|
||||||
in_block_begin, in_next, in_end));
|
in_block_begin, in_next, in_end));
|
||||||
|
|
||||||
@ -3178,8 +3251,8 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
|
|||||||
|
|
||||||
struct lz_match *cache_ptr = c->p.n.match_cache;
|
struct lz_match *cache_ptr = c->p.n.match_cache;
|
||||||
const u8 * const in_block_begin = in_next;
|
const u8 * const in_block_begin = in_next;
|
||||||
const u8 * const in_max_block_end =
|
const u8 * const in_max_block_end = choose_max_block_end(
|
||||||
in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
|
in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
|
||||||
const u8 *next_observation = in_next;
|
const u8 *next_observation = in_next;
|
||||||
|
|
||||||
deflate_near_optimal_begin_block(c, in_block_begin == in);
|
deflate_near_optimal_begin_block(c, in_block_begin == in);
|
||||||
@ -3347,6 +3420,8 @@ libdeflate_alloc_compressor(int compression_level)
|
|||||||
struct libdeflate_compressor *c;
|
struct libdeflate_compressor *c;
|
||||||
size_t size = offsetof(struct libdeflate_compressor, p);
|
size_t size = offsetof(struct libdeflate_compressor, p);
|
||||||
|
|
||||||
|
check_buildtime_parameters();
|
||||||
|
|
||||||
if (compression_level < 0 || compression_level > 12)
|
if (compression_level < 0 || compression_level > 12)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user