Choose cheapest block type in compressor

This commit is contained in:
Eric Biggers 2016-05-21 10:34:00 -05:00
parent ecdcfc600b
commit 92e6c1ff01

View File

@ -140,6 +140,11 @@ static const u8 deflate_length_slot[DEFLATE_MAX_MATCH_LEN + 1] = {
27, 27, 28,
};
/* The order in which precode codeword lengths are stored */
static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
};
/* Codewords for the DEFLATE Huffman codes. */
struct deflate_codewords {
u32 litlen[DEFLATE_NUM_LITLEN_SYMS];
@ -295,7 +300,7 @@ struct deflate_compressor {
/* Dynamic Huffman codes for the current block */
struct deflate_codes codes;
/* Static Huffman codes set just before first use */
/* Static Huffman codes */
struct deflate_codes static_codes;
/* A table for fast lookups of offset slot by match offset.
@ -326,11 +331,15 @@ struct deflate_compressor {
/* The compression level with which this compressor was created. */
unsigned compression_level;
/* Temporary arrays for Huffman code output */
/* Temporary space for Huffman code output */
u32 precode_freqs[DEFLATE_NUM_PRECODE_SYMS];
u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
u32 precode_codewords[DEFLATE_NUM_PRECODE_SYMS];
unsigned precode_items[DEFLATE_NUM_LITLEN_SYMS + DEFLATE_NUM_OFFSET_SYMS];
unsigned num_litlen_syms;
unsigned num_offset_syms;
unsigned num_explicit_lens;
unsigned num_precode_items;
union {
/* Data for greedy or lazy parsing */
@ -460,6 +469,14 @@ deflate_flush_bits(struct deflate_output_bitstream *os)
}
}
/* Align the bitstream on a byte boundary. */
static forceinline void
deflate_align_bitstream(struct deflate_output_bitstream *os)
{
os->bitcount += -os->bitcount & 7;
deflate_flush_bits(os);
}
/*
* Flush any remaining bits to the output buffer if needed. Return the total
* number of bytes written to the output buffer, or 0 if an overflow occurred.
@ -1135,6 +1152,20 @@ deflate_init_static_codes(struct deflate_compressor *c)
deflate_make_huffman_codes(&c->freqs, &c->static_codes);
}
/* Return the offset slot for the specified match offset. */
static forceinline unsigned
deflate_get_offset_slot(struct deflate_compressor *c, unsigned offset)
{
#if USE_FULL_OFFSET_SLOT_FAST
return c->offset_slot_fast[offset];
#else
if (offset <= 256)
return c->offset_slot_fast[offset - 1];
else
return c->offset_slot_fast[256 + ((offset - 1) >> 7)];
#endif
}
/* Write the header fields common to all DEFLATE block types. */
static void
deflate_write_block_header(struct deflate_output_bitstream *os,
@ -1157,6 +1188,9 @@ deflate_compute_precode_items(const u8 lens[restrict],
unsigned extra_bits;
u8 len;
memset(precode_freqs, 0,
DEFLATE_NUM_PRECODE_SYMS * sizeof(precode_freqs[0]));
itemptr = precode_items;
run_start = 0;
do {
@ -1219,67 +1253,102 @@ deflate_compute_precode_items(const u8 lens[restrict],
}
/*
* Output a list of Huffman codeword lengths in compressed form.
*
* The codeword lengths are compressed using a separate Huffman code, the
* "precode", which contains a symbol for each possible codeword length in the
* larger code as well as several special symbols to represent repeated codeword
* lengths (a form of run-length encoding). The precode is itself constructed
* in canonical form, and its codeword lengths are represented literally in 19
* 3-bit fields that immediately precede the compressed codeword lengths of the
* larger code.
* Huffman codeword lengths for dynamic Huffman blocks are compressed using a
* separate Huffman code, the "precode", which contains a symbol for each
* possible codeword length in the larger code as well as several special
* symbols to represent repeated codeword lengths (a form of run-length
* encoding). The precode is itself constructed in canonical form, and its
* codeword lengths are represented literally in 19 3-bit fields that
* immediately precede the compressed codeword lengths of the larger code.
*/
static void
deflate_write_compressed_lens(struct deflate_compressor *c,
struct deflate_output_bitstream *os,
const u8 lens[], unsigned num_lens)
{
unsigned num_precode_items;
unsigned precode_item;
unsigned precode_sym;
unsigned num_explicit_lens;
unsigned i;
static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
};
for (i = 0; i < DEFLATE_NUM_PRECODE_SYMS; i++)
c->precode_freqs[i] = 0;
/* Precompute the information needed to output Huffman codes. */
static void
deflate_precompute_huffman_header(struct deflate_compressor *c)
{
/* Compute how many litlen and offset symbols are needed. */
for (c->num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS;
c->num_litlen_syms > 257;
c->num_litlen_syms--)
if (c->codes.lens.litlen[c->num_litlen_syms - 1] != 0)
break;
for (c->num_offset_syms = DEFLATE_NUM_OFFSET_SYMS;
c->num_offset_syms > 1;
c->num_offset_syms--)
if (c->codes.lens.offset[c->num_offset_syms - 1] != 0)
break;
/* If we're not using the full set of literal/length codeword lengths,
* then temporarily move the offset codeword lengths over so that the
* literal/length and offset codeword lengths are contiguous. */
STATIC_ASSERT(offsetof(struct deflate_lens, offset) ==
DEFLATE_NUM_LITLEN_SYMS);
if (c->num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
memmove(&c->codes.lens.all[c->num_litlen_syms],
&c->codes.lens.all[DEFLATE_NUM_LITLEN_SYMS],
c->num_offset_syms * sizeof(c->codes.lens.all[0]));
}
/* Compute the "items" (RLE / literal tokens and extra bits) with which
* the codeword lengths in the larger code will be output. */
num_precode_items = deflate_compute_precode_items(lens,
num_lens,
c->precode_freqs,
c->precode_items);
* the codeword lengths in the larger code will be output. */
c->num_precode_items =
deflate_compute_precode_items(c->codes.lens.all,
c->num_litlen_syms +
c->num_offset_syms,
c->precode_freqs,
c->precode_items);
/* Build the precode. */
/* Build the precode. */
STATIC_ASSERT(MAX_PRE_CODEWORD_LEN <= DEFLATE_MAX_PRE_CODEWORD_LEN);
deflate_make_huffman_code(DEFLATE_NUM_PRECODE_SYMS,
MAX_PRE_CODEWORD_LEN,
c->precode_freqs, c->precode_lens,
c->precode_codewords);
/* Count how many precode lengths we actually need to output. */
for (num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS;
num_explicit_lens > 4;
num_explicit_lens--)
if (c->precode_lens[deflate_precode_lens_permutation[num_explicit_lens - 1]] != 0)
/* Count how many precode lengths we actually need to output. */
for (c->num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS;
c->num_explicit_lens > 4;
c->num_explicit_lens--)
if (c->precode_lens[deflate_precode_lens_permutation[
c->num_explicit_lens - 1]] != 0)
break;
deflate_add_bits(os, num_explicit_lens - 4, 4);
/* Restore the offset codeword lengths if needed. */
if (c->num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
memmove(&c->codes.lens.all[DEFLATE_NUM_LITLEN_SYMS],
&c->codes.lens.all[c->num_litlen_syms],
c->num_offset_syms * sizeof(c->codes.lens.all[0]));
}
}
/* Output the Huffman codes. */
static void
deflate_write_huffman_header(struct deflate_compressor *c,
struct deflate_output_bitstream *os)
{
unsigned i;
deflate_add_bits(os, c->num_litlen_syms - 257, 5);
deflate_add_bits(os, c->num_offset_syms - 1, 5);
deflate_add_bits(os, c->num_explicit_lens - 4, 4);
deflate_flush_bits(os);
/* Output the lengths of the codewords in the precode. */
for (i = 0; i < num_explicit_lens; i++) {
deflate_add_bits(os, c->precode_lens[deflate_precode_lens_permutation[i]], 3);
for (i = 0; i < c->num_explicit_lens; i++) {
deflate_add_bits(os, c->precode_lens[
deflate_precode_lens_permutation[i]], 3);
deflate_flush_bits(os);
}
/* Output the encoded lengths of the codewords in the larger code. */
for (i = 0; i < num_precode_items; i++) {
precode_item = c->precode_items[i];
precode_sym = precode_item & 0x1F;
for (i = 0; i < c->num_precode_items; i++) {
unsigned precode_item = c->precode_items[i];
unsigned precode_sym = precode_item & 0x1F;
deflate_add_bits(os, c->precode_codewords[precode_sym],
c->precode_lens[precode_sym]);
if (precode_sym >= 16) {
@ -1295,64 +1364,11 @@ deflate_write_compressed_lens(struct deflate_compressor *c,
}
}
/*
* Output the specified Huffman codes.
* This is used for dynamic Huffman blocks.
*/
static void
deflate_write_huffman_codes(struct deflate_compressor *c,
struct deflate_output_bitstream *os)
{
unsigned num_litlen_syms;
unsigned num_offset_syms;
/* We only need to output up to the highest-valued symbol actually used. */
for (num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS;
num_litlen_syms > 257;
num_litlen_syms--)
if (c->codes.lens.litlen[num_litlen_syms - 1] != 0)
break;
for (num_offset_syms = DEFLATE_NUM_OFFSET_SYMS;
num_offset_syms > 1;
num_offset_syms--)
if (c->codes.lens.offset[num_offset_syms - 1] != 0)
break;
deflate_add_bits(os, num_litlen_syms - 257, 5);
deflate_add_bits(os, num_offset_syms - 1, 5);
deflate_flush_bits(os);
/* If we're not outputting the full set of literal/length codeword
* lengths, temporarily move the offset codeword lengths over so that
* the literal/length and offset codeword lengths are contiguous. */
STATIC_ASSERT(offsetof(struct deflate_lens, offset) ==
DEFLATE_NUM_LITLEN_SYMS);
if (num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS)
memmove(&c->codes.lens.all[num_litlen_syms],
&c->codes.lens.all[DEFLATE_NUM_LITLEN_SYMS],
num_offset_syms * sizeof(c->codes.lens.all[0]));
/* Output the codeword lengths. */
deflate_write_compressed_lens(c, os, c->codes.lens.all,
num_litlen_syms + num_offset_syms);
/* Restore the offset codeword lengths if needed. */
if (num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS)
memmove(&c->codes.lens.all[DEFLATE_NUM_LITLEN_SYMS],
&c->codes.lens.all[num_litlen_syms],
num_offset_syms * sizeof(c->codes.lens.all[0]));
}
static void
deflate_write_sequences(struct deflate_output_bitstream * restrict os,
const u8 * restrict in_next,
const struct deflate_codes * restrict codes,
const struct deflate_sequence sequences[restrict],
const struct deflate_codes * restrict codes)
const u8 * restrict in_next)
{
const struct deflate_sequence *seq = sequences;
@ -1471,6 +1487,71 @@ deflate_write_sequences(struct deflate_output_bitstream * restrict os,
}
}
/*
* Follow the minimum-cost path in the graph of possible match/literal choices
* for the current block and write out the matches/literals using the specified
* Huffman codes.
*
* Note: this is slightly duplicated with deflate_write_sequences(), the reason
* being that we don't want to waste time translating between intermediate
* match/literal representations.
*/
static void
deflate_write_item_list(struct deflate_output_bitstream *os,
const struct deflate_codes *codes,
struct deflate_compressor *c,
u32 block_length)
{
struct deflate_optimum_node *cur_node = c->optimum;
struct deflate_optimum_node * const end_node = cur_node + block_length;
do {
unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
unsigned litlen_symbol;
unsigned length_slot;
unsigned offset_slot;
if (length == 1) {
/* Literal */
litlen_symbol = offset;
deflate_add_bits(os, codes->codewords.litlen[litlen_symbol],
codes->lens.litlen[litlen_symbol]);
deflate_flush_bits(os);
} else {
/* Match length */
length_slot = deflate_length_slot[length];
litlen_symbol = 257 + length_slot;
deflate_add_bits(os, codes->codewords.litlen[litlen_symbol],
codes->lens.litlen[litlen_symbol]);
deflate_add_bits(os, length - deflate_length_slot_base[length_slot],
deflate_extra_length_bits[length_slot]);
if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
DEFLATE_MAX_EXTRA_LENGTH_BITS +
MAX_OFFSET_CODEWORD_LEN +
DEFLATE_MAX_EXTRA_OFFSET_BITS))
deflate_flush_bits(os);
/* Match offset */
offset_slot = deflate_get_offset_slot(c, offset);
deflate_add_bits(os, codes->codewords.offset[offset_slot],
codes->lens.offset[offset_slot]);
if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN +
DEFLATE_MAX_EXTRA_OFFSET_BITS))
deflate_flush_bits(os);
deflate_add_bits(os, offset - deflate_offset_slot_base[offset_slot],
deflate_extra_offset_bits[offset_slot]);
deflate_flush_bits(os);
}
cur_node += length;
} while (cur_node != end_node);
}
/* Output the end-of-block symbol. */
static void
deflate_write_end_of_block(struct deflate_output_bitstream *os,
@ -1481,56 +1562,153 @@ deflate_write_end_of_block(struct deflate_output_bitstream *os,
deflate_flush_bits(os);
}
static void
deflate_write_block(struct deflate_compressor * restrict c,
struct deflate_output_bitstream * restrict os,
const u8 * restrict block_begin, u32 block_length,
bool is_final_block)
deflate_write_uncompressed_block(struct deflate_output_bitstream *os,
const u8 *data, u16 len,
bool is_final_block)
{
struct deflate_codes *codes;
deflate_write_block_header(os, is_final_block,
DEFLATE_BLOCKTYPE_UNCOMPRESSED);
deflate_align_bitstream(os);
/* Note: we don't currently output any uncompressed blocks. */
/* Account for end-of-block symbol */
c->freqs.litlen[DEFLATE_END_OF_BLOCK]++;
if (block_length >= 1000) {
/* Use custom ("dynamic") Huffman codes. */
deflate_write_block_header(os, is_final_block,
DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN);
deflate_make_huffman_codes(&c->freqs, &c->codes);
deflate_write_huffman_codes(c, os);
codes = &c->codes;
} else {
/* This is a very short block. Just use the static codes. */
deflate_write_block_header(os, is_final_block,
DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
codes = &c->static_codes;
if (codes->codewords.litlen[0] == 0xFFFFFFFF)
deflate_init_static_codes(c);
if (4 + (u32)len >= os->end - os->next) {
os->next = os->end;
return;
}
deflate_write_sequences(os, block_begin, c->sequences, codes);
deflate_write_end_of_block(os, codes);
/* Reset symbol frequencies if this wasn't the final block. */
if (!is_final_block)
deflate_reset_symbol_frequencies(c);
put_unaligned_le16(len, os->next);
os->next += 2;
put_unaligned_le16(~len, os->next);
os->next += 2;
memcpy(os->next, data, len);
os->next += len;
}
/* Return the offset slot for the specified match offset. */
static forceinline unsigned
deflate_get_offset_slot(struct deflate_compressor *c, unsigned offset)
static void
deflate_write_uncompressed_blocks(struct deflate_output_bitstream *os,
const u8 *data, u32 data_length,
bool is_final_block)
{
#if USE_FULL_OFFSET_SLOT_FAST
return c->offset_slot_fast[offset];
#else
if (offset <= 256)
return c->offset_slot_fast[offset - 1];
else
return c->offset_slot_fast[256 + ((offset - 1) >> 7)];
#endif
do {
u16 len = MIN(data_length, UINT16_MAX);
deflate_write_uncompressed_block(os, data, len,
is_final_block && len == data_length);
data += len;
data_length -= len;
} while (data_length != 0);
}
/*
* Choose the best type of block to use (dynamic Huffman, static Huffman, or
* uncompressed), then output it.
*/
static void
deflate_flush_block(struct deflate_compressor * restrict c,
struct deflate_output_bitstream * restrict os,
const u8 * restrict block_begin, u32 block_length,
bool is_final_block, bool use_item_list)
{
static const u8 deflate_extra_precode_bits[DEFLATE_NUM_PRECODE_SYMS] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7,
};
/* Costs are measured in bits */
u32 dynamic_cost = 0;
u32 static_cost = 0;
u32 uncompressed_cost = 0;
struct deflate_codes *codes;
int block_type;
unsigned sym;
/* Tally the end-of-block symbol. */
c->freqs.litlen[DEFLATE_END_OF_BLOCK]++;
/* Build dynamic Huffman codes. */
deflate_make_huffman_codes(&c->freqs, &c->codes);
/* Account for the cost of sending dynamic Huffman codes. */
deflate_precompute_huffman_header(c);
dynamic_cost += 5 + 5 + 4 + (3 * c->num_explicit_lens);
for (sym = 0; sym < DEFLATE_NUM_PRECODE_SYMS; sym++) {
u32 extra = deflate_extra_precode_bits[sym];
dynamic_cost += c->precode_freqs[sym] *
(extra + c->precode_lens[sym]);
}
/* Account for the cost of encoding literals. */
for (sym = 0; sym < 256; sym++) {
dynamic_cost += c->freqs.litlen[sym] *
c->codes.lens.litlen[sym];
}
for (sym = 0; sym < 144; sym++)
static_cost += c->freqs.litlen[sym] * 8;
for (; sym < 256; sym++)
static_cost += c->freqs.litlen[sym] * 9;
/* Account for the cost of encoding the end-of-block symbol. */
dynamic_cost += c->codes.lens.litlen[256];
static_cost += 7;
/* Account for the cost of encoding lengths. */
for (sym = 257; sym < DEFLATE_NUM_LITLEN_SYMS; sym++) {
u32 extra = deflate_extra_length_bits[sym - 257];
dynamic_cost += c->freqs.litlen[sym] *
(extra + c->codes.lens.litlen[sym]);
static_cost += c->freqs.litlen[sym] *
(extra + c->static_codes.lens.litlen[sym]);
}
/* Account for the cost of encoding offsets. */
for (sym = 0; sym < DEFLATE_NUM_OFFSET_SYMS; sym++) {
u32 extra = deflate_extra_offset_bits[sym];
dynamic_cost += c->freqs.offset[sym] *
(extra + c->codes.lens.offset[sym]);
static_cost += c->freqs.offset[sym] * (extra + 5);
}
/* Compute the cost of using uncompressed blocks. */
uncompressed_cost += (-(os->bitcount + 3) & 7) + 32 +
(40 * (DIV_ROUND_UP(block_length,
UINT16_MAX) - 1)) +
(8 * block_length);
/* Choose the cheapest block type. */
if (dynamic_cost < MIN(static_cost, uncompressed_cost)) {
block_type = DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN;
codes = &c->codes;
} else if (static_cost < uncompressed_cost) {
block_type = DEFLATE_BLOCKTYPE_STATIC_HUFFMAN;
codes = &c->static_codes;
} else {
block_type = DEFLATE_BLOCKTYPE_UNCOMPRESSED;
}
/* Now actually output the block. */
if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
/* Note: the length being flushed may exceed the maximum length
* of an uncompressed block (65535 bytes). Therefore, more than
* one uncompressed block might be needed. */
deflate_write_uncompressed_blocks(os, block_begin, block_length,
is_final_block);
} else {
/* Output the block header. */
deflate_write_block_header(os, is_final_block, block_type);
/* Output the Huffman codes (dynamic Huffman blocks only). */
if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN)
deflate_write_huffman_header(c, os);
/* Output the literals, matches, and end-of-block symbol. */
if (use_item_list) {
deflate_write_item_list(os, codes, c, block_length);
} else {
deflate_write_sequences(os, codes, c->sequences,
block_begin);
}
deflate_write_end_of_block(os, codes);
}
}
static forceinline void
@ -1713,7 +1891,6 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
u32 next_hashes[2] = {0, 0};
deflate_init_output(&os, out, out_nbytes_avail);
deflate_reset_symbol_frequencies(c);
hc_matchfinder_init(&c->hc_mf);
do {
@ -1726,6 +1903,7 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
struct block_split_stats split_stats;
init_block_split_stats(&split_stats);
deflate_reset_symbol_frequencies(c);
do {
u32 length;
@ -1771,8 +1949,9 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
!should_end_block(&split_stats, in_block_begin, in_next, in_end));
deflate_finish_sequence(next_seq, litrunlen);
deflate_write_block(c, &os, in_block_begin,
in_next - in_block_begin, in_next == in_end);
deflate_flush_block(c, &os, in_block_begin,
in_next - in_block_begin,
in_next == in_end, false);
} while (in_next != in_end);
return deflate_flush_output(&os);
@ -1797,7 +1976,6 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
u32 next_hashes[2] = {0, 0};
deflate_init_output(&os, out, out_nbytes_avail);
deflate_reset_symbol_frequencies(c);
hc_matchfinder_init(&c->hc_mf);
do {
@ -1810,6 +1988,7 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
struct block_split_stats split_stats;
init_block_split_stats(&split_stats);
deflate_reset_symbol_frequencies(c);
do {
unsigned cur_len;
@ -1917,9 +2096,9 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
!should_end_block(&split_stats, in_block_begin, in_next, in_end));
deflate_finish_sequence(next_seq, litrunlen);
deflate_write_block(c, &os, in_block_begin,
in_next - in_block_begin, in_next == in_end);
deflate_flush_block(c, &os, in_block_begin,
in_next - in_block_begin,
in_next == in_end, false);
} while (in_next != in_end);
return deflate_flush_output(&os);
@ -1953,70 +2132,6 @@ deflate_tally_item_list(struct deflate_compressor *c,
} while (cur_node != end_node);
}
/*
* Follow the minimum-cost path in the graph of possible match/literal choices
* for the current block and write out the matches/literals using the specified
* Huffman codes.
*
* Note: this is slightly duplicated with deflate_write_sequences(), the reason
* being that we don't want to waste time translating between intermediate
* match/literal representations.
*/
static void
deflate_write_item_list(struct deflate_output_bitstream *os,
const struct deflate_codes *codes,
struct deflate_compressor *c,
struct deflate_optimum_node * const end_node)
{
struct deflate_optimum_node *cur_node = c->optimum;
do {
unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
unsigned litlen_symbol;
unsigned length_slot;
unsigned offset_slot;
if (length == 1) {
/* Literal */
litlen_symbol = offset;
deflate_add_bits(os, codes->codewords.litlen[litlen_symbol],
codes->lens.litlen[litlen_symbol]);
deflate_flush_bits(os);
} else {
/* Match length */
length_slot = deflate_length_slot[length];
litlen_symbol = 257 + length_slot;
deflate_add_bits(os, codes->codewords.litlen[litlen_symbol],
codes->lens.litlen[litlen_symbol]);
deflate_add_bits(os, length - deflate_length_slot_base[length_slot],
deflate_extra_length_bits[length_slot]);
if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
DEFLATE_MAX_EXTRA_LENGTH_BITS +
MAX_OFFSET_CODEWORD_LEN +
DEFLATE_MAX_EXTRA_OFFSET_BITS))
deflate_flush_bits(os);
/* Match offset */
offset_slot = deflate_get_offset_slot(c, offset);
deflate_add_bits(os, codes->codewords.offset[offset_slot],
codes->lens.offset[offset_slot]);
if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN +
DEFLATE_MAX_EXTRA_OFFSET_BITS))
deflate_flush_bits(os);
deflate_add_bits(os, offset - deflate_offset_slot_base[offset_slot],
deflate_extra_offset_bits[offset_slot]);
deflate_flush_bits(os);
}
cur_node += length;
} while (cur_node != end_node);
}
/* Set the current cost model from the codeword lengths specified in @lens. */
static void
deflate_set_costs(struct deflate_compressor *c, const struct deflate_lens * lens)
@ -2137,11 +2252,12 @@ deflate_adjust_costs(struct deflate_compressor *c)
static void
deflate_optimize_and_write_block(struct deflate_compressor *c,
struct deflate_output_bitstream *os,
const unsigned block_len,
struct lz_match *end_cache_ptr,
const u8 * const block_begin,
const u32 block_length,
struct lz_match * const end_cache_ptr,
const bool is_final_block)
{
struct deflate_optimum_node *end_node = c->optimum + block_len;
struct deflate_optimum_node * const end_node = c->optimum + block_length;
unsigned num_passes_remaining = c->num_optim_passes;
/* Force the block to really end at 'end_node', even if some matches
@ -2242,13 +2358,8 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
/* All optimization passes are done. Output a block using the
* minimum-cost path computed on the last optimization pass. */
c->freqs.litlen[DEFLATE_END_OF_BLOCK]++;
deflate_make_huffman_codes(&c->freqs, &c->codes);
deflate_reset_symbol_frequencies(c);
deflate_write_block_header(os, is_final_block, DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN);
deflate_write_huffman_codes(c, os);
deflate_write_item_list(os, &c->codes, c, end_node);
deflate_write_end_of_block(os, &c->codes);
deflate_flush_block(c, os, block_begin, block_length,
is_final_block, true);
}
/*
@ -2279,7 +2390,6 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
u32 next_hashes[2] = {0, 0};
deflate_init_output(&os, out, out_nbytes_avail);
deflate_reset_symbol_frequencies(c);
bt_matchfinder_init(&c->bt_mf);
do {
@ -2293,6 +2403,7 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
const u8 *next_observation = in_next;
init_block_split_stats(&split_stats);
deflate_reset_symbol_frequencies(c);
/*
* Find matches until we decide to end the block. We end the
@ -2418,7 +2529,8 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
deflate_set_default_costs(c);
else
deflate_adjust_costs(c);
deflate_optimize_and_write_block(c, &os, in_next - in_block_begin,
deflate_optimize_and_write_block(c, &os, in_block_begin,
in_next - in_block_begin,
cache_ptr, in_next == in_end);
} while (in_next != in_end);
@ -2565,7 +2677,7 @@ deflate_alloc_compressor(unsigned int compression_level)
}
deflate_init_offset_slot_fast(c);
c->static_codes.codewords.litlen[0] = 0xFFFFFFFF;
deflate_init_static_codes(c);
return c;
}
@ -2577,15 +2689,15 @@ deflate_compress(struct deflate_compressor *c,
{
if (unlikely(out_nbytes_avail < MIN_OUTPUT_SIZE))
return 0;
if (unlikely(in_nbytes == 0)) {
/* Empty input; output a single empty block. */
/* For extremely small inputs just use a single uncompressed block. */
if (unlikely(in_nbytes < 16)) {
struct deflate_output_bitstream os;
deflate_init_output(&os, out, out_nbytes_avail);
deflate_reset_symbol_frequencies(c);
deflate_finish_sequence(c->sequences, 0);
deflate_write_block(c, &os, in, 0, true);
deflate_write_uncompressed_block(&os, in, in_nbytes, true);
return deflate_flush_output(&os);
}
return (*c->impl)(c, in, in_nbytes, out, out_nbytes_avail);
}
@ -2601,14 +2713,15 @@ deflate_get_compression_level(struct deflate_compressor *c)
return c->compression_level;
}
/* Return an upper bound on the compressed size for compressing @in_nbytes bytes
* of data. This function needs some work to be more accurate. */
LIBEXPORT size_t
deflate_compress_bound(struct deflate_compressor *c, size_t in_nbytes)
{
size_t max_num_blocks = DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH);
if (max_num_blocks == 0)
max_num_blocks++;
return MIN_OUTPUT_SIZE + DIV_ROUND_UP(in_nbytes * 9, 8) +
max_num_blocks * 200;
/*
* The worst case is all uncompressed blocks where one block has length
* <= MIN_BLOCK_LENGTH and the others have length MIN_BLOCK_LENGTH.
* Each uncompressed block has 5 bytes of overhead: 1 for BFINAL, BTYPE,
* and alignment to a byte boundary; 2 for LEN; and 2 for NLEN.
*/
size_t max_num_blocks = MAX(DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH), 1);
return MAX((5 * max_num_blocks) + in_nbytes, MIN_OUTPUT_SIZE);
}