mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-11 13:32:14 -04:00
decompress: handle Huffman codes with one used symbol
This commit is contained in:
parent
4325101bb9
commit
446e2268b3
@ -565,10 +565,10 @@ build_decode_table(u32 decode_table[],
|
|||||||
len_counts[lens[sym]]++;
|
len_counts[lens[sym]]++;
|
||||||
|
|
||||||
/* It is already guaranteed that all lengths are <= max_codeword_len,
|
/* It is already guaranteed that all lengths are <= max_codeword_len,
|
||||||
* but it cannot be assumed they form a valid prefix code. A codeword
|
* but it cannot be assumed they form a complete prefix code. A
|
||||||
* of length n should require a proportion of the codespace equaling
|
* codeword of length n should require a proportion of the codespace
|
||||||
* (1/2)^n. The code is valid if and only if, by this measure, the
|
* equaling (1/2)^n. The code is complete if and only if, by this
|
||||||
* codespace is exactly filled by the lengths. */
|
* measure, the codespace is exactly filled by the lengths. */
|
||||||
remainder = 1;
|
remainder = 1;
|
||||||
for (len = 1; len <= max_codeword_len; len++) {
|
for (len = 1; len <= max_codeword_len; len++) {
|
||||||
remainder <<= 1;
|
remainder <<= 1;
|
||||||
@ -582,27 +582,34 @@ build_decode_table(u32 decode_table[],
|
|||||||
|
|
||||||
if (unlikely(remainder != 0)) {
|
if (unlikely(remainder != 0)) {
|
||||||
/* The lengths do not fill the codespace; that is, they form an
|
/* The lengths do not fill the codespace; that is, they form an
|
||||||
* incomplete set. */
|
* incomplete code. */
|
||||||
if (remainder == (1U << max_codeword_len)) {
|
|
||||||
/* The code is completely empty. By definition, no
|
/* Initialize the table entries to default values. When
|
||||||
* symbols can be decoded with an empty code.
|
* decompressing a well-formed stream, these default values will
|
||||||
* Consequently, we technically don't even need to fill
|
* never be used. But since a malformed stream might contain
|
||||||
* in the decode table. However, to avoid accessing
|
* any bits at all, these entries need to be set anyway. */
|
||||||
* uninitialized memory if the algorithm nevertheless
|
u32 entry = make_decode_table_entry(decode_results[0], 1);
|
||||||
* attempts to decode symbols using such a code, we fill
|
for (unsigned i = 0; i < (1U << table_bits); i++)
|
||||||
* the decode table with default values. */
|
decode_table[i] = entry;
|
||||||
for (unsigned i = 0; i < (1U << table_bits); i++) {
|
|
||||||
decode_table[i] =
|
/* A completely empty code is permitted. */
|
||||||
make_decode_table_entry(
|
if (remainder == (1U << max_codeword_len))
|
||||||
decode_results[0], 1);
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
return false;
|
/* The code is nonempty and incomplete. Proceed only if there
|
||||||
|
* is a single used symbol and its codeword has length 1. The
|
||||||
|
* DEFLATE RFC is somewhat unclear regarding this case. What
|
||||||
|
* zlib's decompressor does is permit this case for
|
||||||
|
* literal/length and offset codes and assume the codeword is 0
|
||||||
|
* rather than 1. We do the same except we allow this case for
|
||||||
|
* precodes too. */
|
||||||
|
if (remainder != (1U << (max_codeword_len - 1)) ||
|
||||||
|
len_counts[1] != 1)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sort the symbols primarily by length and secondarily by symbol value.
|
/* Sort the symbols primarily by increasing codeword length and
|
||||||
*/
|
* secondarily by increasing symbol value. */
|
||||||
|
|
||||||
/* Initialize 'offsets' so that offsets[len] is the number of codewords
|
/* Initialize 'offsets' so that offsets[len] is the number of codewords
|
||||||
* shorter than 'len' bits, including length 0. */
|
* shorter than 'len' bits, including length 0. */
|
||||||
@ -617,17 +624,16 @@ build_decode_table(u32 decode_table[],
|
|||||||
/* Generate the decode table entries. Since we process codewords from
|
/* Generate the decode table entries. Since we process codewords from
|
||||||
* shortest to longest, the main portion of the decode table is filled
|
* shortest to longest, the main portion of the decode table is filled
|
||||||
* first; then the subtables are filled. Note that it's already been
|
* first; then the subtables are filled. Note that it's already been
|
||||||
* verified that the codewords form a valid (complete) prefix code. */
|
* verified that the code is nonempty and not over-subscribed. */
|
||||||
|
|
||||||
/* Start with the index of the first used symbol. */
|
/* Start with the smallest codeword length and the smallest-valued
|
||||||
|
* symbol which has that codeword length. */
|
||||||
sym_idx = offsets[0];
|
sym_idx = offsets[0];
|
||||||
|
|
||||||
/* Start with the smallest used codeword length. */
|
|
||||||
codeword_len = 1;
|
codeword_len = 1;
|
||||||
while (len_counts[codeword_len] == 0)
|
while (len_counts[codeword_len] == 0)
|
||||||
codeword_len++;
|
codeword_len++;
|
||||||
|
|
||||||
for (;;) { /* For used each symbol and its codeword... */
|
for (;;) { /* For each used symbol and its codeword... */
|
||||||
unsigned sym;
|
unsigned sym;
|
||||||
u32 entry;
|
u32 entry;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user