mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-08 11:50:00 -04:00
Support multi-member gzip files
This commit is contained in:
parent
3d96a83ef9
commit
5a9d25a892
@ -37,7 +37,7 @@ static enum libdeflate_result ATTRIBUTES
|
||||
FUNCNAME(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
u8 *out_next = out;
|
||||
u8 * const out_end = out_next + out_nbytes_avail;
|
||||
@ -394,6 +394,14 @@ block_done:
|
||||
|
||||
/* That was the last block. */
|
||||
|
||||
/* Discard any readahead bits and check for excessive overread */
|
||||
ALIGN_INPUT();
|
||||
|
||||
/* Optionally return the actual number of bytes read */
|
||||
if (actual_in_nbytes_ret)
|
||||
*actual_in_nbytes_ret = in_next - (u8 *)in;
|
||||
|
||||
/* Optionally return the actual number of bytes written */
|
||||
if (actual_out_nbytes_ret) {
|
||||
*actual_out_nbytes_ret = out_next - (u8 *)out;
|
||||
} else {
|
||||
|
@ -204,18 +204,20 @@ typedef machine_word_t bitbuf_t;
|
||||
/*
|
||||
* Fill the bitbuffer variable, reading one byte at a time.
|
||||
*
|
||||
* Note: if we would overrun the input buffer, we just don't read anything,
|
||||
* leaving the bits as 0 but marking them as filled. This makes the
|
||||
* implementation simpler because this removes the need to distinguish between
|
||||
* "real" overruns and overruns that occur because of our own lookahead during
|
||||
* Huffman decoding. The disadvantage is that a "real" overrun can go
|
||||
* undetected, and libdeflate_deflate_decompress() may return a success status
|
||||
* rather than the expected failure status if one occurs. However, this is
|
||||
* irrelevant because even if this specific case were to be handled "correctly",
|
||||
* one could easily come up with a different case where the compressed data
|
||||
* would be corrupted in such a way that fully retains its validity. Users
|
||||
* should run a checksum against the uncompressed data if they wish to detect
|
||||
* corruptions.
|
||||
* If we would overread the input buffer, we just don't read anything, leaving
|
||||
* the bits zeroed but marking them filled. This simplifies the decompressor
|
||||
* because it removes the need to distinguish between real overreads and
|
||||
* overreads that occur only because of the decompressor's own lookahead.
|
||||
*
|
||||
* The disadvantage is that real overreads are not detected immediately.
|
||||
* However, this is safe because the decompressor is still guaranteed to make
|
||||
* forward progress when presented never-ending 0 bits. In an existing block
|
||||
* output will be getting generated, whereas new blocks can only be uncompressed
|
||||
* (since the type code for uncompressed blocks is 0), for which we check for
|
||||
* previous overread. But even if we didn't check, uncompressed blocks would
|
||||
* fail to validate because LEN would not equal ~NLEN. So the decompressor will
|
||||
* eventually either detect that the output buffer is full, or detect invalid
|
||||
* input, or finish the final block.
|
||||
*/
|
||||
#define FILL_BITS_BYTEWISE() \
|
||||
do { \
|
||||
@ -277,17 +279,19 @@ if (!HAVE_BITS(n)) { \
|
||||
#define POP_BITS(n) (tmp32 = BITS(n), REMOVE_BITS(n), tmp32)
|
||||
|
||||
/*
|
||||
* Align the input to the next byte boundary, discarding any remaining bits in
|
||||
* the current byte.
|
||||
* Verify that the input buffer hasn't been overread, then align the input to
|
||||
* the next byte boundary, discarding any remaining bits in the current byte.
|
||||
*
|
||||
* Note that if the bitbuffer variable currently contains more than 8 bits, then
|
||||
* Note that if the bitbuffer variable currently contains more than 7 bits, then
|
||||
* we must rewind 'in_next', effectively putting those bits back. Only the bits
|
||||
* in what would be the "current" byte if we were reading one byte at a time can
|
||||
* be actually discarded.
|
||||
*/
|
||||
#define ALIGN_INPUT() \
|
||||
do { \
|
||||
in_next -= (bitsleft >> 3) - MIN(overrun_count, bitsleft >> 3); \
|
||||
SAFETY_CHECK(overrun_count <= (bitsleft >> 3)); \
|
||||
in_next -= (bitsleft >> 3) - overrun_count; \
|
||||
overrun_count = 0; \
|
||||
bitbuf = 0; \
|
||||
bitsleft = 0; \
|
||||
} while(0)
|
||||
@ -824,13 +828,13 @@ static enum libdeflate_result
|
||||
dispatch(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret);
|
||||
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
|
||||
|
||||
typedef enum libdeflate_result (*decompress_func_t)
|
||||
(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret);
|
||||
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
|
||||
|
||||
static decompress_func_t decompress_impl = dispatch;
|
||||
|
||||
@ -838,7 +842,7 @@ static enum libdeflate_result
|
||||
dispatch(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
decompress_func_t f = deflate_decompress_default;
|
||||
#if X86_CPU_FEATURES_ENABLED
|
||||
@ -847,7 +851,7 @@ dispatch(struct libdeflate_decompressor * restrict d,
|
||||
#endif
|
||||
decompress_impl = f;
|
||||
return (*f)(d, in, in_nbytes, out, out_nbytes_avail,
|
||||
actual_out_nbytes_ret);
|
||||
actual_in_nbytes_ret, actual_out_nbytes_ret);
|
||||
}
|
||||
#endif /* DISPATCH_ENABLED */
|
||||
|
||||
@ -860,20 +864,33 @@ dispatch(struct libdeflate_decompressor * restrict d,
|
||||
* calling the appropriate implementation depending on the CPU features at
|
||||
* runtime.
|
||||
*/
|
||||
LIBDEFLATEAPI enum libdeflate_result
|
||||
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
size_t *actual_in_nbytes_ret,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
#if DISPATCH_ENABLED
|
||||
return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes_avail,
|
||||
actual_in_nbytes_ret, actual_out_nbytes_ret);
|
||||
#else
|
||||
return deflate_decompress_default(d, in, in_nbytes,
|
||||
out, out_nbytes_avail,
|
||||
actual_in_nbytes_ret,
|
||||
actual_out_nbytes_ret);
|
||||
#endif
|
||||
}
|
||||
|
||||
LIBDEFLATEAPI enum libdeflate_result
|
||||
libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
#if DISPATCH_ENABLED
|
||||
return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes_avail,
|
||||
actual_out_nbytes_ret);
|
||||
#else
|
||||
return deflate_decompress_default(d, in, in_nbytes, out,
|
||||
out_nbytes_avail,
|
||||
actual_out_nbytes_ret);
|
||||
#endif
|
||||
return libdeflate_deflate_decompress_ex(d, in, in_nbytes,
|
||||
out, out_nbytes_avail,
|
||||
NULL, actual_out_nbytes_ret);
|
||||
}
|
||||
|
||||
LIBDEFLATEAPI struct libdeflate_decompressor *
|
||||
|
@ -33,14 +33,16 @@
|
||||
#include "libdeflate.h"
|
||||
|
||||
LIBDEFLATEAPI enum libdeflate_result
|
||||
libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_in_nbytes_ret,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
const u8 *in_next = in;
|
||||
const u8 * const in_end = in_next + in_nbytes;
|
||||
u8 flg;
|
||||
size_t actual_in_nbytes;
|
||||
size_t actual_out_nbytes;
|
||||
enum libdeflate_result result;
|
||||
|
||||
@ -102,9 +104,10 @@ libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
|
||||
}
|
||||
|
||||
/* Compressed data */
|
||||
result = libdeflate_deflate_decompress(d, in_next,
|
||||
result = libdeflate_deflate_decompress_ex(d, in_next,
|
||||
in_end - GZIP_FOOTER_SIZE - in_next,
|
||||
out, out_nbytes_avail,
|
||||
&actual_in_nbytes,
|
||||
actual_out_nbytes_ret);
|
||||
if (result != LIBDEFLATE_SUCCESS)
|
||||
return result;
|
||||
@ -114,7 +117,7 @@ libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
|
||||
else
|
||||
actual_out_nbytes = out_nbytes_avail;
|
||||
|
||||
in_next = in_end - GZIP_FOOTER_SIZE;
|
||||
in_next += actual_in_nbytes;
|
||||
|
||||
/* CRC32 */
|
||||
if (libdeflate_crc32(0, out, actual_out_nbytes) !=
|
||||
@ -125,6 +128,21 @@ libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
|
||||
/* ISIZE */
|
||||
if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
|
||||
return LIBDEFLATE_BAD_DATA;
|
||||
in_next += 4;
|
||||
|
||||
if (actual_in_nbytes_ret)
|
||||
*actual_in_nbytes_ret = in_next - (u8 *)in;
|
||||
|
||||
return LIBDEFLATE_SUCCESS;
|
||||
}
|
||||
|
||||
LIBDEFLATEAPI enum libdeflate_result
|
||||
libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
return libdeflate_gzip_decompress_ex(d, in, in_nbytes,
|
||||
out, out_nbytes_avail,
|
||||
NULL, actual_out_nbytes_ret);
|
||||
}
|
||||
|
44
libdeflate.h
44
libdeflate.h
@ -182,14 +182,17 @@ enum libdeflate_result {
|
||||
};
|
||||
|
||||
/*
|
||||
* libdeflate_deflate_decompress() decompresses 'in_nbytes' bytes of
|
||||
* raw DEFLATE-compressed data at 'in' and writes the uncompressed data to
|
||||
* 'out', which is a buffer of at least 'out_nbytes_avail' bytes. If
|
||||
* decompression was successful, then 0 (LIBDEFLATE_SUCCESS) is returned;
|
||||
* otherwise, a nonzero result code such as LIBDEFLATE_BAD_DATA is returned. If
|
||||
* libdeflate_deflate_decompress() decompresses the DEFLATE-compressed stream
|
||||
* from the buffer 'in' with compressed size up to 'in_nbytes' bytes. The
|
||||
* uncompressed data is written to 'out', a buffer with size 'out_nbytes_avail'
|
||||
* bytes. If decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned.
|
||||
* Otherwise, a nonzero result code such as LIBDEFLATE_BAD_DATA is returned. If
|
||||
* a nonzero result code is returned, then the contents of the output buffer are
|
||||
* undefined.
|
||||
*
|
||||
* Decompression stops at the end of the DEFLATE stream (as indicated by the
|
||||
* BFINAL flag), even if it is actually shorter than 'in_nbytes' bytes.
|
||||
*
|
||||
* libdeflate_deflate_decompress() can be used in cases where the actual
|
||||
* uncompressed size is known (recommended) or unknown (not recommended):
|
||||
*
|
||||
@ -216,6 +219,19 @@ libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret);
|
||||
|
||||
/*
|
||||
* Like libdeflate_deflate_decompress(), but adds the 'actual_in_nbytes_ret'
|
||||
* argument. If decompression succeeds and 'actual_in_nbytes_ret' is not NULL,
|
||||
* then the actual compressed size of the DEFLATE stream (aligned to the next
|
||||
* byte boundary) is written to *actual_in_nbytes_ret.
|
||||
*/
|
||||
LIBDEFLATEAPI enum libdeflate_result
|
||||
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_in_nbytes_ret,
|
||||
size_t *actual_out_nbytes_ret);
|
||||
|
||||
/*
|
||||
* Like libdeflate_deflate_decompress(), but assumes the zlib wrapper format
|
||||
* instead of raw DEFLATE.
|
||||
@ -229,6 +245,10 @@ libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
|
||||
/*
|
||||
* Like libdeflate_deflate_decompress(), but assumes the gzip wrapper format
|
||||
* instead of raw DEFLATE.
|
||||
*
|
||||
* If multiple gzip-compressed members are concatenated, then only the first
|
||||
* will be decompressed. Use libdeflate_gzip_decompress_ex() if you need
|
||||
* multi-member support.
|
||||
*/
|
||||
LIBDEFLATEAPI enum libdeflate_result
|
||||
libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
|
||||
@ -236,6 +256,20 @@ libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret);
|
||||
|
||||
/*
|
||||
* Like libdeflate_gzip_decompress(), but adds the 'actual_in_nbytes_ret'
|
||||
* argument. If 'actual_in_nbytes_ret' is not NULL and the decompression
|
||||
* succeeds (indicating that the first gzip-compressed member in the input
|
||||
* buffer was decompressed), then the actual number of input bytes consumed is
|
||||
* written to *actual_in_nbytes_ret.
|
||||
*/
|
||||
LIBDEFLATEAPI enum libdeflate_result
|
||||
libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_in_nbytes_ret,
|
||||
size_t *actual_out_nbytes_ret);
|
||||
|
||||
/*
|
||||
* libdeflate_free_decompressor() frees a decompressor that was allocated with
|
||||
* libdeflate_alloc_decompressor(). If a NULL pointer is passed in, no action
|
||||
|
@ -189,8 +189,10 @@ do_decompress(struct libdeflate_decompressor *decompressor,
|
||||
size_t compressed_size = in->mmap_size;
|
||||
void *uncompressed_data = NULL;
|
||||
size_t uncompressed_size;
|
||||
size_t actual_in_nbytes;
|
||||
size_t actual_out_nbytes;
|
||||
enum libdeflate_result result;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
if (compressed_size < sizeof(u32)) {
|
||||
msg("%"TS": not in gzip format", in->name);
|
||||
@ -200,34 +202,61 @@ do_decompress(struct libdeflate_decompressor *decompressor,
|
||||
|
||||
uncompressed_size = load_u32_gzip(&compressed_data[compressed_size - 4]);
|
||||
|
||||
uncompressed_data = xmalloc(uncompressed_size);
|
||||
if (uncompressed_data == NULL) {
|
||||
msg("%"TS": file is probably too large to be processed by this "
|
||||
"program", in->name);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
do {
|
||||
if (uncompressed_data == NULL) {
|
||||
uncompressed_data = xmalloc(uncompressed_size);
|
||||
if (uncompressed_data == NULL) {
|
||||
msg("%"TS": file is probably too large to be "
|
||||
"processed by this program", in->name);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
result = libdeflate_gzip_decompress(decompressor,
|
||||
compressed_data,
|
||||
compressed_size,
|
||||
uncompressed_data,
|
||||
uncompressed_size, NULL);
|
||||
result = libdeflate_gzip_decompress_ex(decompressor,
|
||||
compressed_data,
|
||||
compressed_size,
|
||||
uncompressed_data,
|
||||
uncompressed_size,
|
||||
&actual_in_nbytes,
|
||||
&actual_out_nbytes);
|
||||
|
||||
if (result == LIBDEFLATE_INSUFFICIENT_SPACE) {
|
||||
msg("%"TS": file corrupt or too large to be processed by this "
|
||||
"program", in->name);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
if (result == LIBDEFLATE_INSUFFICIENT_SPACE) {
|
||||
if (uncompressed_size * 2 <= uncompressed_size) {
|
||||
msg("%"TS": file corrupt or too large to be "
|
||||
"processed by this program", in->name);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
uncompressed_size *= 2;
|
||||
free(uncompressed_data);
|
||||
uncompressed_data = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result != LIBDEFLATE_SUCCESS) {
|
||||
msg("%"TS": file corrupt or not in gzip format", in->name);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
if (result != LIBDEFLATE_SUCCESS) {
|
||||
msg("%"TS": file corrupt or not in gzip format",
|
||||
in->name);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = full_write(out, uncompressed_data, uncompressed_size);
|
||||
if (actual_in_nbytes == 0 ||
|
||||
actual_in_nbytes > compressed_size ||
|
||||
actual_out_nbytes > uncompressed_size) {
|
||||
msg("Bug in libdeflate_gzip_decompress_ex()!");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = full_write(out, uncompressed_data, actual_out_nbytes);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
||||
compressed_data += actual_in_nbytes;
|
||||
compressed_size -= actual_in_nbytes;
|
||||
|
||||
} while (compressed_size != 0);
|
||||
out:
|
||||
free(uncompressed_data);
|
||||
return ret;
|
||||
|
@ -392,6 +392,23 @@ gunzip file.gz
|
||||
[ "$(stat -c '%a;%x;%y' file)" = "$orig_stat" ]
|
||||
|
||||
|
||||
begin_test 'Decompressing multi-member gzip file'
|
||||
cat file file > orig
|
||||
gzip -c file > file.gz
|
||||
gzip -c file >> file.gz
|
||||
gunzip -f file.gz
|
||||
cmp file orig
|
||||
|
||||
|
||||
begin_test 'Decompressing multi-member gzip file (final member smaller)'
|
||||
echo 'hello world' > 2
|
||||
cat file 2 > orig
|
||||
gzip -c file > file.gz
|
||||
gzip -c 2 >> file.gz
|
||||
gunzip -f file.gz
|
||||
cmp file orig
|
||||
|
||||
|
||||
begin_test 'Help option'
|
||||
gzip -h 2>&1 | grep -q 'Usage'
|
||||
gunzip -h 2>&1 | grep -q 'Usage'
|
||||
|
Loading…
x
Reference in New Issue
Block a user