Support level 0, "no compression"

Some users may require a valid DEFLATE, zlib, or gzip stream but know
ahead of time that particular inputs are not compressible.  zlib
supports "level 0" for this use case.  Support this in libdeflate too.

Resolves https://github.com/ebiggers/libdeflate/issues/86
This commit is contained in:
Eric Biggers 2020-10-10 21:54:14 -07:00
parent e98ddd6612
commit 4c92394eaa
7 changed files with 150 additions and 47 deletions

View File

@ -1676,7 +1676,7 @@ deflate_write_uncompressed_block(struct deflate_output_bitstream *os,
static void static void
deflate_write_uncompressed_blocks(struct deflate_output_bitstream *os, deflate_write_uncompressed_blocks(struct deflate_output_bitstream *os,
const u8 *data, u32 data_length, const u8 *data, size_t data_length,
bool is_final_block) bool is_final_block)
{ {
do { do {
@ -1956,6 +1956,23 @@ should_end_block(struct block_split_stats *stats,
/******************************************************************************/ /******************************************************************************/
/*
* This is the level 0 "compressor". It always outputs uncompressed blocks.
*/
static size_t
deflate_compress_none(struct libdeflate_compressor * restrict c,
const u8 * restrict in, size_t in_nbytes,
u8 * restrict out, size_t out_nbytes_avail)
{
struct deflate_output_bitstream os;
deflate_init_output(&os, out, out_nbytes_avail);
deflate_write_uncompressed_blocks(&os, in, in_nbytes, true);
return deflate_flush_output(&os);
}
/* /*
* This is the "greedy" DEFLATE compressor. It always chooses the longest match. * This is the "greedy" DEFLATE compressor. It always chooses the longest match.
*/ */
@ -2669,20 +2686,26 @@ LIBDEFLATEEXPORT struct libdeflate_compressor * LIBDEFLATEAPI
libdeflate_alloc_compressor(int compression_level) libdeflate_alloc_compressor(int compression_level)
{ {
struct libdeflate_compressor *c; struct libdeflate_compressor *c;
size_t size; size_t size = offsetof(struct libdeflate_compressor, p);
#if SUPPORT_NEAR_OPTIMAL_PARSING #if SUPPORT_NEAR_OPTIMAL_PARSING
if (compression_level >= 8) if (compression_level >= 8)
size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.n); size += sizeof(c->p.n);
else else if (compression_level >= 1)
size += sizeof(c->p.g);
#else
if (compression_level >= 1)
size += sizeof(c->p.g);
#endif #endif
size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.g);
c = libdeflate_aligned_malloc(MATCHFINDER_ALIGNMENT, size); c = libdeflate_aligned_malloc(MATCHFINDER_ALIGNMENT, size);
if (!c) if (!c)
return NULL; return NULL;
switch (compression_level) { switch (compression_level) {
case 0:
c->impl = deflate_compress_none;
break;
case 1: case 1:
c->impl = deflate_compress_greedy; c->impl = deflate_compress_greedy;
c->max_search_depth = 2; c->max_search_depth = 2;

View File

@ -61,9 +61,12 @@ struct libdeflate_compressor;
* libdeflate_alloc_compressor() allocates a new compressor that supports * libdeflate_alloc_compressor() allocates a new compressor that supports
* DEFLATE, zlib, and gzip compression. 'compression_level' is the compression * DEFLATE, zlib, and gzip compression. 'compression_level' is the compression
* level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 = * level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 =
* medium/default, 9 = slow, 12 = slowest). The return value is a pointer to * medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means
* the new compressor, or NULL if out of memory or if the compression level is * "no compression", specifically "create a valid stream, but only emit
* invalid (i.e. outside the range [1, 12]). * uncompressed blocks" (this will expand the data slightly).
*
* The return value is a pointer to the new compressor, or NULL if out of memory
* or if the compression level is invalid (i.e. outside the range [0, 12]).
* *
* Note: for compression, the sliding window size is defined at compilation time * Note: for compression, the sliding window size is defined at compilation time
* to 32768, the largest size permissible in the DEFLATE format. It cannot be * to 32768, the largest size permissible in the DEFLATE format. It cannot be

View File

@ -27,7 +27,7 @@
#include "test_util.h" #include "test_util.h"
static const tchar *const optstring = T("1::2::3::4::5::6::7::8::9::C:D:ghs:VYZz"); static const tchar *const optstring = T("0::1::2::3::4::5::6::7::8::9::C:D:eghs:VYZz");
enum wrapper { enum wrapper {
NO_WRAPPER, NO_WRAPPER,
@ -52,6 +52,7 @@ struct engine {
const tchar *name; const tchar *name;
bool (*init_compressor)(struct compressor *); bool (*init_compressor)(struct compressor *);
size_t (*compress_bound)(struct compressor *, size_t);
size_t (*compress)(struct compressor *, const void *, size_t, size_t (*compress)(struct compressor *, const void *, size_t,
void *, size_t); void *, size_t);
void (*destroy_compressor)(struct compressor *); void (*destroy_compressor)(struct compressor *);
@ -71,6 +72,19 @@ libdeflate_engine_init_compressor(struct compressor *c)
return c->private != NULL; return c->private != NULL;
} }
static size_t
libdeflate_engine_compress_bound(struct compressor *c, size_t in_nbytes)
{
switch (c->wrapper) {
case ZLIB_WRAPPER:
return libdeflate_zlib_compress_bound(c->private, in_nbytes);
case GZIP_WRAPPER:
return libdeflate_gzip_compress_bound(c->private, in_nbytes);
default:
return libdeflate_deflate_compress_bound(c->private, in_nbytes);
}
}
static size_t static size_t
libdeflate_engine_compress(struct compressor *c, const void *in, libdeflate_engine_compress(struct compressor *c, const void *in,
size_t in_nbytes, void *out, size_t out_nbytes_avail) size_t in_nbytes, void *out, size_t out_nbytes_avail)
@ -128,6 +142,7 @@ static const struct engine libdeflate_engine = {
.name = T("libdeflate"), .name = T("libdeflate"),
.init_compressor = libdeflate_engine_init_compressor, .init_compressor = libdeflate_engine_init_compressor,
.compress_bound = libdeflate_engine_compress_bound,
.compress = libdeflate_engine_compress, .compress = libdeflate_engine_compress,
.destroy_compressor = libdeflate_engine_destroy_compressor, .destroy_compressor = libdeflate_engine_destroy_compressor,
@ -184,6 +199,12 @@ libz_engine_init_compressor(struct compressor *c)
return true; return true;
} }
static size_t
libz_engine_compress_bound(struct compressor *c, size_t in_nbytes)
{
return deflateBound(c->private, in_nbytes);
}
static size_t static size_t
libz_engine_compress(struct compressor *c, const void *in, size_t in_nbytes, libz_engine_compress(struct compressor *c, const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail) void *out, size_t out_nbytes_avail)
@ -265,6 +286,7 @@ static const struct engine libz_engine = {
.name = T("libz"), .name = T("libz"),
.init_compressor = libz_engine_init_compressor, .init_compressor = libz_engine_init_compressor,
.compress_bound = libz_engine_compress_bound,
.compress = libz_engine_compress, .compress = libz_engine_compress,
.destroy_compressor = libz_engine_destroy_compressor, .destroy_compressor = libz_engine_destroy_compressor,
@ -305,6 +327,12 @@ compressor_init(struct compressor *c, int level, enum wrapper wrapper,
return engine->init_compressor(c); return engine->init_compressor(c);
} }
static size_t
compress_bound(struct compressor *c, size_t in_nbytes)
{
return c->engine->compress_bound(c, in_nbytes);
}
static size_t static size_t
do_compress(struct compressor *c, const void *in, size_t in_nbytes, do_compress(struct compressor *c, const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail) void *out, size_t out_nbytes_avail)
@ -315,7 +343,8 @@ do_compress(struct compressor *c, const void *in, size_t in_nbytes,
static void static void
compressor_destroy(struct compressor *c) compressor_destroy(struct compressor *c)
{ {
c->engine->destroy_compressor(c); if (c->engine != NULL)
c->engine->destroy_compressor(c);
} }
static bool static bool
@ -337,7 +366,8 @@ do_decompress(struct decompressor *d, const void *in, size_t in_nbytes,
static void static void
decompressor_destroy(struct decompressor *d) decompressor_destroy(struct decompressor *d)
{ {
d->engine->destroy_decompressor(d); if (d->engine != NULL)
d->engine->destroy_decompressor(d);
} }
/******************************************************************************/ /******************************************************************************/
@ -364,11 +394,13 @@ show_usage(FILE *fp)
"Benchmark DEFLATE compression and decompression on the specified FILEs.\n" "Benchmark DEFLATE compression and decompression on the specified FILEs.\n"
"\n" "\n"
"Options:\n" "Options:\n"
" -0 no compression\n"
" -1 fastest (worst) compression\n" " -1 fastest (worst) compression\n"
" -6 medium compression (default)\n" " -6 medium compression (default)\n"
" -12 slowest (best) compression\n" " -12 slowest (best) compression\n"
" -C ENGINE compression engine\n" " -C ENGINE compression engine\n"
" -D ENGINE decompression engine\n" " -D ENGINE decompression engine\n"
" -e allow chunks to be expanded (implied by -0)\n"
" -g use gzip wrapper\n" " -g use gzip wrapper\n"
" -h print this help\n" " -h print this help\n"
" -s SIZE chunk size\n" " -s SIZE chunk size\n"
@ -398,6 +430,7 @@ show_version(void)
static int static int
do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf,
void *decompressed_buf, u32 chunk_size, void *decompressed_buf, u32 chunk_size,
bool allow_expansion, size_t compressed_buf_size,
struct compressor *compressor, struct compressor *compressor,
struct decompressor *decompressor) struct decompressor *decompressor)
{ {
@ -409,19 +442,31 @@ do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf,
while ((ret = xread(in, original_buf, chunk_size)) > 0) { while ((ret = xread(in, original_buf, chunk_size)) > 0) {
u32 original_size = ret; u32 original_size = ret;
size_t out_nbytes_avail;
u32 compressed_size; u32 compressed_size;
u64 start_time; u64 start_time;
bool ok; bool ok;
total_uncompressed_size += original_size; total_uncompressed_size += original_size;
if (allow_expansion) {
out_nbytes_avail = compress_bound(compressor,
original_size);
if (out_nbytes_avail > compressed_buf_size) {
msg("%"TS": bug in compress_bound()", in->name);
return -1;
}
} else {
out_nbytes_avail = original_size - 1;
}
/* Compress the chunk of data. */ /* Compress the chunk of data. */
start_time = timer_ticks(); start_time = timer_ticks();
compressed_size = do_compress(compressor, compressed_size = do_compress(compressor,
original_buf, original_buf,
original_size, original_size,
compressed_buf, compressed_buf,
original_size - 1); out_nbytes_avail);
total_compress_time += timer_ticks() - start_time; total_compress_time += timer_ticks() - start_time;
if (compressed_size) { if (compressed_size) {
@ -451,7 +496,14 @@ do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf,
total_compressed_size += compressed_size; total_compressed_size += compressed_size;
} else { } else {
/* Compression did not make the chunk smaller. */ /*
* The chunk would have compressed to more than
* out_nbytes_avail bytes.
*/
if (allow_expansion) {
msg("%"TS": bug in compress_bound()", in->name);
return -1;
}
total_compressed_size += original_size; total_compressed_size += original_size;
} }
} }
@ -493,11 +545,13 @@ tmain(int argc, tchar *argv[])
enum wrapper wrapper = NO_WRAPPER; enum wrapper wrapper = NO_WRAPPER;
const struct engine *compress_engine = &DEFAULT_ENGINE; const struct engine *compress_engine = &DEFAULT_ENGINE;
const struct engine *decompress_engine = &DEFAULT_ENGINE; const struct engine *decompress_engine = &DEFAULT_ENGINE;
bool allow_expansion = false;
struct compressor compressor = { 0 };
struct decompressor decompressor = { 0 };
size_t compressed_buf_size;
void *original_buf = NULL; void *original_buf = NULL;
void *compressed_buf = NULL; void *compressed_buf = NULL;
void *decompressed_buf = NULL; void *decompressed_buf = NULL;
struct compressor compressor;
struct decompressor decompressor;
tchar *default_file_list[] = { NULL }; tchar *default_file_list[] = { NULL };
int opt_char; int opt_char;
int i; int i;
@ -507,6 +561,7 @@ tmain(int argc, tchar *argv[])
while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { while ((opt_char = tgetopt(argc, argv, optstring)) != -1) {
switch (opt_char) { switch (opt_char) {
case '0':
case '1': case '1':
case '2': case '2':
case '3': case '3':
@ -517,7 +572,7 @@ tmain(int argc, tchar *argv[])
case '8': case '8':
case '9': case '9':
level = parse_compression_level(opt_char, toptarg); level = parse_compression_level(opt_char, toptarg);
if (level == 0) if (level < 0)
return 1; return 1;
break; break;
case 'C': case 'C':
@ -536,6 +591,9 @@ tmain(int argc, tchar *argv[])
return 1; return 1;
} }
break; break;
case 'e':
allow_expansion = true;
break;
case 'g': case 'g':
wrapper = GZIP_WRAPPER; wrapper = GZIP_WRAPPER;
break; break;
@ -570,20 +628,28 @@ tmain(int argc, tchar *argv[])
argc -= toptind; argc -= toptind;
argv += toptind; argv += toptind;
if (level == 0)
allow_expansion = true;
ret = -1;
if (!compressor_init(&compressor, level, wrapper, compress_engine))
goto out;
if (!decompressor_init(&decompressor, wrapper, decompress_engine))
goto out;
if (allow_expansion)
compressed_buf_size = compress_bound(&compressor, chunk_size);
else
compressed_buf_size = chunk_size - 1;
original_buf = xmalloc(chunk_size); original_buf = xmalloc(chunk_size);
compressed_buf = xmalloc(chunk_size - 1); compressed_buf = xmalloc(compressed_buf_size);
decompressed_buf = xmalloc(chunk_size); decompressed_buf = xmalloc(chunk_size);
ret = -1; ret = -1;
if (original_buf == NULL || compressed_buf == NULL || if (original_buf == NULL || compressed_buf == NULL ||
decompressed_buf == NULL) decompressed_buf == NULL)
goto out0; goto out;
if (!compressor_init(&compressor, level, wrapper, compress_engine))
goto out0;
if (!decompressor_init(&decompressor, wrapper, decompress_engine))
goto out1;
if (argc == 0) { if (argc == 0) {
argv = default_file_list; argv = default_file_list;
@ -608,25 +674,24 @@ tmain(int argc, tchar *argv[])
ret = xopen_for_read(argv[i], true, &in); ret = xopen_for_read(argv[i], true, &in);
if (ret != 0) if (ret != 0)
goto out2; goto out;
printf("Processing %"TS"...\n", in.name); printf("Processing %"TS"...\n", in.name);
ret = do_benchmark(&in, original_buf, compressed_buf, ret = do_benchmark(&in, original_buf, compressed_buf,
decompressed_buf, chunk_size, &compressor, decompressed_buf, chunk_size,
&decompressor); allow_expansion, compressed_buf_size,
&compressor, &decompressor);
xclose(&in); xclose(&in);
if (ret != 0) if (ret != 0)
goto out2; goto out;
} }
ret = 0; ret = 0;
out2: out:
decompressor_destroy(&decompressor);
out1:
compressor_destroy(&compressor);
out0:
free(decompressed_buf); free(decompressed_buf);
free(compressed_buf); free(compressed_buf);
free(original_buf); free(original_buf);
decompressor_destroy(&decompressor);
compressor_destroy(&compressor);
return -ret; return -ret;
} }

View File

@ -550,7 +550,7 @@ tmain(int argc, tchar *argv[])
case '9': case '9':
options.compression_level = options.compression_level =
parse_compression_level(opt_char, toptarg); parse_compression_level(opt_char, toptarg);
if (options.compression_level == 0) if (options.compression_level < 0)
return 1; return 1;
break; break;
case 'c': case 'c':

View File

@ -433,27 +433,39 @@ xclose(struct file_stream *strm)
/* /*
* Parse the compression level given on the command line, returning the * Parse the compression level given on the command line, returning the
* compression level on success or 0 on error * compression level on success or -1 on error
*/ */
int int
parse_compression_level(tchar opt_char, const tchar *arg) parse_compression_level(tchar opt_char, const tchar *arg)
{ {
unsigned long level = opt_char - '0'; int level;
const tchar *p;
if (arg == NULL) if (arg == NULL)
arg = T(""); arg = T("");
for (p = arg; *p >= '0' && *p <= '9'; p++) if (opt_char < '0' || opt_char > '9')
level = (level * 10) + (*p - '0'); goto invalid;
level = opt_char - '0';
if (level < 1 || level > 12 || *p != '\0') { if (arg[0] != '\0') {
msg("Invalid compression level: \"%"TC"%"TS"\". " if (arg[0] < '0' || arg[0] > '9')
"Must be an integer in the range [1, 12].", opt_char, arg); goto invalid;
return 0; if (arg[1] != '\0') /* Levels are at most 2 digits */
goto invalid;
if (level == 0) /* Don't allow arguments like "-01" */
goto invalid;
level = (level * 10) + (arg[0] - '0');
} }
if (level < 0 || level > 12)
goto invalid;
return level; return level;
invalid:
msg("Invalid compression level: \"%"TC"%"TS"\". "
"Must be an integer in the range [0, 12].", opt_char, arg);
return -1;
} }
/* Allocate a new DEFLATE compressor */ /* Allocate a new DEFLATE compressor */

View File

@ -43,7 +43,7 @@ tmain(int argc, tchar *argv[])
ASSERT(malloc_count == 0); ASSERT(malloc_count == 0);
ASSERT(free_count == 0); ASSERT(free_count == 0);
for (level = 1; level <= 12; level++) { for (level = 0; level <= 12; level++) {
malloc_count = free_count = 0; malloc_count = free_count = 0;
c = libdeflate_alloc_compressor(level); c = libdeflate_alloc_compressor(level);
ASSERT(c != NULL); ASSERT(c != NULL);
@ -67,7 +67,7 @@ tmain(int argc, tchar *argv[])
libdeflate_set_memory_allocator(do_fail_malloc, do_free); libdeflate_set_memory_allocator(do_fail_malloc, do_free);
for (level = 1; level <= 12; level++) { for (level = 0; level <= 12; level++) {
malloc_count = free_count = 0; malloc_count = free_count = 0;
c = libdeflate_alloc_compressor(level); c = libdeflate_alloc_compressor(level);
ASSERT(c == NULL); ASSERT(c == NULL);

View File

@ -18,12 +18,12 @@ for format in '' '-g' '-z'; do
run_cmd ./benchmark $format $ref_impl $SMOKEDATA run_cmd ./benchmark $format $ref_impl $SMOKEDATA
done done
done done
for level in 1 3 7 9; do for level in 0 1 3 7 9; do
for ref_impl in '' '-Y'; do for ref_impl in '' '-Y'; do
run_cmd ./benchmark -$level $ref_impl $SMOKEDATA run_cmd ./benchmark -$level $ref_impl $SMOKEDATA
done done
done done
for level in 1 3 7 9 12; do for level in 0 1 3 7 9 12; do
for ref_impl in '' '-Z'; do for ref_impl in '' '-Z'; do
run_cmd ./benchmark -$level $ref_impl $SMOKEDATA run_cmd ./benchmark -$level $ref_impl $SMOKEDATA
done done