Support level 0, "no compression"

Some users may require a valid DEFLATE, zlib, or gzip stream but know
ahead of time that particular inputs are not compressible.  zlib
supports "level 0" for this use case.  Support this in libdeflate too.

Resolves https://github.com/ebiggers/libdeflate/issues/86
This commit is contained in:
Eric Biggers 2020-10-10 21:54:14 -07:00
parent e98ddd6612
commit 4c92394eaa
7 changed files with 150 additions and 47 deletions

View File

@ -1676,7 +1676,7 @@ deflate_write_uncompressed_block(struct deflate_output_bitstream *os,
static void
deflate_write_uncompressed_blocks(struct deflate_output_bitstream *os,
const u8 *data, u32 data_length,
const u8 *data, size_t data_length,
bool is_final_block)
{
do {
@ -1956,6 +1956,23 @@ should_end_block(struct block_split_stats *stats,
/******************************************************************************/
/*
* This is the level 0 "compressor". It always outputs uncompressed blocks.
*/
static size_t
deflate_compress_none(struct libdeflate_compressor * restrict c,
const u8 * restrict in, size_t in_nbytes,
u8 * restrict out, size_t out_nbytes_avail)
{
struct deflate_output_bitstream os;
deflate_init_output(&os, out, out_nbytes_avail);
deflate_write_uncompressed_blocks(&os, in, in_nbytes, true);
return deflate_flush_output(&os);
}
/*
* This is the "greedy" DEFLATE compressor. It always chooses the longest match.
*/
@ -2669,20 +2686,26 @@ LIBDEFLATEEXPORT struct libdeflate_compressor * LIBDEFLATEAPI
libdeflate_alloc_compressor(int compression_level)
{
struct libdeflate_compressor *c;
size_t size;
size_t size = offsetof(struct libdeflate_compressor, p);
#if SUPPORT_NEAR_OPTIMAL_PARSING
if (compression_level >= 8)
size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.n);
else
size += sizeof(c->p.n);
else if (compression_level >= 1)
size += sizeof(c->p.g);
#else
if (compression_level >= 1)
size += sizeof(c->p.g);
#endif
size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.g);
c = libdeflate_aligned_malloc(MATCHFINDER_ALIGNMENT, size);
if (!c)
return NULL;
switch (compression_level) {
case 0:
c->impl = deflate_compress_none;
break;
case 1:
c->impl = deflate_compress_greedy;
c->max_search_depth = 2;

View File

@ -61,9 +61,12 @@ struct libdeflate_compressor;
* libdeflate_alloc_compressor() allocates a new compressor that supports
* DEFLATE, zlib, and gzip compression. 'compression_level' is the compression
* level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 =
* medium/default, 9 = slow, 12 = slowest). The return value is a pointer to
* the new compressor, or NULL if out of memory or if the compression level is
* invalid (i.e. outside the range [1, 12]).
* medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means
* "no compression", specifically "create a valid stream, but only emit
* uncompressed blocks" (this will expand the data slightly).
*
* The return value is a pointer to the new compressor, or NULL if out of memory
* or if the compression level is invalid (i.e. outside the range [0, 12]).
*
* Note: for compression, the sliding window size is defined at compilation time
* to 32768, the largest size permissible in the DEFLATE format. It cannot be

View File

@ -27,7 +27,7 @@
#include "test_util.h"
static const tchar *const optstring = T("1::2::3::4::5::6::7::8::9::C:D:ghs:VYZz");
static const tchar *const optstring = T("0::1::2::3::4::5::6::7::8::9::C:D:eghs:VYZz");
enum wrapper {
NO_WRAPPER,
@ -52,6 +52,7 @@ struct engine {
const tchar *name;
bool (*init_compressor)(struct compressor *);
size_t (*compress_bound)(struct compressor *, size_t);
size_t (*compress)(struct compressor *, const void *, size_t,
void *, size_t);
void (*destroy_compressor)(struct compressor *);
@ -71,6 +72,19 @@ libdeflate_engine_init_compressor(struct compressor *c)
return c->private != NULL;
}
static size_t
libdeflate_engine_compress_bound(struct compressor *c, size_t in_nbytes)
{
switch (c->wrapper) {
case ZLIB_WRAPPER:
return libdeflate_zlib_compress_bound(c->private, in_nbytes);
case GZIP_WRAPPER:
return libdeflate_gzip_compress_bound(c->private, in_nbytes);
default:
return libdeflate_deflate_compress_bound(c->private, in_nbytes);
}
}
static size_t
libdeflate_engine_compress(struct compressor *c, const void *in,
size_t in_nbytes, void *out, size_t out_nbytes_avail)
@ -128,6 +142,7 @@ static const struct engine libdeflate_engine = {
.name = T("libdeflate"),
.init_compressor = libdeflate_engine_init_compressor,
.compress_bound = libdeflate_engine_compress_bound,
.compress = libdeflate_engine_compress,
.destroy_compressor = libdeflate_engine_destroy_compressor,
@ -184,6 +199,12 @@ libz_engine_init_compressor(struct compressor *c)
return true;
}
static size_t
libz_engine_compress_bound(struct compressor *c, size_t in_nbytes)
{
return deflateBound(c->private, in_nbytes);
}
static size_t
libz_engine_compress(struct compressor *c, const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail)
@ -265,6 +286,7 @@ static const struct engine libz_engine = {
.name = T("libz"),
.init_compressor = libz_engine_init_compressor,
.compress_bound = libz_engine_compress_bound,
.compress = libz_engine_compress,
.destroy_compressor = libz_engine_destroy_compressor,
@ -305,6 +327,12 @@ compressor_init(struct compressor *c, int level, enum wrapper wrapper,
return engine->init_compressor(c);
}
static size_t
compress_bound(struct compressor *c, size_t in_nbytes)
{
return c->engine->compress_bound(c, in_nbytes);
}
static size_t
do_compress(struct compressor *c, const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail)
@ -315,7 +343,8 @@ do_compress(struct compressor *c, const void *in, size_t in_nbytes,
static void
compressor_destroy(struct compressor *c)
{
c->engine->destroy_compressor(c);
if (c->engine != NULL)
c->engine->destroy_compressor(c);
}
static bool
@ -337,7 +366,8 @@ do_decompress(struct decompressor *d, const void *in, size_t in_nbytes,
static void
decompressor_destroy(struct decompressor *d)
{
d->engine->destroy_decompressor(d);
if (d->engine != NULL)
d->engine->destroy_decompressor(d);
}
/******************************************************************************/
@ -364,11 +394,13 @@ show_usage(FILE *fp)
"Benchmark DEFLATE compression and decompression on the specified FILEs.\n"
"\n"
"Options:\n"
" -0 no compression\n"
" -1 fastest (worst) compression\n"
" -6 medium compression (default)\n"
" -12 slowest (best) compression\n"
" -C ENGINE compression engine\n"
" -D ENGINE decompression engine\n"
" -e allow chunks to be expanded (implied by -0)\n"
" -g use gzip wrapper\n"
" -h print this help\n"
" -s SIZE chunk size\n"
@ -398,6 +430,7 @@ show_version(void)
static int
do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf,
void *decompressed_buf, u32 chunk_size,
bool allow_expansion, size_t compressed_buf_size,
struct compressor *compressor,
struct decompressor *decompressor)
{
@ -409,19 +442,31 @@ do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf,
while ((ret = xread(in, original_buf, chunk_size)) > 0) {
u32 original_size = ret;
size_t out_nbytes_avail;
u32 compressed_size;
u64 start_time;
bool ok;
total_uncompressed_size += original_size;
if (allow_expansion) {
out_nbytes_avail = compress_bound(compressor,
original_size);
if (out_nbytes_avail > compressed_buf_size) {
msg("%"TS": bug in compress_bound()", in->name);
return -1;
}
} else {
out_nbytes_avail = original_size - 1;
}
/* Compress the chunk of data. */
start_time = timer_ticks();
compressed_size = do_compress(compressor,
original_buf,
original_size,
compressed_buf,
original_size - 1);
out_nbytes_avail);
total_compress_time += timer_ticks() - start_time;
if (compressed_size) {
@ -451,7 +496,14 @@ do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf,
total_compressed_size += compressed_size;
} else {
/* Compression did not make the chunk smaller. */
/*
* The chunk would have compressed to more than
* out_nbytes_avail bytes.
*/
if (allow_expansion) {
msg("%"TS": bug in compress_bound()", in->name);
return -1;
}
total_compressed_size += original_size;
}
}
@ -493,11 +545,13 @@ tmain(int argc, tchar *argv[])
enum wrapper wrapper = NO_WRAPPER;
const struct engine *compress_engine = &DEFAULT_ENGINE;
const struct engine *decompress_engine = &DEFAULT_ENGINE;
bool allow_expansion = false;
struct compressor compressor = { 0 };
struct decompressor decompressor = { 0 };
size_t compressed_buf_size;
void *original_buf = NULL;
void *compressed_buf = NULL;
void *decompressed_buf = NULL;
struct compressor compressor;
struct decompressor decompressor;
tchar *default_file_list[] = { NULL };
int opt_char;
int i;
@ -507,6 +561,7 @@ tmain(int argc, tchar *argv[])
while ((opt_char = tgetopt(argc, argv, optstring)) != -1) {
switch (opt_char) {
case '0':
case '1':
case '2':
case '3':
@ -517,7 +572,7 @@ tmain(int argc, tchar *argv[])
case '8':
case '9':
level = parse_compression_level(opt_char, toptarg);
if (level == 0)
if (level < 0)
return 1;
break;
case 'C':
@ -536,6 +591,9 @@ tmain(int argc, tchar *argv[])
return 1;
}
break;
case 'e':
allow_expansion = true;
break;
case 'g':
wrapper = GZIP_WRAPPER;
break;
@ -570,20 +628,28 @@ tmain(int argc, tchar *argv[])
argc -= toptind;
argv += toptind;
if (level == 0)
allow_expansion = true;
ret = -1;
if (!compressor_init(&compressor, level, wrapper, compress_engine))
goto out;
if (!decompressor_init(&decompressor, wrapper, decompress_engine))
goto out;
if (allow_expansion)
compressed_buf_size = compress_bound(&compressor, chunk_size);
else
compressed_buf_size = chunk_size - 1;
original_buf = xmalloc(chunk_size);
compressed_buf = xmalloc(chunk_size - 1);
compressed_buf = xmalloc(compressed_buf_size);
decompressed_buf = xmalloc(chunk_size);
ret = -1;
if (original_buf == NULL || compressed_buf == NULL ||
decompressed_buf == NULL)
goto out0;
if (!compressor_init(&compressor, level, wrapper, compress_engine))
goto out0;
if (!decompressor_init(&decompressor, wrapper, decompress_engine))
goto out1;
goto out;
if (argc == 0) {
argv = default_file_list;
@ -608,25 +674,24 @@ tmain(int argc, tchar *argv[])
ret = xopen_for_read(argv[i], true, &in);
if (ret != 0)
goto out2;
goto out;
printf("Processing %"TS"...\n", in.name);
ret = do_benchmark(&in, original_buf, compressed_buf,
decompressed_buf, chunk_size, &compressor,
&decompressor);
decompressed_buf, chunk_size,
allow_expansion, compressed_buf_size,
&compressor, &decompressor);
xclose(&in);
if (ret != 0)
goto out2;
goto out;
}
ret = 0;
out2:
decompressor_destroy(&decompressor);
out1:
compressor_destroy(&compressor);
out0:
out:
free(decompressed_buf);
free(compressed_buf);
free(original_buf);
decompressor_destroy(&decompressor);
compressor_destroy(&compressor);
return -ret;
}

View File

@ -550,7 +550,7 @@ tmain(int argc, tchar *argv[])
case '9':
options.compression_level =
parse_compression_level(opt_char, toptarg);
if (options.compression_level == 0)
if (options.compression_level < 0)
return 1;
break;
case 'c':

View File

@ -433,27 +433,39 @@ xclose(struct file_stream *strm)
/*
* Parse the compression level given on the command line, returning the
* compression level on success or 0 on error
* compression level on success or -1 on error
*/
int
parse_compression_level(tchar opt_char, const tchar *arg)
{
unsigned long level = opt_char - '0';
const tchar *p;
int level;
if (arg == NULL)
arg = T("");
for (p = arg; *p >= '0' && *p <= '9'; p++)
level = (level * 10) + (*p - '0');
if (opt_char < '0' || opt_char > '9')
goto invalid;
level = opt_char - '0';
if (level < 1 || level > 12 || *p != '\0') {
msg("Invalid compression level: \"%"TC"%"TS"\". "
"Must be an integer in the range [1, 12].", opt_char, arg);
return 0;
if (arg[0] != '\0') {
if (arg[0] < '0' || arg[0] > '9')
goto invalid;
if (arg[1] != '\0') /* Levels are at most 2 digits */
goto invalid;
if (level == 0) /* Don't allow arguments like "-01" */
goto invalid;
level = (level * 10) + (arg[0] - '0');
}
if (level < 0 || level > 12)
goto invalid;
return level;
invalid:
msg("Invalid compression level: \"%"TC"%"TS"\". "
"Must be an integer in the range [0, 12].", opt_char, arg);
return -1;
}
/* Allocate a new DEFLATE compressor */

View File

@ -43,7 +43,7 @@ tmain(int argc, tchar *argv[])
ASSERT(malloc_count == 0);
ASSERT(free_count == 0);
for (level = 1; level <= 12; level++) {
for (level = 0; level <= 12; level++) {
malloc_count = free_count = 0;
c = libdeflate_alloc_compressor(level);
ASSERT(c != NULL);
@ -67,7 +67,7 @@ tmain(int argc, tchar *argv[])
libdeflate_set_memory_allocator(do_fail_malloc, do_free);
for (level = 1; level <= 12; level++) {
for (level = 0; level <= 12; level++) {
malloc_count = free_count = 0;
c = libdeflate_alloc_compressor(level);
ASSERT(c == NULL);

View File

@ -18,12 +18,12 @@ for format in '' '-g' '-z'; do
run_cmd ./benchmark $format $ref_impl $SMOKEDATA
done
done
for level in 1 3 7 9; do
for level in 0 1 3 7 9; do
for ref_impl in '' '-Y'; do
run_cmd ./benchmark -$level $ref_impl $SMOKEDATA
done
done
for level in 1 3 7 9 12; do
for level in 0 1 3 7 9 12; do
for ref_impl in '' '-Z'; do
run_cmd ./benchmark -$level $ref_impl $SMOKEDATA
done