From 4c92394eaa8769e1d832c7719d4fbe7a94ac3c42 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 10 Oct 2020 21:54:14 -0700 Subject: [PATCH] Support level 0, "no compression" Some users may require a valid DEFLATE, zlib, or gzip stream but know ahead of time that particular inputs are not compressible. zlib supports "level 0" for this use case. Support this in libdeflate too. Resolves https://github.com/ebiggers/libdeflate/issues/86 --- lib/deflate_compress.c | 33 ++++++++-- libdeflate.h | 9 ++- programs/benchmark.c | 115 ++++++++++++++++++++++++++-------- programs/gzip.c | 2 +- programs/prog_util.c | 30 ++++++--- programs/test_custom_malloc.c | 4 +- tools/exec_tests.sh | 4 +- 7 files changed, 150 insertions(+), 47 deletions(-) diff --git a/lib/deflate_compress.c b/lib/deflate_compress.c index 2902bf0..eac3d89 100644 --- a/lib/deflate_compress.c +++ b/lib/deflate_compress.c @@ -1676,7 +1676,7 @@ deflate_write_uncompressed_block(struct deflate_output_bitstream *os, static void deflate_write_uncompressed_blocks(struct deflate_output_bitstream *os, - const u8 *data, u32 data_length, + const u8 *data, size_t data_length, bool is_final_block) { do { @@ -1956,6 +1956,23 @@ should_end_block(struct block_split_stats *stats, /******************************************************************************/ +/* + * This is the level 0 "compressor". It always outputs uncompressed blocks. + */ +static size_t +deflate_compress_none(struct libdeflate_compressor * restrict c, + const u8 * restrict in, size_t in_nbytes, + u8 * restrict out, size_t out_nbytes_avail) +{ + struct deflate_output_bitstream os; + + deflate_init_output(&os, out, out_nbytes_avail); + + deflate_write_uncompressed_blocks(&os, in, in_nbytes, true); + + return deflate_flush_output(&os); +} + /* * This is the "greedy" DEFLATE compressor. It always chooses the longest match. */ @@ -2669,20 +2686,26 @@ LIBDEFLATEEXPORT struct libdeflate_compressor * LIBDEFLATEAPI libdeflate_alloc_compressor(int compression_level) { struct libdeflate_compressor *c; - size_t size; + size_t size = offsetof(struct libdeflate_compressor, p); #if SUPPORT_NEAR_OPTIMAL_PARSING if (compression_level >= 8) - size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.n); - else + size += sizeof(c->p.n); + else if (compression_level >= 1) + size += sizeof(c->p.g); +#else + if (compression_level >= 1) + size += sizeof(c->p.g); #endif - size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.g); c = libdeflate_aligned_malloc(MATCHFINDER_ALIGNMENT, size); if (!c) return NULL; switch (compression_level) { + case 0: + c->impl = deflate_compress_none; + break; case 1: c->impl = deflate_compress_greedy; c->max_search_depth = 2; diff --git a/libdeflate.h b/libdeflate.h index 8626640..3e4d1d7 100644 --- a/libdeflate.h +++ b/libdeflate.h @@ -61,9 +61,12 @@ struct libdeflate_compressor; * libdeflate_alloc_compressor() allocates a new compressor that supports * DEFLATE, zlib, and gzip compression. 'compression_level' is the compression * level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 = - * medium/default, 9 = slow, 12 = slowest). The return value is a pointer to - * the new compressor, or NULL if out of memory or if the compression level is - * invalid (i.e. outside the range [1, 12]). + * medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means + * "no compression", specifically "create a valid stream, but only emit + * uncompressed blocks" (this will expand the data slightly). + * + * The return value is a pointer to the new compressor, or NULL if out of memory + * or if the compression level is invalid (i.e. outside the range [0, 12]). * * Note: for compression, the sliding window size is defined at compilation time * to 32768, the largest size permissible in the DEFLATE format. It cannot be diff --git a/programs/benchmark.c b/programs/benchmark.c index d0629bb..fce7a6d 100644 --- a/programs/benchmark.c +++ b/programs/benchmark.c @@ -27,7 +27,7 @@ #include "test_util.h" -static const tchar *const optstring = T("1::2::3::4::5::6::7::8::9::C:D:ghs:VYZz"); +static const tchar *const optstring = T("0::1::2::3::4::5::6::7::8::9::C:D:eghs:VYZz"); enum wrapper { NO_WRAPPER, @@ -52,6 +52,7 @@ struct engine { const tchar *name; bool (*init_compressor)(struct compressor *); + size_t (*compress_bound)(struct compressor *, size_t); size_t (*compress)(struct compressor *, const void *, size_t, void *, size_t); void (*destroy_compressor)(struct compressor *); @@ -71,6 +72,19 @@ libdeflate_engine_init_compressor(struct compressor *c) return c->private != NULL; } +static size_t +libdeflate_engine_compress_bound(struct compressor *c, size_t in_nbytes) +{ + switch (c->wrapper) { + case ZLIB_WRAPPER: + return libdeflate_zlib_compress_bound(c->private, in_nbytes); + case GZIP_WRAPPER: + return libdeflate_gzip_compress_bound(c->private, in_nbytes); + default: + return libdeflate_deflate_compress_bound(c->private, in_nbytes); + } +} + static size_t libdeflate_engine_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) @@ -128,6 +142,7 @@ static const struct engine libdeflate_engine = { .name = T("libdeflate"), .init_compressor = libdeflate_engine_init_compressor, + .compress_bound = libdeflate_engine_compress_bound, .compress = libdeflate_engine_compress, .destroy_compressor = libdeflate_engine_destroy_compressor, @@ -184,6 +199,12 @@ libz_engine_init_compressor(struct compressor *c) return true; } +static size_t +libz_engine_compress_bound(struct compressor *c, size_t in_nbytes) +{ + return deflateBound(c->private, in_nbytes); +} + static size_t libz_engine_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) @@ -265,6 +286,7 @@ static const struct engine libz_engine = { .name = T("libz"), .init_compressor = libz_engine_init_compressor, + .compress_bound = libz_engine_compress_bound, .compress = libz_engine_compress, .destroy_compressor = libz_engine_destroy_compressor, @@ -305,6 +327,12 @@ compressor_init(struct compressor *c, int level, enum wrapper wrapper, return engine->init_compressor(c); } +static size_t +compress_bound(struct compressor *c, size_t in_nbytes) +{ + return c->engine->compress_bound(c, in_nbytes); +} + static size_t do_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) @@ -315,7 +343,8 @@ do_compress(struct compressor *c, const void *in, size_t in_nbytes, static void compressor_destroy(struct compressor *c) { - c->engine->destroy_compressor(c); + if (c->engine != NULL) + c->engine->destroy_compressor(c); } static bool @@ -337,7 +366,8 @@ do_decompress(struct decompressor *d, const void *in, size_t in_nbytes, static void decompressor_destroy(struct decompressor *d) { - d->engine->destroy_decompressor(d); + if (d->engine != NULL) + d->engine->destroy_decompressor(d); } /******************************************************************************/ @@ -364,11 +394,13 @@ show_usage(FILE *fp) "Benchmark DEFLATE compression and decompression on the specified FILEs.\n" "\n" "Options:\n" +" -0 no compression\n" " -1 fastest (worst) compression\n" " -6 medium compression (default)\n" " -12 slowest (best) compression\n" " -C ENGINE compression engine\n" " -D ENGINE decompression engine\n" +" -e allow chunks to be expanded (implied by -0)\n" " -g use gzip wrapper\n" " -h print this help\n" " -s SIZE chunk size\n" @@ -398,6 +430,7 @@ show_version(void) static int do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, void *decompressed_buf, u32 chunk_size, + bool allow_expansion, size_t compressed_buf_size, struct compressor *compressor, struct decompressor *decompressor) { @@ -409,19 +442,31 @@ do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, while ((ret = xread(in, original_buf, chunk_size)) > 0) { u32 original_size = ret; + size_t out_nbytes_avail; u32 compressed_size; u64 start_time; bool ok; total_uncompressed_size += original_size; + if (allow_expansion) { + out_nbytes_avail = compress_bound(compressor, + original_size); + if (out_nbytes_avail > compressed_buf_size) { + msg("%"TS": bug in compress_bound()", in->name); + return -1; + } + } else { + out_nbytes_avail = original_size - 1; + } + /* Compress the chunk of data. */ start_time = timer_ticks(); compressed_size = do_compress(compressor, original_buf, original_size, compressed_buf, - original_size - 1); + out_nbytes_avail); total_compress_time += timer_ticks() - start_time; if (compressed_size) { @@ -451,7 +496,14 @@ do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, total_compressed_size += compressed_size; } else { - /* Compression did not make the chunk smaller. */ + /* + * The chunk would have compressed to more than + * out_nbytes_avail bytes. + */ + if (allow_expansion) { + msg("%"TS": bug in compress_bound()", in->name); + return -1; + } total_compressed_size += original_size; } } @@ -493,11 +545,13 @@ tmain(int argc, tchar *argv[]) enum wrapper wrapper = NO_WRAPPER; const struct engine *compress_engine = &DEFAULT_ENGINE; const struct engine *decompress_engine = &DEFAULT_ENGINE; + bool allow_expansion = false; + struct compressor compressor = { 0 }; + struct decompressor decompressor = { 0 }; + size_t compressed_buf_size; void *original_buf = NULL; void *compressed_buf = NULL; void *decompressed_buf = NULL; - struct compressor compressor; - struct decompressor decompressor; tchar *default_file_list[] = { NULL }; int opt_char; int i; @@ -507,6 +561,7 @@ tmain(int argc, tchar *argv[]) while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { switch (opt_char) { + case '0': case '1': case '2': case '3': @@ -517,7 +572,7 @@ tmain(int argc, tchar *argv[]) case '8': case '9': level = parse_compression_level(opt_char, toptarg); - if (level == 0) + if (level < 0) return 1; break; case 'C': @@ -536,6 +591,9 @@ tmain(int argc, tchar *argv[]) return 1; } break; + case 'e': + allow_expansion = true; + break; case 'g': wrapper = GZIP_WRAPPER; break; @@ -570,20 +628,28 @@ tmain(int argc, tchar *argv[]) argc -= toptind; argv += toptind; + if (level == 0) + allow_expansion = true; + + ret = -1; + if (!compressor_init(&compressor, level, wrapper, compress_engine)) + goto out; + if (!decompressor_init(&decompressor, wrapper, decompress_engine)) + goto out; + + if (allow_expansion) + compressed_buf_size = compress_bound(&compressor, chunk_size); + else + compressed_buf_size = chunk_size - 1; + original_buf = xmalloc(chunk_size); - compressed_buf = xmalloc(chunk_size - 1); + compressed_buf = xmalloc(compressed_buf_size); decompressed_buf = xmalloc(chunk_size); ret = -1; if (original_buf == NULL || compressed_buf == NULL || decompressed_buf == NULL) - goto out0; - - if (!compressor_init(&compressor, level, wrapper, compress_engine)) - goto out0; - - if (!decompressor_init(&decompressor, wrapper, decompress_engine)) - goto out1; + goto out; if (argc == 0) { argv = default_file_list; @@ -608,25 +674,24 @@ tmain(int argc, tchar *argv[]) ret = xopen_for_read(argv[i], true, &in); if (ret != 0) - goto out2; + goto out; printf("Processing %"TS"...\n", in.name); ret = do_benchmark(&in, original_buf, compressed_buf, - decompressed_buf, chunk_size, &compressor, - &decompressor); + decompressed_buf, chunk_size, + allow_expansion, compressed_buf_size, + &compressor, &decompressor); xclose(&in); if (ret != 0) - goto out2; + goto out; } ret = 0; -out2: - decompressor_destroy(&decompressor); -out1: - compressor_destroy(&compressor); -out0: +out: free(decompressed_buf); free(compressed_buf); free(original_buf); + decompressor_destroy(&decompressor); + compressor_destroy(&compressor); return -ret; } diff --git a/programs/gzip.c b/programs/gzip.c index 546f969..30f1ad6 100644 --- a/programs/gzip.c +++ b/programs/gzip.c @@ -550,7 +550,7 @@ tmain(int argc, tchar *argv[]) case '9': options.compression_level = parse_compression_level(opt_char, toptarg); - if (options.compression_level == 0) + if (options.compression_level < 0) return 1; break; case 'c': diff --git a/programs/prog_util.c b/programs/prog_util.c index 49a7af4..f736dab 100644 --- a/programs/prog_util.c +++ b/programs/prog_util.c @@ -433,27 +433,39 @@ xclose(struct file_stream *strm) /* * Parse the compression level given on the command line, returning the - * compression level on success or 0 on error + * compression level on success or -1 on error */ int parse_compression_level(tchar opt_char, const tchar *arg) { - unsigned long level = opt_char - '0'; - const tchar *p; + int level; if (arg == NULL) arg = T(""); - for (p = arg; *p >= '0' && *p <= '9'; p++) - level = (level * 10) + (*p - '0'); + if (opt_char < '0' || opt_char > '9') + goto invalid; + level = opt_char - '0'; - if (level < 1 || level > 12 || *p != '\0') { - msg("Invalid compression level: \"%"TC"%"TS"\". " - "Must be an integer in the range [1, 12].", opt_char, arg); - return 0; + if (arg[0] != '\0') { + if (arg[0] < '0' || arg[0] > '9') + goto invalid; + if (arg[1] != '\0') /* Levels are at most 2 digits */ + goto invalid; + if (level == 0) /* Don't allow arguments like "-01" */ + goto invalid; + level = (level * 10) + (arg[0] - '0'); } + if (level < 0 || level > 12) + goto invalid; + return level; + +invalid: + msg("Invalid compression level: \"%"TC"%"TS"\". " + "Must be an integer in the range [0, 12].", opt_char, arg); + return -1; } /* Allocate a new DEFLATE compressor */ diff --git a/programs/test_custom_malloc.c b/programs/test_custom_malloc.c index 7e1eced..2bbb7f0 100644 --- a/programs/test_custom_malloc.c +++ b/programs/test_custom_malloc.c @@ -43,7 +43,7 @@ tmain(int argc, tchar *argv[]) ASSERT(malloc_count == 0); ASSERT(free_count == 0); - for (level = 1; level <= 12; level++) { + for (level = 0; level <= 12; level++) { malloc_count = free_count = 0; c = libdeflate_alloc_compressor(level); ASSERT(c != NULL); @@ -67,7 +67,7 @@ tmain(int argc, tchar *argv[]) libdeflate_set_memory_allocator(do_fail_malloc, do_free); - for (level = 1; level <= 12; level++) { + for (level = 0; level <= 12; level++) { malloc_count = free_count = 0; c = libdeflate_alloc_compressor(level); ASSERT(c == NULL); diff --git a/tools/exec_tests.sh b/tools/exec_tests.sh index c7acb3f..e4deacb 100644 --- a/tools/exec_tests.sh +++ b/tools/exec_tests.sh @@ -18,12 +18,12 @@ for format in '' '-g' '-z'; do run_cmd ./benchmark $format $ref_impl $SMOKEDATA done done -for level in 1 3 7 9; do +for level in 0 1 3 7 9; do for ref_impl in '' '-Y'; do run_cmd ./benchmark -$level $ref_impl $SMOKEDATA done done -for level in 1 3 7 9 12; do +for level in 0 1 3 7 9 12; do for ref_impl in '' '-Z'; do run_cmd ./benchmark -$level $ref_impl $SMOKEDATA done