diff --git a/lib/deflate_compress.c b/lib/deflate_compress.c index 2902bf0..eac3d89 100644 --- a/lib/deflate_compress.c +++ b/lib/deflate_compress.c @@ -1676,7 +1676,7 @@ deflate_write_uncompressed_block(struct deflate_output_bitstream *os, static void deflate_write_uncompressed_blocks(struct deflate_output_bitstream *os, - const u8 *data, u32 data_length, + const u8 *data, size_t data_length, bool is_final_block) { do { @@ -1956,6 +1956,23 @@ should_end_block(struct block_split_stats *stats, /******************************************************************************/ +/* + * This is the level 0 "compressor". It always outputs uncompressed blocks. + */ +static size_t +deflate_compress_none(struct libdeflate_compressor * restrict c, + const u8 * restrict in, size_t in_nbytes, + u8 * restrict out, size_t out_nbytes_avail) +{ + struct deflate_output_bitstream os; + + deflate_init_output(&os, out, out_nbytes_avail); + + deflate_write_uncompressed_blocks(&os, in, in_nbytes, true); + + return deflate_flush_output(&os); +} + /* * This is the "greedy" DEFLATE compressor. It always chooses the longest match. */ @@ -2669,20 +2686,26 @@ LIBDEFLATEEXPORT struct libdeflate_compressor * LIBDEFLATEAPI libdeflate_alloc_compressor(int compression_level) { struct libdeflate_compressor *c; - size_t size; + size_t size = offsetof(struct libdeflate_compressor, p); #if SUPPORT_NEAR_OPTIMAL_PARSING if (compression_level >= 8) - size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.n); - else + size += sizeof(c->p.n); + else if (compression_level >= 1) + size += sizeof(c->p.g); +#else + if (compression_level >= 1) + size += sizeof(c->p.g); #endif - size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.g); c = libdeflate_aligned_malloc(MATCHFINDER_ALIGNMENT, size); if (!c) return NULL; switch (compression_level) { + case 0: + c->impl = deflate_compress_none; + break; case 1: c->impl = deflate_compress_greedy; c->max_search_depth = 2; diff --git a/libdeflate.h b/libdeflate.h index 8626640..3e4d1d7 100644 --- a/libdeflate.h +++ b/libdeflate.h @@ -61,9 +61,12 @@ struct libdeflate_compressor; * libdeflate_alloc_compressor() allocates a new compressor that supports * DEFLATE, zlib, and gzip compression. 'compression_level' is the compression * level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 = - * medium/default, 9 = slow, 12 = slowest). The return value is a pointer to - * the new compressor, or NULL if out of memory or if the compression level is - * invalid (i.e. outside the range [1, 12]). + * medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means + * "no compression", specifically "create a valid stream, but only emit + * uncompressed blocks" (this will expand the data slightly). + * + * The return value is a pointer to the new compressor, or NULL if out of memory + * or if the compression level is invalid (i.e. outside the range [0, 12]). * * Note: for compression, the sliding window size is defined at compilation time * to 32768, the largest size permissible in the DEFLATE format. It cannot be diff --git a/programs/benchmark.c b/programs/benchmark.c index d0629bb..fce7a6d 100644 --- a/programs/benchmark.c +++ b/programs/benchmark.c @@ -27,7 +27,7 @@ #include "test_util.h" -static const tchar *const optstring = T("1::2::3::4::5::6::7::8::9::C:D:ghs:VYZz"); +static const tchar *const optstring = T("0::1::2::3::4::5::6::7::8::9::C:D:eghs:VYZz"); enum wrapper { NO_WRAPPER, @@ -52,6 +52,7 @@ struct engine { const tchar *name; bool (*init_compressor)(struct compressor *); + size_t (*compress_bound)(struct compressor *, size_t); size_t (*compress)(struct compressor *, const void *, size_t, void *, size_t); void (*destroy_compressor)(struct compressor *); @@ -71,6 +72,19 @@ libdeflate_engine_init_compressor(struct compressor *c) return c->private != NULL; } +static size_t +libdeflate_engine_compress_bound(struct compressor *c, size_t in_nbytes) +{ + switch (c->wrapper) { + case ZLIB_WRAPPER: + return libdeflate_zlib_compress_bound(c->private, in_nbytes); + case GZIP_WRAPPER: + return libdeflate_gzip_compress_bound(c->private, in_nbytes); + default: + return libdeflate_deflate_compress_bound(c->private, in_nbytes); + } +} + static size_t libdeflate_engine_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) @@ -128,6 +142,7 @@ static const struct engine libdeflate_engine = { .name = T("libdeflate"), .init_compressor = libdeflate_engine_init_compressor, + .compress_bound = libdeflate_engine_compress_bound, .compress = libdeflate_engine_compress, .destroy_compressor = libdeflate_engine_destroy_compressor, @@ -184,6 +199,12 @@ libz_engine_init_compressor(struct compressor *c) return true; } +static size_t +libz_engine_compress_bound(struct compressor *c, size_t in_nbytes) +{ + return deflateBound(c->private, in_nbytes); +} + static size_t libz_engine_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) @@ -265,6 +286,7 @@ static const struct engine libz_engine = { .name = T("libz"), .init_compressor = libz_engine_init_compressor, + .compress_bound = libz_engine_compress_bound, .compress = libz_engine_compress, .destroy_compressor = libz_engine_destroy_compressor, @@ -305,6 +327,12 @@ compressor_init(struct compressor *c, int level, enum wrapper wrapper, return engine->init_compressor(c); } +static size_t +compress_bound(struct compressor *c, size_t in_nbytes) +{ + return c->engine->compress_bound(c, in_nbytes); +} + static size_t do_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) @@ -315,7 +343,8 @@ do_compress(struct compressor *c, const void *in, size_t in_nbytes, static void compressor_destroy(struct compressor *c) { - c->engine->destroy_compressor(c); + if (c->engine != NULL) + c->engine->destroy_compressor(c); } static bool @@ -337,7 +366,8 @@ do_decompress(struct decompressor *d, const void *in, size_t in_nbytes, static void decompressor_destroy(struct decompressor *d) { - d->engine->destroy_decompressor(d); + if (d->engine != NULL) + d->engine->destroy_decompressor(d); } /******************************************************************************/ @@ -364,11 +394,13 @@ show_usage(FILE *fp) "Benchmark DEFLATE compression and decompression on the specified FILEs.\n" "\n" "Options:\n" +" -0 no compression\n" " -1 fastest (worst) compression\n" " -6 medium compression (default)\n" " -12 slowest (best) compression\n" " -C ENGINE compression engine\n" " -D ENGINE decompression engine\n" +" -e allow chunks to be expanded (implied by -0)\n" " -g use gzip wrapper\n" " -h print this help\n" " -s SIZE chunk size\n" @@ -398,6 +430,7 @@ show_version(void) static int do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, void *decompressed_buf, u32 chunk_size, + bool allow_expansion, size_t compressed_buf_size, struct compressor *compressor, struct decompressor *decompressor) { @@ -409,19 +442,31 @@ do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, while ((ret = xread(in, original_buf, chunk_size)) > 0) { u32 original_size = ret; + size_t out_nbytes_avail; u32 compressed_size; u64 start_time; bool ok; total_uncompressed_size += original_size; + if (allow_expansion) { + out_nbytes_avail = compress_bound(compressor, + original_size); + if (out_nbytes_avail > compressed_buf_size) { + msg("%"TS": bug in compress_bound()", in->name); + return -1; + } + } else { + out_nbytes_avail = original_size - 1; + } + /* Compress the chunk of data. */ start_time = timer_ticks(); compressed_size = do_compress(compressor, original_buf, original_size, compressed_buf, - original_size - 1); + out_nbytes_avail); total_compress_time += timer_ticks() - start_time; if (compressed_size) { @@ -451,7 +496,14 @@ do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, total_compressed_size += compressed_size; } else { - /* Compression did not make the chunk smaller. */ + /* + * The chunk would have compressed to more than + * out_nbytes_avail bytes. + */ + if (allow_expansion) { + msg("%"TS": bug in compress_bound()", in->name); + return -1; + } total_compressed_size += original_size; } } @@ -493,11 +545,13 @@ tmain(int argc, tchar *argv[]) enum wrapper wrapper = NO_WRAPPER; const struct engine *compress_engine = &DEFAULT_ENGINE; const struct engine *decompress_engine = &DEFAULT_ENGINE; + bool allow_expansion = false; + struct compressor compressor = { 0 }; + struct decompressor decompressor = { 0 }; + size_t compressed_buf_size; void *original_buf = NULL; void *compressed_buf = NULL; void *decompressed_buf = NULL; - struct compressor compressor; - struct decompressor decompressor; tchar *default_file_list[] = { NULL }; int opt_char; int i; @@ -507,6 +561,7 @@ tmain(int argc, tchar *argv[]) while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { switch (opt_char) { + case '0': case '1': case '2': case '3': @@ -517,7 +572,7 @@ tmain(int argc, tchar *argv[]) case '8': case '9': level = parse_compression_level(opt_char, toptarg); - if (level == 0) + if (level < 0) return 1; break; case 'C': @@ -536,6 +591,9 @@ tmain(int argc, tchar *argv[]) return 1; } break; + case 'e': + allow_expansion = true; + break; case 'g': wrapper = GZIP_WRAPPER; break; @@ -570,20 +628,28 @@ tmain(int argc, tchar *argv[]) argc -= toptind; argv += toptind; + if (level == 0) + allow_expansion = true; + + ret = -1; + if (!compressor_init(&compressor, level, wrapper, compress_engine)) + goto out; + if (!decompressor_init(&decompressor, wrapper, decompress_engine)) + goto out; + + if (allow_expansion) + compressed_buf_size = compress_bound(&compressor, chunk_size); + else + compressed_buf_size = chunk_size - 1; + original_buf = xmalloc(chunk_size); - compressed_buf = xmalloc(chunk_size - 1); + compressed_buf = xmalloc(compressed_buf_size); decompressed_buf = xmalloc(chunk_size); ret = -1; if (original_buf == NULL || compressed_buf == NULL || decompressed_buf == NULL) - goto out0; - - if (!compressor_init(&compressor, level, wrapper, compress_engine)) - goto out0; - - if (!decompressor_init(&decompressor, wrapper, decompress_engine)) - goto out1; + goto out; if (argc == 0) { argv = default_file_list; @@ -608,25 +674,24 @@ tmain(int argc, tchar *argv[]) ret = xopen_for_read(argv[i], true, &in); if (ret != 0) - goto out2; + goto out; printf("Processing %"TS"...\n", in.name); ret = do_benchmark(&in, original_buf, compressed_buf, - decompressed_buf, chunk_size, &compressor, - &decompressor); + decompressed_buf, chunk_size, + allow_expansion, compressed_buf_size, + &compressor, &decompressor); xclose(&in); if (ret != 0) - goto out2; + goto out; } ret = 0; -out2: - decompressor_destroy(&decompressor); -out1: - compressor_destroy(&compressor); -out0: +out: free(decompressed_buf); free(compressed_buf); free(original_buf); + decompressor_destroy(&decompressor); + compressor_destroy(&compressor); return -ret; } diff --git a/programs/gzip.c b/programs/gzip.c index 546f969..30f1ad6 100644 --- a/programs/gzip.c +++ b/programs/gzip.c @@ -550,7 +550,7 @@ tmain(int argc, tchar *argv[]) case '9': options.compression_level = parse_compression_level(opt_char, toptarg); - if (options.compression_level == 0) + if (options.compression_level < 0) return 1; break; case 'c': diff --git a/programs/prog_util.c b/programs/prog_util.c index 49a7af4..f736dab 100644 --- a/programs/prog_util.c +++ b/programs/prog_util.c @@ -433,27 +433,39 @@ xclose(struct file_stream *strm) /* * Parse the compression level given on the command line, returning the - * compression level on success or 0 on error + * compression level on success or -1 on error */ int parse_compression_level(tchar opt_char, const tchar *arg) { - unsigned long level = opt_char - '0'; - const tchar *p; + int level; if (arg == NULL) arg = T(""); - for (p = arg; *p >= '0' && *p <= '9'; p++) - level = (level * 10) + (*p - '0'); + if (opt_char < '0' || opt_char > '9') + goto invalid; + level = opt_char - '0'; - if (level < 1 || level > 12 || *p != '\0') { - msg("Invalid compression level: \"%"TC"%"TS"\". " - "Must be an integer in the range [1, 12].", opt_char, arg); - return 0; + if (arg[0] != '\0') { + if (arg[0] < '0' || arg[0] > '9') + goto invalid; + if (arg[1] != '\0') /* Levels are at most 2 digits */ + goto invalid; + if (level == 0) /* Don't allow arguments like "-01" */ + goto invalid; + level = (level * 10) + (arg[0] - '0'); } + if (level < 0 || level > 12) + goto invalid; + return level; + +invalid: + msg("Invalid compression level: \"%"TC"%"TS"\". " + "Must be an integer in the range [0, 12].", opt_char, arg); + return -1; } /* Allocate a new DEFLATE compressor */ diff --git a/programs/test_custom_malloc.c b/programs/test_custom_malloc.c index 7e1eced..2bbb7f0 100644 --- a/programs/test_custom_malloc.c +++ b/programs/test_custom_malloc.c @@ -43,7 +43,7 @@ tmain(int argc, tchar *argv[]) ASSERT(malloc_count == 0); ASSERT(free_count == 0); - for (level = 1; level <= 12; level++) { + for (level = 0; level <= 12; level++) { malloc_count = free_count = 0; c = libdeflate_alloc_compressor(level); ASSERT(c != NULL); @@ -67,7 +67,7 @@ tmain(int argc, tchar *argv[]) libdeflate_set_memory_allocator(do_fail_malloc, do_free); - for (level = 1; level <= 12; level++) { + for (level = 0; level <= 12; level++) { malloc_count = free_count = 0; c = libdeflate_alloc_compressor(level); ASSERT(c == NULL); diff --git a/tools/exec_tests.sh b/tools/exec_tests.sh index c7acb3f..e4deacb 100644 --- a/tools/exec_tests.sh +++ b/tools/exec_tests.sh @@ -18,12 +18,12 @@ for format in '' '-g' '-z'; do run_cmd ./benchmark $format $ref_impl $SMOKEDATA done done -for level in 1 3 7 9; do +for level in 0 1 3 7 9; do for ref_impl in '' '-Y'; do run_cmd ./benchmark -$level $ref_impl $SMOKEDATA done done -for level in 1 3 7 9 12; do +for level in 0 1 3 7 9 12; do for ref_impl in '' '-Z'; do run_cmd ./benchmark -$level $ref_impl $SMOKEDATA done