diff --git a/libdeflate.h b/libdeflate.h index 1b95beb..7db75b7 100644 --- a/libdeflate.h +++ b/libdeflate.h @@ -44,6 +44,28 @@ deflate_compress(struct deflate_compressor *compressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail); +/* + * deflate_compress_bound() returns a worst-case upper bound on the number of + * bytes of compressed data that may be produced by compressing any buffer of + * length less than or equal to 'in_nbytes' using deflate_compress() with the + * specified compressor. Mathematically, this bound will necessarily be a + * number greater than or equal to 'in_nbytes'. It may be an overestimate of + * the true upper bound. The return value is guaranteed to be the same for all + * invocations with the same compressor and same 'in_nbytes'. + * + * Note that this function is not necessary in many applications. With + * block-based compression, it is usually preferable to separately store the + * uncompressed size of each block and to store any blocks that did not compress + * to less than their original size uncompressed. In that scenario, there is no + * need to know the worst-case compressed size, since the maximum number of + * bytes of compressed data that may be used would always be one less than the + * input length. You can just pass a buffer of that size to deflate_compress() + * and store the data uncompressed if deflate_compress() returns 0, indicating + * that the compressed data did not fit into the provided output buffer. + */ +extern size_t +deflate_compress_bound(struct deflate_compressor *compressor, size_t in_nbytes); + /* * Like deflate_compress(), but stores the data in the zlib wrapper format. */ @@ -52,6 +74,13 @@ zlib_compress(struct deflate_compressor *compressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail); +/* + * Like deflate_compress_bound(), but assumes the data will be compressed with + * zlib_compress() rather than with deflate_compress(). + */ +extern size_t +zlib_compress_bound(struct deflate_compressor *compressor, size_t in_nbytes); + /* * Like deflate_compress(), but stores the data in the gzip wrapper format. */ @@ -60,6 +89,13 @@ gzip_compress(struct deflate_compressor *compressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail); +/* + * Like deflate_compress_bound(), but assumes the data will be compressed with + * gzip_compress() rather than with deflate_compress(). + */ +extern size_t +gzip_compress_bound(struct deflate_compressor *compressor, size_t in_nbytes); + /* * deflate_free_compressor() frees a DEFLATE compressor that was allocated with * deflate_alloc_compressor(). If a NULL pointer is passed in, no action is diff --git a/src/deflate_compress.c b/src/deflate_compress.c index 57a3613..0fdcb9b 100644 --- a/src/deflate_compress.c +++ b/src/deflate_compress.c @@ -2512,8 +2512,17 @@ deflate_compress(struct deflate_compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) { - if (in_nbytes < 16 || out_nbytes_avail < MIN_OUTPUT_SIZE) + if (unlikely(out_nbytes_avail < MIN_OUTPUT_SIZE)) return 0; + if (unlikely(in_nbytes == 0)) { + /* Empty input; output a single empty block. */ + struct deflate_output_bitstream os; + deflate_init_output(&os, out, out_nbytes_avail); + deflate_reset_symbol_frequencies(c); + deflate_finish_sequence(c->sequences, 0); + deflate_write_block(c, &os, in, 0, true); + return deflate_flush_output(&os); + } return (*c->impl)(c, in, in_nbytes, out, out_nbytes_avail); } @@ -2528,3 +2537,16 @@ deflate_get_compression_level(struct deflate_compressor *c) { return c->compression_level; } + +/* Return an upper bound on the compressed size for compressing @in_nbytes bytes + * of data. This function needs some work to be more accurate. */ +LIBEXPORT size_t +deflate_compress_bound(struct deflate_compressor *c, size_t in_nbytes) +{ + size_t max_num_blocks = + (in_nbytes + MAX_ITEMS_PER_BLOCK - 1) / MAX_ITEMS_PER_BLOCK; + if (max_num_blocks == 0) + max_num_blocks++; + return MIN_OUTPUT_SIZE + (in_nbytes * 9 + 7) / 8 + + max_num_blocks * 200; +} diff --git a/src/gzip_compress.c b/src/gzip_compress.c index 8129718..cb60448 100644 --- a/src/gzip_compress.c +++ b/src/gzip_compress.c @@ -66,3 +66,9 @@ gzip_compress(struct deflate_compressor *c, const void *in, size_t in_size, return out_next - (u8 *)out; } + +LIBEXPORT size_t +gzip_compress_bound(struct deflate_compressor *c, size_t in_nbytes) +{ + return GZIP_MIN_OVERHEAD + deflate_compress_bound(c, in_nbytes); +} diff --git a/src/zlib_compress.c b/src/zlib_compress.c index 1a3f039..79765cf 100644 --- a/src/zlib_compress.c +++ b/src/zlib_compress.c @@ -58,3 +58,9 @@ zlib_compress(struct deflate_compressor *c, const void *in, size_t in_size, return out_next - (u8 *)out; } + +LIBEXPORT size_t +zlib_compress_bound(struct deflate_compressor *c, size_t in_nbytes) +{ + return ZLIB_MIN_OVERHEAD + deflate_compress_bound(c, in_nbytes); +}