Make decompression routines optionally provide the actual uncompressd size

2025-09-13 06:15:51 -04:00 · 2016-01-23 00:58:20 -06:00 · 2016-01-23 00:58:20 -06:00 · 7be1effacc
commit 7be1effacc
parent e731f4b510
7 changed files with 91 additions and 55 deletions
--- a/README.md
+++ b/README.md
@ -11,7 +11,7 @@ The supported formats are:
 - gzip (a.k.a. DEFLATE with a gzip wrapper)
 libdeflate is heavily optimized.  It is significantly faster than the zlib
-software library, both for compression and decompression, and especially on x86
+library, both for compression and decompression, and especially on x86
 processors.  In addition, libdeflate provides optional high compression modes
 that provide a better compression ratio than the zlib's "level 9".
@ -28,7 +28,7 @@ There are various options which can be set on the `make` command line; see the
 Makefile for details.  As an example, you can run `make SUPPORT_COMPRESSION=no`
 to build a decompression-only library.
-There is no `make install` yet; just copy the file(s) you want.
+There is no `make install` yet; just copy the file(s) to where you want.
 It's possible to build a Windows binary using MinGW, using a command like this:
@ -50,6 +50,12 @@ certain use cases such as transparent filesystem compression.  But if your
 application compresses large files as a single compressed stream, similarly to
 the `gzip` program, then libdeflate isn't for you.
 Note that with chunk-based compression, you generally should have the
 uncompressed size of each chunk stored outside of the compressed data itself.
 This enables you to allocate an output buffer of the correct size without
 guessing.  However, libdeflate's decompression routines do optionally provide
 the actual number of output bytes in case you need it.
 DEFLATE vs. zlib vs. gzip
 =========================
@ -87,10 +93,10 @@ compressed.  Different algorithms and different amounts of computation time will
 result in different compression ratios, while remaining equally compatible with
 the decompressor.
-For this reason, the commonly used zlib software library provides nine
+For this reason, the commonly used zlib library provides nine compression
-compression levels.  Level 1 is the fastest but provides the worst compression;
+levels.  Level 1 is the fastest but provides the worst compression; level 9
-level 9 provides the best compression but is the slowest.  It defaults to level
+provides the best compression but is the slowest.  It defaults to level 6.
-6.  libdeflate uses this same design but is designed to improve on both zlib's
+libdeflate uses this same design but is designed to improve on both zlib's
 performance *and* compression ratio at every compression level.  In addition,
 libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a
 minimum-cost-path based algorithm (sometimes called "optimal parsing") that can
@ -102,8 +108,8 @@ different levels to see which works best for your application.
 Motivation
 ==========
-Despite DEFLATE's widespread use mainly through the zlib software library, in
+Despite DEFLATE's widespread use mainly through the zlib library, in the
-the compression community this format from the early 1990s is often considered
+compression community this format from the early 1990s is often considered
 obsolete.  And in a few significant ways, it is.
 So why implement DEFLATE at all, instead of focusing entirely on
@ -119,8 +125,8 @@ optimizations (e.g. those dealing with LZ77 matchfinding) can be reused.
 In addition, comparing compressors fairly is difficult because the performance
 of a compressor depends heavily on optimizations which are not intrinsic to the
-compression format itself.  In this respect, the zlib software library sometimes
+compression format itself.  In this respect, the zlib library sometimes compares
-compares poorly to certain newer code because zlib is not well optimized for
+poorly to certain newer code because zlib is not well optimized for modern
-modern processors.  libdeflate addresses this by providing an optimized DEFLATE
+processors.  libdeflate addresses this by providing an optimized DEFLATE
 implementation which can be used for benchmarking purposes.  And, of course,
 real applications can use it as well.
--- a/libdeflate.h
+++ b/libdeflate.h
@ -90,19 +90,20 @@ deflate_alloc_decompressor(void);
 /*
 * deflate_decompress() decompresses 'in_nbytes' bytes of DEFLATE-compressed
- * data at 'in' and writes the uncompressed data, which had original size
+ * data at 'in' and writes the uncompressed data to 'out', which is a buffer of
- * 'out_nbytes', to 'out'.  The return value is true if decompression was
+ * at least 'out_nbytes_avail' bytes.  If decompression was successful, then
- * successful, or false if the compressed data was invalid.
+ * %true is returned; otherwise, the compressed data must have been invalid and
- *
+ * %false is returned.  In addition, on success, if 'actual_out_nbytes_ret' is
- * Note that the uncompressed size must be known *exactly* and passed as
+ * not NULL, then the actual uncompressed size is written to
- * 'out_nbytes'.  This is because this API is designed for block-based
+ * *actual_out_nbytes_ret.  Or, if 'actual_out_nbytes_ret' is NULL, then the
- * compression where the uncompressed size should have already been stored
+ * uncompressed size must be exactly equal to 'out_nbytes_avail'; otherwise
- * elsewhere.
+ * decompression fails and %false is returned.
 */
 extern bool
 deflate_decompress(struct deflate_decompressor *decompressor,
 		   const void *in, size_t in_nbytes,
-		   void *out, size_t out_nbytes);
+		   void *out, size_t out_nbytes_avail,
 		   size_t *actual_out_nbytes_ret);
 /*
 * Like deflate_decompress(), but assumes the zlib wrapper format instead of raw
@ -111,7 +112,8 @@ deflate_decompress(struct deflate_decompressor *decompressor,
 extern bool
 zlib_decompress(struct deflate_decompressor *decompressor,
 		const void *in, size_t in_nbytes,
-		void *out, size_t out_nbytes);
+		void *out, size_t out_nbytes_avail,
 		size_t *actual_out_nbytes_ret);
 /*
 * Like deflate_decompress(), but assumes the gzip wrapper format instead of raw
@ -120,7 +122,8 @@ zlib_decompress(struct deflate_decompressor *decompressor,
 extern bool
 gzip_decompress(struct deflate_decompressor *decompressor,
 		const void *in, size_t in_nbytes,
-		void *out, size_t out_nbytes);
+		void *out, size_t out_nbytes_avail,
 		size_t *actual_out_nbytes_ret);
 /*
 * deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
--- a/src/decompress_impl.h
+++ b/src/decompress_impl.h
@ -9,10 +9,11 @@
 static bool ATTRIBUTES
 FUNCNAME(struct deflate_decompressor * restrict d,
 	 const void * restrict in, size_t in_nbytes,
-	 void * restrict out, size_t out_nbytes)
+	 void * restrict out, size_t out_nbytes_avail,
 	 size_t *actual_out_nbytes_ret)
 {
 	u8 *out_next = out;
-	u8 * const out_end = out_next + out_nbytes;
+	u8 * const out_end = out_next + out_nbytes_avail;
 	const u8 *in_next = in;
 	const u8 * const in_end = in_next + in_nbytes;
 	bitbuf_t bitbuf = 0;
@ -358,7 +359,11 @@ block_done:
 	if (!is_final_block)
 		goto next_block;
-	/* That was the last block.  Return %true if we got all the output we
+	/* That was the last block.  */
-	 * expected, otherwise %false.  */
+
-	return (out_next == out_end);
+	if (!actual_out_nbytes_ret)
 		return out_next == out_end;
 	*actual_out_nbytes_ret = out_next - (u8 *)out;
 	return true;
 }
--- a/src/deflate_decompress.c
+++ b/src/deflate_decompress.c
@ -817,18 +817,21 @@ copy_word_unaligned(const void *src, void *dst)
 static bool
 dispatch(struct deflate_decompressor * restrict d,
 	 const void * restrict in, size_t in_nbytes,
-	 void * restrict out, size_t out_nbytes);
+	 void * restrict out, size_t out_nbytes_avail,
 	 size_t *actual_out_nbytes_ret);
 typedef bool (*decompress_func_t)(struct deflate_decompressor * restrict d,
 				  const void * restrict in, size_t in_nbytes,
-				  void * restrict out, size_t out_nbytes);
+				  void * restrict out, size_t out_nbytes_avail,
 				  size_t *actual_out_nbytes_ret);
 static decompress_func_t decompress_impl = dispatch;
 static bool
 dispatch(struct deflate_decompressor * restrict d,
 	 const void * restrict in, size_t in_nbytes,
-	 void * restrict out, size_t out_nbytes)
+	 void * restrict out, size_t out_nbytes_avail,
 	 size_t *actual_out_nbytes_ret)
 {
 	decompress_func_t f = deflate_decompress_default;
 #if X86_CPU_FEATURES_ENABLED
@ -836,32 +839,33 @@ dispatch(struct deflate_decompressor * restrict d,
 		f = deflate_decompress_bmi2;
 #endif
 	decompress_impl = f;
-	return (*f)(d, in, in_nbytes, out, out_nbytes);
+	return (*f)(d, in, in_nbytes, out, out_nbytes_avail,
 		    actual_out_nbytes_ret);
 }
 #endif /* DISPATCH_ENABLED */
 /*
- * This is the main DEFLATE decompression routine.  It decompresses 'in_nbytes'
+ * This is the main DEFLATE decompression routine.  See libdeflate.h for the
- * bytes of compressed data from the buffer 'in' and writes the uncompressed
+ * documentation.
 * data to the buffer 'out'.  The caller must know the exact length of the
 * uncompressed data and pass it as 'out_nbytes'.  The return value is %true if
 * and only if decompression was successful.  A return value of %false indicates
 * that either the compressed data is invalid or it does not decompress to
 * exactly 'out_nbytes' bytes of uncompressed data.
 *
- * The real code is in decompress_impl.h.  The part here just handles calling
+ * Note that the real code is in decompress_impl.h.  The part here just handles
- * the appropriate implementation depending on the CPU features at runtime.
+ * calling the appropriate implementation depending on the CPU features at
 * runtime.
 */
 LIBEXPORT bool
 deflate_decompress(struct deflate_decompressor * restrict d,
 		   const void * restrict in, size_t in_nbytes,
-		   void * restrict out, size_t out_nbytes)
+		   void * restrict out, size_t out_nbytes_avail,
 		   size_t *actual_out_nbytes_ret)
 {
 #if DISPATCH_ENABLED
-	return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes);
+	return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes_avail,
 				  actual_out_nbytes_ret);
 #else
-	return deflate_decompress_default(d, in, in_nbytes, out, out_nbytes);
+	return deflate_decompress_default(d, in, in_nbytes, out,
 					  out_nbytes_avail,
 					  actual_out_nbytes_ret);
 #endif
 }
--- a/src/gzip_decompress.c
+++ b/src/gzip_decompress.c
@ -16,11 +16,14 @@
 LIBEXPORT bool
 gzip_decompress(struct deflate_decompressor *d,
-		const void *in, size_t in_nbytes, void *out, size_t out_nbytes)
+		const void *in, size_t in_nbytes,
 		void *out, size_t out_nbytes_avail,
 		size_t *actual_out_nbytes_ret)
 {
 	const u8 *in_next = in;
 	const u8 * const in_end = in_next + in_nbytes;
 	u8 flg;
 	size_t actual_out_nbytes;
 	if (in_nbytes < GZIP_MIN_OVERHEAD)
 		return false;
@ -81,18 +84,23 @@ gzip_decompress(struct deflate_decompressor *d,
 	/* Compressed data  */
 	if (!deflate_decompress(d, in_next, in_end - GZIP_FOOTER_SIZE - in_next,
-				out, out_nbytes))
+				out, out_nbytes_avail, actual_out_nbytes_ret))
 		return false;
 	if (actual_out_nbytes_ret)
 		actual_out_nbytes = *actual_out_nbytes_ret;
 	else
 		actual_out_nbytes = out_nbytes_avail;
 	in_next = in_end - GZIP_FOOTER_SIZE;
 	/* CRC32 */
-	if (crc32_gzip(out, out_nbytes) != get_unaligned_le32(in_next))
+	if (crc32_gzip(out, actual_out_nbytes) != get_unaligned_le32(in_next))
 		return false;
 	in_next += 4;
 	/* ISIZE */
-	if ((u32)out_nbytes != get_unaligned_le32(in_next))
+	if (actual_out_nbytes != get_unaligned_le32(in_next))
 		return false;
 	return true;
--- a/src/zlib_decompress.c
+++ b/src/zlib_decompress.c
@ -16,11 +16,14 @@
 LIBEXPORT bool
 zlib_decompress(struct deflate_decompressor *d,
-		const void *in, size_t in_nbytes, void *out, size_t out_nbytes)
+		const void *in, size_t in_nbytes,
 		void *out, size_t out_nbytes_avail,
 		size_t *actual_out_nbytes_ret)
 {
 	const u8 *in_next = in;
 	const u8 * const in_end = in_next + in_nbytes;
 	u16 hdr;
 	size_t actual_out_nbytes;
 	if (in_nbytes < ZLIB_MIN_OVERHEAD)
 		return false;
@ -47,13 +50,18 @@ zlib_decompress(struct deflate_decompressor *d,
 	/* Compressed data  */
 	if (!deflate_decompress(d, in_next, in_end - ZLIB_FOOTER_SIZE - in_next,
-				out, out_nbytes))
+				out, out_nbytes_avail, actual_out_nbytes_ret))
 		return false;
 	if (actual_out_nbytes_ret)
 		actual_out_nbytes = *actual_out_nbytes_ret;
 	else
 		actual_out_nbytes = out_nbytes_avail;
 	in_next = in_end - ZLIB_FOOTER_SIZE;
 	/* ADLER32  */
-	if (adler32(out, out_nbytes) != get_unaligned_be32(in_next))
+	if (adler32(out, actual_out_nbytes) != get_unaligned_be32(in_next))
 		return false;
 	return true;
--- a/tools/benchmark.c
+++ b/tools/benchmark.c
@ -181,13 +181,14 @@ compressor_destroy(struct compressor *c)
 struct decompressor {
 	void *private;
-	bool (*decompress)(void *, const void *, size_t, void *, size_t);
+	bool (*decompress)(void *, const void *, size_t, void *, size_t, size_t *);
 	void (*free_private)(void *);
 };
 static bool
 libz_decompress(void *private, const void *in, size_t in_nbytes,
-		void *out, size_t out_nbytes)
+		void *out, size_t out_nbytes_avail,
 		size_t *actual_out_nbytes_ret)
 {
 	z_stream *z = private;
@ -196,7 +197,7 @@ libz_decompress(void *private, const void *in, size_t in_nbytes,
 	z->next_in = (void *)in;
 	z->avail_in = in_nbytes;
 	z->next_out = out;
-	z->avail_out = out_nbytes;
+	z->avail_out = out_nbytes_avail;
 	return (inflate(z, Z_FINISH) == Z_STREAM_END && z->avail_out == 0);
 }
@ -250,9 +251,10 @@ decompressor_init(struct decompressor *d, enum wrapper wrapper, bool use_libz)
 static bool
 do_decompress(struct decompressor *d, const void *in, size_t in_nbytes,
-	      void *out, size_t out_nbytes)
+	      void *out, size_t out_nbytes_avail)
 {
-	return (*d->decompress)(d->private, in, in_nbytes, out, out_nbytes);
+	return (*d->decompress)(d->private, in, in_nbytes,
 				out, out_nbytes_avail, NULL);
 }
 static void