From 7be1effaccde8d648d337959a03c28a6fb194d8f Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Sat, 23 Jan 2016 00:58:20 -0600
Subject: [PATCH] Make decompression routines optionally provide the actual
 uncompressd size

---
 README.md                | 28 +++++++++++++++++-----------
 libdeflate.h             | 25 ++++++++++++++-----------
 src/decompress_impl.h    | 15 ++++++++++-----
 src/deflate_decompress.c | 36 ++++++++++++++++++++----------------
 src/gzip_decompress.c    | 16 ++++++++++++----
 src/zlib_decompress.c    | 14 +++++++++++---
 tools/benchmark.c        | 12 +++++++-----
 7 files changed, 91 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index 977f438..90fd2c4 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ The supported formats are:
 - gzip (a.k.a. DEFLATE with a gzip wrapper)
 
 libdeflate is heavily optimized.  It is significantly faster than the zlib
-software library, both for compression and decompression, and especially on x86
+library, both for compression and decompression, and especially on x86
 processors.  In addition, libdeflate provides optional high compression modes
 that provide a better compression ratio than the zlib's "level 9".
 
@@ -28,7 +28,7 @@ There are various options which can be set on the `make` command line; see the
 Makefile for details.  As an example, you can run `make SUPPORT_COMPRESSION=no`
 to build a decompression-only library.
 
-There is no `make install` yet; just copy the file(s) you want.
+There is no `make install` yet; just copy the file(s) to where you want.
 
 It's possible to build a Windows binary using MinGW, using a command like this:
 
@@ -50,6 +50,12 @@ certain use cases such as transparent filesystem compression.  But if your
 application compresses large files as a single compressed stream, similarly to
 the `gzip` program, then libdeflate isn't for you.
 
+Note that with chunk-based compression, you generally should have the
+uncompressed size of each chunk stored outside of the compressed data itself.
+This enables you to allocate an output buffer of the correct size without
+guessing.  However, libdeflate's decompression routines do optionally provide
+the actual number of output bytes in case you need it.
+
 DEFLATE vs. zlib vs. gzip
 =========================
 
@@ -87,10 +93,10 @@ compressed.  Different algorithms and different amounts of computation time will
 result in different compression ratios, while remaining equally compatible with
 the decompressor.
 
-For this reason, the commonly used zlib software library provides nine
-compression levels.  Level 1 is the fastest but provides the worst compression;
-level 9 provides the best compression but is the slowest.  It defaults to level
-6.  libdeflate uses this same design but is designed to improve on both zlib's
+For this reason, the commonly used zlib library provides nine compression
+levels.  Level 1 is the fastest but provides the worst compression; level 9
+provides the best compression but is the slowest.  It defaults to level 6.
+libdeflate uses this same design but is designed to improve on both zlib's
 performance *and* compression ratio at every compression level.  In addition,
 libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a
 minimum-cost-path based algorithm (sometimes called "optimal parsing") that can
@@ -102,8 +108,8 @@ different levels to see which works best for your application.
 Motivation
 ==========
 
-Despite DEFLATE's widespread use mainly through the zlib software library, in
-the compression community this format from the early 1990s is often considered
+Despite DEFLATE's widespread use mainly through the zlib library, in the
+compression community this format from the early 1990s is often considered
 obsolete.  And in a few significant ways, it is.
 
 So why implement DEFLATE at all, instead of focusing entirely on
@@ -119,8 +125,8 @@ optimizations (e.g. those dealing with LZ77 matchfinding) can be reused.
 
 In addition, comparing compressors fairly is difficult because the performance
 of a compressor depends heavily on optimizations which are not intrinsic to the
-compression format itself.  In this respect, the zlib software library sometimes
-compares poorly to certain newer code because zlib is not well optimized for
-modern processors.  libdeflate addresses this by providing an optimized DEFLATE
+compression format itself.  In this respect, the zlib library sometimes compares
+poorly to certain newer code because zlib is not well optimized for modern
+processors.  libdeflate addresses this by providing an optimized DEFLATE
 implementation which can be used for benchmarking purposes.  And, of course,
 real applications can use it as well.
diff --git a/libdeflate.h b/libdeflate.h
index bae28ad..1b95beb 100644
--- a/libdeflate.h
+++ b/libdeflate.h
@@ -90,19 +90,20 @@ deflate_alloc_decompressor(void);
 
 /*
  * deflate_decompress() decompresses 'in_nbytes' bytes of DEFLATE-compressed
- * data at 'in' and writes the uncompressed data, which had original size
- * 'out_nbytes', to 'out'.  The return value is true if decompression was
- * successful, or false if the compressed data was invalid.
- *
- * Note that the uncompressed size must be known *exactly* and passed as
- * 'out_nbytes'.  This is because this API is designed for block-based
- * compression where the uncompressed size should have already been stored
- * elsewhere.
+ * data at 'in' and writes the uncompressed data to 'out', which is a buffer of
+ * at least 'out_nbytes_avail' bytes.  If decompression was successful, then
+ * %true is returned; otherwise, the compressed data must have been invalid and
+ * %false is returned.  In addition, on success, if 'actual_out_nbytes_ret' is
+ * not NULL, then the actual uncompressed size is written to
+ * *actual_out_nbytes_ret.  Or, if 'actual_out_nbytes_ret' is NULL, then the
+ * uncompressed size must be exactly equal to 'out_nbytes_avail'; otherwise
+ * decompression fails and %false is returned.
  */
 extern bool
 deflate_decompress(struct deflate_decompressor *decompressor,
 		   const void *in, size_t in_nbytes,
-		   void *out, size_t out_nbytes);
+		   void *out, size_t out_nbytes_avail,
+		   size_t *actual_out_nbytes_ret);
 
 /*
  * Like deflate_decompress(), but assumes the zlib wrapper format instead of raw
@@ -111,7 +112,8 @@ deflate_decompress(struct deflate_decompressor *decompressor,
 extern bool
 zlib_decompress(struct deflate_decompressor *decompressor,
 		const void *in, size_t in_nbytes,
-		void *out, size_t out_nbytes);
+		void *out, size_t out_nbytes_avail,
+		size_t *actual_out_nbytes_ret);
 
 /*
  * Like deflate_decompress(), but assumes the gzip wrapper format instead of raw
@@ -120,7 +122,8 @@ zlib_decompress(struct deflate_decompressor *decompressor,
 extern bool
 gzip_decompress(struct deflate_decompressor *decompressor,
 		const void *in, size_t in_nbytes,
-		void *out, size_t out_nbytes);
+		void *out, size_t out_nbytes_avail,
+		size_t *actual_out_nbytes_ret);
 
 /*
  * deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
diff --git a/src/decompress_impl.h b/src/decompress_impl.h
index ad51d2b..d32c668 100644
--- a/src/decompress_impl.h
+++ b/src/decompress_impl.h
@@ -9,10 +9,11 @@
 static bool ATTRIBUTES
 FUNCNAME(struct deflate_decompressor * restrict d,
 	 const void * restrict in, size_t in_nbytes,
-	 void * restrict out, size_t out_nbytes)
+	 void * restrict out, size_t out_nbytes_avail,
+	 size_t *actual_out_nbytes_ret)
 {
 	u8 *out_next = out;
-	u8 * const out_end = out_next + out_nbytes;
+	u8 * const out_end = out_next + out_nbytes_avail;
 	const u8 *in_next = in;
 	const u8 * const in_end = in_next + in_nbytes;
 	bitbuf_t bitbuf = 0;
@@ -358,7 +359,11 @@ block_done:
 	if (!is_final_block)
 		goto next_block;
 
-	/* That was the last block.  Return %true if we got all the output we
-	 * expected, otherwise %false.  */
-	return (out_next == out_end);
+	/* That was the last block.  */
+
+	if (!actual_out_nbytes_ret)
+		return out_next == out_end;
+
+	*actual_out_nbytes_ret = out_next - (u8 *)out;
+	return true;
 }
diff --git a/src/deflate_decompress.c b/src/deflate_decompress.c
index ee3451f..610d5a1 100644
--- a/src/deflate_decompress.c
+++ b/src/deflate_decompress.c
@@ -817,18 +817,21 @@ copy_word_unaligned(const void *src, void *dst)
 static bool
 dispatch(struct deflate_decompressor * restrict d,
 	 const void * restrict in, size_t in_nbytes,
-	 void * restrict out, size_t out_nbytes);
+	 void * restrict out, size_t out_nbytes_avail,
+	 size_t *actual_out_nbytes_ret);
 
 typedef bool (*decompress_func_t)(struct deflate_decompressor * restrict d,
 				  const void * restrict in, size_t in_nbytes,
-				  void * restrict out, size_t out_nbytes);
+				  void * restrict out, size_t out_nbytes_avail,
+				  size_t *actual_out_nbytes_ret);
 
 static decompress_func_t decompress_impl = dispatch;
 
 static bool
 dispatch(struct deflate_decompressor * restrict d,
 	 const void * restrict in, size_t in_nbytes,
-	 void * restrict out, size_t out_nbytes)
+	 void * restrict out, size_t out_nbytes_avail,
+	 size_t *actual_out_nbytes_ret)
 {
 	decompress_func_t f = deflate_decompress_default;
 #if X86_CPU_FEATURES_ENABLED
@@ -836,32 +839,33 @@ dispatch(struct deflate_decompressor * restrict d,
 		f = deflate_decompress_bmi2;
 #endif
 	decompress_impl = f;
-	return (*f)(d, in, in_nbytes, out, out_nbytes);
+	return (*f)(d, in, in_nbytes, out, out_nbytes_avail,
+		    actual_out_nbytes_ret);
 }
 #endif /* DISPATCH_ENABLED */
 
 
 /*
- * This is the main DEFLATE decompression routine.  It decompresses 'in_nbytes'
- * bytes of compressed data from the buffer 'in' and writes the uncompressed
- * data to the buffer 'out'.  The caller must know the exact length of the
- * uncompressed data and pass it as 'out_nbytes'.  The return value is %true if
- * and only if decompression was successful.  A return value of %false indicates
- * that either the compressed data is invalid or it does not decompress to
- * exactly 'out_nbytes' bytes of uncompressed data.
+ * This is the main DEFLATE decompression routine.  See libdeflate.h for the
+ * documentation.
  *
- * The real code is in decompress_impl.h.  The part here just handles calling
- * the appropriate implementation depending on the CPU features at runtime.
+ * Note that the real code is in decompress_impl.h.  The part here just handles
+ * calling the appropriate implementation depending on the CPU features at
+ * runtime.
  */
 LIBEXPORT bool
 deflate_decompress(struct deflate_decompressor * restrict d,
 		   const void * restrict in, size_t in_nbytes,
-		   void * restrict out, size_t out_nbytes)
+		   void * restrict out, size_t out_nbytes_avail,
+		   size_t *actual_out_nbytes_ret)
 {
 #if DISPATCH_ENABLED
-	return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes);
+	return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes_avail,
+				  actual_out_nbytes_ret);
 #else
-	return deflate_decompress_default(d, in, in_nbytes, out, out_nbytes);
+	return deflate_decompress_default(d, in, in_nbytes, out,
+					  out_nbytes_avail,
+					  actual_out_nbytes_ret);
 #endif
 }
 
diff --git a/src/gzip_decompress.c b/src/gzip_decompress.c
index b4c4d05..a567246 100644
--- a/src/gzip_decompress.c
+++ b/src/gzip_decompress.c
@@ -16,11 +16,14 @@
 
 LIBEXPORT bool
 gzip_decompress(struct deflate_decompressor *d,
-		const void *in, size_t in_nbytes, void *out, size_t out_nbytes)
+		const void *in, size_t in_nbytes,
+		void *out, size_t out_nbytes_avail,
+		size_t *actual_out_nbytes_ret)
 {
 	const u8 *in_next = in;
 	const u8 * const in_end = in_next + in_nbytes;
 	u8 flg;
+	size_t actual_out_nbytes;
 
 	if (in_nbytes < GZIP_MIN_OVERHEAD)
 		return false;
@@ -81,18 +84,23 @@ gzip_decompress(struct deflate_decompressor *d,
 
 	/* Compressed data  */
 	if (!deflate_decompress(d, in_next, in_end - GZIP_FOOTER_SIZE - in_next,
-				out, out_nbytes))
+				out, out_nbytes_avail, actual_out_nbytes_ret))
 		return false;
 
+	if (actual_out_nbytes_ret)
+		actual_out_nbytes = *actual_out_nbytes_ret;
+	else
+		actual_out_nbytes = out_nbytes_avail;
+
 	in_next = in_end - GZIP_FOOTER_SIZE;
 
 	/* CRC32 */
-	if (crc32_gzip(out, out_nbytes) != get_unaligned_le32(in_next))
+	if (crc32_gzip(out, actual_out_nbytes) != get_unaligned_le32(in_next))
 		return false;
 	in_next += 4;
 
 	/* ISIZE */
-	if ((u32)out_nbytes != get_unaligned_le32(in_next))
+	if (actual_out_nbytes != get_unaligned_le32(in_next))
 		return false;
 
 	return true;
diff --git a/src/zlib_decompress.c b/src/zlib_decompress.c
index 820a92a..accc1a2 100644
--- a/src/zlib_decompress.c
+++ b/src/zlib_decompress.c
@@ -16,11 +16,14 @@
 
 LIBEXPORT bool
 zlib_decompress(struct deflate_decompressor *d,
-		const void *in, size_t in_nbytes, void *out, size_t out_nbytes)
+		const void *in, size_t in_nbytes,
+		void *out, size_t out_nbytes_avail,
+		size_t *actual_out_nbytes_ret)
 {
 	const u8 *in_next = in;
 	const u8 * const in_end = in_next + in_nbytes;
 	u16 hdr;
+	size_t actual_out_nbytes;
 
 	if (in_nbytes < ZLIB_MIN_OVERHEAD)
 		return false;
@@ -47,13 +50,18 @@ zlib_decompress(struct deflate_decompressor *d,
 
 	/* Compressed data  */
 	if (!deflate_decompress(d, in_next, in_end - ZLIB_FOOTER_SIZE - in_next,
-				out, out_nbytes))
+				out, out_nbytes_avail, actual_out_nbytes_ret))
 		return false;
 
+	if (actual_out_nbytes_ret)
+		actual_out_nbytes = *actual_out_nbytes_ret;
+	else
+		actual_out_nbytes = out_nbytes_avail;
+
 	in_next = in_end - ZLIB_FOOTER_SIZE;
 
 	/* ADLER32  */
-	if (adler32(out, out_nbytes) != get_unaligned_be32(in_next))
+	if (adler32(out, actual_out_nbytes) != get_unaligned_be32(in_next))
 		return false;
 
 	return true;
diff --git a/tools/benchmark.c b/tools/benchmark.c
index 955645e..fba6843 100644
--- a/tools/benchmark.c
+++ b/tools/benchmark.c
@@ -181,13 +181,14 @@ compressor_destroy(struct compressor *c)
 
 struct decompressor {
 	void *private;
-	bool (*decompress)(void *, const void *, size_t, void *, size_t);
+	bool (*decompress)(void *, const void *, size_t, void *, size_t, size_t *);
 	void (*free_private)(void *);
 };
 
 static bool
 libz_decompress(void *private, const void *in, size_t in_nbytes,
-		void *out, size_t out_nbytes)
+		void *out, size_t out_nbytes_avail,
+		size_t *actual_out_nbytes_ret)
 {
 	z_stream *z = private;
 
@@ -196,7 +197,7 @@ libz_decompress(void *private, const void *in, size_t in_nbytes,
 	z->next_in = (void *)in;
 	z->avail_in = in_nbytes;
 	z->next_out = out;
-	z->avail_out = out_nbytes;
+	z->avail_out = out_nbytes_avail;
 
 	return (inflate(z, Z_FINISH) == Z_STREAM_END && z->avail_out == 0);
 }
@@ -250,9 +251,10 @@ decompressor_init(struct decompressor *d, enum wrapper wrapper, bool use_libz)
 
 static bool
 do_decompress(struct decompressor *d, const void *in, size_t in_nbytes,
-	      void *out, size_t out_nbytes)
+	      void *out, size_t out_nbytes_avail)
 {
-	return (*d->decompress)(d->private, in, in_nbytes, out, out_nbytes);
+	return (*d->decompress)(d->private, in, in_nbytes,
+				out, out_nbytes_avail, NULL);
 }
 
 static void