lib/deflate_decompress: store decode results pre-shifted

This slightly speeds up decode table building, since now the decode results don't need to be shifted at runtime when building the tables.
2025-09-12 13:58:35 -04:00 · 2018-12-25 14:16:38 -06:00 · 2018-12-25 14:16:38 -06:00 · 1c3609da7b
commit 1c3609da7b
parent eed4829c16
1 changed files with 20 additions and 18 deletions
--- a/lib/deflate_decompress.c
+++ b/lib/deflate_decompress.c
@ -364,19 +364,28 @@ do {									\
 /* Shift to extract the decode result from a decode table entry.  */
 #define HUFFDEC_RESULT_SHIFT		8

+/* Shift a decode result into its position in the decode table entry.  */
+#define HUFFDEC_RESULT_ENTRY(result)	((u32)(result) << HUFFDEC_RESULT_SHIFT)
+
 /* The decode result for each precode symbol.  There is no special optimization
 * for the precode; the decode result is simply the symbol value.  */
 static const u32 precode_decode_results[DEFLATE_NUM_PRECODE_SYMS] = {
-	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+#define ENTRY(presym)	HUFFDEC_RESULT_ENTRY(presym)
+	ENTRY(0)   , ENTRY(1)   , ENTRY(2)   , ENTRY(3)   ,
+	ENTRY(4)   , ENTRY(5)   , ENTRY(6)   , ENTRY(7)   ,
+	ENTRY(8)   , ENTRY(9)   , ENTRY(10)  , ENTRY(11)  ,
+	ENTRY(12)  , ENTRY(13)  , ENTRY(14)  , ENTRY(15)  ,
+	ENTRY(16)  , ENTRY(17)  , ENTRY(18)  ,
+#undef ENTRY
 };

 /* The decode result for each litlen symbol.  For literals, this is the literal
 * value itself and the HUFFDEC_LITERAL flag.  For lengths, this is the length
 * base and the number of extra length bits.  */
 static const u32 litlen_decode_results[DEFLATE_NUM_LITLEN_SYMS] = {
-#define ENTRY(literal)	((HUFFDEC_LITERAL >> HUFFDEC_RESULT_SHIFT) | (literal))

 	/* Literals  */
+#define ENTRY(literal)	(HUFFDEC_LITERAL | HUFFDEC_RESULT_ENTRY(literal))
 	ENTRY(0)   , ENTRY(1)   , ENTRY(2)   , ENTRY(3)   ,
 	ENTRY(4)   , ENTRY(5)   , ENTRY(6)   , ENTRY(7)   ,
 	ENTRY(8)   , ENTRY(9)   , ENTRY(10)  , ENTRY(11)  ,
@ -447,8 +456,8 @@ static const u32 litlen_decode_results[DEFLATE_NUM_LITLEN_SYMS] = {
 #define HUFFDEC_LENGTH_BASE_SHIFT	8
 #define HUFFDEC_END_OF_BLOCK_LENGTH	0

-#define ENTRY(length_base, num_extra_bits) \
-	(((u32)(length_base) << HUFFDEC_LENGTH_BASE_SHIFT) | (num_extra_bits))
+#define ENTRY(length_base, num_extra_bits)	HUFFDEC_RESULT_ENTRY(	\
+	((u32)(length_base) << HUFFDEC_LENGTH_BASE_SHIFT) | (num_extra_bits))

 	/* End of block  */
 	ENTRY(HUFFDEC_END_OF_BLOCK_LENGTH, 0),
@ -472,8 +481,9 @@ static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
 #define HUFFDEC_EXTRA_OFFSET_BITS_SHIFT 16
 #define HUFFDEC_OFFSET_BASE_MASK (((u32)1 << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) - 1)

-#define ENTRY(offset_base, num_extra_bits) \
-	((offset_base) | ((u32)(num_extra_bits) << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT))
+#define ENTRY(offset_base, num_extra_bits)	HUFFDEC_RESULT_ENTRY(	\
+		((u32)(num_extra_bits) << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) | \
+		(offset_base))
 	ENTRY(1     , 0)  , ENTRY(2     , 0)  , ENTRY(3     , 0)  , ENTRY(4     , 0)  ,
 	ENTRY(5     , 1)  , ENTRY(7     , 1)  , ENTRY(9     , 2)  , ENTRY(13    , 2) ,
 	ENTRY(17    , 3)  , ENTRY(25    , 3)  , ENTRY(33    , 4)  , ENTRY(49    , 4)  ,
@ -485,13 +495,6 @@ static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
 #undef ENTRY
 };

-/* Construct a decode table entry from a decode result and codeword length.  */
-static forceinline u32
-make_decode_table_entry(u32 result, u32 length)
-{
-	return (result << HUFFDEC_RESULT_SHIFT) | length;
-}
-
 /*
 * Build a table for fast decoding of symbols from a Huffman code.  As input,
 * this function takes the codeword length of each symbol which may be used in
@ -596,7 +599,7 @@ build_decode_table(u32 decode_table[],
 		 * decompressing a well-formed stream, these default values will
 		 * never be used.  But since a malformed stream might contain
 		 * any bits at all, these entries need to be set anyway.  */
-		u32 entry = make_decode_table_entry(decode_results[0], 1);
+		u32 entry = decode_results[0] | 1;
 		for (sym = 0; sym < (1U << table_bits); sym++)
 			decode_table[sym] = entry;

@ -679,8 +682,8 @@ build_decode_table(u32 decode_table[],
 			 * number of entries it contains).  */
 			decode_table[cur_codeword_prefix] =
 				HUFFDEC_SUBTABLE_POINTER |
-				make_decode_table_entry(cur_table_start,
-							cur_table_bits);
+				HUFFDEC_RESULT_ENTRY(cur_table_start) |
+				cur_table_bits;

 			/* Now that we're filling a subtable, we need to drop
 			 * the first 'table_bits' bits of the codewords.  */
@ -690,8 +693,7 @@ build_decode_table(u32 decode_table[],
 		/* Create the decode table entry, which packs the decode result
 		 * and the codeword length (minus 'table_bits' for subtables)
 		 * together.  */
-		entry = make_decode_table_entry(decode_results[sym],
-						codeword_len - num_dropped_bits);
+		entry = decode_results[sym] | (codeword_len - num_dropped_bits);

 		/* Fill in as many copies of the decode table entry as are
 		 * needed.  The number of entries to fill is a power of 2 and