Compressor updates

2025-09-11 21:39:32 -04:00 · 2016-06-11 15:33:27 -05:00 · 2016-06-11 15:33:27 -05:00 · f649a4b8db
commit f649a4b8db
parent e3cfa7b5cd
2 changed files with 167 additions and 148 deletions
--- a/lib/bt_matchfinder.h
+++ b/lib/bt_matchfinder.h
@ -211,8 +211,7 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
 		matchptr = &in_base[cur_node];
 		if (matchptr[len] == in_next[len]) {
-			len = lz_extend(in_next, matchptr, len + 1,
+			len = lz_extend(in_next, matchptr, len + 1, max_len);
 					(record_matches ? max_len : nice_len));
 			if (!record_matches || len > best_len) {
 				if (record_matches) {
 					best_len = len;
@ -325,7 +324,6 @@ static forceinline void
 bt_matchfinder_skip_position(struct bt_matchfinder *mf,
 			     const u8 *in_base,
 			     ptrdiff_t cur_pos,
 			     u32 max_len,
 			     u32 nice_len,
 			     u32 max_search_depth,
 			     u32 next_hashes[2])
@ -334,7 +332,7 @@ bt_matchfinder_skip_position(struct bt_matchfinder *mf,
 	bt_matchfinder_advance_one_byte(mf,
 					in_base,
 					cur_pos,
-					max_len,
+					nice_len,
 					nice_len,
 					max_search_depth,
 					next_hashes,
--- a/lib/deflate_compress.c
+++ b/lib/deflate_compress.c
@ -51,12 +51,24 @@
 #endif
 /*
- * The minimum and maximum block lengths, in bytes of source data, which the
+ * The compressor always chooses a block of at least MIN_BLOCK_LENGTH bytes,
- * parsing algorithms may choose.  Caveat: due to implementation details, the
+ * except if the last block has to be shorter.
 * actual maximum will be slightly higher than the number defined below.
 */
 #define MIN_BLOCK_LENGTH	10000
-#define MAX_BLOCK_LENGTH	300000
+
 /*
 * The compressor attempts to end blocks after SOFT_MAX_BLOCK_LENGTH bytes, but
 * the final length might be slightly longer due to matches extending beyond
 * this limit.
 */
 #define SOFT_MAX_BLOCK_LENGTH	300000
 /*
 * The number of observed matches or literals that represents sufficient data to
 * decide whether the current block should be terminated or not.
 */
 #define NUM_OBSERVATIONS_PER_BLOCK_CHECK       512
 #if SUPPORT_NEAR_OPTIMAL_PARSING
 /* Constants specific to the near-optimal parsing algorithm */
@ -77,7 +89,7 @@
 * However, fallback behavior (immediately terminating the block) on cache
 * overflow is still required.
 */
-#  define CACHE_LENGTH      (MAX_BLOCK_LENGTH * 5)
+#  define CACHE_LENGTH      (SOFT_MAX_BLOCK_LENGTH * 5)
 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
@ -85,7 +97,7 @@
 * These are the compressor-side limits on the codeword lengths for each Huffman
 * code.  To make outputting bits slightly faster, some of these limits are
 * lower than the limits defined by the DEFLATE format.  This does not
- * significantly affect the compression ratio, at least for the block sizes we
+ * significantly affect the compression ratio, at least for the block lengths we
 * use.
 */
 #define MAX_LITLEN_CODEWORD_LEN		14
@ -365,7 +377,7 @@ struct deflate_compressor {
 			 * that can ever be chosen for a single block, plus one
 			 * for the special entry at the end.  */
 			struct deflate_sequence sequences[
-				DIV_ROUND_UP(MAX_BLOCK_LENGTH,
+				DIV_ROUND_UP(SOFT_MAX_BLOCK_LENGTH,
 					     DEFLATE_MIN_MATCH_LEN) + 1];
 		} g; /* (g)reedy */
@ -411,11 +423,12 @@ struct deflate_compressor {
 			 * This array must be large enough to accommodate the
 			 * worst-case number of nodes, which occurs if we find a
 			 * match of length DEFLATE_MAX_MATCH_LEN at position
-			 * MAX_BLOCK_LENGTH - 1, producing a block of length
+			 * SOFT_MAX_BLOCK_LENGTH - 1, producing a block of
-			 * MAX_BLOCK_LENGTH - 1 + DEFLATE_MAX_MATCH_LEN.  Add
+			 * length SOFT_MAX_BLOCK_LENGTH - 1 +
-			 * one for the end-of-block node.
+			 * DEFLATE_MAX_MATCH_LEN.  Add one for the end-of-block
 			 * node.
 			 */
-			struct deflate_optimum_node optimum_nodes[MAX_BLOCK_LENGTH - 1 +
+			struct deflate_optimum_node optimum_nodes[SOFT_MAX_BLOCK_LENGTH - 1 +
 								  DEFLATE_MAX_MATCH_LEN + 1];
 			/* The current cost model being used.  */
@ -1829,10 +1842,10 @@ deflate_finish_sequence(struct deflate_sequence *seq, unsigned litrunlen)
 * For determining whether the frequency distributions are "different enough" to
 * start a new block, the simply heuristic of splitting when the sum of absolute
 * differences exceeds a constant seems to be good enough.  We also add a number
- * proportional to the block size so that the algorithm is more likely to end
+ * proportional to the block length so that the algorithm is more likely to end
- * large blocks than small blocks.  This reflects the general expectation that
+ * long blocks than short blocks.  This reflects the general expectation that it
- * it will become increasingly beneficial to start a new block as the current
+ * will become increasingly beneficial to start a new block as the current
- * blocks grows larger.
+ * block grows longer.
 *
 * Finally, for an approximation, it is not strictly necessary that the exact
 * symbols being used are considered.  With "near-optimal parsing", for example,
@ -1874,7 +1887,7 @@ observe_match(struct block_split_stats *stats, unsigned length)
 }
 static bool
-do_end_block_check(struct block_split_stats *stats, u32 block_size)
+do_end_block_check(struct block_split_stats *stats, u32 block_length)
 {
 	int i;
@ -1893,8 +1906,8 @@ do_end_block_check(struct block_split_stats *stats, u32 block_size)
 		}
 		/* Ready to end the block? */
-		if (total_delta + (block_size >> 12) * stats->num_observations >=
+		if (total_delta + (block_length / 4096) * stats->num_observations >=
-		    200 * stats->num_observations)
+		    NUM_OBSERVATIONS_PER_BLOCK_CHECK * 200 / 512 * stats->num_observations)
 			return true;
 	}
@ -1912,9 +1925,9 @@ should_end_block(struct block_split_stats *stats,
 		 const u8 *in_block_begin, const u8 *in_next, const u8 *in_end)
 {
 	/* Ready to check block split statistics? */
-	if (stats->num_new_observations < 512 ||
+	if (stats->num_new_observations < NUM_OBSERVATIONS_PER_BLOCK_CHECK ||
 	    in_next - in_block_begin < MIN_BLOCK_LENGTH ||
-	    in_end - in_next < 16384)
+	    in_end - in_next < MIN_BLOCK_LENGTH)
 		return false;
 	return do_end_block_check(stats, in_next - in_block_begin);
@ -1945,7 +1958,8 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
 		/* Starting a new DEFLATE block.  */
 		const u8 * const in_block_begin = in_next;
-		const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH);
+		const u8 * const in_max_block_end =
 			in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
 		u32 litrunlen = 0;
 		struct deflate_sequence *next_seq = c->p.g.sequences;
@ -2029,7 +2043,8 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
 		/* Starting a new DEFLATE block.  */
 		const u8 * const in_block_begin = in_next;
-		const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH);
+		const u8 * const in_max_block_end =
 			in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
 		u32 litrunlen = 0;
 		struct deflate_sequence *next_seq = c->p.g.sequences;
@ -2155,13 +2170,13 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
 /*
 * Follow the minimum-cost path in the graph of possible match/literal choices
 * for the current block and compute the frequencies of the Huffman symbols that
- * are needed to output those matches and literals.
+ * would be needed to output those matches and literals.
 */
 static void
-deflate_tally_item_list(struct deflate_compressor *c,
+deflate_tally_item_list(struct deflate_compressor *c, u32 block_length)
 			struct deflate_optimum_node *end_node)
 {
 	struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0];
 	struct deflate_optimum_node *end_node = &c->p.n.optimum_nodes[block_length];
 	do {
 		unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
 		unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
@ -2180,7 +2195,8 @@ deflate_tally_item_list(struct deflate_compressor *c,
 /* Set the current cost model from the codeword lengths specified in @lens.  */
 static void
-deflate_set_costs(struct deflate_compressor *c, const struct deflate_lens *lens)
+deflate_set_costs_from_codes(struct deflate_compressor *c,
 			     const struct deflate_lens *lens)
 {
 	unsigned i;
@ -2232,10 +2248,10 @@ deflate_default_offset_slot_cost(unsigned offset_slot)
 }
 /*
- * Set default Huffman symbol costs for the first optimization pass.
+ * Set default symbol costs for the first block's first optimization pass.
 *
- * It works well to assume that each Huffman symbol is equally probable.  This
+ * It works well to assume that each symbol is equally probable.  This results
- * results in each symbol being assigned a cost of (-log2(1.0/num_syms) * (1 <<
+ * in each symbol being assigned a cost of (-log2(1.0/num_syms) * (1 <<
 * COST_SHIFT)) where 'num_syms' is the number of symbols in the corresponding
 * alphabet.  However, we intentionally bias the parse towards matches rather
 * than literals by using a slightly lower default cost for length symbols than
@ -2297,51 +2313,32 @@ deflate_adjust_costs(struct deflate_compressor *c)
 				    deflate_default_offset_slot_cost(i));
 }
-static void
+/*
-deflate_optimize_and_write_block(struct deflate_compressor *c,
+ * Find the minimum-cost path through the graph of possible match/literal
 				 struct deflate_output_bitstream *os,
 				 const u8 * const block_begin,
 				 const u32 block_length,
 				 const struct lz_match * const end_cache_ptr,
 				 const bool is_final_block)
 {
 	struct deflate_optimum_node * const end_node =
 		&c->p.n.optimum_nodes[block_length];
 	unsigned num_passes_remaining = c->p.n.num_optim_passes;
 	u32 i;
 	/* Force the block to really end at 'end_node', even if some matches
 	 * extend beyond it.  */
 	for (i = block_length; i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN,
 					ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
 		c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
 	do {
 		/*
 		 * Beginning a new optimization pass and finding a new
 		 * minimum-cost path through the graph of possible match/literal
 * choices for this block.
 *
-		 * We find the minimum cost path from 'c->optimum_nodes[0]',
+ * We find the minimum cost path from 'c->p.n.optimum_nodes[0]', which
-		 * which represents the node at the beginning of the block, to
+ * represents the node at the beginning of the block, to
-		 * 'end_node', which represents the node at the end of the
+ * 'c->p.n.optimum_nodes[block_length]', which represents the node at the end of
-		 * block.  Edge costs are evaluated using the cost model
+ * the block.  Edge costs are evaluated using the cost model 'c->p.n.costs'.
 		 * 'c->costs'.
 *
-		 * The algorithm works backward, starting at 'end_node' and
+ * The algorithm works backwards, starting at the end node and proceeding
-		 * proceeding backwards one position at a time.  At each
+ * backwards one node at a time.  At each node, the minimum cost to reach the
-		 * position, the minimum cost to reach 'end_node' is computed
+ * end node is computed and the match/literal choice that begins that path is
-		 * and the match/literal choice is saved.
+ * saved.
 */
 static void
 deflate_find_min_cost_path(struct deflate_compressor *c, const u32 block_length,
 			   const struct lz_match *cache_ptr)
 {
 	struct deflate_optimum_node *end_node = &c->p.n.optimum_nodes[block_length];
 	struct deflate_optimum_node *cur_node = end_node;
 		const struct lz_match *cache_ptr = end_cache_ptr;
 	cur_node->cost_to_end = 0;
 	do {
 		unsigned num_matches;
 		unsigned literal;
 		u32 best_cost_to_end;
 			u32 best_item;
 		cur_node--;
 		cache_ptr--;
@ -2352,7 +2349,7 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
 		/* It's always possible to choose a literal.  */
 		best_cost_to_end = c->p.n.costs.literal[literal] +
 				   (cur_node + 1)->cost_to_end;
-			best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
+		cur_node->item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
 		/* Also consider matches if there are any.  */
 		if (num_matches) {
@ -2365,13 +2362,12 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
 			/*
 			 * Consider each length from the minimum
-				 * (DEFLATE_MIN_MATCH_LEN) to the length of the
+			 * (DEFLATE_MIN_MATCH_LEN) to the length of the longest
-				 * longest match found at this position.  For
+			 * match found at this position.  For each length, we
-				 * each length, we consider only the smallest
+			 * consider only the smallest offset for which that
-				 * offset for which that length is available.
+			 * length is available.  Although this is not guaranteed
-				 * Although this is not guaranteed to be optimal
+			 * to be optimal due to the possibility of a larger
-				 * due to the possibility of a larger offset
+			 * offset costing less than a smaller offset to code,
 				 * costing less than a smaller offset to code,
 			 * this is a very useful heuristic.
 			 */
 			match = cache_ptr - num_matches;
@ -2386,31 +2382,61 @@ deflate_optimize_and_write_block(struct deflate_compressor *c,
 						      (cur_node + len)->cost_to_end;
 					if (cost_to_end < best_cost_to_end) {
 						best_cost_to_end = cost_to_end;
-							best_item = ((u32)offset << OPTIMUM_OFFSET_SHIFT) | len;
+						cur_node->item = ((u32)offset << OPTIMUM_OFFSET_SHIFT) | len;
 					}
 				} while (++len <= match->length);
 			} while (++match != cache_ptr);
 			cache_ptr -= num_matches;
 		}
 		cur_node->cost_to_end = best_cost_to_end;
 			cur_node->item = best_item;
 	} while (cur_node != &c->p.n.optimum_nodes[0]);
 }
-		/* Tally Huffman symbol frequencies.  */
+/*
-		deflate_tally_item_list(c, end_node);
+ * Choose the literal/match sequence to use for the current block.  The basic
 * algorithm finds a minimum-cost path through the block's graph of
 * literal/match choices, given a cost model.  However, the cost of each symbol
 * is unknown until the Huffman codes have been built, but at the same time the
 * Huffman codes depend on the frequencies of chosen symbols.  Consequently,
 * multiple passes must be used to try to approximate an optimal solution.  The
 * first pass uses default costs, mixed with the costs from the previous block
 * if any.  Later passes use the Huffman codeword lengths from the previous pass
 * as the costs.
 */
 static void
 deflate_optimize_block(struct deflate_compressor *c, u32 block_length,
 		       const struct lz_match *cache_ptr, bool is_first_block)
 {
 	unsigned num_passes_remaining = c->p.n.num_optim_passes;
 	u32 i;
-		/* If this wasn't the last pass, update the cost model.  */
+	/* Force the block to really end at the desired length, even if some
-		if (num_passes_remaining > 1) {
+	 * matches extend beyond it. */
-			deflate_make_huffman_codes(&c->freqs, &c->codes);
+	for (i = block_length; i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN,
-			deflate_set_costs(c, &c->codes.lens);
+					ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
 		c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
 	/* Set the initial costs. */
 	if (is_first_block)
 		deflate_set_default_costs(c);
 	else
 		deflate_adjust_costs(c);
 	for (;;) {
 		/* Find the minimum cost path for this pass. */
 		deflate_find_min_cost_path(c, block_length, cache_ptr);
 		/* Compute frequencies of the chosen symbols. */
 		deflate_reset_symbol_frequencies(c);
-		}
+		deflate_tally_item_list(c, block_length);
 	} while (--num_passes_remaining);
-	/* All optimization passes are done.  Output a block using the
+		if (--num_passes_remaining == 0)
-	 * minimum-cost path computed on the last optimization pass.  */
+			break;
-	deflate_flush_block(c, os, block_begin, block_length,
+
-			    is_final_block, true);
+		/* At least one optimization pass remains; update the costs. */
 		deflate_make_huffman_codes(&c->freqs, &c->codes);
 		deflate_set_costs_from_codes(c, &c->codes.lens);
 	}
 }
 /*
@ -2448,17 +2474,17 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
 		struct lz_match *cache_ptr = c->p.n.match_cache;
 		const u8 * const in_block_begin = in_next;
-		const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, MAX_BLOCK_LENGTH);
+		const u8 * const in_max_block_end =
 			in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH);
 		const u8 *next_observation = in_next;
 		init_block_split_stats(&c->split_stats);
 		deflate_reset_symbol_frequencies(c);
 		/*
 		 * Find matches until we decide to end the block.  We end the
 		 * block if any of the following is true:
 		 *
-		 * (1) Maximum block size has been reached
+		 * (1) Maximum block length has been reached
 		 * (2) Match catch may overflow.
 		 * (3) Block split heuristic says to split now.
 		 */
@ -2556,7 +2582,6 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
 						bt_matchfinder_skip_position(&c->p.n.bt_mf,
 									     in_cur_base,
 									     in_next - in_cur_base,
 									     max_len,
 									     nice_len,
 									     c->max_search_depth,
 									     next_hashes);
@ -2571,16 +2596,12 @@ deflate_compress_near_optimal(struct deflate_compressor * restrict c,
 			 cache_ptr < &c->p.n.match_cache[CACHE_LENGTH] &&
 			 !should_end_block(&c->split_stats, in_block_begin, in_next, in_end));
-		/* All the matches for this block have been cached.  Now compute
+		/* All the matches for this block have been cached.  Now choose
-		 * a near-optimal sequence of literals and matches, and output
+		 * the sequence of items to output and flush the block.  */
-		 * the block.  */
+		deflate_optimize_block(c, in_next - in_block_begin, cache_ptr,
-		if (in_block_begin == in)
+				       in_block_begin == in);
-			deflate_set_default_costs(c);
+		deflate_flush_block(c, &os, in_block_begin, in_next - in_block_begin,
-		else
+				    in_next == in_end, true);
 			deflate_adjust_costs(c);
 		deflate_optimize_and_write_block(c, &os, in_block_begin,
 						 in_next - in_block_begin,
 						 cache_ptr, in_next == in_end);
 	} while (in_next != in_end);
 	return deflate_flush_output(&os);