Slide window within hc_matchfinder functions

This commit is contained in:
Eric Biggers 2016-05-21 10:33:59 -05:00
parent 00bf9daff9
commit c3f68e9ba7
2 changed files with 119 additions and 159 deletions

View File

@ -1491,7 +1491,7 @@ deflate_write_end_of_block(struct deflate_output_bitstream *os,
static void static void
deflate_write_block(struct deflate_compressor * restrict c, deflate_write_block(struct deflate_compressor * restrict c,
struct deflate_output_bitstream * restrict os, struct deflate_output_bitstream * restrict os,
const u8 * restrict block_begin, u32 items_remaining, const u8 * restrict block_begin, s32 items_remaining,
bool is_final_block) bool is_final_block)
{ {
struct deflate_codes *codes; struct deflate_codes *codes;
@ -1588,41 +1588,37 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
const u8 *in_next = in; const u8 *in_next = in;
const u8 *in_end = in_next + in_nbytes; const u8 *in_end = in_next + in_nbytes;
struct deflate_output_bitstream os; struct deflate_output_bitstream os;
const u8 *block_begin = in_next; const u8 *in_cur_base = in_next;
struct deflate_sequence *next_seq = c->sequences; unsigned max_len = DEFLATE_MAX_MATCH_LEN;
u32 litrunlen = 0; unsigned nice_len = MIN(c->nice_match_length, max_len);
u32 items_remaining = MAX_ITEMS_PER_BLOCK;
u32 next_hashes[2] = {0, 0}; u32 next_hashes[2] = {0, 0};
deflate_init_output(&os, out, out_nbytes_avail); deflate_init_output(&os, out, out_nbytes_avail);
deflate_reset_symbol_frequencies(c); deflate_reset_symbol_frequencies(c);
hc_matchfinder_init(&c->hc_mf);
/* The outer loop repeats every WINDOW_SIZE bytes and handles the
* sliding window. */
do { do {
const u8 *in_cur_base; /* Starting a new DEFLATE block. */
const u8 *in_cur_end;
if (in == in_next) const u8 * const in_block_begin = in_next;
hc_matchfinder_init(&c->hc_mf); u32 litrunlen = 0;
else struct deflate_sequence *next_seq = c->sequences;
hc_matchfinder_slide_window(&c->hc_mf); s32 items_remaining = MAX_ITEMS_PER_BLOCK;
in_cur_base = in_next;
in_cur_end = in_next + MIN(in_end - in_next,
MATCHFINDER_WINDOW_SIZE);
do { do {
unsigned max_len; u32 length;
unsigned nice_len; u32 offset;
unsigned length;
unsigned offset;
max_len = MIN(in_cur_end - in_next, DEFLATE_MAX_MATCH_LEN); /* Decrease the maximum and nice match lengths if we're
nice_len = MIN(max_len, c->nice_match_length); * approaching the end of the input buffer. */
if (unlikely(max_len > in_end - in_next)) {
max_len = in_end - in_next;
nice_len = MIN(nice_len, max_len);
}
length = hc_matchfinder_longest_match(&c->hc_mf, length = hc_matchfinder_longest_match(&c->hc_mf,
in_cur_base, &in_cur_base,
in_next - in_cur_base, in_next,
DEFLATE_MIN_MATCH_LEN - 1, DEFLATE_MIN_MATCH_LEN - 1,
max_len, max_len,
nice_len, nice_len,
@ -1635,9 +1631,9 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
deflate_choose_match(c, length, offset, deflate_choose_match(c, length, offset,
&litrunlen, &next_seq); &litrunlen, &next_seq);
in_next = hc_matchfinder_skip_positions(&c->hc_mf, in_next = hc_matchfinder_skip_positions(&c->hc_mf,
in_cur_base, &in_cur_base,
in_next + 1 - in_cur_base, in_next + 1,
in_end - in_cur_base, in_end,
length - 1, length - 1,
next_hashes); next_hashes);
} else { } else {
@ -1646,28 +1642,12 @@ deflate_compress_greedy(struct deflate_compressor * restrict c,
} }
/* Check if it's time to output another block. */ /* Check if it's time to output another block. */
if (--items_remaining == 0) { } while (in_next != in_end && --items_remaining > 0);
deflate_finish_sequence(next_seq, litrunlen);
deflate_write_block(c, &os, block_begin,
items_remaining,
in_next == in_end);
block_begin = in_next;
next_seq = c->sequences;
litrunlen = 0;
items_remaining = MAX_ITEMS_PER_BLOCK;
}
} while (in_next != in_cur_end);
} while (in_next != in_end);
/* Output the last block. */
if (items_remaining != MAX_ITEMS_PER_BLOCK) {
deflate_finish_sequence(next_seq, litrunlen); deflate_finish_sequence(next_seq, litrunlen);
deflate_write_block(c, &os, block_begin, deflate_write_block(c, &os, in_block_begin,
items_remaining, true); items_remaining, in_next == in_end);
} } while (in_next != in_end);
return deflate_flush_output(&os); return deflate_flush_output(&os);
} }
@ -1685,48 +1665,38 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
const u8 *in_next = in; const u8 *in_next = in;
const u8 *in_end = in_next + in_nbytes; const u8 *in_end = in_next + in_nbytes;
struct deflate_output_bitstream os; struct deflate_output_bitstream os;
const u8 *block_begin = in_next; const u8 *in_cur_base = in_next;
struct deflate_sequence *next_seq = c->sequences; unsigned max_len = DEFLATE_MAX_MATCH_LEN;
u32 litrunlen = 0; unsigned nice_len = MIN(c->nice_match_length, max_len);
u32 items_remaining = MAX_ITEMS_PER_BLOCK;
u32 next_hashes[2] = {0, 0}; u32 next_hashes[2] = {0, 0};
deflate_init_output(&os, out, out_nbytes_avail); deflate_init_output(&os, out, out_nbytes_avail);
deflate_reset_symbol_frequencies(c); deflate_reset_symbol_frequencies(c);
hc_matchfinder_init(&c->hc_mf);
/* The outer loop repeats every WINDOW_SIZE bytes and handles the
* sliding window. */
do { do {
const u8 *in_cur_base; /* Starting a new DEFLATE block. */
const u8 *in_cur_end;
unsigned max_len;
unsigned nice_len;
if (in == in_next) const u8 * const in_block_begin = in_next;
hc_matchfinder_init(&c->hc_mf); u32 litrunlen = 0;
else struct deflate_sequence *next_seq = c->sequences;
hc_matchfinder_slide_window(&c->hc_mf); s32 items_remaining = MAX_ITEMS_PER_BLOCK;
in_cur_base = in_next;
in_cur_end = in_next + MIN(in_end - in_next,
MATCHFINDER_WINDOW_SIZE);
max_len = DEFLATE_MAX_MATCH_LEN;
nice_len = MIN(c->nice_match_length, max_len);
do { do {
unsigned cur_len; unsigned cur_len;
unsigned cur_offset; unsigned cur_offset;
unsigned next_len; unsigned next_len;
unsigned next_offset; unsigned next_offset;
if (unlikely(in_cur_end - in_next < DEFLATE_MAX_MATCH_LEN)) { if (unlikely(in_end - in_next < DEFLATE_MAX_MATCH_LEN)) {
max_len = in_cur_end - in_next; max_len = in_end - in_next;
nice_len = MIN(max_len, nice_len); nice_len = MIN(nice_len, max_len);
} }
/* Find the longest match at the current position. */ /* Find the longest match at the current position. */
cur_len = hc_matchfinder_longest_match(&c->hc_mf, cur_len = hc_matchfinder_longest_match(&c->hc_mf,
in_cur_base, &in_cur_base,
in_next - in_cur_base, in_next,
DEFLATE_MIN_MATCH_LEN - 1, DEFLATE_MIN_MATCH_LEN - 1,
max_len, max_len,
nice_len, nice_len,
@ -1738,7 +1708,7 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
if (cur_len < DEFLATE_MIN_MATCH_LEN) { if (cur_len < DEFLATE_MIN_MATCH_LEN) {
/* No match found. Choose a literal. */ /* No match found. Choose a literal. */
deflate_choose_literal(c, *(in_next - 1), &litrunlen); deflate_choose_literal(c, *(in_next - 1), &litrunlen);
goto check_block_and_continue; continue;
} }
have_cur_match: have_cur_match:
@ -1750,12 +1720,12 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
deflate_choose_match(c, cur_len, cur_offset, deflate_choose_match(c, cur_len, cur_offset,
&litrunlen, &next_seq); &litrunlen, &next_seq);
in_next = hc_matchfinder_skip_positions(&c->hc_mf, in_next = hc_matchfinder_skip_positions(&c->hc_mf,
in_cur_base, &in_cur_base,
in_next - in_cur_base, in_next,
in_end - in_cur_base, in_end,
cur_len - 1, cur_len - 1,
next_hashes); next_hashes);
goto check_block_and_continue; continue;
} }
/* /*
@ -1774,13 +1744,13 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
* have two call sites, with longest_match() inlined at * have two call sites, with longest_match() inlined at
* each. * each.
*/ */
if (unlikely(in_cur_end - in_next < DEFLATE_MAX_MATCH_LEN)) { if (unlikely(in_end - in_next < DEFLATE_MAX_MATCH_LEN)) {
max_len = in_cur_end - in_next; max_len = in_end - in_next;
nice_len = MIN(max_len, nice_len); nice_len = MIN(nice_len, max_len);
} }
next_len = hc_matchfinder_longest_match(&c->hc_mf, next_len = hc_matchfinder_longest_match(&c->hc_mf,
in_cur_base, &in_cur_base,
in_next - in_cur_base, in_next,
cur_len, cur_len,
max_len, max_len,
nice_len, nice_len,
@ -1794,57 +1764,32 @@ deflate_compress_lazy(struct deflate_compressor * restrict c,
* Output a literal. Then the next match * Output a literal. Then the next match
* becomes the current match. */ * becomes the current match. */
deflate_choose_literal(c, *(in_next - 2), &litrunlen); deflate_choose_literal(c, *(in_next - 2), &litrunlen);
if (--items_remaining == 0) { items_remaining--;
deflate_finish_sequence(next_seq, litrunlen);
deflate_write_block(c, &os, block_begin,
items_remaining,
in_next == in_end);
block_begin = in_next - 1;
next_seq = c->sequences;
litrunlen = 0;
items_remaining = MAX_ITEMS_PER_BLOCK;
}
cur_len = next_len; cur_len = next_len;
cur_offset = next_offset; cur_offset = next_offset;
goto have_cur_match; goto have_cur_match;
} else {
/* No longer match at the next position.
* Output the current match. */
deflate_choose_match(c, cur_len, cur_offset,
&litrunlen, &next_seq);
in_next = hc_matchfinder_skip_positions(&c->hc_mf,
in_cur_base,
in_next - in_cur_base,
in_end - in_cur_base,
cur_len - 2,
next_hashes);
goto check_block_and_continue;
} }
check_block_and_continue: /* No longer match at the next position.
* Output the current match. */
deflate_choose_match(c, cur_len, cur_offset,
&litrunlen, &next_seq);
in_next = hc_matchfinder_skip_positions(&c->hc_mf,
&in_cur_base,
in_next,
in_end,
cur_len - 2,
next_hashes);
/* Check if it's time to output another block. */ /* Check if it's time to output another block. */
if (--items_remaining == 0) { } while (in_next != in_end && --items_remaining > 0);
deflate_finish_sequence(next_seq, litrunlen);
deflate_write_block(c, &os, block_begin,
items_remaining,
in_next == in_end);
block_begin = in_next; deflate_finish_sequence(next_seq, litrunlen);
next_seq = c->sequences; deflate_write_block(c, &os, in_block_begin,
litrunlen = 0; items_remaining, in_next == in_end);
items_remaining = MAX_ITEMS_PER_BLOCK;
}
} while (in_next != in_cur_end);
} while (in_next != in_end); } while (in_next != in_end);
/* Output the last block. */
if (items_remaining != MAX_ITEMS_PER_BLOCK) {
deflate_finish_sequence(next_seq, litrunlen);
deflate_write_block(c, &os, block_begin, items_remaining, true);
}
return deflate_flush_output(&os); return deflate_flush_output(&os);
} }

View File

@ -138,9 +138,10 @@ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
* *
* @mf * @mf
* The matchfinder structure. * The matchfinder structure.
* @in_base * @in_base_p
* Pointer to the next byte in the input buffer to process _at the last * Location of a pointer which points to the place in the input data the
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_. * matchfinder currently stores positions relative to. This may be updated
* by this function.
* @cur_pos * @cur_pos
* The current position in the input buffer relative to @in_base (the * The current position in the input buffer relative to @in_base (the
* position of the sequence being matched against). * position of the sequence being matched against).
@ -165,16 +166,23 @@ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
*/ */
static forceinline u32 static forceinline u32
hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
const u8 * const restrict in_base, const u8 ** const restrict in_base_p,
const ptrdiff_t cur_pos, const u8 * const restrict in_next,
u32 best_len, u32 best_len,
const u32 max_len, const u32 max_len,
const u32 nice_len, const u32 nice_len,
const u32 max_search_depth, const u32 max_search_depth,
u32 next_hashes[restrict 2], u32 * const restrict next_hashes,
u32 * const restrict offset_ret) u32 * const restrict offset_ret)
{ {
const u8 *in_next = in_base + cur_pos; u32 cur_pos = in_next - *in_base_p;
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
hc_matchfinder_slide_window(mf);
*in_base_p += MATCHFINDER_WINDOW_SIZE;
cur_pos = 0;
}
const u8 * const in_base = *in_base_p;
u32 depth_remaining = max_search_depth; u32 depth_remaining = max_search_depth;
const u8 *best_matchptr = in_next; const u8 *best_matchptr = in_next;
const mf_pos_t cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; const mf_pos_t cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
@ -317,9 +325,10 @@ out:
* *
* @mf * @mf
* The matchfinder structure. * The matchfinder structure.
* @in_base * @in_base_p
* Pointer to the next byte in the input buffer to process _at the last * Location of a pointer which points to the place in the input data the
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_. * matchfinder currently stores positions relative to. This may be updated
* by this function.
* @cur_pos * @cur_pos
* The current position in the input buffer relative to @in_base. * The current position in the input buffer relative to @in_base.
* @end_pos * @end_pos
@ -335,38 +344,44 @@ out:
*/ */
static forceinline const u8 * static forceinline const u8 *
hc_matchfinder_skip_positions(struct hc_matchfinder * const restrict mf, hc_matchfinder_skip_positions(struct hc_matchfinder * const restrict mf,
const u8 * const restrict in_base, const u8 ** const restrict in_base_p,
const ptrdiff_t cur_pos, const u8 *in_next,
const ptrdiff_t end_pos, const u8 * const in_end,
const u32 count, const u32 count,
u32 next_hashes[restrict 2]) u32 * const restrict next_hashes)
{ {
const u8 *in_next = in_base + cur_pos; u32 cur_pos;
const u8 * const stop_ptr = in_next + count; u32 hash3, hash4;
u32 next_seq3, next_seq4;
u32 remaining = count;
if (likely(count + 5 <= end_pos - cur_pos)) { if (unlikely(count + 5 > in_end - in_next))
u32 hash3, hash4; return &in_next[count];
u32 next_seq3, next_seq4;
hash3 = next_hashes[0]; cur_pos = in_next - *in_base_p;
hash4 = next_hashes[1]; hash3 = next_hashes[0];
do { hash4 = next_hashes[1];
mf->hash3_tab[hash3] = in_next - in_base; do {
mf->next_tab[in_next - in_base] = mf->hash4_tab[hash4]; if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
mf->hash4_tab[hash4] = in_next - in_base; hc_matchfinder_slide_window(mf);
*in_base_p += MATCHFINDER_WINDOW_SIZE;
cur_pos = 0;
}
mf->hash3_tab[hash3] = cur_pos;
mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
mf->hash4_tab[hash4] = cur_pos;
next_seq4 = load_u32_unaligned(++in_next); next_seq4 = load_u32_unaligned(++in_next);
next_seq3 = loaded_u32_to_u24(next_seq4); next_seq3 = loaded_u32_to_u24(next_seq4);
hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER); hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER); hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
cur_pos++;
} while (--remaining);
} while (in_next != stop_ptr); prefetchw(&mf->hash3_tab[hash3]);
prefetchw(&mf->hash4_tab[hash4]);
next_hashes[0] = hash3;
next_hashes[1] = hash4;
prefetchw(&mf->hash3_tab[hash3]); return in_next;
prefetchw(&mf->hash4_tab[hash4]);
next_hashes[0] = hash3;
next_hashes[1] = hash4;
}
return stop_ptr;
} }