mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-08-04 02:06:31 -04:00

Further improve the way the near-optimal parser estimates symbol costs: - When setting a block's initial costs, weigh the default costs and previous block's costs differently, depending on how different the current block seems to be from the previous block. - When determining the "default" costs, take into account how many literals appear in the block and how frequent matches seem to be. - Increase BIT_COST from 8 to 16, to increase precision in calculations.
45 lines
1.2 KiB
Python
Executable File
45 lines
1.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# This script computes the default litlen symbol costs for the near-optimal
|
|
# parser.
|
|
|
|
from math import log2
|
|
|
|
BIT_COST = 16 # Must match BIT_COST in deflate_compress.c
|
|
NUM_LEN_SLOTS = 29
|
|
|
|
print("""static const struct {
|
|
u8 used_lits_to_lit_cost[257];
|
|
u8 len_sym_cost;
|
|
} default_litlen_costs[] = {""")
|
|
MATCH_PROBS = [0.25, 0.50, 0.75]
|
|
for i, match_prob in enumerate(MATCH_PROBS):
|
|
len_prob = match_prob / NUM_LEN_SLOTS
|
|
len_sym_cost = int(-log2(len_prob) * BIT_COST)
|
|
if i == 0:
|
|
print('\t{', end='')
|
|
print(f' /* match_prob = {match_prob} */')
|
|
print('\t\t.used_lits_to_lit_cost = {')
|
|
|
|
j = 0
|
|
for num_used_literals in range(0, 257):
|
|
if num_used_literals == 0:
|
|
num_used_literals = 1
|
|
lit_prob = (1 - match_prob) / num_used_literals
|
|
lit_cost = int(-log2(lit_prob) * BIT_COST)
|
|
if j == 0:
|
|
print('\t\t\t', end='')
|
|
if j == 7 or num_used_literals == 256:
|
|
print(f'{lit_cost},')
|
|
j = 0
|
|
else:
|
|
print(f'{lit_cost}, ', end='')
|
|
j += 1
|
|
print('\t\t},')
|
|
print(f'\t\t.len_sym_cost = {len_sym_cost},')
|
|
if i < len(MATCH_PROBS) - 1:
|
|
print('\t}, {', end='')
|
|
else:
|
|
print('\t},')
|
|
print('};')
|