mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-04 02:06:22 -04:00
Update to latest version of libfsst
This commit is contained in:
parent
21a6d688b4
commit
bd09004889
@ -105,50 +105,58 @@ SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[]
|
|||||||
if (rnd128(i) > sampleFrac) continue;
|
if (rnd128(i) > sampleFrac) continue;
|
||||||
}
|
}
|
||||||
if (cur < end) {
|
if (cur < end) {
|
||||||
u16 pos2 = 255, pos1 = st->findLongestSymbol(cur, end);
|
u8* start = cur;
|
||||||
cur += st->symbols[pos1].length();
|
u16 code2 = 255, code1 = st->findLongestSymbol(cur, end);
|
||||||
gain += (int) (st->symbols[pos1].length()-(1+isEscapeCode(pos1)));
|
cur += st->symbols[code1].length();
|
||||||
|
gain += (int) (st->symbols[code1].length()-(1+isEscapeCode(code1)));
|
||||||
while (true) {
|
while (true) {
|
||||||
u8* old = cur;
|
// count single symbol (i.e. an option is not extending it)
|
||||||
counters.count1Inc(pos1);
|
counters.count1Inc(code1);
|
||||||
|
|
||||||
|
// as an alternative, consider just using the next byte..
|
||||||
|
if (st->symbols[code1].length() != 1) // .. but do not count single byte symbols doubly
|
||||||
|
counters.count1Inc(*start);
|
||||||
|
|
||||||
if (cur==end) {
|
if (cur==end) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// count single symbol (i.e. an option is not extending it)
|
|
||||||
if (st->symbols[pos1].length() != 1)
|
// now match a new symbol
|
||||||
counters.count1Inc(*cur);
|
start = cur;
|
||||||
if (cur<end-7) {
|
if (cur<end-7) {
|
||||||
u64 word = fsst_unaligned_load(cur);
|
u64 word = fsst_unaligned_load(cur);
|
||||||
size_t pos = word & 0xFFFFFF;
|
size_t code = word & 0xFFFFFF;
|
||||||
size_t idx = FSST_HASH(pos)&(st->hashTabSize-1);
|
size_t idx = FSST_HASH(code)&(st->hashTabSize-1);
|
||||||
Symbol s = st->hashTab[idx];
|
Symbol s = st->hashTab[idx];
|
||||||
pos2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
|
code2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
|
||||||
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
||||||
if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
|
if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
|
||||||
pos2 = s.code();
|
code2 = s.code();
|
||||||
cur += s.length();
|
cur += s.length();
|
||||||
} else if (pos2 >= FSST_CODE_BASE) {
|
} else if (code2 >= FSST_CODE_BASE) {
|
||||||
cur += 2;
|
cur += 2;
|
||||||
} else {
|
} else {
|
||||||
pos2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK;
|
code2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK;
|
||||||
cur += 1;
|
cur += 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(cur<end);
|
code2 = st->findLongestSymbol(cur, end);
|
||||||
pos2 = st->findLongestSymbol(cur, end);
|
cur += st->symbols[code2].length();
|
||||||
cur += st->symbols[pos2].length();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute compressed output size
|
// compute compressed output size
|
||||||
gain += ((int) (cur-old))-(1+isEscapeCode(pos2));
|
gain += ((int) (cur-start))-(1+isEscapeCode(code2));
|
||||||
|
|
||||||
// now count the subsequent two symbols we encode as an extension possibility
|
// now count the subsequent two symbols we encode as an extension codesibility
|
||||||
if (sampleFrac < 128) { // no need to count pairs in final round
|
if (sampleFrac < 128) { // no need to count pairs in final round
|
||||||
counters.count2Inc(pos1, pos2);
|
// consider the symbol that is the concatenation of the two last symbols
|
||||||
if ((cur-old) > 1) // do not count escaped bytes doubly
|
counters.count2Inc(code1, code2);
|
||||||
counters.count2Inc(pos1, *old);
|
|
||||||
|
// as an alternative, consider just extending with the next byte..
|
||||||
|
if ((cur-start) > 1) // ..but do not count single byte extensions doubly
|
||||||
|
counters.count2Inc(code1, *start);
|
||||||
}
|
}
|
||||||
pos1 = pos2;
|
code1 = code2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user