mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-15 23:35:42 -04:00
Simplify and speed up nilsimsa computation
This commit is contained in:
parent
b4becd5d48
commit
f9a1db48ee
@ -27,7 +27,7 @@ namespace dwarfs {
|
||||
namespace {
|
||||
|
||||
// Nilsimsa transition table
|
||||
constexpr std::array<uint8_t, 256> TT53{
|
||||
constexpr std::array<uint8_t, 256> const TT53{
|
||||
{0x02, 0xD6, 0x9E, 0x6F, 0xF9, 0x1D, 0x04, 0xAB, 0xD0, 0x22, 0x16, 0x1F,
|
||||
0xD8, 0x73, 0xA1, 0xAC, 0x3B, 0x70, 0x62, 0x96, 0x1E, 0x6E, 0x8F, 0x39,
|
||||
0x9D, 0x05, 0x14, 0x4A, 0xA6, 0xBE, 0xAE, 0x0E, 0xCF, 0xB9, 0x9C, 0x9A,
|
||||
@ -51,7 +51,7 @@ constexpr std::array<uint8_t, 256> TT53{
|
||||
0xF1, 0xCD, 0xE4, 0x6A, 0xE7, 0xA9, 0xFD, 0xC4, 0x37, 0xC8, 0xD2, 0xF6,
|
||||
0xDF, 0x58, 0x72, 0x4E}};
|
||||
|
||||
uint8_t tran3(uint8_t a, uint8_t b, uint8_t c, uint8_t n) {
|
||||
constexpr inline uint8_t tran3(uint8_t a, uint8_t b, uint8_t c, uint8_t n) {
|
||||
return ((TT53[(a + n) & 0xFF] ^ TT53[b] * (n + n + 1)) + TT53[c ^ TT53[n]]);
|
||||
}
|
||||
|
||||
@ -98,13 +98,13 @@ class nilsimsa::impl {
|
||||
|
||||
private:
|
||||
void update_slow(uint8_t const* data, size_t size) {
|
||||
uint_fast8_t w1 = w_[0];
|
||||
uint_fast8_t w2 = w_[1];
|
||||
uint_fast8_t w3 = w_[2];
|
||||
uint_fast8_t w4 = w_[3];
|
||||
uint8_t w1 = w_[0];
|
||||
uint8_t w2 = w_[1];
|
||||
uint8_t w3 = w_[2];
|
||||
uint8_t w4 = w_[3];
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
uint_fast8_t w0 = data[i];
|
||||
uint8_t w0 = data[i];
|
||||
|
||||
if (size_ + i > 1) {
|
||||
++acc_[tran3(w0, w1, w2, 0)];
|
||||
@ -137,48 +137,43 @@ class nilsimsa::impl {
|
||||
size_ += size;
|
||||
}
|
||||
|
||||
#define DWARFS_NILSIMSA_UPDATE_FAST_IMPL \
|
||||
void update_fast(uint8_t const* data, size_t size) { \
|
||||
uint8_t w1 = w_[0]; \
|
||||
uint8_t w2 = w_[1]; \
|
||||
uint8_t w3 = w_[2]; \
|
||||
uint8_t w4 = w_[3]; \
|
||||
\
|
||||
for (size_t i = 0; i < size; ++i) { \
|
||||
uint8_t w0 = data[i]; \
|
||||
\
|
||||
++acc_[tran3(w0, w1, w2, 0)]; \
|
||||
++acc_[tran3(w0, w1, w3, 1)]; \
|
||||
++acc_[tran3(w0, w2, w3, 2)]; \
|
||||
++acc_[tran3(w0, w1, w4, 3)]; \
|
||||
++acc_[tran3(w0, w2, w4, 4)]; \
|
||||
++acc_[tran3(w0, w3, w4, 5)]; \
|
||||
++acc_[tran3(w4, w1, w0, 6)]; \
|
||||
++acc_[tran3(w4, w3, w0, 7)]; \
|
||||
\
|
||||
w4 = w3; \
|
||||
w3 = w2; \
|
||||
w2 = w1; \
|
||||
w1 = w0; \
|
||||
} \
|
||||
\
|
||||
w_[0] = w1; \
|
||||
w_[1] = w2; \
|
||||
w_[2] = w3; \
|
||||
w_[3] = w4; \
|
||||
\
|
||||
size_ += size; \
|
||||
} \
|
||||
static_assert(true, "")
|
||||
|
||||
#ifdef DWARFS_MULTIVERSIONING
|
||||
__attribute__((target("avx"))) DWARFS_NILSIMSA_UPDATE_FAST_IMPL;
|
||||
__attribute__((target("default")))
|
||||
// __attribute__((target_clones("arch=tigerlake", "default")))
|
||||
#endif
|
||||
DWARFS_NILSIMSA_UPDATE_FAST_IMPL;
|
||||
void update_fast(uint8_t const* data, size_t size) {
|
||||
uint8_t w1 = w_[0];
|
||||
uint8_t w2 = w_[1];
|
||||
uint8_t w3 = w_[2];
|
||||
uint8_t w4 = w_[3];
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
uint8_t w0 = data[i];
|
||||
|
||||
++acc_[tran3(w0, w1, w2, 0)];
|
||||
++acc_[tran3(w0, w1, w3, 1)];
|
||||
++acc_[tran3(w0, w1, w4, 3)];
|
||||
++acc_[tran3(w0, w2, w3, 2)];
|
||||
++acc_[tran3(w0, w2, w4, 4)];
|
||||
++acc_[tran3(w0, w3, w4, 5)];
|
||||
++acc_[tran3(w4, w1, w0, 6)];
|
||||
++acc_[tran3(w4, w3, w0, 7)];
|
||||
|
||||
w4 = w3;
|
||||
w3 = w2;
|
||||
w2 = w1;
|
||||
w1 = w0;
|
||||
}
|
||||
|
||||
w_[0] = w1;
|
||||
w_[1] = w2;
|
||||
w_[2] = w3;
|
||||
w_[3] = w4;
|
||||
|
||||
size_ += size;
|
||||
}
|
||||
|
||||
std::array<size_t, 256> acc_;
|
||||
std::array<uint_fast8_t, 4> w_;
|
||||
std::array<uint8_t, 4> w_;
|
||||
size_t size_{0};
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user