From f9a1db48ee392ebd917e8c3c8c6b268bc9cf0175 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 4 Sep 2023 22:51:02 +0200 Subject: [PATCH] Simplify and speed up nilsimsa computation --- src/dwarfs/nilsimsa.cpp | 85 +++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/src/dwarfs/nilsimsa.cpp b/src/dwarfs/nilsimsa.cpp index b169d7ee..ae766489 100644 --- a/src/dwarfs/nilsimsa.cpp +++ b/src/dwarfs/nilsimsa.cpp @@ -27,7 +27,7 @@ namespace dwarfs { namespace { // Nilsimsa transition table -constexpr std::array TT53{ +constexpr std::array const TT53{ {0x02, 0xD6, 0x9E, 0x6F, 0xF9, 0x1D, 0x04, 0xAB, 0xD0, 0x22, 0x16, 0x1F, 0xD8, 0x73, 0xA1, 0xAC, 0x3B, 0x70, 0x62, 0x96, 0x1E, 0x6E, 0x8F, 0x39, 0x9D, 0x05, 0x14, 0x4A, 0xA6, 0xBE, 0xAE, 0x0E, 0xCF, 0xB9, 0x9C, 0x9A, @@ -51,7 +51,7 @@ constexpr std::array TT53{ 0xF1, 0xCD, 0xE4, 0x6A, 0xE7, 0xA9, 0xFD, 0xC4, 0x37, 0xC8, 0xD2, 0xF6, 0xDF, 0x58, 0x72, 0x4E}}; -uint8_t tran3(uint8_t a, uint8_t b, uint8_t c, uint8_t n) { +constexpr inline uint8_t tran3(uint8_t a, uint8_t b, uint8_t c, uint8_t n) { return ((TT53[(a + n) & 0xFF] ^ TT53[b] * (n + n + 1)) + TT53[c ^ TT53[n]]); } @@ -98,13 +98,13 @@ class nilsimsa::impl { private: void update_slow(uint8_t const* data, size_t size) { - uint_fast8_t w1 = w_[0]; - uint_fast8_t w2 = w_[1]; - uint_fast8_t w3 = w_[2]; - uint_fast8_t w4 = w_[3]; + uint8_t w1 = w_[0]; + uint8_t w2 = w_[1]; + uint8_t w3 = w_[2]; + uint8_t w4 = w_[3]; for (size_t i = 0; i < size; ++i) { - uint_fast8_t w0 = data[i]; + uint8_t w0 = data[i]; if (size_ + i > 1) { ++acc_[tran3(w0, w1, w2, 0)]; @@ -137,48 +137,43 @@ class nilsimsa::impl { size_ += size; } -#define DWARFS_NILSIMSA_UPDATE_FAST_IMPL \ - void update_fast(uint8_t const* data, size_t size) { \ - uint8_t w1 = w_[0]; \ - uint8_t w2 = w_[1]; \ - uint8_t w3 = w_[2]; \ - uint8_t w4 = w_[3]; \ - \ - for (size_t i = 0; i < size; ++i) { \ - uint8_t w0 = data[i]; \ - \ - ++acc_[tran3(w0, w1, w2, 0)]; \ - ++acc_[tran3(w0, w1, w3, 1)]; \ - ++acc_[tran3(w0, w2, w3, 2)]; \ - ++acc_[tran3(w0, w1, w4, 3)]; \ - ++acc_[tran3(w0, w2, w4, 4)]; \ - ++acc_[tran3(w0, w3, w4, 5)]; \ - ++acc_[tran3(w4, w1, w0, 6)]; \ - ++acc_[tran3(w4, w3, w0, 7)]; \ - \ - w4 = w3; \ - w3 = w2; \ - w2 = w1; \ - w1 = w0; \ - } \ - \ - w_[0] = w1; \ - w_[1] = w2; \ - w_[2] = w3; \ - w_[3] = w4; \ - \ - size_ += size; \ - } \ - static_assert(true, "") - #ifdef DWARFS_MULTIVERSIONING - __attribute__((target("avx"))) DWARFS_NILSIMSA_UPDATE_FAST_IMPL; - __attribute__((target("default"))) + // __attribute__((target_clones("arch=tigerlake", "default"))) #endif - DWARFS_NILSIMSA_UPDATE_FAST_IMPL; + void update_fast(uint8_t const* data, size_t size) { + uint8_t w1 = w_[0]; + uint8_t w2 = w_[1]; + uint8_t w3 = w_[2]; + uint8_t w4 = w_[3]; + + for (size_t i = 0; i < size; ++i) { + uint8_t w0 = data[i]; + + ++acc_[tran3(w0, w1, w2, 0)]; + ++acc_[tran3(w0, w1, w3, 1)]; + ++acc_[tran3(w0, w1, w4, 3)]; + ++acc_[tran3(w0, w2, w3, 2)]; + ++acc_[tran3(w0, w2, w4, 4)]; + ++acc_[tran3(w0, w3, w4, 5)]; + ++acc_[tran3(w4, w1, w0, 6)]; + ++acc_[tran3(w4, w3, w0, 7)]; + + w4 = w3; + w3 = w2; + w2 = w1; + w1 = w0; + } + + w_[0] = w1; + w_[1] = w2; + w_[2] = w3; + w_[3] = w4; + + size_ += size; + } std::array acc_; - std::array w_; + std::array w_; size_t size_{0}; };