Enable multiversioned nilsimsa similarity computation

This significantly improves inode ordering speed. Overall inode
ordering is about twice as fast as before.

    32.88%  mkdwarfs-origin  mkdwarfs-original    [.] dwarfs::nilsimsa::similarity
    18.51%  mkdwarfs-origin  mkdwarfs-original    [.] dwarfs::inode_manager_<dwarfs::prod_logger_policy>::order_inodes_by_nilsimsa
     4.07%  blockify1        mkdwarfs-original    [.] dwarfs::block_manager_<dwarfs::prod_logger_policy>::segment_and_add_data
     3.31%  mkdwarfs-origin  mkdwarfs-original    [.] dwarfs::(anonymous namespace)::inode_::nilsimsa_similarity_hash
     2.91%  blockify1        mkdwarfs-original    [.] dwarfs::active_block::append
     2.65%  writer           libcrypto.so.1.1     [.] sha512_block_data_order_avx2

    24.19%  mkdwarfs-nilsim  mkdwarfs-nilsimsa    [.] dwarfs::nilsimsa::similarity
    11.27%  mkdwarfs-nilsim  mkdwarfs-nilsimsa    [.] dwarfs::inode_manager_<dwarfs::prod_logger_policy>::order_inodes_by_nilsimsa
     5.42%  blockify1        mkdwarfs-nilsimsa    [.] dwarfs::block_manager_<dwarfs::prod_logger_policy>::segment_and_add_data
     4.18%  mkdwarfs-nilsim  mkdwarfs-nilsimsa    [.] dwarfs::(anonymous namespace)::inode_::nilsimsa_similarity_hash
     3.84%  blockify1        mkdwarfs-nilsimsa    [.] dwarfs::active_block::append
     3.47%  writer           libcrypto.so.1.1     [.] sha512_block_data_order_avx2
This commit is contained in:
Marcus Holland-Moritz 2021-03-09 17:11:32 +01:00
parent ef06b6749e
commit 22d41effa6
3 changed files with 45 additions and 12 deletions

View File

@ -28,3 +28,11 @@
#define DWARFS_LIKELY(x) (x)
#define DWARFS_UNLIKELY(x) (x)
#endif
#if defined(__SANITIZE_THREAD__)
#define DWARFS_SANITIZE_THREAD 1
#elif defined(__has_feature)
#if __has_feature(thread_sanitizer)
#define DWARFS_SANITIZE_THREAD 1
#endif
#endif

View File

@ -25,6 +25,8 @@
#include <memory>
#include <vector>
#include "dwarfs/compiler.h"
namespace dwarfs {
class nilsimsa {
@ -35,7 +37,14 @@ class nilsimsa {
void update(uint8_t const* data, size_t size);
std::vector<uint64_t> finalize() const;
static int similarity(uint64_t const* a, uint64_t const* b);
#ifndef DWARFS_SANITIZE_THREAD
__attribute__((target("popcnt"))) static int
similarity(uint64_t const* a, uint64_t const* b);
__attribute__((target("default")))
#endif
static int
similarity(uint64_t const* a, uint64_t const* b);
private:
class impl;

View File

@ -57,15 +57,6 @@ uint8_t tran3(uint8_t a, uint8_t b, uint8_t c, uint8_t n) {
return ((TT53[(a + n) & 0xFF] ^ TT53[b] * (n + n + 1)) + TT53[c ^ TT53[n]]);
}
// TODO: this will currently only work for gcc/clang, but should be easy to port
__attribute__((__unused__)) int popcount(unsigned long x) {
return __builtin_popcountl(x);
}
__attribute__((__unused__)) int popcount(unsigned long long x) {
return __builtin_popcountll(x);
}
} // namespace
class nilsimsa::impl {
@ -198,14 +189,39 @@ void nilsimsa::update(uint8_t const* data, size_t size) {
std::vector<uint64_t> nilsimsa::finalize() const { return impl_->finalize(); }
#ifndef DWARFS_SANITIZE_THREAD
__attribute__((target("popcnt"))) int
nilsimsa::similarity(uint64_t const* a, uint64_t const* b) {
int bits = 0;
for (int i = 0; i < 4; ++i) {
if constexpr (std::is_same_v<unsigned long, uint64_t>) {
bits += __builtin_popcountl(a[i] ^ b[i]);
} else if constexpr (std::is_same_v<unsigned long long, uint64_t>) {
bits += __builtin_popcountll(a[i] ^ b[i]);
}
}
return 255 - bits;
}
__attribute__((target("default")))
#endif
int nilsimsa::similarity(uint64_t const* a, uint64_t const* b) {
int bits = 0;
for (int i = 0; i < 4; ++i) {
bits += popcount(a[i] ^ b[i]);
if constexpr (std::is_same_v<unsigned long, uint64_t>) {
bits += __builtin_popcountl(a[i] ^ b[i]);
} else if constexpr (std::is_same_v<unsigned long long, uint64_t>) {
bits += __builtin_popcountll(a[i] ^ b[i]);
}
}
return 256 - bits;
return 255 - bits;
}
static_assert(std::is_same_v<unsigned long, uint64_t> ||
std::is_same_v<unsigned long long, uint64_t>);
} // namespace dwarfs