mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-04 02:06:22 -04:00
refactor: replace multiversioning
This commit is contained in:
parent
514ca07e64
commit
cac5b778e8
@ -62,7 +62,7 @@ if(NOT WIN32)
|
||||
if(LDD_EXE)
|
||||
execute_process(COMMAND ${LDD_EXE} --version ERROR_VARIABLE LDD_VERSION)
|
||||
if(LDD_VERSION MATCHES "musl libc")
|
||||
add_compile_definitions(DWARFS_MUSL=1 _LARGEFILE64_SOURCE)
|
||||
add_compile_definitions(_LARGEFILE64_SOURCE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -33,9 +33,10 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(DWARFS_SANITIZE_THREAD) && !defined(DWARFS_MUSL) && \
|
||||
defined(__x86_64__) && __has_attribute(target_clones)
|
||||
#define DWARFS_MULTIVERSIONING 1
|
||||
#if defined(__has_builtin)
|
||||
#if __has_builtin(__builtin_cpu_supports) && __has_attribute(target)
|
||||
#define DWARFS_USE_CPU_FEATURES 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
|
@ -140,9 +140,6 @@ class nilsimsa::impl {
|
||||
size_ += size;
|
||||
}
|
||||
|
||||
#ifdef DWARFS_MULTIVERSIONING
|
||||
// __attribute__((target_clones("arch=tigerlake", "default")))
|
||||
#endif
|
||||
void update_fast(uint8_t const* data, size_t size) {
|
||||
uint8_t w1 = w_[0];
|
||||
uint8_t w2 = w_[1];
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
@ -78,46 +79,111 @@ template <typename T, size_t N>
|
||||
int distance(std::array<T, N> const& a, std::array<T, N> const& b) {
|
||||
int d = 0;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
d += folly::popcount(a[i] ^ b[i]);
|
||||
d += std::popcount(a[i] ^ b[i]);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
#ifdef DWARFS_MULTIVERSIONING
|
||||
#ifdef __clang__
|
||||
__attribute__((target_clones("avx512vpopcntdq", "popcnt", "default")))
|
||||
#if defined(DWARFS_USE_CPU_FEATURES) && defined(__x86_64__)
|
||||
#define DWARFS_USE_POPCNT
|
||||
#endif
|
||||
|
||||
enum class cpu_feature {
|
||||
none,
|
||||
popcnt,
|
||||
};
|
||||
|
||||
cpu_feature detect_cpu_feature() {
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
static cpu_feature const feature = [] {
|
||||
if (__builtin_cpu_supports("popcnt")) {
|
||||
return cpu_feature::popcnt;
|
||||
}
|
||||
return cpu_feature::none;
|
||||
}();
|
||||
return feature;
|
||||
#else
|
||||
__attribute__((target_clones("popcnt", "default")))
|
||||
return cpu_feature::none;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename Fn, typename... Args>
|
||||
decltype(auto) cpu_dispatch(Args&&... args) {
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
auto feature = detect_cpu_feature();
|
||||
switch (feature) {
|
||||
case cpu_feature::popcnt:
|
||||
return Fn::template call<cpu_feature::popcnt>(std::forward<Args>(args)...);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
int distance(std::array<uint64_t, 4> const& a, std::array<uint64_t, 4> const& b) {
|
||||
return Fn::template call<cpu_feature::none>(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
int distance_default(std::array<uint64_t, 4> const& a,
|
||||
std::array<uint64_t, 4> const& b) {
|
||||
return distance<uint64_t, 4>(a, b);
|
||||
}
|
||||
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
__attribute__((__target__("popcnt"))) int
|
||||
distance_popcnt(std::array<uint64_t, 4> const& a,
|
||||
std::array<uint64_t, 4> const& b) {
|
||||
return distance<uint64_t, 4>(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct distance_cpu {
|
||||
template <cpu_feature CpuFeature>
|
||||
static int
|
||||
call(std::array<uint64_t, 4> const& a, std::array<uint64_t, 4> const& b) {
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
if constexpr (CpuFeature == cpu_feature::popcnt) {
|
||||
return distance_popcnt(a, b);
|
||||
}
|
||||
#endif
|
||||
return distance_default(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
struct order_by_shortest_path_cpu {
|
||||
template <cpu_feature CpuFeature, typename GetI, typename GetK, typename Swap>
|
||||
static void
|
||||
call(size_t count, GetI const& geti, GetK const& getk, Swap const& swapper) {
|
||||
for (size_t i = 0; i < count - 1; ++i) {
|
||||
auto bi = geti(i);
|
||||
int best_distance = std::numeric_limits<int>::max();
|
||||
size_t best_index = 0;
|
||||
|
||||
for (size_t k = i + 1; k < count; ++k) {
|
||||
auto bk = getk(k);
|
||||
auto d = distance_cpu::template call<CpuFeature>(*bi, *bk);
|
||||
if (d < best_distance) {
|
||||
best_distance = d;
|
||||
best_index = k;
|
||||
if (best_distance <= 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best_index > 0 && i + 1 != best_index) {
|
||||
swapper(i + 1, best_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
int distance(std::array<uint64_t, 4> const& a,
|
||||
std::array<uint64_t, 4> const& b) {
|
||||
return cpu_dispatch<distance_cpu>(a, b);
|
||||
}
|
||||
|
||||
template <typename GetI, typename GetK, typename Swap>
|
||||
void order_by_shortest_path(size_t count, GetI const& geti, GetK const& getk,
|
||||
Swap const& swapper) {
|
||||
for (size_t i = 0; i < count - 1; ++i) {
|
||||
auto bi = geti(i);
|
||||
int best_distance = std::numeric_limits<int>::max();
|
||||
size_t best_index = 0;
|
||||
|
||||
for (size_t k = i + 1; k < count; ++k) {
|
||||
auto bk = getk(k);
|
||||
auto d = distance(*bi, *bk);
|
||||
if (d < best_distance) {
|
||||
best_distance = d;
|
||||
best_index = k;
|
||||
if (best_distance <= 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best_index > 0 && i + 1 != best_index) {
|
||||
swapper(i + 1, best_index);
|
||||
}
|
||||
}
|
||||
cpu_dispatch<order_by_shortest_path_cpu>(count, geti, getk, swapper);
|
||||
}
|
||||
|
||||
template <size_t Bits, typename BitsType = uint64_t,
|
||||
|
@ -45,17 +45,73 @@ int distance(std::array<T, N> const& a, std::array<T, N> const& b) {
|
||||
return d;
|
||||
}
|
||||
|
||||
#ifdef DWARFS_MULTIVERSIONING
|
||||
#ifdef __clang__
|
||||
__attribute__((target_clones("avx512vpopcntdq", "popcnt", "default")))
|
||||
#if defined(DWARFS_USE_CPU_FEATURES) && defined(__x86_64__)
|
||||
#define DWARFS_USE_POPCNT
|
||||
#endif
|
||||
|
||||
enum class cpu_feature {
|
||||
none,
|
||||
popcnt,
|
||||
};
|
||||
|
||||
cpu_feature detect_cpu_feature() {
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
static cpu_feature const feature = [] {
|
||||
if (__builtin_cpu_supports("popcnt")) {
|
||||
return cpu_feature::popcnt;
|
||||
}
|
||||
return cpu_feature::none;
|
||||
}();
|
||||
return feature;
|
||||
#else
|
||||
__attribute__((target_clones("popcnt", "default")))
|
||||
return cpu_feature::none;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename Fn, typename... Args>
|
||||
decltype(auto) cpu_dispatch(Args&&... args) {
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
auto feature = detect_cpu_feature();
|
||||
switch (feature) {
|
||||
case cpu_feature::popcnt:
|
||||
return Fn::template call<cpu_feature::popcnt>(std::forward<Args>(args)...);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
int distance(std::array<uint64_t, 4> const& a, std::array<uint64_t, 4> const& b) {
|
||||
return Fn::template call<cpu_feature::none>(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
int distance_default(std::array<uint64_t, 4> const& a,
|
||||
std::array<uint64_t, 4> const& b) {
|
||||
return distance<uint64_t, 4>(a, b);
|
||||
}
|
||||
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
__attribute__((__target__("popcnt"))) int
|
||||
distance_popcnt(std::array<uint64_t, 4> const& a,
|
||||
std::array<uint64_t, 4> const& b) {
|
||||
return distance<uint64_t, 4>(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct distance_cpu {
|
||||
template <cpu_feature CpuFeature>
|
||||
static int
|
||||
call(std::array<uint64_t, 4> const& a, std::array<uint64_t, 4> const& b) {
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
if constexpr (CpuFeature == cpu_feature::popcnt) {
|
||||
return distance_popcnt(a, b);
|
||||
}
|
||||
#endif
|
||||
return distance_default(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
int distance(std::array<uint64_t, 4> const& a,
|
||||
std::array<uint64_t, 4> const& b) {
|
||||
return cpu_dispatch<distance_cpu>(a, b);
|
||||
}
|
||||
void nilsimsa_distance(::benchmark::State& state) {
|
||||
std::independent_bits_engine<std::mt19937_64,
|
||||
std::numeric_limits<uint64_t>::digits, uint64_t>
|
||||
@ -74,6 +130,38 @@ void nilsimsa_distance(::benchmark::State& state) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
void nilsimsa_distance_cpu(::benchmark::State& state) {
|
||||
std::independent_bits_engine<std::mt19937_64,
|
||||
std::numeric_limits<uint64_t>::digits, uint64_t>
|
||||
rng;
|
||||
static constexpr unsigned const kNumData{1024};
|
||||
std::vector<std::array<uint64_t, 4>> data(kNumData);
|
||||
for (auto& a : data) {
|
||||
std::generate(begin(a), end(a), std::ref(rng));
|
||||
}
|
||||
unsigned i{0}, k{1};
|
||||
int d;
|
||||
|
||||
switch (detect_cpu_feature()) {
|
||||
case cpu_feature::popcnt:
|
||||
for (auto _ : state) {
|
||||
::benchmark::DoNotOptimize(
|
||||
d = distance_cpu::template call<cpu_feature::popcnt>(
|
||||
data[i++ % kNumData], data[k++ % kNumData]));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (auto _ : state) {
|
||||
::benchmark::DoNotOptimize(
|
||||
d = distance_cpu::template call<cpu_feature::none>(
|
||||
data[i++ % kNumData], data[k++ % kNumData]));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void nilsimsa_update(::benchmark::State& state) {
|
||||
std::independent_bits_engine<std::mt19937_64,
|
||||
std::numeric_limits<uint8_t>::digits, uint16_t>
|
||||
@ -92,6 +180,9 @@ void nilsimsa_update(::benchmark::State& state) {
|
||||
} // namespace
|
||||
|
||||
BENCHMARK(nilsimsa_distance);
|
||||
#ifdef DWARFS_USE_POPCNT
|
||||
BENCHMARK(nilsimsa_distance_cpu);
|
||||
#endif
|
||||
BENCHMARK(nilsimsa_update);
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
Loading…
x
Reference in New Issue
Block a user