mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-04 02:06:22 -04:00
refactor: replace multiversioning
This commit is contained in:
parent
514ca07e64
commit
cac5b778e8
@ -62,7 +62,7 @@ if(NOT WIN32)
|
|||||||
if(LDD_EXE)
|
if(LDD_EXE)
|
||||||
execute_process(COMMAND ${LDD_EXE} --version ERROR_VARIABLE LDD_VERSION)
|
execute_process(COMMAND ${LDD_EXE} --version ERROR_VARIABLE LDD_VERSION)
|
||||||
if(LDD_VERSION MATCHES "musl libc")
|
if(LDD_VERSION MATCHES "musl libc")
|
||||||
add_compile_definitions(DWARFS_MUSL=1 _LARGEFILE64_SOURCE)
|
add_compile_definitions(_LARGEFILE64_SOURCE)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -33,9 +33,10 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(DWARFS_SANITIZE_THREAD) && !defined(DWARFS_MUSL) && \
|
#if defined(__has_builtin)
|
||||||
defined(__x86_64__) && __has_attribute(target_clones)
|
#if __has_builtin(__builtin_cpu_supports) && __has_attribute(target)
|
||||||
#define DWARFS_MULTIVERSIONING 1
|
#define DWARFS_USE_CPU_FEATURES 1
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
@ -140,9 +140,6 @@ class nilsimsa::impl {
|
|||||||
size_ += size;
|
size_ += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DWARFS_MULTIVERSIONING
|
|
||||||
// __attribute__((target_clones("arch=tigerlake", "default")))
|
|
||||||
#endif
|
|
||||||
void update_fast(uint8_t const* data, size_t size) {
|
void update_fast(uint8_t const* data, size_t size) {
|
||||||
uint8_t w1 = w_[0];
|
uint8_t w1 = w_[0];
|
||||||
uint8_t w2 = w_[1];
|
uint8_t w2 = w_[1];
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <bit>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
@ -78,46 +79,111 @@ template <typename T, size_t N>
|
|||||||
int distance(std::array<T, N> const& a, std::array<T, N> const& b) {
|
int distance(std::array<T, N> const& a, std::array<T, N> const& b) {
|
||||||
int d = 0;
|
int d = 0;
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
d += folly::popcount(a[i] ^ b[i]);
|
d += std::popcount(a[i] ^ b[i]);
|
||||||
}
|
}
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DWARFS_MULTIVERSIONING
|
#if defined(DWARFS_USE_CPU_FEATURES) && defined(__x86_64__)
|
||||||
#ifdef __clang__
|
#define DWARFS_USE_POPCNT
|
||||||
__attribute__((target_clones("avx512vpopcntdq", "popcnt", "default")))
|
#endif
|
||||||
|
|
||||||
|
enum class cpu_feature {
|
||||||
|
none,
|
||||||
|
popcnt,
|
||||||
|
};
|
||||||
|
|
||||||
|
cpu_feature detect_cpu_feature() {
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
static cpu_feature const feature = [] {
|
||||||
|
if (__builtin_cpu_supports("popcnt")) {
|
||||||
|
return cpu_feature::popcnt;
|
||||||
|
}
|
||||||
|
return cpu_feature::none;
|
||||||
|
}();
|
||||||
|
return feature;
|
||||||
#else
|
#else
|
||||||
__attribute__((target_clones("popcnt", "default")))
|
return cpu_feature::none;
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Fn, typename... Args>
|
||||||
|
decltype(auto) cpu_dispatch(Args&&... args) {
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
auto feature = detect_cpu_feature();
|
||||||
|
switch (feature) {
|
||||||
|
case cpu_feature::popcnt:
|
||||||
|
return Fn::template call<cpu_feature::popcnt>(std::forward<Args>(args)...);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
int distance(std::array<uint64_t, 4> const& a, std::array<uint64_t, 4> const& b) {
|
return Fn::template call<cpu_feature::none>(std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
int distance_default(std::array<uint64_t, 4> const& a,
|
||||||
|
std::array<uint64_t, 4> const& b) {
|
||||||
return distance<uint64_t, 4>(a, b);
|
return distance<uint64_t, 4>(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
__attribute__((__target__("popcnt"))) int
|
||||||
|
distance_popcnt(std::array<uint64_t, 4> const& a,
|
||||||
|
std::array<uint64_t, 4> const& b) {
|
||||||
|
return distance<uint64_t, 4>(a, b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct distance_cpu {
|
||||||
|
template <cpu_feature CpuFeature>
|
||||||
|
static int
|
||||||
|
call(std::array<uint64_t, 4> const& a, std::array<uint64_t, 4> const& b) {
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
if constexpr (CpuFeature == cpu_feature::popcnt) {
|
||||||
|
return distance_popcnt(a, b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return distance_default(a, b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct order_by_shortest_path_cpu {
|
||||||
|
template <cpu_feature CpuFeature, typename GetI, typename GetK, typename Swap>
|
||||||
|
static void
|
||||||
|
call(size_t count, GetI const& geti, GetK const& getk, Swap const& swapper) {
|
||||||
|
for (size_t i = 0; i < count - 1; ++i) {
|
||||||
|
auto bi = geti(i);
|
||||||
|
int best_distance = std::numeric_limits<int>::max();
|
||||||
|
size_t best_index = 0;
|
||||||
|
|
||||||
|
for (size_t k = i + 1; k < count; ++k) {
|
||||||
|
auto bk = getk(k);
|
||||||
|
auto d = distance_cpu::template call<CpuFeature>(*bi, *bk);
|
||||||
|
if (d < best_distance) {
|
||||||
|
best_distance = d;
|
||||||
|
best_index = k;
|
||||||
|
if (best_distance <= 1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (best_index > 0 && i + 1 != best_index) {
|
||||||
|
swapper(i + 1, best_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int distance(std::array<uint64_t, 4> const& a,
|
||||||
|
std::array<uint64_t, 4> const& b) {
|
||||||
|
return cpu_dispatch<distance_cpu>(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GetI, typename GetK, typename Swap>
|
template <typename GetI, typename GetK, typename Swap>
|
||||||
void order_by_shortest_path(size_t count, GetI const& geti, GetK const& getk,
|
void order_by_shortest_path(size_t count, GetI const& geti, GetK const& getk,
|
||||||
Swap const& swapper) {
|
Swap const& swapper) {
|
||||||
for (size_t i = 0; i < count - 1; ++i) {
|
cpu_dispatch<order_by_shortest_path_cpu>(count, geti, getk, swapper);
|
||||||
auto bi = geti(i);
|
|
||||||
int best_distance = std::numeric_limits<int>::max();
|
|
||||||
size_t best_index = 0;
|
|
||||||
|
|
||||||
for (size_t k = i + 1; k < count; ++k) {
|
|
||||||
auto bk = getk(k);
|
|
||||||
auto d = distance(*bi, *bk);
|
|
||||||
if (d < best_distance) {
|
|
||||||
best_distance = d;
|
|
||||||
best_index = k;
|
|
||||||
if (best_distance <= 1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (best_index > 0 && i + 1 != best_index) {
|
|
||||||
swapper(i + 1, best_index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t Bits, typename BitsType = uint64_t,
|
template <size_t Bits, typename BitsType = uint64_t,
|
||||||
|
@ -45,17 +45,73 @@ int distance(std::array<T, N> const& a, std::array<T, N> const& b) {
|
|||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DWARFS_MULTIVERSIONING
|
#if defined(DWARFS_USE_CPU_FEATURES) && defined(__x86_64__)
|
||||||
#ifdef __clang__
|
#define DWARFS_USE_POPCNT
|
||||||
__attribute__((target_clones("avx512vpopcntdq", "popcnt", "default")))
|
#endif
|
||||||
|
|
||||||
|
enum class cpu_feature {
|
||||||
|
none,
|
||||||
|
popcnt,
|
||||||
|
};
|
||||||
|
|
||||||
|
cpu_feature detect_cpu_feature() {
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
static cpu_feature const feature = [] {
|
||||||
|
if (__builtin_cpu_supports("popcnt")) {
|
||||||
|
return cpu_feature::popcnt;
|
||||||
|
}
|
||||||
|
return cpu_feature::none;
|
||||||
|
}();
|
||||||
|
return feature;
|
||||||
#else
|
#else
|
||||||
__attribute__((target_clones("popcnt", "default")))
|
return cpu_feature::none;
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Fn, typename... Args>
|
||||||
|
decltype(auto) cpu_dispatch(Args&&... args) {
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
auto feature = detect_cpu_feature();
|
||||||
|
switch (feature) {
|
||||||
|
case cpu_feature::popcnt:
|
||||||
|
return Fn::template call<cpu_feature::popcnt>(std::forward<Args>(args)...);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
int distance(std::array<uint64_t, 4> const& a, std::array<uint64_t, 4> const& b) {
|
return Fn::template call<cpu_feature::none>(std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
int distance_default(std::array<uint64_t, 4> const& a,
|
||||||
|
std::array<uint64_t, 4> const& b) {
|
||||||
return distance<uint64_t, 4>(a, b);
|
return distance<uint64_t, 4>(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
__attribute__((__target__("popcnt"))) int
|
||||||
|
distance_popcnt(std::array<uint64_t, 4> const& a,
|
||||||
|
std::array<uint64_t, 4> const& b) {
|
||||||
|
return distance<uint64_t, 4>(a, b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct distance_cpu {
|
||||||
|
template <cpu_feature CpuFeature>
|
||||||
|
static int
|
||||||
|
call(std::array<uint64_t, 4> const& a, std::array<uint64_t, 4> const& b) {
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
if constexpr (CpuFeature == cpu_feature::popcnt) {
|
||||||
|
return distance_popcnt(a, b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return distance_default(a, b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int distance(std::array<uint64_t, 4> const& a,
|
||||||
|
std::array<uint64_t, 4> const& b) {
|
||||||
|
return cpu_dispatch<distance_cpu>(a, b);
|
||||||
|
}
|
||||||
void nilsimsa_distance(::benchmark::State& state) {
|
void nilsimsa_distance(::benchmark::State& state) {
|
||||||
std::independent_bits_engine<std::mt19937_64,
|
std::independent_bits_engine<std::mt19937_64,
|
||||||
std::numeric_limits<uint64_t>::digits, uint64_t>
|
std::numeric_limits<uint64_t>::digits, uint64_t>
|
||||||
@ -74,6 +130,38 @@ void nilsimsa_distance(::benchmark::State& state) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
void nilsimsa_distance_cpu(::benchmark::State& state) {
|
||||||
|
std::independent_bits_engine<std::mt19937_64,
|
||||||
|
std::numeric_limits<uint64_t>::digits, uint64_t>
|
||||||
|
rng;
|
||||||
|
static constexpr unsigned const kNumData{1024};
|
||||||
|
std::vector<std::array<uint64_t, 4>> data(kNumData);
|
||||||
|
for (auto& a : data) {
|
||||||
|
std::generate(begin(a), end(a), std::ref(rng));
|
||||||
|
}
|
||||||
|
unsigned i{0}, k{1};
|
||||||
|
int d;
|
||||||
|
|
||||||
|
switch (detect_cpu_feature()) {
|
||||||
|
case cpu_feature::popcnt:
|
||||||
|
for (auto _ : state) {
|
||||||
|
::benchmark::DoNotOptimize(
|
||||||
|
d = distance_cpu::template call<cpu_feature::popcnt>(
|
||||||
|
data[i++ % kNumData], data[k++ % kNumData]));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
for (auto _ : state) {
|
||||||
|
::benchmark::DoNotOptimize(
|
||||||
|
d = distance_cpu::template call<cpu_feature::none>(
|
||||||
|
data[i++ % kNumData], data[k++ % kNumData]));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void nilsimsa_update(::benchmark::State& state) {
|
void nilsimsa_update(::benchmark::State& state) {
|
||||||
std::independent_bits_engine<std::mt19937_64,
|
std::independent_bits_engine<std::mt19937_64,
|
||||||
std::numeric_limits<uint8_t>::digits, uint16_t>
|
std::numeric_limits<uint8_t>::digits, uint16_t>
|
||||||
@ -92,6 +180,9 @@ void nilsimsa_update(::benchmark::State& state) {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
BENCHMARK(nilsimsa_distance);
|
BENCHMARK(nilsimsa_distance);
|
||||||
|
#ifdef DWARFS_USE_POPCNT
|
||||||
|
BENCHMARK(nilsimsa_distance_cpu);
|
||||||
|
#endif
|
||||||
BENCHMARK(nilsimsa_update);
|
BENCHMARK(nilsimsa_update);
|
||||||
|
|
||||||
BENCHMARK_MAIN();
|
BENCHMARK_MAIN();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user