mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-11 13:30:47 -04:00
feat(segmenter): slightly improve speed
This commit is contained in:
parent
e21b1578eb
commit
19a14cf55a
@ -37,3 +37,11 @@
|
|||||||
__has_attribute(target_clones)
|
__has_attribute(target_clones)
|
||||||
#define DWARFS_MULTIVERSIONING 1
|
#define DWARFS_MULTIVERSIONING 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#define DWARFS_FORCE_INLINE inline __attribute__((__always_inline__))
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
#define DWARFS_FORCE_INLINE __forceinline
|
||||||
|
#else
|
||||||
|
#define DWARFS_FORCE_INLINE inline
|
||||||
|
#endif
|
||||||
|
@ -24,33 +24,38 @@
|
|||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#include "dwarfs/compiler.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
|
|
||||||
class rsync_hash {
|
class rsync_hash {
|
||||||
public:
|
public:
|
||||||
rsync_hash() = default;
|
rsync_hash() = default;
|
||||||
|
|
||||||
uint32_t operator()() const { return a_ | (uint32_t(b_) << 16); }
|
DWARFS_FORCE_INLINE uint32_t operator()() const {
|
||||||
|
return a_ | (uint32_t(b_) << 16);
|
||||||
|
}
|
||||||
|
|
||||||
void update(uint8_t inbyte) {
|
DWARFS_FORCE_INLINE void update(uint8_t inbyte) {
|
||||||
a_ += inbyte;
|
a_ += inbyte;
|
||||||
b_ += a_;
|
b_ += a_;
|
||||||
++len_;
|
++len_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void update(uint8_t outbyte, uint8_t inbyte) {
|
DWARFS_FORCE_INLINE void update(uint8_t outbyte, uint8_t inbyte) {
|
||||||
a_ = a_ - outbyte + inbyte;
|
a_ = a_ - outbyte + inbyte;
|
||||||
b_ -= len_ * outbyte;
|
b_ -= len_ * outbyte;
|
||||||
b_ += a_;
|
b_ += a_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear() {
|
DWARFS_FORCE_INLINE void clear() {
|
||||||
a_ = 0;
|
a_ = 0;
|
||||||
b_ = 0;
|
b_ = 0;
|
||||||
len_ = 0;
|
len_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr uint32_t repeating_window(uint8_t byte, size_t length) {
|
static DWARFS_FORCE_INLINE constexpr uint32_t
|
||||||
|
repeating_window(uint8_t byte, size_t length) {
|
||||||
uint16_t v = static_cast<uint16_t>(byte);
|
uint16_t v = static_cast<uint16_t>(byte);
|
||||||
uint16_t a{static_cast<uint16_t>(v * length)};
|
uint16_t a{static_cast<uint16_t>(v * length)};
|
||||||
uint16_t b{static_cast<uint16_t>(v * (length * (length + 1)) / 2)};
|
uint16_t b{static_cast<uint16_t>(v * (length * (length + 1)) / 2)};
|
||||||
|
@ -44,6 +44,7 @@
|
|||||||
#include "dwarfs/block_data.h"
|
#include "dwarfs/block_data.h"
|
||||||
#include "dwarfs/block_manager.h"
|
#include "dwarfs/block_manager.h"
|
||||||
#include "dwarfs/chunkable.h"
|
#include "dwarfs/chunkable.h"
|
||||||
|
#include "dwarfs/compiler.h"
|
||||||
#include "dwarfs/compression_constraints.h"
|
#include "dwarfs/compression_constraints.h"
|
||||||
#include "dwarfs/cyclic_hash.h"
|
#include "dwarfs/cyclic_hash.h"
|
||||||
#include "dwarfs/entry.h"
|
#include "dwarfs/entry.h"
|
||||||
@ -103,14 +104,14 @@ class fast_multimap {
|
|||||||
using collision_t = phmap::flat_hash_map<KeyT, collision_vector>;
|
using collision_t = phmap::flat_hash_map<KeyT, collision_vector>;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void insert(KeyT const& key, ValT const& val) {
|
DWARFS_FORCE_INLINE void insert(KeyT const& key, ValT const& val) {
|
||||||
if (!values_.insert(std::make_pair(key, val)).second) [[unlikely]] {
|
if (!values_.insert(std::make_pair(key, val)).second) [[unlikely]] {
|
||||||
collisions_[key].emplace_back(val);
|
collisions_[key].emplace_back(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
void for_each_value(KeyT const& key, F&& func) const {
|
DWARFS_FORCE_INLINE void for_each_value(KeyT const& key, F&& func) const {
|
||||||
if (auto it = values_.find(key); it != values_.end()) [[unlikely]] {
|
if (auto it = values_.find(key); it != values_.end()) [[unlikely]] {
|
||||||
func(it->second);
|
func(it->second);
|
||||||
if (auto it2 = collisions_.find(key); it2 != collisions_.end())
|
if (auto it2 = collisions_.find(key); it2 != collisions_.end())
|
||||||
@ -123,7 +124,7 @@ class fast_multimap {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
bool any_value_is(KeyT const& key, F&& func) const {
|
DWARFS_FORCE_INLINE bool any_value_is(KeyT const& key, F&& func) const {
|
||||||
if (auto it = values_.find(key); it != values_.end()) [[unlikely]] {
|
if (auto it = values_.find(key); it != values_.end()) [[unlikely]] {
|
||||||
if (func(it->second)) {
|
if (func(it->second)) {
|
||||||
return true;
|
return true;
|
||||||
@ -145,8 +146,10 @@ class fast_multimap {
|
|||||||
collisions_.clear();
|
collisions_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
blockhash_t const& values() const { return values_; };
|
DWARFS_FORCE_INLINE blockhash_t const& values() const { return values_; };
|
||||||
collision_t const& collisions() const { return collisions_; };
|
DWARFS_FORCE_INLINE collision_t const& collisions() const {
|
||||||
|
return collisions_;
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
blockhash_t values_;
|
blockhash_t values_;
|
||||||
@ -204,14 +207,14 @@ class alignas(64) bloom_filter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void add(size_t ix) {
|
DWARFS_FORCE_INLINE void add(size_t ix) {
|
||||||
auto bits = bits_;
|
auto bits = bits_;
|
||||||
BOOST_ALIGN_ASSUME_ALIGNED(bits, sizeof(bits_type));
|
BOOST_ALIGN_ASSUME_ALIGNED(bits, sizeof(bits_type));
|
||||||
bits[(ix >> index_shift) & index_mask_] |= static_cast<bits_type>(1)
|
bits[(ix >> index_shift) & index_mask_] |= static_cast<bits_type>(1)
|
||||||
<< (ix & value_mask);
|
<< (ix & value_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool test(size_t ix) const {
|
DWARFS_FORCE_INLINE bool test(size_t ix) const {
|
||||||
auto bits = bits_;
|
auto bits = bits_;
|
||||||
BOOST_ALIGN_ASSUME_ALIGNED(bits, sizeof(bits_type));
|
BOOST_ALIGN_ASSUME_ALIGNED(bits, sizeof(bits_type));
|
||||||
return bits[(ix >> index_shift) & index_mask_] &
|
return bits[(ix >> index_shift) & index_mask_] &
|
||||||
@ -219,7 +222,7 @@ class alignas(64) bloom_filter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// size in bits
|
// size in bits
|
||||||
size_t size() const { return size_; }
|
DWARFS_FORCE_INLINE size_t size() const { return size_; }
|
||||||
|
|
||||||
void clear() { std::fill(begin(), end(), 0); }
|
void clear() { std::fill(begin(), end(), 0); }
|
||||||
|
|
||||||
@ -231,10 +234,14 @@ class alignas(64) bloom_filter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bits_type const* cbegin() const { return bits_; }
|
DWARFS_FORCE_INLINE bits_type const* cbegin() const { return bits_; }
|
||||||
bits_type const* cend() const { return bits_ + (size_ >> index_shift); }
|
DWARFS_FORCE_INLINE bits_type const* cend() const {
|
||||||
bits_type* begin() { return bits_; }
|
return bits_ + (size_ >> index_shift);
|
||||||
bits_type* end() { return bits_ + (size_ >> index_shift); }
|
}
|
||||||
|
DWARFS_FORCE_INLINE bits_type* begin() { return bits_; }
|
||||||
|
DWARFS_FORCE_INLINE bits_type* end() {
|
||||||
|
return bits_ + (size_ >> index_shift);
|
||||||
|
}
|
||||||
|
|
||||||
bits_type* bits_{nullptr};
|
bits_type* bits_{nullptr};
|
||||||
size_t const index_mask_;
|
size_t const index_mask_;
|
||||||
@ -294,11 +301,12 @@ class ConstantGranularityPolicy : private GranularityPolicyBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename U>
|
template <typename T, typename U>
|
||||||
static void add_match(T& matches, U const* block, uint32_t off) {
|
static DWARFS_FORCE_INLINE void
|
||||||
|
add_match(T& matches, U const* block, uint32_t off) {
|
||||||
matches.emplace_back(block, off);
|
matches.emplace_back(block, off);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_valid_granularity_size(auto size) {
|
static DWARFS_FORCE_INLINE bool is_valid_granularity_size(auto size) {
|
||||||
if constexpr (kGranularity > 1) {
|
if constexpr (kGranularity > 1) {
|
||||||
return size % kGranularity == 0;
|
return size % kGranularity == 0;
|
||||||
} else {
|
} else {
|
||||||
@ -306,14 +314,14 @@ class ConstantGranularityPolicy : private GranularityPolicyBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_chunkable_size(auto size) {
|
static DWARFS_FORCE_INLINE void check_chunkable_size(auto size) {
|
||||||
if constexpr (kGranularity > 1) {
|
if constexpr (kGranularity > 1) {
|
||||||
DWARFS_CHECK(is_valid_granularity_size(size),
|
DWARFS_CHECK(is_valid_granularity_size(size),
|
||||||
chunkable_size_fail_message(size, kGranularity));
|
chunkable_size_fail_message(size, kGranularity));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t constrained_block_size(size_t size) {
|
static DWARFS_FORCE_INLINE size_t constrained_block_size(size_t size) {
|
||||||
if constexpr (kGranularity > 1) {
|
if constexpr (kGranularity > 1) {
|
||||||
size -= size % kGranularity;
|
size -= size % kGranularity;
|
||||||
}
|
}
|
||||||
@ -321,32 +329,37 @@ class ConstantGranularityPolicy : private GranularityPolicyBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename... Args>
|
template <typename T, typename... Args>
|
||||||
static T create(Args&&... args) {
|
static DWARFS_FORCE_INLINE T create(Args&&... args) {
|
||||||
return T(std::forward<Args>(args)...);
|
return T(std::forward<Args>(args)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t bytes_to_frames(size_t size) {
|
static DWARFS_FORCE_INLINE size_t bytes_to_frames(size_t size) {
|
||||||
assert(size % kGranularity == 0);
|
assert(size % kGranularity == 0);
|
||||||
return size / kGranularity;
|
return size / kGranularity;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t frames_to_bytes(size_t size) { return size * kGranularity; }
|
static DWARFS_FORCE_INLINE size_t frames_to_bytes(size_t size) {
|
||||||
|
return size * kGranularity;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void for_bytes_in_frame(T&& func) {
|
static DWARFS_FORCE_INLINE void for_bytes_in_frame(T&& func) {
|
||||||
for (size_t i = 0; i < kGranularity; ++i) {
|
for (size_t i = 0; i < kGranularity; ++i) {
|
||||||
func();
|
func();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint_fast32_t granularity_bytes() { return kGranularity; }
|
static DWARFS_FORCE_INLINE uint_fast32_t granularity_bytes() {
|
||||||
|
return kGranularity;
|
||||||
|
}
|
||||||
|
|
||||||
static bool compile_time_granularity() { return true; }
|
static DWARFS_FORCE_INLINE bool compile_time_granularity() { return true; }
|
||||||
};
|
};
|
||||||
|
|
||||||
class VariableGranularityPolicy : private GranularityPolicyBase {
|
class VariableGranularityPolicy : private GranularityPolicyBase {
|
||||||
public:
|
public:
|
||||||
explicit VariableGranularityPolicy(uint32_t granularity) noexcept
|
explicit DWARFS_FORCE_INLINE
|
||||||
|
VariableGranularityPolicy(uint32_t granularity) noexcept
|
||||||
: granularity_{granularity} {}
|
: granularity_{granularity} {}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -360,22 +373,23 @@ class VariableGranularityPolicy : private GranularityPolicyBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename U>
|
template <typename T, typename U>
|
||||||
void add_match(T& matches, U const* block, uint32_t off) const {
|
DWARFS_FORCE_INLINE void
|
||||||
|
add_match(T& matches, U const* block, uint32_t off) const {
|
||||||
matches.emplace_back(block, off, granularity_);
|
matches.emplace_back(block, off, granularity_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_valid_granularity_size(auto size) const {
|
DWARFS_FORCE_INLINE bool is_valid_granularity_size(auto size) const {
|
||||||
return size % granularity_ == 0;
|
return size % granularity_ == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void check_chunkable_size(auto size) const {
|
DWARFS_FORCE_INLINE void check_chunkable_size(auto size) const {
|
||||||
if (granularity_ > 1) {
|
if (granularity_ > 1) {
|
||||||
DWARFS_CHECK(is_valid_granularity_size(size),
|
DWARFS_CHECK(is_valid_granularity_size(size),
|
||||||
chunkable_size_fail_message(size, granularity_));
|
chunkable_size_fail_message(size, granularity_));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t constrained_block_size(size_t size) const {
|
DWARFS_FORCE_INLINE size_t constrained_block_size(size_t size) const {
|
||||||
if (granularity_ > 1) {
|
if (granularity_ > 1) {
|
||||||
size -= size % granularity_;
|
size -= size % granularity_;
|
||||||
}
|
}
|
||||||
@ -383,27 +397,31 @@ class VariableGranularityPolicy : private GranularityPolicyBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename... Args>
|
template <typename T, typename... Args>
|
||||||
T create(Args&&... args) const {
|
DWARFS_FORCE_INLINE T create(Args&&... args) const {
|
||||||
return T(std::forward<Args>(args)..., granularity_);
|
return T(std::forward<Args>(args)..., granularity_);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t bytes_to_frames(size_t size) const {
|
DWARFS_FORCE_INLINE size_t bytes_to_frames(size_t size) const {
|
||||||
assert(size % granularity_ == 0);
|
assert(size % granularity_ == 0);
|
||||||
return size / granularity_;
|
return size / granularity_;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t frames_to_bytes(size_t size) const { return size * granularity_; }
|
DWARFS_FORCE_INLINE size_t frames_to_bytes(size_t size) const {
|
||||||
|
return size * granularity_;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void for_bytes_in_frame(T&& func) const {
|
DWARFS_FORCE_INLINE void for_bytes_in_frame(T&& func) const {
|
||||||
for (size_t i = 0; i < granularity_; ++i) {
|
for (size_t i = 0; i < granularity_; ++i) {
|
||||||
func();
|
func();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint_fast32_t granularity_bytes() const { return granularity_; }
|
DWARFS_FORCE_INLINE uint_fast32_t granularity_bytes() const {
|
||||||
|
return granularity_;
|
||||||
|
}
|
||||||
|
|
||||||
static bool compile_time_granularity() { return false; }
|
static DWARFS_FORCE_INLINE bool compile_time_granularity() { return false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint_fast32_t const granularity_;
|
uint_fast32_t const granularity_;
|
||||||
@ -413,28 +431,33 @@ template <typename T, typename GranularityPolicy>
|
|||||||
class granular_span_adapter : private GranularityPolicy {
|
class granular_span_adapter : private GranularityPolicy {
|
||||||
public:
|
public:
|
||||||
template <typename... PolicyArgs>
|
template <typename... PolicyArgs>
|
||||||
|
DWARFS_FORCE_INLINE
|
||||||
granular_span_adapter(std::span<T> s, PolicyArgs&&... args)
|
granular_span_adapter(std::span<T> s, PolicyArgs&&... args)
|
||||||
: GranularityPolicy(std::forward<PolicyArgs>(args)...)
|
: GranularityPolicy(std::forward<PolicyArgs>(args)...)
|
||||||
, s_{s} {}
|
, s_{s} {}
|
||||||
|
|
||||||
size_t size() const { return this->bytes_to_frames(s_.size()); }
|
DWARFS_FORCE_INLINE size_t size() const {
|
||||||
|
return this->bytes_to_frames(s_.size());
|
||||||
|
}
|
||||||
|
|
||||||
std::span<T> raw() const { return s_; }
|
DWARFS_FORCE_INLINE std::span<T> raw() const { return s_; }
|
||||||
|
|
||||||
granular_span_adapter subspan(size_t offset, size_t count) const {
|
DWARFS_FORCE_INLINE granular_span_adapter subspan(size_t offset,
|
||||||
|
size_t count) const {
|
||||||
return this->template create<granular_span_adapter<T, GranularityPolicy>>(
|
return this->template create<granular_span_adapter<T, GranularityPolicy>>(
|
||||||
s_.subspan(this->frames_to_bytes(offset),
|
s_.subspan(this->frames_to_bytes(offset),
|
||||||
this->frames_to_bytes(count)));
|
this->frames_to_bytes(count)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename H>
|
template <typename H>
|
||||||
void update_hash(H& hasher, size_t offset) const {
|
DWARFS_FORCE_INLINE void update_hash(H& hasher, size_t offset) const {
|
||||||
offset = this->frames_to_bytes(offset);
|
offset = this->frames_to_bytes(offset);
|
||||||
this->for_bytes_in_frame([&] { hasher.update(s_[offset++]); });
|
this->for_bytes_in_frame([&] { hasher.update(s_[offset++]); });
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename H>
|
template <typename H>
|
||||||
void update_hash(H& hasher, size_t from, size_t to) const {
|
DWARFS_FORCE_INLINE void
|
||||||
|
update_hash(H& hasher, size_t from, size_t to) const {
|
||||||
from = this->frames_to_bytes(from);
|
from = this->frames_to_bytes(from);
|
||||||
to = this->frames_to_bytes(to);
|
to = this->frames_to_bytes(to);
|
||||||
this->for_bytes_in_frame([&] { hasher.update(s_[from++], s_[to++]); });
|
this->for_bytes_in_frame([&] { hasher.update(s_[from++], s_[to++]); });
|
||||||
@ -450,14 +473,16 @@ class BasicSegmentationPolicy : public GranularityPolicy {
|
|||||||
using GranularityPolicyT = GranularityPolicy;
|
using GranularityPolicyT = GranularityPolicy;
|
||||||
|
|
||||||
template <typename... PolicyArgs>
|
template <typename... PolicyArgs>
|
||||||
BasicSegmentationPolicy(PolicyArgs&&... args)
|
DWARFS_FORCE_INLINE BasicSegmentationPolicy(PolicyArgs&&... args)
|
||||||
: GranularityPolicy(std::forward<PolicyArgs>(args)...) {}
|
: GranularityPolicy(std::forward<PolicyArgs>(args)...) {}
|
||||||
|
|
||||||
static constexpr bool is_segmentation_enabled() {
|
static DWARFS_FORCE_INLINE constexpr bool is_segmentation_enabled() {
|
||||||
return SegmentationEnabled;
|
return SegmentationEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr bool is_multi_block_mode() { return MultiBlock; }
|
static DWARFS_FORCE_INLINE constexpr bool is_multi_block_mode() {
|
||||||
|
return MultiBlock;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename GranularityPolicy>
|
template <typename GranularityPolicy>
|
||||||
@ -476,21 +501,25 @@ template <typename T, typename GranularityPolicy>
|
|||||||
class granular_vector_adapter : private GranularityPolicy {
|
class granular_vector_adapter : private GranularityPolicy {
|
||||||
public:
|
public:
|
||||||
template <typename... PolicyArgs>
|
template <typename... PolicyArgs>
|
||||||
|
DWARFS_FORCE_INLINE
|
||||||
granular_vector_adapter(std::vector<T>& v, PolicyArgs&&... args)
|
granular_vector_adapter(std::vector<T>& v, PolicyArgs&&... args)
|
||||||
: GranularityPolicy(std::forward<PolicyArgs>(args)...)
|
: GranularityPolicy(std::forward<PolicyArgs>(args)...)
|
||||||
, v_{v} {}
|
, v_{v} {}
|
||||||
|
|
||||||
size_t size() const { return this->bytes_to_frames(v_.size()); }
|
DWARFS_FORCE_INLINE size_t size() const {
|
||||||
|
return this->bytes_to_frames(v_.size());
|
||||||
|
}
|
||||||
|
|
||||||
void append(granular_span_adapter<T const, GranularityPolicy> const& span) {
|
DWARFS_FORCE_INLINE void
|
||||||
|
append(granular_span_adapter<T const, GranularityPolicy> const& span) {
|
||||||
auto raw = span.raw();
|
auto raw = span.raw();
|
||||||
auto off = v_.size();
|
auto off = v_.size();
|
||||||
v_.resize(off + raw.size());
|
v_.resize(off + raw.size());
|
||||||
::memcpy(v_.data() + off, raw.data(), raw.size());
|
::memcpy(v_.data() + off, raw.data(), raw.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
int compare(
|
DWARFS_FORCE_INLINE int
|
||||||
size_t offset,
|
compare(size_t offset,
|
||||||
granular_span_adapter<T const, GranularityPolicy> const& span) const {
|
granular_span_adapter<T const, GranularityPolicy> const& span) const {
|
||||||
auto raw = span.raw();
|
auto raw = span.raw();
|
||||||
return std::memcmp(v_.data() + this->frames_to_bytes(offset), raw.data(),
|
return std::memcmp(v_.data() + this->frames_to_bytes(offset), raw.data(),
|
||||||
@ -498,13 +527,14 @@ class granular_vector_adapter : private GranularityPolicy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename H>
|
template <typename H>
|
||||||
void update_hash(H& hasher, size_t offset) const {
|
DWARFS_FORCE_INLINE void update_hash(H& hasher, size_t offset) const {
|
||||||
offset = this->frames_to_bytes(offset);
|
offset = this->frames_to_bytes(offset);
|
||||||
this->for_bytes_in_frame([&] { hasher.update(v_[offset++]); });
|
this->for_bytes_in_frame([&] { hasher.update(v_[offset++]); });
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename H>
|
template <typename H>
|
||||||
void update_hash(H& hasher, size_t from, size_t to) const {
|
DWARFS_FORCE_INLINE void
|
||||||
|
update_hash(H& hasher, size_t from, size_t to) const {
|
||||||
from = this->frames_to_bytes(from);
|
from = this->frames_to_bytes(from);
|
||||||
to = this->frames_to_bytes(to);
|
to = this->frames_to_bytes(to);
|
||||||
this->for_bytes_in_frame([&] { hasher.update(v_[from++], v_[to++]); });
|
this->for_bytes_in_frame([&] { hasher.update(v_[from++], v_[to++]); });
|
||||||
@ -543,33 +573,38 @@ class active_block : private GranularityPolicy {
|
|||||||
data_->reserve(this->frames_to_bytes(capacity_in_frames_));
|
data_->reserve(this->frames_to_bytes(capacity_in_frames_));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t num() const { return num_; }
|
DWARFS_FORCE_INLINE size_t num() const { return num_; }
|
||||||
|
|
||||||
size_t size_in_frames() const { return this->bytes_to_frames(data_->size()); }
|
DWARFS_FORCE_INLINE size_t size_in_frames() const {
|
||||||
|
return this->bytes_to_frames(data_->size());
|
||||||
|
}
|
||||||
|
|
||||||
bool full() const { return size_in_frames() == capacity_in_frames_; }
|
DWARFS_FORCE_INLINE bool full() const {
|
||||||
|
return size_in_frames() == capacity_in_frames_;
|
||||||
|
}
|
||||||
|
|
||||||
std::shared_ptr<block_data> data() const { return data_; }
|
DWARFS_FORCE_INLINE std::shared_ptr<block_data> data() const { return data_; }
|
||||||
|
|
||||||
void append_bytes(std::span<uint8_t const> data, bloom_filter& global_filter);
|
DWARFS_FORCE_INLINE void
|
||||||
|
append_bytes(std::span<uint8_t const> data, bloom_filter& global_filter);
|
||||||
|
|
||||||
size_t next_hash_distance_in_frames() const {
|
DWARFS_FORCE_INLINE size_t next_hash_distance_in_frames() const {
|
||||||
return window_step_mask_ + 1 - (size_in_frames() & window_step_mask_);
|
return window_step_mask_ + 1 - (size_in_frames() & window_step_mask_);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
void for_each_offset(hash_t key, F&& func) const {
|
DWARFS_FORCE_INLINE void for_each_offset(hash_t key, F&& func) const {
|
||||||
offsets_.for_each_value(key, std::forward<F>(func));
|
offsets_.for_each_value(key, std::forward<F>(func));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
void for_each_offset_filter(hash_t key, F&& func) const {
|
DWARFS_FORCE_INLINE void for_each_offset_filter(hash_t key, F&& func) const {
|
||||||
if (filter_.test(key)) [[unlikely]] {
|
if (filter_.test(key)) [[unlikely]] {
|
||||||
offsets_.for_each_value(key, std::forward<F>(func));
|
offsets_.for_each_value(key, std::forward<F>(func));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void finalize(segmenter_stats& stats) {
|
DWARFS_FORCE_INLINE void finalize(segmenter_stats& stats) {
|
||||||
stats.total_hashes += offsets_.values().size();
|
stats.total_hashes += offsets_.values().size();
|
||||||
for (auto& c : offsets_.collisions()) {
|
for (auto& c : offsets_.collisions()) {
|
||||||
stats.total_hashes += c.second.size();
|
stats.total_hashes += c.second.size();
|
||||||
@ -578,10 +613,11 @@ class active_block : private GranularityPolicy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bloom_filter const& filter() const { return filter_; }
|
DWARFS_FORCE_INLINE bloom_filter const& filter() const { return filter_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool is_existing_repeating_sequence(hash_t hashval, size_t offset);
|
DWARFS_FORCE_INLINE bool
|
||||||
|
is_existing_repeating_sequence(hash_t hashval, size_t offset);
|
||||||
|
|
||||||
static constexpr size_t num_inline_offsets = 4;
|
static constexpr size_t num_inline_offsets = 4;
|
||||||
|
|
||||||
@ -685,15 +721,18 @@ class segmenter_ final : public segmenter::impl, private SegmentingPolicy {
|
|||||||
size_t size_in_frames{0};
|
size_t size_in_frames{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
void block_ready();
|
DWARFS_FORCE_INLINE void block_ready();
|
||||||
void finish_chunk(chunkable& chkable);
|
void finish_chunk(chunkable& chkable);
|
||||||
void append_to_block(chunkable& chkable, size_t offset_in_frames,
|
DWARFS_FORCE_INLINE void
|
||||||
|
append_to_block(chunkable& chkable, size_t offset_in_frames,
|
||||||
size_t size_in_frames);
|
size_t size_in_frames);
|
||||||
void
|
void
|
||||||
add_data(chunkable& chkable, size_t offset_in_frames, size_t size_in_frames);
|
add_data(chunkable& chkable, size_t offset_in_frames, size_t size_in_frames);
|
||||||
void segment_and_add_data(chunkable& chkable, size_t size_in_frames);
|
DWARFS_FORCE_INLINE void
|
||||||
|
segment_and_add_data(chunkable& chkable, size_t size_in_frames);
|
||||||
|
|
||||||
size_t bloom_filter_size(const segmenter::config& cfg) const {
|
DWARFS_FORCE_INLINE size_t
|
||||||
|
bloom_filter_size(const segmenter::config& cfg) const {
|
||||||
if constexpr (is_segmentation_enabled()) {
|
if constexpr (is_segmentation_enabled()) {
|
||||||
auto hash_count =
|
auto hash_count =
|
||||||
std::bit_ceil(std::max<size_t>(1, cfg.max_active_blocks) *
|
std::bit_ceil(std::max<size_t>(1, cfg.max_active_blocks) *
|
||||||
@ -704,17 +743,18 @@ class segmenter_ final : public segmenter::impl, private SegmentingPolicy {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t window_size(const segmenter::config& cfg) {
|
static DWARFS_FORCE_INLINE size_t window_size(const segmenter::config& cfg) {
|
||||||
return cfg.blockhash_window_size > 0
|
return cfg.blockhash_window_size > 0
|
||||||
? static_cast<size_t>(1) << cfg.blockhash_window_size
|
? static_cast<size_t>(1) << cfg.blockhash_window_size
|
||||||
: 0;
|
: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t window_step(const segmenter::config& cfg) {
|
static DWARFS_FORCE_INLINE size_t window_step(const segmenter::config& cfg) {
|
||||||
return std::max<size_t>(1, window_size(cfg) >> cfg.window_increment_shift);
|
return std::max<size_t>(1, window_size(cfg) >> cfg.window_increment_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t block_size_in_frames(const segmenter::config& cfg) const {
|
size_t DWARFS_FORCE_INLINE
|
||||||
|
block_size_in_frames(const segmenter::config& cfg) const {
|
||||||
auto raw_size = static_cast<size_t>(1) << cfg.block_size_bits;
|
auto raw_size = static_cast<size_t>(1) << cfg.block_size_bits;
|
||||||
return bytes_to_frames(constrained_block_size(raw_size));
|
return bytes_to_frames(constrained_block_size(raw_size));
|
||||||
}
|
}
|
||||||
@ -756,7 +796,7 @@ class segment_match : private GranularityPolicy {
|
|||||||
using active_block_type = active_block<LoggerPolicy, GranularityPolicy>;
|
using active_block_type = active_block<LoggerPolicy, GranularityPolicy>;
|
||||||
|
|
||||||
template <typename... PolicyArgs>
|
template <typename... PolicyArgs>
|
||||||
segment_match(active_block_type const* blk, uint32_t off,
|
DWARFS_FORCE_INLINE segment_match(active_block_type const* blk, uint32_t off,
|
||||||
PolicyArgs&&... args) noexcept
|
PolicyArgs&&... args) noexcept
|
||||||
: GranularityPolicy(std::forward<PolicyArgs>(args)...)
|
: GranularityPolicy(std::forward<PolicyArgs>(args)...)
|
||||||
, block_{blk}
|
, block_{blk}
|
||||||
@ -766,17 +806,17 @@ class segment_match : private GranularityPolicy {
|
|||||||
granular_span_adapter<uint8_t const, GranularityPolicy> const& data,
|
granular_span_adapter<uint8_t const, GranularityPolicy> const& data,
|
||||||
size_t pos, size_t len, size_t begin, size_t end);
|
size_t pos, size_t len, size_t begin, size_t end);
|
||||||
|
|
||||||
bool operator<(segment_match const& rhs) const {
|
DWARFS_FORCE_INLINE bool operator<(segment_match const& rhs) const {
|
||||||
return size_ < rhs.size_ ||
|
return size_ < rhs.size_ ||
|
||||||
(size_ == rhs.size_ &&
|
(size_ == rhs.size_ &&
|
||||||
(block_->num() < rhs.block_->num() ||
|
(block_->num() < rhs.block_->num() ||
|
||||||
(block_->num() == rhs.block_->num() && offset_ < rhs.offset_)));
|
(block_->num() == rhs.block_->num() && offset_ < rhs.offset_)));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t pos() const { return pos_; }
|
DWARFS_FORCE_INLINE size_t pos() const { return pos_; }
|
||||||
uint32_t size() const { return size_; }
|
DWARFS_FORCE_INLINE uint32_t size() const { return size_; }
|
||||||
uint32_t offset() const { return offset_; }
|
DWARFS_FORCE_INLINE uint32_t offset() const { return offset_; }
|
||||||
size_t block_num() const { return block_->num(); }
|
DWARFS_FORCE_INLINE size_t block_num() const { return block_->num(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
active_block_type const* block_;
|
active_block_type const* block_;
|
||||||
@ -786,8 +826,9 @@ class segment_match : private GranularityPolicy {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename LoggerPolicy, typename GranularityPolicy>
|
template <typename LoggerPolicy, typename GranularityPolicy>
|
||||||
bool active_block<LoggerPolicy, GranularityPolicy>::
|
DWARFS_FORCE_INLINE bool
|
||||||
is_existing_repeating_sequence(hash_t hashval, size_t offset) {
|
active_block<LoggerPolicy, GranularityPolicy>::is_existing_repeating_sequence(
|
||||||
|
hash_t hashval, size_t offset) {
|
||||||
if (auto it = repseqmap_.find(hashval); it != repseqmap_.end()) [[unlikely]] {
|
if (auto it = repseqmap_.find(hashval); it != repseqmap_.end()) [[unlikely]] {
|
||||||
auto& raw = data_->vec();
|
auto& raw = data_->vec();
|
||||||
auto winbeg = raw.begin() + frames_to_bytes(offset);
|
auto winbeg = raw.begin() + frames_to_bytes(offset);
|
||||||
@ -822,7 +863,8 @@ bool active_block<LoggerPolicy, GranularityPolicy>::
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy, typename GranularityPolicy>
|
template <typename LoggerPolicy, typename GranularityPolicy>
|
||||||
void active_block<LoggerPolicy, GranularityPolicy>::append_bytes(
|
DWARFS_FORCE_INLINE void
|
||||||
|
active_block<LoggerPolicy, GranularityPolicy>::append_bytes(
|
||||||
std::span<uint8_t const> data, bloom_filter& global_filter) {
|
std::span<uint8_t const> data, bloom_filter& global_filter) {
|
||||||
auto src = this->template create<
|
auto src = this->template create<
|
||||||
granular_span_adapter<uint8_t const, GranularityPolicy>>(data);
|
granular_span_adapter<uint8_t const, GranularityPolicy>>(data);
|
||||||
@ -977,7 +1019,8 @@ void segmenter_<LoggerPolicy, SegmentingPolicy>::finish() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy, typename SegmentingPolicy>
|
template <typename LoggerPolicy, typename SegmentingPolicy>
|
||||||
void segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() {
|
DWARFS_FORCE_INLINE void
|
||||||
|
segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() {
|
||||||
auto& block = blocks_.back();
|
auto& block = blocks_.back();
|
||||||
block.finalize(stats_);
|
block.finalize(stats_);
|
||||||
block_ready_(block.data(), block.num());
|
block_ready_(block.data(), block.num());
|
||||||
@ -985,7 +1028,8 @@ void segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy, typename SegmentingPolicy>
|
template <typename LoggerPolicy, typename SegmentingPolicy>
|
||||||
void segmenter_<LoggerPolicy, SegmentingPolicy>::append_to_block(
|
DWARFS_FORCE_INLINE void
|
||||||
|
segmenter_<LoggerPolicy, SegmentingPolicy>::append_to_block(
|
||||||
chunkable& chkable, size_t offset_in_frames, size_t size_in_frames) {
|
chunkable& chkable, size_t offset_in_frames, size_t size_in_frames) {
|
||||||
if (blocks_.empty() or blocks_.back().full()) [[unlikely]] {
|
if (blocks_.empty() or blocks_.back().full()) [[unlikely]] {
|
||||||
if (blocks_.size() >= std::max<size_t>(1, cfg_.max_active_blocks)) {
|
if (blocks_.size() >= std::max<size_t>(1, cfg_.max_active_blocks)) {
|
||||||
@ -1062,7 +1106,8 @@ void segmenter_<LoggerPolicy, SegmentingPolicy>::finish_chunk(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy, typename SegmentingPolicy>
|
template <typename LoggerPolicy, typename SegmentingPolicy>
|
||||||
void segmenter_<LoggerPolicy, SegmentingPolicy>::segment_and_add_data(
|
DWARFS_FORCE_INLINE void
|
||||||
|
segmenter_<LoggerPolicy, SegmentingPolicy>::segment_and_add_data(
|
||||||
chunkable& chkable, size_t size_in_frames) {
|
chunkable& chkable, size_t size_in_frames) {
|
||||||
rsync_hash hasher;
|
rsync_hash hasher;
|
||||||
size_t offset_in_frames = 0;
|
size_t offset_in_frames = 0;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user