From 19a14cf55aa1703b1e55336d5d17546dd041d780 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 24 Feb 2024 07:41:24 +0100 Subject: [PATCH] feat(segmenter): slightly improve speed --- include/dwarfs/compiler.h | 8 ++ include/dwarfs/cyclic_hash.h | 15 ++- src/dwarfs/segmenter.cpp | 205 +++++++++++++++++++++-------------- 3 files changed, 143 insertions(+), 85 deletions(-) diff --git a/include/dwarfs/compiler.h b/include/dwarfs/compiler.h index f75def96..23fd7169 100644 --- a/include/dwarfs/compiler.h +++ b/include/dwarfs/compiler.h @@ -37,3 +37,11 @@ __has_attribute(target_clones) #define DWARFS_MULTIVERSIONING 1 #endif + +#if defined(__GNUC__) || defined(__clang__) +#define DWARFS_FORCE_INLINE inline __attribute__((__always_inline__)) +#elif defined(_MSC_VER) +#define DWARFS_FORCE_INLINE __forceinline +#else +#define DWARFS_FORCE_INLINE inline +#endif diff --git a/include/dwarfs/cyclic_hash.h b/include/dwarfs/cyclic_hash.h index 8cb69892..c02412cb 100644 --- a/include/dwarfs/cyclic_hash.h +++ b/include/dwarfs/cyclic_hash.h @@ -24,33 +24,38 @@ #include #include +#include "dwarfs/compiler.h" + namespace dwarfs { class rsync_hash { public: rsync_hash() = default; - uint32_t operator()() const { return a_ | (uint32_t(b_) << 16); } + DWARFS_FORCE_INLINE uint32_t operator()() const { + return a_ | (uint32_t(b_) << 16); + } - void update(uint8_t inbyte) { + DWARFS_FORCE_INLINE void update(uint8_t inbyte) { a_ += inbyte; b_ += a_; ++len_; } - void update(uint8_t outbyte, uint8_t inbyte) { + DWARFS_FORCE_INLINE void update(uint8_t outbyte, uint8_t inbyte) { a_ = a_ - outbyte + inbyte; b_ -= len_ * outbyte; b_ += a_; } - void clear() { + DWARFS_FORCE_INLINE void clear() { a_ = 0; b_ = 0; len_ = 0; } - static constexpr uint32_t repeating_window(uint8_t byte, size_t length) { + static DWARFS_FORCE_INLINE constexpr uint32_t + repeating_window(uint8_t byte, size_t length) { uint16_t v = static_cast(byte); uint16_t a{static_cast(v * length)}; uint16_t b{static_cast(v * (length * (length + 1)) / 2)}; diff --git a/src/dwarfs/segmenter.cpp b/src/dwarfs/segmenter.cpp index 6c0289f9..598cd9cb 100644 --- a/src/dwarfs/segmenter.cpp +++ b/src/dwarfs/segmenter.cpp @@ -44,6 +44,7 @@ #include "dwarfs/block_data.h" #include "dwarfs/block_manager.h" #include "dwarfs/chunkable.h" +#include "dwarfs/compiler.h" #include "dwarfs/compression_constraints.h" #include "dwarfs/cyclic_hash.h" #include "dwarfs/entry.h" @@ -103,14 +104,14 @@ class fast_multimap { using collision_t = phmap::flat_hash_map; public: - void insert(KeyT const& key, ValT const& val) { + DWARFS_FORCE_INLINE void insert(KeyT const& key, ValT const& val) { if (!values_.insert(std::make_pair(key, val)).second) [[unlikely]] { collisions_[key].emplace_back(val); } } template - void for_each_value(KeyT const& key, F&& func) const { + DWARFS_FORCE_INLINE void for_each_value(KeyT const& key, F&& func) const { if (auto it = values_.find(key); it != values_.end()) [[unlikely]] { func(it->second); if (auto it2 = collisions_.find(key); it2 != collisions_.end()) @@ -123,7 +124,7 @@ class fast_multimap { } template - bool any_value_is(KeyT const& key, F&& func) const { + DWARFS_FORCE_INLINE bool any_value_is(KeyT const& key, F&& func) const { if (auto it = values_.find(key); it != values_.end()) [[unlikely]] { if (func(it->second)) { return true; @@ -145,8 +146,10 @@ class fast_multimap { collisions_.clear(); } - blockhash_t const& values() const { return values_; }; - collision_t const& collisions() const { return collisions_; }; + DWARFS_FORCE_INLINE blockhash_t const& values() const { return values_; }; + DWARFS_FORCE_INLINE collision_t const& collisions() const { + return collisions_; + }; private: blockhash_t values_; @@ -204,14 +207,14 @@ class alignas(64) bloom_filter { } } - void add(size_t ix) { + DWARFS_FORCE_INLINE void add(size_t ix) { auto bits = bits_; BOOST_ALIGN_ASSUME_ALIGNED(bits, sizeof(bits_type)); bits[(ix >> index_shift) & index_mask_] |= static_cast(1) << (ix & value_mask); } - bool test(size_t ix) const { + DWARFS_FORCE_INLINE bool test(size_t ix) const { auto bits = bits_; BOOST_ALIGN_ASSUME_ALIGNED(bits, sizeof(bits_type)); return bits[(ix >> index_shift) & index_mask_] & @@ -219,7 +222,7 @@ class alignas(64) bloom_filter { } // size in bits - size_t size() const { return size_; } + DWARFS_FORCE_INLINE size_t size() const { return size_; } void clear() { std::fill(begin(), end(), 0); } @@ -231,10 +234,14 @@ class alignas(64) bloom_filter { } private: - bits_type const* cbegin() const { return bits_; } - bits_type const* cend() const { return bits_ + (size_ >> index_shift); } - bits_type* begin() { return bits_; } - bits_type* end() { return bits_ + (size_ >> index_shift); } + DWARFS_FORCE_INLINE bits_type const* cbegin() const { return bits_; } + DWARFS_FORCE_INLINE bits_type const* cend() const { + return bits_ + (size_ >> index_shift); + } + DWARFS_FORCE_INLINE bits_type* begin() { return bits_; } + DWARFS_FORCE_INLINE bits_type* end() { + return bits_ + (size_ >> index_shift); + } bits_type* bits_{nullptr}; size_t const index_mask_; @@ -294,11 +301,12 @@ class ConstantGranularityPolicy : private GranularityPolicyBase { } template - static void add_match(T& matches, U const* block, uint32_t off) { + static DWARFS_FORCE_INLINE void + add_match(T& matches, U const* block, uint32_t off) { matches.emplace_back(block, off); } - static bool is_valid_granularity_size(auto size) { + static DWARFS_FORCE_INLINE bool is_valid_granularity_size(auto size) { if constexpr (kGranularity > 1) { return size % kGranularity == 0; } else { @@ -306,14 +314,14 @@ class ConstantGranularityPolicy : private GranularityPolicyBase { } } - static void check_chunkable_size(auto size) { + static DWARFS_FORCE_INLINE void check_chunkable_size(auto size) { if constexpr (kGranularity > 1) { DWARFS_CHECK(is_valid_granularity_size(size), chunkable_size_fail_message(size, kGranularity)); } } - static size_t constrained_block_size(size_t size) { + static DWARFS_FORCE_INLINE size_t constrained_block_size(size_t size) { if constexpr (kGranularity > 1) { size -= size % kGranularity; } @@ -321,32 +329,37 @@ class ConstantGranularityPolicy : private GranularityPolicyBase { } template - static T create(Args&&... args) { + static DWARFS_FORCE_INLINE T create(Args&&... args) { return T(std::forward(args)...); } - static size_t bytes_to_frames(size_t size) { + static DWARFS_FORCE_INLINE size_t bytes_to_frames(size_t size) { assert(size % kGranularity == 0); return size / kGranularity; } - static size_t frames_to_bytes(size_t size) { return size * kGranularity; } + static DWARFS_FORCE_INLINE size_t frames_to_bytes(size_t size) { + return size * kGranularity; + } template - static void for_bytes_in_frame(T&& func) { + static DWARFS_FORCE_INLINE void for_bytes_in_frame(T&& func) { for (size_t i = 0; i < kGranularity; ++i) { func(); } } - static uint_fast32_t granularity_bytes() { return kGranularity; } + static DWARFS_FORCE_INLINE uint_fast32_t granularity_bytes() { + return kGranularity; + } - static bool compile_time_granularity() { return true; } + static DWARFS_FORCE_INLINE bool compile_time_granularity() { return true; } }; class VariableGranularityPolicy : private GranularityPolicyBase { public: - explicit VariableGranularityPolicy(uint32_t granularity) noexcept + explicit DWARFS_FORCE_INLINE + VariableGranularityPolicy(uint32_t granularity) noexcept : granularity_{granularity} {} template @@ -360,22 +373,23 @@ class VariableGranularityPolicy : private GranularityPolicyBase { } template - void add_match(T& matches, U const* block, uint32_t off) const { + DWARFS_FORCE_INLINE void + add_match(T& matches, U const* block, uint32_t off) const { matches.emplace_back(block, off, granularity_); } - bool is_valid_granularity_size(auto size) const { + DWARFS_FORCE_INLINE bool is_valid_granularity_size(auto size) const { return size % granularity_ == 0; } - void check_chunkable_size(auto size) const { + DWARFS_FORCE_INLINE void check_chunkable_size(auto size) const { if (granularity_ > 1) { DWARFS_CHECK(is_valid_granularity_size(size), chunkable_size_fail_message(size, granularity_)); } } - size_t constrained_block_size(size_t size) const { + DWARFS_FORCE_INLINE size_t constrained_block_size(size_t size) const { if (granularity_ > 1) { size -= size % granularity_; } @@ -383,27 +397,31 @@ class VariableGranularityPolicy : private GranularityPolicyBase { } template - T create(Args&&... args) const { + DWARFS_FORCE_INLINE T create(Args&&... args) const { return T(std::forward(args)..., granularity_); } - size_t bytes_to_frames(size_t size) const { + DWARFS_FORCE_INLINE size_t bytes_to_frames(size_t size) const { assert(size % granularity_ == 0); return size / granularity_; } - size_t frames_to_bytes(size_t size) const { return size * granularity_; } + DWARFS_FORCE_INLINE size_t frames_to_bytes(size_t size) const { + return size * granularity_; + } template - void for_bytes_in_frame(T&& func) const { + DWARFS_FORCE_INLINE void for_bytes_in_frame(T&& func) const { for (size_t i = 0; i < granularity_; ++i) { func(); } } - uint_fast32_t granularity_bytes() const { return granularity_; } + DWARFS_FORCE_INLINE uint_fast32_t granularity_bytes() const { + return granularity_; + } - static bool compile_time_granularity() { return false; } + static DWARFS_FORCE_INLINE bool compile_time_granularity() { return false; } private: uint_fast32_t const granularity_; @@ -413,28 +431,33 @@ template class granular_span_adapter : private GranularityPolicy { public: template + DWARFS_FORCE_INLINE granular_span_adapter(std::span s, PolicyArgs&&... args) : GranularityPolicy(std::forward(args)...) , s_{s} {} - size_t size() const { return this->bytes_to_frames(s_.size()); } + DWARFS_FORCE_INLINE size_t size() const { + return this->bytes_to_frames(s_.size()); + } - std::span raw() const { return s_; } + DWARFS_FORCE_INLINE std::span raw() const { return s_; } - granular_span_adapter subspan(size_t offset, size_t count) const { + DWARFS_FORCE_INLINE granular_span_adapter subspan(size_t offset, + size_t count) const { return this->template create>( s_.subspan(this->frames_to_bytes(offset), this->frames_to_bytes(count))); } template - void update_hash(H& hasher, size_t offset) const { + DWARFS_FORCE_INLINE void update_hash(H& hasher, size_t offset) const { offset = this->frames_to_bytes(offset); this->for_bytes_in_frame([&] { hasher.update(s_[offset++]); }); } template - void update_hash(H& hasher, size_t from, size_t to) const { + DWARFS_FORCE_INLINE void + update_hash(H& hasher, size_t from, size_t to) const { from = this->frames_to_bytes(from); to = this->frames_to_bytes(to); this->for_bytes_in_frame([&] { hasher.update(s_[from++], s_[to++]); }); @@ -450,14 +473,16 @@ class BasicSegmentationPolicy : public GranularityPolicy { using GranularityPolicyT = GranularityPolicy; template - BasicSegmentationPolicy(PolicyArgs&&... args) + DWARFS_FORCE_INLINE BasicSegmentationPolicy(PolicyArgs&&... args) : GranularityPolicy(std::forward(args)...) {} - static constexpr bool is_segmentation_enabled() { + static DWARFS_FORCE_INLINE constexpr bool is_segmentation_enabled() { return SegmentationEnabled; } - static constexpr bool is_multi_block_mode() { return MultiBlock; } + static DWARFS_FORCE_INLINE constexpr bool is_multi_block_mode() { + return MultiBlock; + } }; template @@ -476,35 +501,40 @@ template class granular_vector_adapter : private GranularityPolicy { public: template + DWARFS_FORCE_INLINE granular_vector_adapter(std::vector& v, PolicyArgs&&... args) : GranularityPolicy(std::forward(args)...) , v_{v} {} - size_t size() const { return this->bytes_to_frames(v_.size()); } + DWARFS_FORCE_INLINE size_t size() const { + return this->bytes_to_frames(v_.size()); + } - void append(granular_span_adapter const& span) { + DWARFS_FORCE_INLINE void + append(granular_span_adapter const& span) { auto raw = span.raw(); auto off = v_.size(); v_.resize(off + raw.size()); ::memcpy(v_.data() + off, raw.data(), raw.size()); } - int compare( - size_t offset, - granular_span_adapter const& span) const { + DWARFS_FORCE_INLINE int + compare(size_t offset, + granular_span_adapter const& span) const { auto raw = span.raw(); return std::memcmp(v_.data() + this->frames_to_bytes(offset), raw.data(), raw.size()); } template - void update_hash(H& hasher, size_t offset) const { + DWARFS_FORCE_INLINE void update_hash(H& hasher, size_t offset) const { offset = this->frames_to_bytes(offset); this->for_bytes_in_frame([&] { hasher.update(v_[offset++]); }); } template - void update_hash(H& hasher, size_t from, size_t to) const { + DWARFS_FORCE_INLINE void + update_hash(H& hasher, size_t from, size_t to) const { from = this->frames_to_bytes(from); to = this->frames_to_bytes(to); this->for_bytes_in_frame([&] { hasher.update(v_[from++], v_[to++]); }); @@ -543,33 +573,38 @@ class active_block : private GranularityPolicy { data_->reserve(this->frames_to_bytes(capacity_in_frames_)); } - size_t num() const { return num_; } + DWARFS_FORCE_INLINE size_t num() const { return num_; } - size_t size_in_frames() const { return this->bytes_to_frames(data_->size()); } + DWARFS_FORCE_INLINE size_t size_in_frames() const { + return this->bytes_to_frames(data_->size()); + } - bool full() const { return size_in_frames() == capacity_in_frames_; } + DWARFS_FORCE_INLINE bool full() const { + return size_in_frames() == capacity_in_frames_; + } - std::shared_ptr data() const { return data_; } + DWARFS_FORCE_INLINE std::shared_ptr data() const { return data_; } - void append_bytes(std::span data, bloom_filter& global_filter); + DWARFS_FORCE_INLINE void + append_bytes(std::span data, bloom_filter& global_filter); - size_t next_hash_distance_in_frames() const { + DWARFS_FORCE_INLINE size_t next_hash_distance_in_frames() const { return window_step_mask_ + 1 - (size_in_frames() & window_step_mask_); } template - void for_each_offset(hash_t key, F&& func) const { + DWARFS_FORCE_INLINE void for_each_offset(hash_t key, F&& func) const { offsets_.for_each_value(key, std::forward(func)); } template - void for_each_offset_filter(hash_t key, F&& func) const { + DWARFS_FORCE_INLINE void for_each_offset_filter(hash_t key, F&& func) const { if (filter_.test(key)) [[unlikely]] { offsets_.for_each_value(key, std::forward(func)); } } - void finalize(segmenter_stats& stats) { + DWARFS_FORCE_INLINE void finalize(segmenter_stats& stats) { stats.total_hashes += offsets_.values().size(); for (auto& c : offsets_.collisions()) { stats.total_hashes += c.second.size(); @@ -578,10 +613,11 @@ class active_block : private GranularityPolicy { } } - bloom_filter const& filter() const { return filter_; } + DWARFS_FORCE_INLINE bloom_filter const& filter() const { return filter_; } private: - bool is_existing_repeating_sequence(hash_t hashval, size_t offset); + DWARFS_FORCE_INLINE bool + is_existing_repeating_sequence(hash_t hashval, size_t offset); static constexpr size_t num_inline_offsets = 4; @@ -685,15 +721,18 @@ class segmenter_ final : public segmenter::impl, private SegmentingPolicy { size_t size_in_frames{0}; }; - void block_ready(); + DWARFS_FORCE_INLINE void block_ready(); void finish_chunk(chunkable& chkable); - void append_to_block(chunkable& chkable, size_t offset_in_frames, - size_t size_in_frames); + DWARFS_FORCE_INLINE void + append_to_block(chunkable& chkable, size_t offset_in_frames, + size_t size_in_frames); void add_data(chunkable& chkable, size_t offset_in_frames, size_t size_in_frames); - void segment_and_add_data(chunkable& chkable, size_t size_in_frames); + DWARFS_FORCE_INLINE void + segment_and_add_data(chunkable& chkable, size_t size_in_frames); - size_t bloom_filter_size(const segmenter::config& cfg) const { + DWARFS_FORCE_INLINE size_t + bloom_filter_size(const segmenter::config& cfg) const { if constexpr (is_segmentation_enabled()) { auto hash_count = std::bit_ceil(std::max(1, cfg.max_active_blocks) * @@ -704,17 +743,18 @@ class segmenter_ final : public segmenter::impl, private SegmentingPolicy { return 0; } - static size_t window_size(const segmenter::config& cfg) { + static DWARFS_FORCE_INLINE size_t window_size(const segmenter::config& cfg) { return cfg.blockhash_window_size > 0 ? static_cast(1) << cfg.blockhash_window_size : 0; } - static size_t window_step(const segmenter::config& cfg) { + static DWARFS_FORCE_INLINE size_t window_step(const segmenter::config& cfg) { return std::max(1, window_size(cfg) >> cfg.window_increment_shift); } - size_t block_size_in_frames(const segmenter::config& cfg) const { + size_t DWARFS_FORCE_INLINE + block_size_in_frames(const segmenter::config& cfg) const { auto raw_size = static_cast(1) << cfg.block_size_bits; return bytes_to_frames(constrained_block_size(raw_size)); } @@ -756,8 +796,8 @@ class segment_match : private GranularityPolicy { using active_block_type = active_block; template - segment_match(active_block_type const* blk, uint32_t off, - PolicyArgs&&... args) noexcept + DWARFS_FORCE_INLINE segment_match(active_block_type const* blk, uint32_t off, + PolicyArgs&&... args) noexcept : GranularityPolicy(std::forward(args)...) , block_{blk} , offset_{off} {} @@ -766,17 +806,17 @@ class segment_match : private GranularityPolicy { granular_span_adapter const& data, size_t pos, size_t len, size_t begin, size_t end); - bool operator<(segment_match const& rhs) const { + DWARFS_FORCE_INLINE bool operator<(segment_match const& rhs) const { return size_ < rhs.size_ || (size_ == rhs.size_ && (block_->num() < rhs.block_->num() || (block_->num() == rhs.block_->num() && offset_ < rhs.offset_))); } - size_t pos() const { return pos_; } - uint32_t size() const { return size_; } - uint32_t offset() const { return offset_; } - size_t block_num() const { return block_->num(); } + DWARFS_FORCE_INLINE size_t pos() const { return pos_; } + DWARFS_FORCE_INLINE uint32_t size() const { return size_; } + DWARFS_FORCE_INLINE uint32_t offset() const { return offset_; } + DWARFS_FORCE_INLINE size_t block_num() const { return block_->num(); } private: active_block_type const* block_; @@ -786,8 +826,9 @@ class segment_match : private GranularityPolicy { }; template -bool active_block:: - is_existing_repeating_sequence(hash_t hashval, size_t offset) { +DWARFS_FORCE_INLINE bool +active_block::is_existing_repeating_sequence( + hash_t hashval, size_t offset) { if (auto it = repseqmap_.find(hashval); it != repseqmap_.end()) [[unlikely]] { auto& raw = data_->vec(); auto winbeg = raw.begin() + frames_to_bytes(offset); @@ -822,7 +863,8 @@ bool active_block:: } template -void active_block::append_bytes( +DWARFS_FORCE_INLINE void +active_block::append_bytes( std::span data, bloom_filter& global_filter) { auto src = this->template create< granular_span_adapter>(data); @@ -977,7 +1019,8 @@ void segmenter_::finish() { } template -void segmenter_::block_ready() { +DWARFS_FORCE_INLINE void +segmenter_::block_ready() { auto& block = blocks_.back(); block.finalize(stats_); block_ready_(block.data(), block.num()); @@ -985,7 +1028,8 @@ void segmenter_::block_ready() { } template -void segmenter_::append_to_block( +DWARFS_FORCE_INLINE void +segmenter_::append_to_block( chunkable& chkable, size_t offset_in_frames, size_t size_in_frames) { if (blocks_.empty() or blocks_.back().full()) [[unlikely]] { if (blocks_.size() >= std::max(1, cfg_.max_active_blocks)) { @@ -1062,7 +1106,8 @@ void segmenter_::finish_chunk( } template -void segmenter_::segment_and_add_data( +DWARFS_FORCE_INLINE void +segmenter_::segment_and_add_data( chunkable& chkable, size_t size_in_frames) { rsync_hash hasher; size_t offset_in_frames = 0;