mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-14 14:59:52 -04:00
Some alignment optimisation of bloom filter
This commit is contained in:
parent
ba84b130cf
commit
89df6add69
@ -28,6 +28,8 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <boost/align.hpp>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include <parallel_hashmap/phmap.h>
|
#include <parallel_hashmap/phmap.h>
|
||||||
@ -164,45 +166,62 @@ class bloom_filter {
|
|||||||
|
|
||||||
static constexpr size_t value_mask = 8 * sizeof(bits_type) - 1;
|
static constexpr size_t value_mask = 8 * sizeof(bits_type) - 1;
|
||||||
static constexpr size_t index_shift = bitcount(value_mask);
|
static constexpr size_t index_shift = bitcount(value_mask);
|
||||||
|
static constexpr size_t alignment = 64;
|
||||||
|
|
||||||
bloom_filter(size_t size)
|
bloom_filter(size_t size)
|
||||||
: index_mask_{(std::max(size, value_mask + 1) >> index_shift) - 1} {
|
: index_mask_{(std::max(size, value_mask + 1) >> index_shift) - 1}
|
||||||
|
, size_{std::max(size, value_mask + 1)} {
|
||||||
if (size & (size - 1)) {
|
if (size & (size - 1)) {
|
||||||
throw std::runtime_error("size must be a power of two");
|
throw std::runtime_error("size must be a power of two");
|
||||||
}
|
}
|
||||||
bits_.resize(std::max(size, value_mask + 1) >> index_shift);
|
bits_ = reinterpret_cast<bits_type*>(
|
||||||
|
boost::alignment::aligned_alloc(alignment, size_ / 8));
|
||||||
|
if (!bits_) {
|
||||||
|
throw std::runtime_error("failed to allocate aligned memory");
|
||||||
|
}
|
||||||
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void add(size_t val) { set(val); }
|
~bloom_filter() { boost::alignment::aligned_free(bits_); }
|
||||||
|
|
||||||
bool test(size_t val) const { return isset(val); }
|
void add(size_t ix) {
|
||||||
|
auto bits = bits_;
|
||||||
size_t size() const { return bits_.size() << index_shift; }
|
BOOST_ALIGN_ASSUME_ALIGNED(bits, sizeof(bits_type));
|
||||||
|
bits[(ix >> index_shift) & index_mask_] |= static_cast<bits_type>(1)
|
||||||
void clear() { std::fill(bits_.begin(), bits_.end(), 0); }
|
|
||||||
|
|
||||||
void merge(bloom_filter const& other) {
|
|
||||||
if (bits_.size() != other.bits_.size()) {
|
|
||||||
throw std::runtime_error("size mismatch");
|
|
||||||
}
|
|
||||||
std::transform(bits_.cbegin(), bits_.cend(), other.bits_.cbegin(),
|
|
||||||
bits_.begin(), std::bit_or<>{});
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
void set(size_t ix) {
|
|
||||||
bits_[(ix >> index_shift) & index_mask_] |= UINT64_C(1)
|
|
||||||
<< (ix & value_mask);
|
<< (ix & value_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isset(size_t ix) const {
|
bool test(size_t ix) const {
|
||||||
return bits_[(ix >> index_shift) & index_mask_] &
|
auto bits = bits_;
|
||||||
(UINT64_C(1) << (ix & value_mask));
|
BOOST_ALIGN_ASSUME_ALIGNED(bits, sizeof(bits_type));
|
||||||
|
return bits[(ix >> index_shift) & index_mask_] &
|
||||||
|
(static_cast<bits_type>(1) << (ix & value_mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<bits_type> bits_;
|
// size in bits
|
||||||
|
size_t size() const { return size_; }
|
||||||
|
|
||||||
|
void clear() { std::fill(begin(), end(), 0); }
|
||||||
|
|
||||||
|
void merge(bloom_filter const& other) {
|
||||||
|
if (size() != other.size()) {
|
||||||
|
throw std::runtime_error("size mismatch");
|
||||||
|
}
|
||||||
|
std::transform(cbegin(), cend(), other.cbegin(), begin(), std::bit_or<>{});
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bits_type const* cbegin() const { return bits_; }
|
||||||
|
bits_type const* cend() const { return bits_ + (size_ >> index_shift); }
|
||||||
|
bits_type const* begin() const { return bits_; }
|
||||||
|
bits_type const* end() const { return bits_ + (size_ >> index_shift); }
|
||||||
|
bits_type* begin() { return bits_; }
|
||||||
|
bits_type* end() { return bits_ + (size_ >> index_shift); }
|
||||||
|
|
||||||
|
bits_type* bits_;
|
||||||
size_t const index_mask_;
|
size_t const index_mask_;
|
||||||
};
|
size_t const size_;
|
||||||
|
} __attribute__((aligned(64)));
|
||||||
|
|
||||||
class active_block {
|
class active_block {
|
||||||
private:
|
private:
|
||||||
@ -278,13 +297,10 @@ class block_manager_ final : public block_manager::impl {
|
|||||||
, cfg_{cfg}
|
, cfg_{cfg}
|
||||||
, os_{std::move(os)}
|
, os_{std::move(os)}
|
||||||
, fsw_{fsw}
|
, fsw_{fsw}
|
||||||
, window_size_{cfg.blockhash_window_size > 0
|
, window_size_{window_size(cfg)}
|
||||||
? static_cast<size_t>(1) << cfg.blockhash_window_size
|
, window_step_{window_step(cfg)}
|
||||||
: 0}
|
, block_size_{block_size(cfg)}
|
||||||
, window_step_{std::max<size_t>(1, window_size_ >>
|
, filter_{bloom_filter_size(cfg)} {
|
||||||
cfg.window_increment_shift)}
|
|
||||||
, block_size_{static_cast<size_t>(1) << cfg.block_size_bits}
|
|
||||||
, filter_{bloom_filter_size()} {
|
|
||||||
if (segmentation_enabled()) {
|
if (segmentation_enabled()) {
|
||||||
LOG_INFO << "using a " << size_with_unit(window_size_) << " window at "
|
LOG_INFO << "using a " << size_with_unit(window_size_) << " window at "
|
||||||
<< size_with_unit(window_step_) << " steps for segment analysis";
|
<< size_with_unit(window_step_) << " steps for segment analysis";
|
||||||
@ -310,10 +326,25 @@ class block_manager_ final : public block_manager::impl {
|
|||||||
void append_to_block(inode& ino, mmif& mm, size_t offset, size_t size);
|
void append_to_block(inode& ino, mmif& mm, size_t offset, size_t size);
|
||||||
void add_data(inode& ino, mmif& mm, size_t offset, size_t size);
|
void add_data(inode& ino, mmif& mm, size_t offset, size_t size);
|
||||||
void segment_and_add_data(inode& ino, mmif& mm, size_t size);
|
void segment_and_add_data(inode& ino, mmif& mm, size_t size);
|
||||||
size_t bloom_filter_size() const {
|
|
||||||
auto hash_count = pow2ceil(std::max<size_t>(1, cfg_.max_active_blocks)) *
|
static size_t bloom_filter_size(const block_manager::config& cfg) {
|
||||||
(block_size_ / window_step_);
|
auto hash_count = pow2ceil(std::max<size_t>(1, cfg.max_active_blocks)) *
|
||||||
return (1 << cfg_.bloom_filter_size) * hash_count;
|
(block_size(cfg) / window_step(cfg));
|
||||||
|
return (1 << cfg.bloom_filter_size) * hash_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t window_size(const block_manager::config& cfg) {
|
||||||
|
return cfg.blockhash_window_size > 0
|
||||||
|
? static_cast<size_t>(1) << cfg.blockhash_window_size
|
||||||
|
: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t window_step(const block_manager::config& cfg) {
|
||||||
|
return std::max<size_t>(1, window_size(cfg) >> cfg.window_increment_shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t block_size(const block_manager::config& cfg) {
|
||||||
|
return static_cast<size_t>(1) << cfg.block_size_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_PROXY_DECL(LoggerPolicy);
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user