mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-14 14:59:52 -04:00
chore: add segmenter memory usage estimation
This commit is contained in:
parent
2e78d049a1
commit
44bb5d7357
@ -61,6 +61,9 @@ class segmenter {
|
|||||||
using block_ready_cb =
|
using block_ready_cb =
|
||||||
std::function<void(shared_byte_buffer, size_t logical_block_num)>;
|
std::function<void(shared_byte_buffer, size_t logical_block_num)>;
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
estimate_memory_usage(config const& cfg, compression_constraints const& cc);
|
||||||
|
|
||||||
segmenter(logger& lgr, writer_progress& prog,
|
segmenter(logger& lgr, writer_progress& prog,
|
||||||
std::shared_ptr<internal::block_manager> blkmgr, config const& cfg,
|
std::shared_ptr<internal::block_manager> blkmgr, config const& cfg,
|
||||||
compression_constraints const& cc, size_t total_size,
|
compression_constraints const& cc, size_t total_size,
|
||||||
|
@ -67,6 +67,11 @@ class segmenter_factory {
|
|||||||
|
|
||||||
size_t get_block_size() const { return impl_->get_block_size(); }
|
size_t get_block_size() const { return impl_->get_block_size(); }
|
||||||
|
|
||||||
|
size_t estimate_memory_usage(fragment_category cat,
|
||||||
|
compression_constraints const& cc) const {
|
||||||
|
return impl_->estimate_memory_usage(cat, cc);
|
||||||
|
}
|
||||||
|
|
||||||
class impl {
|
class impl {
|
||||||
public:
|
public:
|
||||||
virtual ~impl() = default;
|
virtual ~impl() = default;
|
||||||
@ -76,6 +81,9 @@ class segmenter_factory {
|
|||||||
std::shared_ptr<internal::block_manager> blkmgr,
|
std::shared_ptr<internal::block_manager> blkmgr,
|
||||||
segmenter::block_ready_cb block_ready) const = 0;
|
segmenter::block_ready_cb block_ready) const = 0;
|
||||||
virtual size_t get_block_size() const = 0;
|
virtual size_t get_block_size() const = 0;
|
||||||
|
virtual size_t
|
||||||
|
estimate_memory_usage(fragment_category cat,
|
||||||
|
compression_constraints const& cc) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -814,6 +814,11 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
auto cc = fsw.get_compression_constraints(category.value(), meta);
|
auto cc = fsw.get_compression_constraints(category.value(), meta);
|
||||||
|
|
||||||
|
LOG_DEBUG << category_prefix(catmgr, category)
|
||||||
|
<< "segmenter will use up to "
|
||||||
|
<< size_with_unit(
|
||||||
|
segmenter_factory_.estimate_memory_usage(category, cc));
|
||||||
|
|
||||||
wg_blockify.add_job([this, catmgr, blockmgr, category, cat_size, meta, cc,
|
wg_blockify.add_job([this, catmgr, blockmgr, category, cat_size, meta, cc,
|
||||||
&prog, &fsw, &im, &wg_ordering] {
|
&prog, &fsw, &im, &wg_ordering] {
|
||||||
auto span = im.ordered_span(category, wg_ordering);
|
auto span = im.ordered_span(category, wg_ordering);
|
||||||
|
@ -711,6 +711,16 @@ class segmenter_progress : public progress::context {
|
|||||||
size_t const bytes_total_;
|
size_t const bytes_total_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
DWARFS_FORCE_INLINE size_t window_size(segmenter::config const& cfg) {
|
||||||
|
return cfg.blockhash_window_size > 0
|
||||||
|
? static_cast<size_t>(1) << cfg.blockhash_window_size
|
||||||
|
: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
DWARFS_FORCE_INLINE size_t window_step(segmenter::config const& cfg) {
|
||||||
|
return std::max<size_t>(1, window_size(cfg) >> cfg.window_increment_shift);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy, typename SegmentingPolicy>
|
template <typename LoggerPolicy, typename SegmentingPolicy>
|
||||||
class segmenter_ final : public segmenter::impl, private SegmentingPolicy {
|
class segmenter_ final : public segmenter::impl, private SegmentingPolicy {
|
||||||
private:
|
private:
|
||||||
@ -794,16 +804,6 @@ class segmenter_ final : public segmenter::impl, private SegmentingPolicy {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DWARFS_FORCE_INLINE size_t window_size(segmenter::config const& cfg) {
|
|
||||||
return cfg.blockhash_window_size > 0
|
|
||||||
? static_cast<size_t>(1) << cfg.blockhash_window_size
|
|
||||||
: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static DWARFS_FORCE_INLINE size_t window_step(segmenter::config const& cfg) {
|
|
||||||
return std::max<size_t>(1, window_size(cfg) >> cfg.window_increment_shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t DWARFS_FORCE_INLINE
|
size_t DWARFS_FORCE_INLINE
|
||||||
block_size_in_frames(segmenter::config const& cfg) const {
|
block_size_in_frames(segmenter::config const& cfg) const {
|
||||||
auto raw_size = static_cast<size_t>(1) << cfg.block_size_bits;
|
auto raw_size = static_cast<size_t>(1) << cfg.block_size_bits;
|
||||||
@ -1419,4 +1419,37 @@ segmenter::segmenter(logger& lgr, writer_progress& prog,
|
|||||||
std::move(blkmgr), cfg, cc, total_size,
|
std::move(blkmgr), cfg, cc, total_size,
|
||||||
std::move(block_ready))) {}
|
std::move(block_ready))) {}
|
||||||
|
|
||||||
|
size_t segmenter::estimate_memory_usage(config const& cfg,
|
||||||
|
compression_constraints const& cc) {
|
||||||
|
if (cfg.max_active_blocks == 0 or cfg.blockhash_window_size == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr size_t kWorstCaseBytesPerOffset = 19; // 8 bytes / 0.4375
|
||||||
|
|
||||||
|
size_t const granularity = cc.granularity.value_or(1);
|
||||||
|
size_t const block_size_in_frames =
|
||||||
|
(static_cast<size_t>(1) << cfg.block_size_bits) / granularity;
|
||||||
|
|
||||||
|
size_t const win_size = internal::window_size(cfg);
|
||||||
|
size_t const win_step = internal::window_step(cfg);
|
||||||
|
size_t const max_offset_count =
|
||||||
|
(block_size_in_frames - (win_size - win_step)) / win_step;
|
||||||
|
size_t const bloom_filter_mem =
|
||||||
|
((static_cast<size_t>(1) << cfg.bloom_filter_size) *
|
||||||
|
std::bit_ceil(cfg.max_active_blocks *
|
||||||
|
(block_size_in_frames / win_step))) /
|
||||||
|
8;
|
||||||
|
|
||||||
|
// Single active block uses memory for:
|
||||||
|
// - offsets
|
||||||
|
// - bloom filter (only with MultiBlockSegmentationPolicy)
|
||||||
|
// We do *not* consider the memory for the block data buffer here
|
||||||
|
size_t const active_block_mem_usage =
|
||||||
|
(max_offset_count * kWorstCaseBytesPerOffset) +
|
||||||
|
(cfg.max_active_blocks > 1 ? bloom_filter_mem : 0);
|
||||||
|
|
||||||
|
return cfg.max_active_blocks * active_block_mem_usage + bloom_filter_mem;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace dwarfs::writer
|
} // namespace dwarfs::writer
|
||||||
|
@ -42,6 +42,22 @@ class segmenter_factory_ final : public segmenter_factory::impl {
|
|||||||
compression_constraints const& cc,
|
compression_constraints const& cc,
|
||||||
std::shared_ptr<block_manager> blkmgr,
|
std::shared_ptr<block_manager> blkmgr,
|
||||||
segmenter::block_ready_cb block_ready) const override {
|
segmenter::block_ready_cb block_ready) const override {
|
||||||
|
return {lgr_, prog_, std::move(blkmgr), make_segmenter_config(cat),
|
||||||
|
cc, cat_size, std::move(block_ready)};
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_block_size() const override {
|
||||||
|
return static_cast<size_t>(1) << cfg_.block_size_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
estimate_memory_usage(fragment_category cat,
|
||||||
|
compression_constraints const& cc) const override {
|
||||||
|
return segmenter::estimate_memory_usage(make_segmenter_config(cat), cc);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
segmenter::config make_segmenter_config(fragment_category cat) const {
|
||||||
segmenter::config cfg;
|
segmenter::config cfg;
|
||||||
|
|
||||||
if (catmgr_) {
|
if (catmgr_) {
|
||||||
@ -54,15 +70,9 @@ class segmenter_factory_ final : public segmenter_factory::impl {
|
|||||||
cfg.bloom_filter_size = cfg_.bloom_filter_size.get(cat);
|
cfg.bloom_filter_size = cfg_.bloom_filter_size.get(cat);
|
||||||
cfg.block_size_bits = cfg_.block_size_bits;
|
cfg.block_size_bits = cfg_.block_size_bits;
|
||||||
|
|
||||||
return {lgr_, prog_, std::move(blkmgr), cfg,
|
return cfg;
|
||||||
cc, cat_size, std::move(block_ready)};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t get_block_size() const override {
|
|
||||||
return static_cast<size_t>(1) << cfg_.block_size_bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
logger& lgr_;
|
logger& lgr_;
|
||||||
writer_progress& prog_;
|
writer_progress& prog_;
|
||||||
std::shared_ptr<categorizer_manager> catmgr_;
|
std::shared_ptr<categorizer_manager> catmgr_;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user