From 442e7b8fd5269c913358a9bd3b548ba972ecb24b Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 20 Nov 2023 15:55:59 +0100 Subject: [PATCH] refactor: use callback for delayed updating of physical block numbers --- include/dwarfs/filesystem_writer.h | 29 ++++++++++++++++++++++------- include/dwarfs/segmenter.h | 4 +++- src/dwarfs/filesystem_writer.cpp | 27 +++++++++++++++++++++------ src/dwarfs/scanner.cpp | 5 +++-- src/dwarfs/segmenter.cpp | 7 +++++-- test/segmenter_benchmark.cpp | 5 +++-- 6 files changed, 57 insertions(+), 20 deletions(-) diff --git a/include/dwarfs/filesystem_writer.h b/include/dwarfs/filesystem_writer.h index 969f43b6..e7070496 100644 --- a/include/dwarfs/filesystem_writer.h +++ b/include/dwarfs/filesystem_writer.h @@ -28,6 +28,8 @@ #include #include +#include + #include "dwarfs/compression_constraints.h" #include "dwarfs/fragment_category.h" #include "dwarfs/fstypes.h" @@ -44,6 +46,8 @@ class worker_group; class filesystem_writer { public: + using physical_block_cb_type = folly::Function; + filesystem_writer( std::ostream& os, logger& lgr, worker_group& wg, progress& prog, const block_compressor& schema_bc, const block_compressor& metadata_bc, @@ -69,10 +73,17 @@ class filesystem_writer { impl_->copy_header(header); } - uint32_t write_block(fragment_category::value_type cat, - std::shared_ptr&& data, - std::optional meta = std::nullopt) { - return impl_->write_block(cat, std::move(data), std::move(meta)); + void write_block(fragment_category cat, std::shared_ptr&& data, + physical_block_cb_type physical_block_cb, + std::optional meta = std::nullopt) { + impl_->write_block(cat, std::move(data), std::move(physical_block_cb), + std::move(meta)); + } + + void write_block(fragment_category::value_type cat, + std::shared_ptr&& data, + std::optional meta = std::nullopt) { + impl_->write_block(cat, std::move(data), std::move(meta)); } void write_metadata_v2_schema(std::shared_ptr&& data) { @@ -103,9 +114,13 @@ class filesystem_writer { get_compression_constraints(fragment_category::value_type cat, std::string const& metadata) const = 0; virtual void copy_header(std::span header) = 0; - virtual uint32_t write_block(fragment_category::value_type cat, - std::shared_ptr&& data, - std::optional meta) = 0; + virtual void + write_block(fragment_category cat, std::shared_ptr&& data, + physical_block_cb_type physical_block_cb, + std::optional meta) = 0; + virtual void write_block(fragment_category::value_type cat, + std::shared_ptr&& data, + std::optional meta) = 0; virtual void write_metadata_v2_schema(std::shared_ptr&& data) = 0; virtual void write_metadata_v2(std::shared_ptr&& data) = 0; diff --git a/include/dwarfs/segmenter.h b/include/dwarfs/segmenter.h index 090f9d8b..0ead74db 100644 --- a/include/dwarfs/segmenter.h +++ b/include/dwarfs/segmenter.h @@ -48,7 +48,9 @@ class segmenter { unsigned block_size_bits{22}; }; - using block_ready_cb = folly::Function)>; + using block_ready_cb = + folly::Function, + folly::Function physical_block_cb)>; segmenter(logger& lgr, progress& prog, std::shared_ptr blkmgr, config const& cfg, compression_constraints const& cc, diff --git a/src/dwarfs/filesystem_writer.cpp b/src/dwarfs/filesystem_writer.cpp index 1315b57f..795a5997 100644 --- a/src/dwarfs/filesystem_writer.cpp +++ b/src/dwarfs/filesystem_writer.cpp @@ -284,6 +284,8 @@ void fsblock::build_section_header(section_header_v2& sh, template class filesystem_writer_ final : public filesystem_writer::impl { public: + using physical_block_cb_type = filesystem_writer::physical_block_cb_type; + filesystem_writer_(logger& lgr, std::ostream& os, worker_group& wg, progress& prog, const block_compressor& schema_bc, const block_compressor& metadata_bc, @@ -298,9 +300,12 @@ class filesystem_writer_ final : public filesystem_writer::impl { get_compression_constraints(fragment_category::value_type cat, std::string const& metadata) const override; void copy_header(std::span header) override; - uint32_t write_block(fragment_category::value_type cat, - std::shared_ptr&& data, - std::optional meta) override; + void write_block(fragment_category cat, std::shared_ptr&& data, + physical_block_cb_type physical_block_cb, + std::optional meta) override; + void write_block(fragment_category::value_type cat, + std::shared_ptr&& data, + std::optional meta) override; void write_metadata_v2_schema(std::shared_ptr&& data) override; void write_metadata_v2(std::shared_ptr&& data) override; void write_compressed_section(section_type type, compression_type compression, @@ -566,11 +571,21 @@ void filesystem_writer_::copy_header( } template -uint32_t filesystem_writer_::write_block( +void filesystem_writer_::write_block( + fragment_category cat, std::shared_ptr&& data, + physical_block_cb_type physical_block_cb, std::optional meta) { + auto physical_block = + write_section(section_type::BLOCK, std::move(data), + compressor_for_category(cat.value()), std::move(meta)); + physical_block_cb(physical_block); +} + +template +void filesystem_writer_::write_block( fragment_category::value_type cat, std::shared_ptr&& data, std::optional meta) { - return write_section(section_type::BLOCK, std::move(data), - compressor_for_category(cat), std::move(meta)); + write_section(section_type::BLOCK, std::move(data), + compressor_for_category(cat), std::move(meta)); } template diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index 18dc4cb3..e43a4eca 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -698,8 +698,9 @@ void scanner_::scan( auto seg = segmenter_factory_->create( category, cat_size, cc, blockmgr, - [category, meta, &fsw](auto block) { - return fsw.write_block(category.value(), std::move(block), meta); + [category, meta, &fsw](auto block, auto physical_block_cb) { + fsw.write_block(category, std::move(block), + std::move(physical_block_cb), meta); }); for (auto ino : span) { diff --git a/src/dwarfs/segmenter.cpp b/src/dwarfs/segmenter.cpp index 03fa9bef..ccdff23b 100644 --- a/src/dwarfs/segmenter.cpp +++ b/src/dwarfs/segmenter.cpp @@ -969,8 +969,11 @@ template void segmenter_::block_ready() { auto& block = blocks_.back(); block.finalize(stats_); - auto written_block_num = block_ready_(block.data()); - blkmgr_->set_written_block(block.num(), written_block_num); + block_ready_(block.data(), [blkmgr = blkmgr_, + logical_block_num = + block.num()](size_t physical_block_num) { + blkmgr->set_written_block(logical_block_num, physical_block_num); + }); ++prog_.block_count; } diff --git a/test/segmenter_benchmark.cpp b/test/segmenter_benchmark.cpp index f9ed8b0a..e66732dd 100644 --- a/test/segmenter_benchmark.cpp +++ b/test/segmenter_benchmark.cpp @@ -145,10 +145,11 @@ void run_segmenter_test(unsigned iters, unsigned granularity, std::vector> written; dwarfs::segmenter seg(lgr, prog, blkmgr, cfg, cc, total_size, - [&written](std::shared_ptr blk) { + [&written](std::shared_ptr blk, + auto physical_block_cb) { size_t num = written.size(); written.push_back(blk); - return num; + physical_block_cb(num); }); suspender.dismiss();