diff --git a/include/dwarfs/block_manager.h b/include/dwarfs/block_manager.h index 9576d3ab..69cb8f31 100644 --- a/include/dwarfs/block_manager.h +++ b/include/dwarfs/block_manager.h @@ -25,6 +25,7 @@ #include #include +#include "dwarfs/fragment_category.h" #include "dwarfs/gen-cpp2/metadata_types.h" namespace dwarfs { @@ -34,13 +35,17 @@ class block_manager { using chunk_type = thrift::metadata::chunk; size_t get_logical_block() const; - void set_written_block(size_t logical_block, size_t written_block); + void set_written_block(size_t logical_block, size_t written_block, + fragment_category::value_type category); void map_logical_blocks(std::vector& vec); + std::vector + get_written_block_categories() const; private: std::mutex mutable mx_; size_t mutable num_blocks_{0}; - std::vector> block_map_; + std::vector>> + block_map_; }; } // namespace dwarfs diff --git a/include/dwarfs/segmenter.h b/include/dwarfs/segmenter.h index 0ead74db..53542f17 100644 --- a/include/dwarfs/segmenter.h +++ b/include/dwarfs/segmenter.h @@ -48,9 +48,8 @@ class segmenter { unsigned block_size_bits{22}; }; - using block_ready_cb = - folly::Function, - folly::Function physical_block_cb)>; + using block_ready_cb = folly::Function, + size_t logical_block_num)>; segmenter(logger& lgr, progress& prog, std::shared_ptr blkmgr, config const& cfg, compression_constraints const& cc, diff --git a/src/dwarfs/block_manager.cpp b/src/dwarfs/block_manager.cpp index 6c1c8f80..e33244ab 100644 --- a/src/dwarfs/block_manager.cpp +++ b/src/dwarfs/block_manager.cpp @@ -35,13 +35,14 @@ size_t block_manager::get_logical_block() const { } void block_manager::set_written_block(size_t logical_block, - size_t written_block) { + size_t written_block, + fragment_category::value_type category) { std::lock_guard lock{mx_}; assert(logical_block < num_blocks_); if (block_map_.size() < num_blocks_) { block_map_.resize(num_blocks_); } - block_map_[logical_block] = written_block; + block_map_[logical_block] = std::make_pair(written_block, category); } void block_manager::map_logical_blocks(std::vector& vec) { @@ -49,8 +50,26 @@ void block_manager::map_logical_blocks(std::vector& vec) { for (auto& c : vec) { size_t block = c.get_block(); assert(block < num_blocks_); - c.block() = block_map_[block].value(); + c.block() = block_map_[block].value().first; } } +std::vector +block_manager::get_written_block_categories() const { + std::vector result; + + { + std::lock_guard lock{mx_}; + + result.resize(num_blocks_); + + for (auto& b : block_map_) { + auto& mapping = b.value(); + result[mapping.first] = mapping.second; + } + } + + return result; +} + } // namespace dwarfs diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index b3e6e397..dba6003a 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -700,9 +700,17 @@ void scanner_::scan( auto seg = segmenter_factory_->create( category, cat_size, cc, blockmgr, - [category, meta, &fsw](auto block, auto physical_block_cb) { - fsw.write_block(category, std::move(block), - std::move(physical_block_cb), meta); + [category, meta, blockmgr, &fsw](auto block, + auto logical_block_num) { + fsw.write_block( + category, std::move(block), + [blockmgr, logical_block_num, + category](auto physical_block_num) { + blockmgr->set_written_block(logical_block_num, + physical_block_num, + category.value()); + }, + meta); }); for (auto ino : span) { @@ -862,6 +870,33 @@ void scanner_::scan( mv2.preferred_path_separator() = static_cast(std::filesystem::path::preferred_separator); + if (auto catmgr = options_.inode.categorizer_mgr) { + std::unordered_map + category_indices; + std::vector category_names; + + category_indices.reserve(frag_info.info.size()); + category_names.reserve(frag_info.info.size()); + + for (auto const& ci : frag_info.info) { + auto [it, inserted] = + category_indices.emplace(ci.category, category_names.size()); + if (inserted) { + category_names.emplace_back(catmgr->category_name(ci.category)); + } + } + + auto written_categories = blockmgr->get_written_block_categories(); + + std::transform(written_categories.begin(), written_categories.end(), + written_categories.begin(), + [&](auto const& cat) { return category_indices.at(cat); }); + + mv2.category_names() = std::move(category_names); + mv2.block_categories() = std::move(written_categories); + } + auto [schema, data] = metadata_v2::freeze(mv2); LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size()); diff --git a/src/dwarfs/segmenter.cpp b/src/dwarfs/segmenter.cpp index ccdff23b..f852f005 100644 --- a/src/dwarfs/segmenter.cpp +++ b/src/dwarfs/segmenter.cpp @@ -969,11 +969,7 @@ template void segmenter_::block_ready() { auto& block = blocks_.back(); block.finalize(stats_); - block_ready_(block.data(), [blkmgr = blkmgr_, - logical_block_num = - block.num()](size_t physical_block_num) { - blkmgr->set_written_block(logical_block_num, physical_block_num); - }); + block_ready_(block.data(), block.num()); ++prog_.block_count; } diff --git a/test/segmenter_benchmark.cpp b/test/segmenter_benchmark.cpp index e66732dd..2c3bd84f 100644 --- a/test/segmenter_benchmark.cpp +++ b/test/segmenter_benchmark.cpp @@ -144,13 +144,14 @@ void run_segmenter_test(unsigned iters, unsigned granularity, std::vector> written; - dwarfs::segmenter seg(lgr, prog, blkmgr, cfg, cc, total_size, - [&written](std::shared_ptr blk, - auto physical_block_cb) { - size_t num = written.size(); - written.push_back(blk); - physical_block_cb(num); - }); + dwarfs::segmenter seg( + lgr, prog, blkmgr, cfg, cc, total_size, + [&written, blkmgr](std::shared_ptr blk, + auto logical_block_num) { + auto physical_block_num = written.size(); + written.push_back(blk); + blkmgr->set_written_block(logical_block_num, physical_block_num, 0); + }); suspender.dismiss(); diff --git a/thrift/metadata.thrift b/thrift/metadata.thrift index 0ff3b2bb..bd3faa56 100644 --- a/thrift/metadata.thrift +++ b/thrift/metadata.thrift @@ -361,8 +361,27 @@ struct metadata { // fields added with dwarfs-0.7.3, file system version 2.5 // //=========================================================// + // We don't need to increment the file system minor version + // as file systems created with this new version are still + // readable by older binaries as long as they don't use any + // unsupported features (e.g. FLAC compression). + // The set of features used in this file system image. As long // as an older binary supports all features, it will be able // to use images created with newer versions. 27: optional set features + + //=========================================================// + // fields added with dwarfs-0.8.0, file system version 2.5 // + //=========================================================// + + // The set of categories used in this file system image. Used + // for displaying and to select compression algorithms when + // recompressing the image. + 28: optional list category_names + + // The category of each block in the file system image. The + // index into this vector is the block number and the value + // is an index into `category_names`. + 29: optional list block_categories }