From 2c91e8011983b87efab51342319348765ece1feb Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sun, 17 Dec 2023 09:14:09 +0100 Subject: [PATCH] feat(mkdwarfs): support for rewriting file systems with categories At the same time, this finally adds multi-threaded decompression when rewriting a file system. --- include/dwarfs/filesystem_v2.h | 13 +- include/dwarfs/filesystem_writer.h | 31 ++++ include/dwarfs/options.h | 4 +- src/dwarfs/filesystem_v2.cpp | 203 +++++++++++++--------- src/dwarfs/filesystem_writer.cpp | 262 +++++++++++++++++++++++++++-- src/mkdwarfs_main.cpp | 75 ++++++--- test/dwarfs_compat.cpp | 30 ++-- 7 files changed, 478 insertions(+), 140 deletions(-) diff --git a/include/dwarfs/filesystem_v2.h b/include/dwarfs/filesystem_v2.h index d00a8b2f..e6122aaa 100644 --- a/include/dwarfs/filesystem_v2.h +++ b/include/dwarfs/filesystem_v2.h @@ -67,10 +67,6 @@ class filesystem_v2 { filesystem_options const& options, int inode_offset = 0, std::shared_ptr perfmon = nullptr); - static void - rewrite_deprecated(logger& lgr, progress& prog, std::shared_ptr mm, - filesystem_writer& writer, rewrite_options const& opts); - static int identify(logger& lgr, std::shared_ptr mm, std::ostream& os, int detail_level = 0, size_t num_readers = 1, @@ -184,6 +180,12 @@ class filesystem_v2 { return impl_->get_all_block_categories(); } + void rewrite(progress& prog, filesystem_writer& writer, + category_resolver const& cat_resolver, + rewrite_options const& opts) const { + return impl_->rewrite(prog, writer, cat_resolver, opts); + } + class impl { public: virtual ~impl() = default; @@ -226,6 +228,9 @@ class filesystem_v2 { virtual history const& get_history() const = 0; virtual folly::dynamic get_inode_info(inode_view entry) const = 0; virtual std::vector get_all_block_categories() const = 0; + virtual void rewrite(progress& prog, filesystem_writer& writer, + category_resolver const& cat_resolver, + rewrite_options const& opts) const = 0; }; private: diff --git a/include/dwarfs/filesystem_writer.h b/include/dwarfs/filesystem_writer.h index 7e112717..37946644 100644 --- a/include/dwarfs/filesystem_writer.h +++ b/include/dwarfs/filesystem_writer.h @@ -71,6 +71,12 @@ class filesystem_writer { return impl_->get_compression_constraints(cat, metadata); } + block_compressor const& get_compressor( + section_type type, + std::optional cat = std::nullopt) const { + return impl_->get_compressor(type, cat); + } + void configure(std::vector const& expected_categories, size_t max_active_slots) { impl_->configure(expected_categories, max_active_slots); @@ -80,6 +86,8 @@ class filesystem_writer { impl_->copy_header(header); } + // TODO: check which write_block() API is actually used + void write_block(fragment_category cat, std::shared_ptr&& data, physical_block_cb_type physical_block_cb, std::optional meta = std::nullopt) { @@ -107,6 +115,19 @@ class filesystem_writer { impl_->write_history(std::move(data)); } + void check_block_compression( + compression_type compression, std::span data, + std::optional cat = std::nullopt) { + impl_->check_block_compression(compression, data, cat); + } + + void write_section( + section_type type, compression_type compression, + std::span data, + std::optional cat = std::nullopt) { + impl_->write_section(type, compression, data, cat); + } + void write_compressed_section(section_type type, compression_type compression, std::span data) { impl_->write_compressed_section(type, compression, data); @@ -126,6 +147,9 @@ class filesystem_writer { virtual compression_constraints get_compression_constraints(fragment_category::value_type cat, std::string const& metadata) const = 0; + virtual block_compressor const& + get_compressor(section_type type, + std::optional cat) const = 0; virtual void configure(std::vector const& expected_categories, size_t max_active_slots) = 0; @@ -142,6 +166,13 @@ class filesystem_writer { write_metadata_v2_schema(std::shared_ptr&& data) = 0; virtual void write_metadata_v2(std::shared_ptr&& data) = 0; virtual void write_history(std::shared_ptr&& data) = 0; + virtual void check_block_compression( + compression_type compression, std::span data, + std::optional cat) = 0; + virtual void + write_section(section_type type, compression_type compression, + std::span data, + std::optional cat) = 0; virtual void write_compressed_section(section_type type, compression_type compression, std::span data) = 0; diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h index 72fa8c7d..483670e1 100644 --- a/include/dwarfs/options.h +++ b/include/dwarfs/options.h @@ -136,7 +136,9 @@ struct scanner_options { struct rewrite_options { bool recompress_block{false}; bool recompress_metadata{false}; - file_off_t image_offset{filesystem_options::IMAGE_OFFSET_AUTO}; + bool enable_history{true}; + std::optional> command_line_arguments; + history_config history; }; std::ostream& operator<<(std::ostream& os, file_order_mode mode); diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index 72696600..ba72975d 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -34,6 +34,7 @@ #include "dwarfs/block_compressor.h" #include "dwarfs/block_data.h" #include "dwarfs/categorizer.h" +#include "dwarfs/category_resolver.h" #include "dwarfs/error.h" #include "dwarfs/filesystem_v2.h" #include "dwarfs/filesystem_writer.h" @@ -393,6 +394,9 @@ class filesystem_ final : public filesystem_v2::impl { std::vector get_all_block_categories() const override { return meta_.get_all_block_categories(); } + void rewrite(progress& prog, filesystem_writer& writer, + category_resolver const& cat_resolver, + rewrite_options const& opts) const override; private: filesystem_info const& get_info() const; @@ -545,6 +549,125 @@ filesystem_::filesystem_( } } +template +void filesystem_::rewrite(progress& prog, + filesystem_writer& writer, + category_resolver const& cat_resolver, + rewrite_options const& opts) const { + if (opts.recompress_block) { + size_t block_no{0}; + parser_.rewind(); + + while (auto s = parser_.next_section()) { + if (s->type() == section_type::BLOCK) { + if (auto catstr = meta_.get_block_category(block_no)) { + if (auto cat = cat_resolver.category_value(catstr.value())) { + writer.check_block_compression(s->compression(), s->data(*mm_), + cat); + } + } + ++block_no; + } + } + } + + prog.original_size = mm_->size(); + prog.filesystem_size = mm_->size(); + prog.block_count = num_blocks(); + + if (header_) { + writer.copy_header(*header_); + } + + size_t block_no{0}; + + auto log_recompress = + [&](const auto& s, + std::optional const& cat = + std::nullopt) { + std::string catinfo; + if (cat) { + catinfo = fmt::format(", {}", cat_resolver.category_name(*cat)); + } + LOG_VERBOSE << "recompressing " << get_section_name(s->type()) << " (" + << get_compression_name(s->compression()) << catinfo + << ") using '" + << writer.get_compressor(s->type(), cat).describe() << "'"; + }; + + auto copy_compressed = [&](const auto& s) { + LOG_VERBOSE << "copying " << get_section_name(s->type()) << " (" + << get_compression_name(s->compression()) << ")"; + writer.write_compressed_section(s->type(), s->compression(), s->data(*mm_)); + }; + + parser_.rewind(); + + while (auto s = parser_.next_section()) { + switch (s->type()) { + case section_type::BLOCK: + if (opts.recompress_block) { + std::optional cat; + + if (auto catstr = meta_.get_block_category(block_no)) { + cat = cat_resolver.category_value(catstr.value()); + if (!cat) { + LOG_ERROR << "unknown category '" << catstr.value() + << "' for block " << block_no; + } + } + + log_recompress(s, cat); + + writer.write_section(section_type::BLOCK, s->compression(), + s->data(*mm_), cat); + } else { + copy_compressed(s); + } + ++block_no; + break; + + case section_type::METADATA_V2_SCHEMA: + case section_type::METADATA_V2: + if (opts.recompress_metadata) { + log_recompress(s); + writer.write_section(s->type(), s->compression(), s->data(*mm_)); + } else { + copy_compressed(s); + } + break; + + case section_type::HISTORY: + if (opts.enable_history) { + history hist{opts.history}; + hist.parse(history_.serialize()); + hist.append(opts.command_line_arguments); + + LOG_VERBOSE << "updating " << get_section_name(s->type()) << " (" + << get_compression_name(s->compression()) + << "), compressing using '" + << writer.get_compressor(s->type()).describe() << "'"; + + writer.write_history(std::make_shared(hist.serialize())); + } else { + LOG_VERBOSE << "removing " << get_section_name(s->type()); + } + break; + + case section_type::SECTION_INDEX: + // this will be automatically added by the filesystem_writer + break; + + default: + // verbatim copy everything else + copy_compressed(s); + break; + } + } + + writer.flush(); +} + template void filesystem_::dump(std::ostream& os, int detail_level) const { if (detail_level > 1) { @@ -718,86 +841,6 @@ filesystem_v2::filesystem_v2(logger& lgr, std::shared_ptr mm, logger_policies>( lgr, std::move(mm), options, inode_offset, std::move(perfmon))) {} -void filesystem_v2::rewrite_deprecated(logger& lgr, progress& prog, - std::shared_ptr mm, - filesystem_writer& writer, - rewrite_options const& opts) { - // TODO: - LOG_PROXY(debug_logger_policy, lgr); - filesystem_parser parser(mm, opts.image_offset); - - if (auto hdr = parser.header()) { - writer.copy_header(*hdr); - } - - std::vector section_types; - section_map sections; - - while (auto s = parser.next_section()) { - check_section_logger(lgr, *s); - - if (!s->check_fast(*mm)) { - DWARFS_THROW(runtime_error, "checksum error in section: " + s->name()); - } - if (!s->verify(*mm)) { - DWARFS_THROW(runtime_error, - "integrity check error in section: " + s->name()); - } - prog.original_size += s->length(); - prog.filesystem_size += s->length(); - if (s->type() == section_type::BLOCK) { - ++prog.block_count; - } else if (s->type() != section_type::SECTION_INDEX) { - auto& secvec = sections[s->type()]; - if (secvec.empty()) { - section_types.push_back(s->type()); - } - secvec.push_back(*s); - } - } - - std::vector schema_raw; - std::vector meta_raw; - - // force metadata check - make_metadata(lgr, mm, sections, schema_raw, meta_raw, metadata_options(), 0, - true, mlock_mode::NONE, !parser.has_checksums()); - - parser.rewind(); - - while (auto s = parser.next_section()) { - // TODO: multi-thread this? - if (s->type() == section_type::BLOCK) { - if (opts.recompress_block) { - auto block = - std::make_shared(block_decompressor::decompress( - s->compression(), mm->as(s->start()), s->length())); - // TODO: re-write with different categories - writer.write_block(categorizer_manager::default_category().value(), - std::move(block)); - } else { - writer.write_compressed_section(s->type(), s->compression(), - s->data(*mm)); - } - } - } - - if (opts.recompress_metadata) { - writer.write_metadata_v2_schema( - std::make_shared(std::move(schema_raw))); - writer.write_metadata_v2(std::make_shared(std::move(meta_raw))); - } else { - for (auto type : section_types) { - auto& secvec = DWARFS_NOTHROW(sections.at(type)); - for (auto& sec : secvec) { - writer.write_compressed_section(type, sec.compression(), sec.data(*mm)); - } - } - } - - writer.flush(); -} - int filesystem_v2::identify(logger& lgr, std::shared_ptr mm, std::ostream& os, int detail_level, size_t num_readers, bool check_integrity, diff --git a/src/dwarfs/filesystem_writer.cpp b/src/dwarfs/filesystem_writer.cpp index 278065d5..9701dbec 100644 --- a/src/dwarfs/filesystem_writer.cpp +++ b/src/dwarfs/filesystem_writer.cpp @@ -36,6 +36,7 @@ #include "dwarfs/block_compressor.h" #include "dwarfs/block_data.h" #include "dwarfs/checksum.h" +#include "dwarfs/compression_metadata_requirements.h" #include "dwarfs/filesystem_writer.h" #include "dwarfs/fstypes.h" #include "dwarfs/logger.h" @@ -85,6 +86,10 @@ class fsblock { fsblock(section_type type, compression_type compression, std::span data); + fsblock(section_type type, block_compressor const& bc, + std::span data, compression_type data_comp_type, + std::shared_ptr pctx); + void compress(worker_group& wg, std::optional meta = std::nullopt) { impl_->compress(wg, std::move(meta)); @@ -292,6 +297,112 @@ class compressed_fsblock : public fsblock::impl { section_header_v2 header_; }; +class rewritten_fsblock : public fsblock::impl { + public: + rewritten_fsblock(section_type type, block_compressor const& bc, + std::span data, + compression_type data_comp_type, + std::shared_ptr pctx) + : type_{type} + , bc_{bc} + , data_{data} + , comp_type_{bc_.type()} + , pctx_{std::move(pctx)} + , data_comp_type_{data_comp_type} {} + + void compress(worker_group& wg, std::optional meta) override { + std::promise prom; + future_ = prom.get_future(); + + wg.add_job([this, prom = std::move(prom), + meta = std::move(meta)]() mutable { + // TODO: we don't have to do this for uncompressed blocks + std::vector block; + block_decompressor bd(data_comp_type_, data_.data(), data_.size(), block); + bd.decompress_frame(bd.uncompressed_size()); + + if (!meta) { + meta = bd.metadata(); + } + + pctx_->bytes_in += block.size(); // TODO: data_.size()? + + try { + if (meta) { + block = bc_.compress(block, *meta); + } else { + block = bc_.compress(block); + } + } catch (bad_compression_ratio_error const&) { + comp_type_ = compression_type::NONE; + } + + pctx_->bytes_out += block.size(); + + { + std::lock_guard lock(mx_); + block_data_.swap(block); + } + + prom.set_value(); + }); + } + + void wait_until_compressed() override { future_.wait(); } + + section_type type() const override { return type_; } + + compression_type compression() const override { return comp_type_; } + + std::string description() const override { return bc_.describe(); } + + std::span data() const override { return block_data_; } + + size_t uncompressed_size() const override { return data_.size(); } + + size_t size() const override { + std::lock_guard lock(mx_); + return block_data_.size(); + } + + void set_block_no(uint32_t number) override { + { + std::lock_guard lock(mx_); + if (number_) { + DWARFS_THROW(runtime_error, "block number already set"); + } + number_ = number; + } + } + + uint32_t block_no() const override { + std::lock_guard lock(mx_); + return number_.value(); + } + + section_header_v2 const& header() const override { + std::lock_guard lock(mx_); + if (!header_) { + header_ = section_header_v2{}; + fsblock::build_section_header(*header_, *this); + } + return header_.value(); + } + + private: + const section_type type_; + block_compressor const& bc_; + mutable std::recursive_mutex mx_; + std::span data_; + std::vector block_data_; + std::future future_; + std::optional number_; + std::optional mutable header_; + compression_type comp_type_; + std::shared_ptr pctx_; + compression_type const data_comp_type_; +}; + fsblock::fsblock(section_type type, block_compressor const& bc, std::shared_ptr&& data, std::shared_ptr pctx, @@ -304,6 +415,12 @@ fsblock::fsblock(section_type type, compression_type compression, std::span data) : impl_(std::make_unique(type, compression, data)) {} +fsblock::fsblock(section_type type, block_compressor const& bc, + std::span data, compression_type data_comp_type, + std::shared_ptr pctx) + : impl_(std::make_unique(type, bc, data, data_comp_type, + std::move(pctx))) {} + void fsblock::build_section_header(section_header_v2& sh, fsblock::impl const& fsb) { auto range = fsb.data(); @@ -348,6 +465,9 @@ class filesystem_writer_ final : public filesystem_writer::impl { compression_constraints get_compression_constraints(fragment_category::value_type cat, std::string const& metadata) const override; + block_compressor const& get_compressor( + section_type type, + std::optional cat) const override; void configure(std::vector const& expected_categories, size_t max_active_slots) override; void copy_header(std::span header) override; @@ -361,6 +481,12 @@ class filesystem_writer_ final : public filesystem_writer::impl { void write_metadata_v2_schema(std::shared_ptr&& data) override; void write_metadata_v2(std::shared_ptr&& data) override; void write_history(std::shared_ptr&& data) override; + void check_block_compression( + compression_type compression, std::span data, + std::optional cat) override; + void write_section(section_type type, compression_type compression, + std::span data, + std::optional cat) override; void write_compressed_section(section_type type, compression_type compression, std::span data) override; void flush() override; @@ -379,9 +505,9 @@ class filesystem_writer_ final : public filesystem_writer::impl { block_compressor const& bc, std::optional meta, physical_block_cb_type physical_block_cb); void on_block_merged(block_holder_type holder); - void write_section(section_type type, std::shared_ptr&& data, - block_compressor const& bc, - std::optional meta = std::nullopt); + void write_section_impl(section_type type, std::shared_ptr&& data, + block_compressor const& bc, + std::optional meta = std::nullopt); void write(fsblock const& fsb); void write(const char* data, size_t size); template @@ -415,6 +541,8 @@ class filesystem_writer_ final : public filesystem_writer::impl { std::unique_ptr merger_; }; +// TODO: Maybe we can factor out the logic to find the right compressor +// into something that gets passed a (section_type, category) pair? template filesystem_writer_::filesystem_writer_( logger& lgr, std::ostream& os, worker_group& wg, progress& prog, @@ -430,8 +558,7 @@ filesystem_writer_::filesystem_writer_( , history_bc_(history_bc) , options_(options) , LOG_PROXY_INIT(lgr) - , flush_(false) - , writer_thread_(&filesystem_writer_::writer_thread, this) { + , flush_{true} { if (header_) { if (options_.remove_header) { LOG_WARN << "header will not be written because remove_header is set"; @@ -440,6 +567,30 @@ filesystem_writer_::filesystem_writer_( header_size_ = os_.tellp(); } } + + auto check_compressor = [](std::string_view name, + block_compressor const& bc) { + if (auto reqstr = bc.metadata_requirements(); !reqstr.empty()) { + try { + auto req = compression_metadata_requirements{reqstr}; + req.check(std::nullopt); + } catch (std::exception const& e) { + auto msg = fmt::format( + "cannot use '{}' for {} compression because compression " + "metadata requirements are not met: {}", + bc.describe(), name, e.what()); + DWARFS_THROW(runtime_error, msg); + } + } + }; + + check_compressor("schema", schema_bc); + check_compressor("metadata", metadata_bc); + check_compressor("history", history_bc); + + // TODO: the whole flush & thread thing needs to be revisited + flush_ = false; + writer_thread_ = std::thread(&filesystem_writer_::writer_thread, this); } template @@ -612,7 +763,7 @@ void filesystem_writer_::finish_category(fragment_category cat) { } template -void filesystem_writer_::write_section( +void filesystem_writer_::write_section_impl( section_type type, std::shared_ptr&& data, block_compressor const& bc, std::optional meta) { uint32_t number; @@ -624,10 +775,6 @@ void filesystem_writer_::write_section( pctx_ = prog_.create_context(); } - while (mem_used() > options_.max_queue_size) { - cond_.wait(lock); - } - auto fsb = std::make_unique(type, bc, std::move(data), pctx_); number = section_number_; @@ -642,6 +789,66 @@ void filesystem_writer_::write_section( cond_.notify_one(); } +template +void filesystem_writer_::check_block_compression( + compression_type compression, std::span data, + std::optional cat) { + block_compressor const* bc{nullptr}; + + if (cat) { + bc = &compressor_for_category(*cat); + } else { + bc = &default_bc_.value(); + } + + if (auto reqstr = bc->metadata_requirements(); !reqstr.empty()) { + auto req = compression_metadata_requirements{reqstr}; + + std::vector tmp; + block_decompressor bd(compression, data.data(), data.size(), tmp); + + try { + req.check(bd.metadata()); + } catch (std::exception const& e) { + auto msg = fmt::format( + "cannot compress {} compressed block with compressor '{}' because " + "the following metadata requirements are not met: {}", + get_compression_name(compression), bc->describe(), e.what()); + DWARFS_THROW(runtime_error, msg); + } + } +} + +template +void filesystem_writer_::write_section( + section_type type, compression_type compression, + std::span data, + std::optional cat) { + { + std::unique_lock lock(mx_); + + if (!pctx_) { + pctx_ = prog_.create_context(); + } + + // TODO: do we still need this with the merger in place? + while (mem_used() > options_.max_queue_size) { + cond_.wait(lock); + } + + auto& bc = get_compressor(type, cat); + + auto fsb = std::make_unique(type, bc, data, compression, pctx_); + + fsb->set_block_no(section_number_++); + fsb->compress(wg_); + + queue_.emplace_back(std::move(fsb)); + } + + cond_.notify_one(); +} + template void filesystem_writer_::write_compressed_section( section_type type, compression_type compression, @@ -687,6 +894,30 @@ auto filesystem_writer_::get_compression_constraints( return compressor_for_category(cat).get_compression_constraints(metadata); } +template +block_compressor const& filesystem_writer_::get_compressor( + section_type type, std::optional cat) const { + switch (type) { + case section_type::METADATA_V2_SCHEMA: + return schema_bc_; + + case section_type::METADATA_V2: + return metadata_bc_; + + case section_type::HISTORY: + return history_bc_; + + default: + break; + } + + if (cat) { + return compressor_for_category(*cat); + } + + return default_bc_.value(); +} + template void filesystem_writer_::configure( std::vector const& expected_categories, @@ -726,26 +957,27 @@ template void filesystem_writer_::write_block( fragment_category::value_type cat, std::shared_ptr&& data, std::optional meta) { - write_section(section_type::BLOCK, std::move(data), - compressor_for_category(cat), std::move(meta)); + write_section_impl(section_type::BLOCK, std::move(data), + compressor_for_category(cat), std::move(meta)); } template void filesystem_writer_::write_metadata_v2_schema( std::shared_ptr&& data) { - write_section(section_type::METADATA_V2_SCHEMA, std::move(data), schema_bc_); + write_section_impl(section_type::METADATA_V2_SCHEMA, std::move(data), + schema_bc_); } template void filesystem_writer_::write_metadata_v2( std::shared_ptr&& data) { - write_section(section_type::METADATA_V2, std::move(data), metadata_bc_); + write_section_impl(section_type::METADATA_V2, std::move(data), metadata_bc_); } template void filesystem_writer_::write_history( std::shared_ptr&& data) { - write_section(section_type::HISTORY, std::move(data), history_bc_); + write_section_impl(section_type::HISTORY, std::move(data), history_bc_); } template diff --git a/src/mkdwarfs_main.cpp b/src/mkdwarfs_main.cpp index 791a7444..73ac8600 100644 --- a/src/mkdwarfs_main.cpp +++ b/src/mkdwarfs_main.cpp @@ -59,6 +59,7 @@ #include "dwarfs/console_writer.h" #include "dwarfs/entry.h" #include "dwarfs/error.h" +#include "dwarfs/filesystem_block_category_resolver.h" #include "dwarfs/filesystem_v2.h" #include "dwarfs/filesystem_writer.h" #include "dwarfs/fragment_order_parser.h" @@ -983,18 +984,18 @@ int mkdwarfs_main(int argc, sys_char** argv) { } options.enable_history = !no_history; + rw_opts.enable_history = !no_history; if (options.enable_history) { options.history.with_timestamps = !no_history_timestamps; + rw_opts.history.with_timestamps = !no_history_timestamps; + if (!no_history_command_line) { options.command_line_arguments = command_line; + rw_opts.command_line_arguments = command_line; } } - // TODO: the whole re-writing thing will be a bit weird in combination - // with categories; we'd likely require a "category"-section to be - // present (which we'll also require for bit-identical mode) - if (!categorizer_list_str.empty()) { std::vector categorizer_list; boost::split(categorizer_list, categorizer_list_str, boost::is_any_of(",")); @@ -1006,7 +1007,22 @@ int mkdwarfs_main(int argc, sys_char** argv) { } } - category_parser cp(options.inode.categorizer_mgr); + std::unique_ptr input_filesystem; + std::shared_ptr cat_resolver; + + if (recompress) { + filesystem_options fsopts; + fsopts.image_offset = filesystem_options::IMAGE_OFFSET_AUTO; + input_filesystem = std::make_unique( + lgr, std::make_shared(path), fsopts); + + cat_resolver = std::make_shared( + input_filesystem->get_all_block_categories()); + } else { + cat_resolver = options.inode.categorizer_mgr; + } + + category_parser cp(cat_resolver); try { { @@ -1061,10 +1077,13 @@ int mkdwarfs_main(int argc, sys_char** argv) { block_compressor metadata_bc(metadata_compression); block_compressor history_bc(history_compression); - filesystem_writer fsw(*os, lgr, wg_compress, prog, schema_bc, metadata_bc, - history_bc, fswopts, header_ifs.get()); + std::unique_ptr fsw; try { + fsw = std::make_unique( + *os, lgr, wg_compress, prog, schema_bc, metadata_bc, history_bc, + fswopts, header_ifs.get()); + categorized_option compression_opt; contextual_option_parser cop("--compression", compression_opt, cp, compressor_parser); @@ -1073,21 +1092,28 @@ int mkdwarfs_main(int argc, sys_char** argv) { cop.parse(compression); LOG_DEBUG << cop.as_string(); - fsw.add_default_compressor(compression_opt.get()); + fsw->add_default_compressor(compression_opt.get()); - compression_opt.visit_contextual([catmgr = options.inode.categorizer_mgr, - &fsw](auto cat, - block_compressor const& bc) { - try { - catmgr->set_metadata_requirements(cat, bc.metadata_requirements()); - fsw.add_category_compressor(cat, bc); - } catch (std::exception const& e) { - throw std::runtime_error( - fmt::format("compression '{}' cannot be used for category '{}': " - "metadata requirements not met ({})", - bc.describe(), catmgr->category_name(cat), e.what())); - } - }); + if (recompress) { + compression_opt.visit_contextual( + [catres = cat_resolver, &fsw](auto cat, block_compressor const& bc) { + fsw->add_category_compressor(cat, bc); + }); + } else { + compression_opt.visit_contextual([catmgr = options.inode.categorizer_mgr, + &fsw](auto cat, + block_compressor const& bc) { + try { + catmgr->set_metadata_requirements(cat, bc.metadata_requirements()); + fsw->add_category_compressor(cat, bc); + } catch (std::exception const& e) { + throw std::runtime_error( + fmt::format("compression '{}' cannot be used for category '{}': " + "metadata requirements not met ({})", + bc.describe(), catmgr->category_name(cat), e.what())); + } + }); + } } catch (std::exception const& e) { LOG_ERROR << e.what(); return 1; @@ -1097,8 +1123,7 @@ int mkdwarfs_main(int argc, sys_char** argv) { try { if (recompress) { - filesystem_v2::rewrite_deprecated( - lgr, prog, std::make_shared(path), fsw, rw_opts); + input_filesystem->rewrite(prog, *fsw, *cat_resolver, rw_opts); wg_compress.wait(); } else { auto sf = std::make_shared( @@ -1109,9 +1134,9 @@ int mkdwarfs_main(int argc, sys_char** argv) { options); if (input_list) { - s.scan(fsw, path, prog, *input_list); + s.scan(*fsw, path, prog, *input_list); } else { - s.scan(fsw, path, prog); + s.scan(*fsw, path, prog); } options.inode.categorizer_mgr.reset(); diff --git a/test/dwarfs_compat.cpp b/test/dwarfs_compat.cpp index 7e2437b4..471704df 100644 --- a/test/dwarfs_compat.cpp +++ b/test/dwarfs_compat.cpp @@ -37,6 +37,7 @@ #include "dwarfs/block_compressor.h" #include "dwarfs/file_stat.h" +#include "dwarfs/filesystem_block_category_resolver.h" #include "dwarfs/filesystem_extractor.h" #include "dwarfs/filesystem_v2.h" #include "dwarfs/filesystem_writer.h" @@ -1108,13 +1109,21 @@ TEST_P(rewrite, filesystem_rewrite) { progress prog([](const progress&, bool) {}, 1000); std::ostringstream rewritten, idss; + auto rewrite_fs = [&](auto& fsw, auto const& mm) { + filesystem_options fsopts; + fsopts.image_offset = filesystem_options::IMAGE_OFFSET_AUTO; + filesystem_v2 fs(lgr, mm, fsopts); + filesystem_block_category_resolver resolver(fs.get_all_block_categories()); + fs.rewrite(prog, fsw, resolver, opts); + }; + { filesystem_writer fsw(rewritten, lgr, wg, prog, bc, bc, bc); fsw.add_default_compressor(bc); auto mm = std::make_shared(filename); EXPECT_NO_THROW(filesystem_v2::identify(lgr, mm, idss)); EXPECT_FALSE(filesystem_v2::header(mm)); - filesystem_v2::rewrite_deprecated(lgr, prog, mm, fsw, opts); + rewrite_fs(fsw, mm); } { @@ -1134,8 +1143,7 @@ TEST_P(rewrite, filesystem_rewrite) { filesystem_writer fsw(rewritten, lgr, wg, prog, bc, bc, bc, fsw_opts, &hdr_iss); fsw.add_default_compressor(bc); - filesystem_v2::rewrite_deprecated( - lgr, prog, std::make_shared(filename), fsw, opts); + rewrite_fs(fsw, std::make_shared(filename)); } { @@ -1161,9 +1169,7 @@ TEST_P(rewrite, filesystem_rewrite) { filesystem_writer fsw(rewritten2, lgr, wg, prog, bc, bc, bc, fsw_opts, &hdr_iss); fsw.add_default_compressor(bc); - filesystem_v2::rewrite_deprecated( - lgr, prog, std::make_shared(rewritten.str()), fsw, - opts); + rewrite_fs(fsw, std::make_shared(rewritten.str())); } { @@ -1180,9 +1186,7 @@ TEST_P(rewrite, filesystem_rewrite) { { filesystem_writer fsw(rewritten3, lgr, wg, prog, bc, bc, bc); fsw.add_default_compressor(bc); - filesystem_v2::rewrite_deprecated( - lgr, prog, std::make_shared(rewritten2.str()), fsw, - opts); + rewrite_fs(fsw, std::make_shared(rewritten2.str())); } { @@ -1201,9 +1205,7 @@ TEST_P(rewrite, filesystem_rewrite) { fsw_opts.remove_header = true; filesystem_writer fsw(rewritten4, lgr, wg, prog, bc, bc, bc, fsw_opts); fsw.add_default_compressor(bc); - filesystem_v2::rewrite_deprecated( - lgr, prog, std::make_shared(rewritten3.str()), fsw, - opts); + rewrite_fs(fsw, std::make_shared(rewritten3.str())); } { @@ -1222,9 +1224,7 @@ TEST_P(rewrite, filesystem_rewrite) { fsw_opts.no_section_index = true; filesystem_writer fsw(rewritten5, lgr, wg, prog, bc, bc, bc, fsw_opts); fsw.add_default_compressor(bc); - filesystem_v2::rewrite_deprecated( - lgr, prog, std::make_shared(rewritten4.str()), fsw, - opts); + rewrite_fs(fsw, std::make_shared(rewritten4.str())); } {