feat(mkdwarfs): support for rewriting file systems with categories

At the same time, this finally adds multi-threaded decompression
when rewriting a file system.
This commit is contained in:
Marcus Holland-Moritz 2023-12-17 09:14:09 +01:00
parent e2606226bd
commit 2c91e80119
7 changed files with 478 additions and 140 deletions

View File

@ -67,10 +67,6 @@ class filesystem_v2 {
filesystem_options const& options, int inode_offset = 0,
std::shared_ptr<performance_monitor const> perfmon = nullptr);
static void
rewrite_deprecated(logger& lgr, progress& prog, std::shared_ptr<mmif> mm,
filesystem_writer& writer, rewrite_options const& opts);
static int
identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os,
int detail_level = 0, size_t num_readers = 1,
@ -184,6 +180,12 @@ class filesystem_v2 {
return impl_->get_all_block_categories();
}
void rewrite(progress& prog, filesystem_writer& writer,
category_resolver const& cat_resolver,
rewrite_options const& opts) const {
return impl_->rewrite(prog, writer, cat_resolver, opts);
}
class impl {
public:
virtual ~impl() = default;
@ -226,6 +228,9 @@ class filesystem_v2 {
virtual history const& get_history() const = 0;
virtual folly::dynamic get_inode_info(inode_view entry) const = 0;
virtual std::vector<std::string> get_all_block_categories() const = 0;
virtual void rewrite(progress& prog, filesystem_writer& writer,
category_resolver const& cat_resolver,
rewrite_options const& opts) const = 0;
};
private:

View File

@ -71,6 +71,12 @@ class filesystem_writer {
return impl_->get_compression_constraints(cat, metadata);
}
block_compressor const& get_compressor(
section_type type,
std::optional<fragment_category::value_type> cat = std::nullopt) const {
return impl_->get_compressor(type, cat);
}
void configure(std::vector<fragment_category> const& expected_categories,
size_t max_active_slots) {
impl_->configure(expected_categories, max_active_slots);
@ -80,6 +86,8 @@ class filesystem_writer {
impl_->copy_header(header);
}
// TODO: check which write_block() API is actually used
void write_block(fragment_category cat, std::shared_ptr<block_data>&& data,
physical_block_cb_type physical_block_cb,
std::optional<std::string> meta = std::nullopt) {
@ -107,6 +115,19 @@ class filesystem_writer {
impl_->write_history(std::move(data));
}
void check_block_compression(
compression_type compression, std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat = std::nullopt) {
impl_->check_block_compression(compression, data, cat);
}
void write_section(
section_type type, compression_type compression,
std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat = std::nullopt) {
impl_->write_section(type, compression, data, cat);
}
void write_compressed_section(section_type type, compression_type compression,
std::span<uint8_t const> data) {
impl_->write_compressed_section(type, compression, data);
@ -126,6 +147,9 @@ class filesystem_writer {
virtual compression_constraints
get_compression_constraints(fragment_category::value_type cat,
std::string const& metadata) const = 0;
virtual block_compressor const&
get_compressor(section_type type,
std::optional<fragment_category::value_type> cat) const = 0;
virtual void
configure(std::vector<fragment_category> const& expected_categories,
size_t max_active_slots) = 0;
@ -142,6 +166,13 @@ class filesystem_writer {
write_metadata_v2_schema(std::shared_ptr<block_data>&& data) = 0;
virtual void write_metadata_v2(std::shared_ptr<block_data>&& data) = 0;
virtual void write_history(std::shared_ptr<block_data>&& data) = 0;
virtual void check_block_compression(
compression_type compression, std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat) = 0;
virtual void
write_section(section_type type, compression_type compression,
std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat) = 0;
virtual void
write_compressed_section(section_type type, compression_type compression,
std::span<uint8_t const> data) = 0;

View File

@ -136,7 +136,9 @@ struct scanner_options {
struct rewrite_options {
bool recompress_block{false};
bool recompress_metadata{false};
file_off_t image_offset{filesystem_options::IMAGE_OFFSET_AUTO};
bool enable_history{true};
std::optional<std::vector<std::string>> command_line_arguments;
history_config history;
};
std::ostream& operator<<(std::ostream& os, file_order_mode mode);

View File

@ -34,6 +34,7 @@
#include "dwarfs/block_compressor.h"
#include "dwarfs/block_data.h"
#include "dwarfs/categorizer.h"
#include "dwarfs/category_resolver.h"
#include "dwarfs/error.h"
#include "dwarfs/filesystem_v2.h"
#include "dwarfs/filesystem_writer.h"
@ -393,6 +394,9 @@ class filesystem_ final : public filesystem_v2::impl {
std::vector<std::string> get_all_block_categories() const override {
return meta_.get_all_block_categories();
}
void rewrite(progress& prog, filesystem_writer& writer,
category_resolver const& cat_resolver,
rewrite_options const& opts) const override;
private:
filesystem_info const& get_info() const;
@ -545,6 +549,125 @@ filesystem_<LoggerPolicy>::filesystem_(
}
}
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::rewrite(progress& prog,
filesystem_writer& writer,
category_resolver const& cat_resolver,
rewrite_options const& opts) const {
if (opts.recompress_block) {
size_t block_no{0};
parser_.rewind();
while (auto s = parser_.next_section()) {
if (s->type() == section_type::BLOCK) {
if (auto catstr = meta_.get_block_category(block_no)) {
if (auto cat = cat_resolver.category_value(catstr.value())) {
writer.check_block_compression(s->compression(), s->data(*mm_),
cat);
}
}
++block_no;
}
}
}
prog.original_size = mm_->size();
prog.filesystem_size = mm_->size();
prog.block_count = num_blocks();
if (header_) {
writer.copy_header(*header_);
}
size_t block_no{0};
auto log_recompress =
[&](const auto& s,
std::optional<fragment_category::value_type> const& cat =
std::nullopt) {
std::string catinfo;
if (cat) {
catinfo = fmt::format(", {}", cat_resolver.category_name(*cat));
}
LOG_VERBOSE << "recompressing " << get_section_name(s->type()) << " ("
<< get_compression_name(s->compression()) << catinfo
<< ") using '"
<< writer.get_compressor(s->type(), cat).describe() << "'";
};
auto copy_compressed = [&](const auto& s) {
LOG_VERBOSE << "copying " << get_section_name(s->type()) << " ("
<< get_compression_name(s->compression()) << ")";
writer.write_compressed_section(s->type(), s->compression(), s->data(*mm_));
};
parser_.rewind();
while (auto s = parser_.next_section()) {
switch (s->type()) {
case section_type::BLOCK:
if (opts.recompress_block) {
std::optional<fragment_category::value_type> cat;
if (auto catstr = meta_.get_block_category(block_no)) {
cat = cat_resolver.category_value(catstr.value());
if (!cat) {
LOG_ERROR << "unknown category '" << catstr.value()
<< "' for block " << block_no;
}
}
log_recompress(s, cat);
writer.write_section(section_type::BLOCK, s->compression(),
s->data(*mm_), cat);
} else {
copy_compressed(s);
}
++block_no;
break;
case section_type::METADATA_V2_SCHEMA:
case section_type::METADATA_V2:
if (opts.recompress_metadata) {
log_recompress(s);
writer.write_section(s->type(), s->compression(), s->data(*mm_));
} else {
copy_compressed(s);
}
break;
case section_type::HISTORY:
if (opts.enable_history) {
history hist{opts.history};
hist.parse(history_.serialize());
hist.append(opts.command_line_arguments);
LOG_VERBOSE << "updating " << get_section_name(s->type()) << " ("
<< get_compression_name(s->compression())
<< "), compressing using '"
<< writer.get_compressor(s->type()).describe() << "'";
writer.write_history(std::make_shared<block_data>(hist.serialize()));
} else {
LOG_VERBOSE << "removing " << get_section_name(s->type());
}
break;
case section_type::SECTION_INDEX:
// this will be automatically added by the filesystem_writer
break;
default:
// verbatim copy everything else
copy_compressed(s);
break;
}
}
writer.flush();
}
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::dump(std::ostream& os, int detail_level) const {
if (detail_level > 1) {
@ -718,86 +841,6 @@ filesystem_v2::filesystem_v2(logger& lgr, std::shared_ptr<mmif> mm,
logger_policies>(
lgr, std::move(mm), options, inode_offset, std::move(perfmon))) {}
void filesystem_v2::rewrite_deprecated(logger& lgr, progress& prog,
std::shared_ptr<mmif> mm,
filesystem_writer& writer,
rewrite_options const& opts) {
// TODO:
LOG_PROXY(debug_logger_policy, lgr);
filesystem_parser parser(mm, opts.image_offset);
if (auto hdr = parser.header()) {
writer.copy_header(*hdr);
}
std::vector<section_type> section_types;
section_map sections;
while (auto s = parser.next_section()) {
check_section_logger(lgr, *s);
if (!s->check_fast(*mm)) {
DWARFS_THROW(runtime_error, "checksum error in section: " + s->name());
}
if (!s->verify(*mm)) {
DWARFS_THROW(runtime_error,
"integrity check error in section: " + s->name());
}
prog.original_size += s->length();
prog.filesystem_size += s->length();
if (s->type() == section_type::BLOCK) {
++prog.block_count;
} else if (s->type() != section_type::SECTION_INDEX) {
auto& secvec = sections[s->type()];
if (secvec.empty()) {
section_types.push_back(s->type());
}
secvec.push_back(*s);
}
}
std::vector<uint8_t> schema_raw;
std::vector<uint8_t> meta_raw;
// force metadata check
make_metadata(lgr, mm, sections, schema_raw, meta_raw, metadata_options(), 0,
true, mlock_mode::NONE, !parser.has_checksums());
parser.rewind();
while (auto s = parser.next_section()) {
// TODO: multi-thread this?
if (s->type() == section_type::BLOCK) {
if (opts.recompress_block) {
auto block =
std::make_shared<block_data>(block_decompressor::decompress(
s->compression(), mm->as<uint8_t>(s->start()), s->length()));
// TODO: re-write with different categories
writer.write_block(categorizer_manager::default_category().value(),
std::move(block));
} else {
writer.write_compressed_section(s->type(), s->compression(),
s->data(*mm));
}
}
}
if (opts.recompress_metadata) {
writer.write_metadata_v2_schema(
std::make_shared<block_data>(std::move(schema_raw)));
writer.write_metadata_v2(std::make_shared<block_data>(std::move(meta_raw)));
} else {
for (auto type : section_types) {
auto& secvec = DWARFS_NOTHROW(sections.at(type));
for (auto& sec : secvec) {
writer.write_compressed_section(type, sec.compression(), sec.data(*mm));
}
}
}
writer.flush();
}
int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
std::ostream& os, int detail_level,
size_t num_readers, bool check_integrity,

View File

@ -36,6 +36,7 @@
#include "dwarfs/block_compressor.h"
#include "dwarfs/block_data.h"
#include "dwarfs/checksum.h"
#include "dwarfs/compression_metadata_requirements.h"
#include "dwarfs/filesystem_writer.h"
#include "dwarfs/fstypes.h"
#include "dwarfs/logger.h"
@ -85,6 +86,10 @@ class fsblock {
fsblock(section_type type, compression_type compression,
std::span<uint8_t const> data);
fsblock(section_type type, block_compressor const& bc,
std::span<uint8_t const> data, compression_type data_comp_type,
std::shared_ptr<compression_progress> pctx);
void
compress(worker_group& wg, std::optional<std::string> meta = std::nullopt) {
impl_->compress(wg, std::move(meta));
@ -292,6 +297,112 @@ class compressed_fsblock : public fsblock::impl {
section_header_v2 header_;
};
class rewritten_fsblock : public fsblock::impl {
public:
rewritten_fsblock(section_type type, block_compressor const& bc,
std::span<uint8_t const> data,
compression_type data_comp_type,
std::shared_ptr<compression_progress> pctx)
: type_{type}
, bc_{bc}
, data_{data}
, comp_type_{bc_.type()}
, pctx_{std::move(pctx)}
, data_comp_type_{data_comp_type} {}
void compress(worker_group& wg, std::optional<std::string> meta) override {
std::promise<void> prom;
future_ = prom.get_future();
wg.add_job([this, prom = std::move(prom),
meta = std::move(meta)]() mutable {
// TODO: we don't have to do this for uncompressed blocks
std::vector<uint8_t> block;
block_decompressor bd(data_comp_type_, data_.data(), data_.size(), block);
bd.decompress_frame(bd.uncompressed_size());
if (!meta) {
meta = bd.metadata();
}
pctx_->bytes_in += block.size(); // TODO: data_.size()?
try {
if (meta) {
block = bc_.compress(block, *meta);
} else {
block = bc_.compress(block);
}
} catch (bad_compression_ratio_error const&) {
comp_type_ = compression_type::NONE;
}
pctx_->bytes_out += block.size();
{
std::lock_guard lock(mx_);
block_data_.swap(block);
}
prom.set_value();
});
}
void wait_until_compressed() override { future_.wait(); }
section_type type() const override { return type_; }
compression_type compression() const override { return comp_type_; }
std::string description() const override { return bc_.describe(); }
std::span<uint8_t const> data() const override { return block_data_; }
size_t uncompressed_size() const override { return data_.size(); }
size_t size() const override {
std::lock_guard lock(mx_);
return block_data_.size();
}
void set_block_no(uint32_t number) override {
{
std::lock_guard lock(mx_);
if (number_) {
DWARFS_THROW(runtime_error, "block number already set");
}
number_ = number;
}
}
uint32_t block_no() const override {
std::lock_guard lock(mx_);
return number_.value();
}
section_header_v2 const& header() const override {
std::lock_guard lock(mx_);
if (!header_) {
header_ = section_header_v2{};
fsblock::build_section_header(*header_, *this);
}
return header_.value();
}
private:
const section_type type_;
block_compressor const& bc_;
mutable std::recursive_mutex mx_;
std::span<uint8_t const> data_;
std::vector<uint8_t> block_data_;
std::future<void> future_;
std::optional<uint32_t> number_;
std::optional<section_header_v2> mutable header_;
compression_type comp_type_;
std::shared_ptr<compression_progress> pctx_;
compression_type const data_comp_type_;
};
fsblock::fsblock(section_type type, block_compressor const& bc,
std::shared_ptr<block_data>&& data,
std::shared_ptr<compression_progress> pctx,
@ -304,6 +415,12 @@ fsblock::fsblock(section_type type, compression_type compression,
std::span<uint8_t const> data)
: impl_(std::make_unique<compressed_fsblock>(type, compression, data)) {}
fsblock::fsblock(section_type type, block_compressor const& bc,
std::span<uint8_t const> data, compression_type data_comp_type,
std::shared_ptr<compression_progress> pctx)
: impl_(std::make_unique<rewritten_fsblock>(type, bc, data, data_comp_type,
std::move(pctx))) {}
void fsblock::build_section_header(section_header_v2& sh,
fsblock::impl const& fsb) {
auto range = fsb.data();
@ -348,6 +465,9 @@ class filesystem_writer_ final : public filesystem_writer::impl {
compression_constraints
get_compression_constraints(fragment_category::value_type cat,
std::string const& metadata) const override;
block_compressor const& get_compressor(
section_type type,
std::optional<fragment_category::value_type> cat) const override;
void configure(std::vector<fragment_category> const& expected_categories,
size_t max_active_slots) override;
void copy_header(std::span<uint8_t const> header) override;
@ -361,6 +481,12 @@ class filesystem_writer_ final : public filesystem_writer::impl {
void write_metadata_v2_schema(std::shared_ptr<block_data>&& data) override;
void write_metadata_v2(std::shared_ptr<block_data>&& data) override;
void write_history(std::shared_ptr<block_data>&& data) override;
void check_block_compression(
compression_type compression, std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat) override;
void write_section(section_type type, compression_type compression,
std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat) override;
void write_compressed_section(section_type type, compression_type compression,
std::span<uint8_t const> data) override;
void flush() override;
@ -379,9 +505,9 @@ class filesystem_writer_ final : public filesystem_writer::impl {
block_compressor const& bc, std::optional<std::string> meta,
physical_block_cb_type physical_block_cb);
void on_block_merged(block_holder_type holder);
void write_section(section_type type, std::shared_ptr<block_data>&& data,
block_compressor const& bc,
std::optional<std::string> meta = std::nullopt);
void write_section_impl(section_type type, std::shared_ptr<block_data>&& data,
block_compressor const& bc,
std::optional<std::string> meta = std::nullopt);
void write(fsblock const& fsb);
void write(const char* data, size_t size);
template <typename T>
@ -415,6 +541,8 @@ class filesystem_writer_ final : public filesystem_writer::impl {
std::unique_ptr<block_merger_type> merger_;
};
// TODO: Maybe we can factor out the logic to find the right compressor
// into something that gets passed a (section_type, category) pair?
template <typename LoggerPolicy>
filesystem_writer_<LoggerPolicy>::filesystem_writer_(
logger& lgr, std::ostream& os, worker_group& wg, progress& prog,
@ -430,8 +558,7 @@ filesystem_writer_<LoggerPolicy>::filesystem_writer_(
, history_bc_(history_bc)
, options_(options)
, LOG_PROXY_INIT(lgr)
, flush_(false)
, writer_thread_(&filesystem_writer_::writer_thread, this) {
, flush_{true} {
if (header_) {
if (options_.remove_header) {
LOG_WARN << "header will not be written because remove_header is set";
@ -440,6 +567,30 @@ filesystem_writer_<LoggerPolicy>::filesystem_writer_(
header_size_ = os_.tellp();
}
}
auto check_compressor = [](std::string_view name,
block_compressor const& bc) {
if (auto reqstr = bc.metadata_requirements(); !reqstr.empty()) {
try {
auto req = compression_metadata_requirements<folly::dynamic>{reqstr};
req.check(std::nullopt);
} catch (std::exception const& e) {
auto msg = fmt::format(
"cannot use '{}' for {} compression because compression "
"metadata requirements are not met: {}",
bc.describe(), name, e.what());
DWARFS_THROW(runtime_error, msg);
}
}
};
check_compressor("schema", schema_bc);
check_compressor("metadata", metadata_bc);
check_compressor("history", history_bc);
// TODO: the whole flush & thread thing needs to be revisited
flush_ = false;
writer_thread_ = std::thread(&filesystem_writer_::writer_thread, this);
}
template <typename LoggerPolicy>
@ -612,7 +763,7 @@ void filesystem_writer_<LoggerPolicy>::finish_category(fragment_category cat) {
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_section(
void filesystem_writer_<LoggerPolicy>::write_section_impl(
section_type type, std::shared_ptr<block_data>&& data,
block_compressor const& bc, std::optional<std::string> meta) {
uint32_t number;
@ -624,10 +775,6 @@ void filesystem_writer_<LoggerPolicy>::write_section(
pctx_ = prog_.create_context<compression_progress>();
}
while (mem_used() > options_.max_queue_size) {
cond_.wait(lock);
}
auto fsb = std::make_unique<fsblock>(type, bc, std::move(data), pctx_);
number = section_number_;
@ -642,6 +789,66 @@ void filesystem_writer_<LoggerPolicy>::write_section(
cond_.notify_one();
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::check_block_compression(
compression_type compression, std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat) {
block_compressor const* bc{nullptr};
if (cat) {
bc = &compressor_for_category(*cat);
} else {
bc = &default_bc_.value();
}
if (auto reqstr = bc->metadata_requirements(); !reqstr.empty()) {
auto req = compression_metadata_requirements<folly::dynamic>{reqstr};
std::vector<uint8_t> tmp;
block_decompressor bd(compression, data.data(), data.size(), tmp);
try {
req.check(bd.metadata());
} catch (std::exception const& e) {
auto msg = fmt::format(
"cannot compress {} compressed block with compressor '{}' because "
"the following metadata requirements are not met: {}",
get_compression_name(compression), bc->describe(), e.what());
DWARFS_THROW(runtime_error, msg);
}
}
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_section(
section_type type, compression_type compression,
std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat) {
{
std::unique_lock lock(mx_);
if (!pctx_) {
pctx_ = prog_.create_context<compression_progress>();
}
// TODO: do we still need this with the merger in place?
while (mem_used() > options_.max_queue_size) {
cond_.wait(lock);
}
auto& bc = get_compressor(type, cat);
auto fsb = std::make_unique<fsblock>(type, bc, data, compression, pctx_);
fsb->set_block_no(section_number_++);
fsb->compress(wg_);
queue_.emplace_back(std::move(fsb));
}
cond_.notify_one();
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_compressed_section(
section_type type, compression_type compression,
@ -687,6 +894,30 @@ auto filesystem_writer_<LoggerPolicy>::get_compression_constraints(
return compressor_for_category(cat).get_compression_constraints(metadata);
}
template <typename LoggerPolicy>
block_compressor const& filesystem_writer_<LoggerPolicy>::get_compressor(
section_type type, std::optional<fragment_category::value_type> cat) const {
switch (type) {
case section_type::METADATA_V2_SCHEMA:
return schema_bc_;
case section_type::METADATA_V2:
return metadata_bc_;
case section_type::HISTORY:
return history_bc_;
default:
break;
}
if (cat) {
return compressor_for_category(*cat);
}
return default_bc_.value();
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::configure(
std::vector<fragment_category> const& expected_categories,
@ -726,26 +957,27 @@ template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_block(
fragment_category::value_type cat, std::shared_ptr<block_data>&& data,
std::optional<std::string> meta) {
write_section(section_type::BLOCK, std::move(data),
compressor_for_category(cat), std::move(meta));
write_section_impl(section_type::BLOCK, std::move(data),
compressor_for_category(cat), std::move(meta));
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_metadata_v2_schema(
std::shared_ptr<block_data>&& data) {
write_section(section_type::METADATA_V2_SCHEMA, std::move(data), schema_bc_);
write_section_impl(section_type::METADATA_V2_SCHEMA, std::move(data),
schema_bc_);
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_metadata_v2(
std::shared_ptr<block_data>&& data) {
write_section(section_type::METADATA_V2, std::move(data), metadata_bc_);
write_section_impl(section_type::METADATA_V2, std::move(data), metadata_bc_);
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_history(
std::shared_ptr<block_data>&& data) {
write_section(section_type::HISTORY, std::move(data), history_bc_);
write_section_impl(section_type::HISTORY, std::move(data), history_bc_);
}
template <typename LoggerPolicy>

View File

@ -59,6 +59,7 @@
#include "dwarfs/console_writer.h"
#include "dwarfs/entry.h"
#include "dwarfs/error.h"
#include "dwarfs/filesystem_block_category_resolver.h"
#include "dwarfs/filesystem_v2.h"
#include "dwarfs/filesystem_writer.h"
#include "dwarfs/fragment_order_parser.h"
@ -983,18 +984,18 @@ int mkdwarfs_main(int argc, sys_char** argv) {
}
options.enable_history = !no_history;
rw_opts.enable_history = !no_history;
if (options.enable_history) {
options.history.with_timestamps = !no_history_timestamps;
rw_opts.history.with_timestamps = !no_history_timestamps;
if (!no_history_command_line) {
options.command_line_arguments = command_line;
rw_opts.command_line_arguments = command_line;
}
}
// TODO: the whole re-writing thing will be a bit weird in combination
// with categories; we'd likely require a "category"-section to be
// present (which we'll also require for bit-identical mode)
if (!categorizer_list_str.empty()) {
std::vector<std::string> categorizer_list;
boost::split(categorizer_list, categorizer_list_str, boost::is_any_of(","));
@ -1006,7 +1007,22 @@ int mkdwarfs_main(int argc, sys_char** argv) {
}
}
category_parser cp(options.inode.categorizer_mgr);
std::unique_ptr<filesystem_v2> input_filesystem;
std::shared_ptr<category_resolver> cat_resolver;
if (recompress) {
filesystem_options fsopts;
fsopts.image_offset = filesystem_options::IMAGE_OFFSET_AUTO;
input_filesystem = std::make_unique<filesystem_v2>(
lgr, std::make_shared<dwarfs::mmap>(path), fsopts);
cat_resolver = std::make_shared<filesystem_block_category_resolver>(
input_filesystem->get_all_block_categories());
} else {
cat_resolver = options.inode.categorizer_mgr;
}
category_parser cp(cat_resolver);
try {
{
@ -1061,10 +1077,13 @@ int mkdwarfs_main(int argc, sys_char** argv) {
block_compressor metadata_bc(metadata_compression);
block_compressor history_bc(history_compression);
filesystem_writer fsw(*os, lgr, wg_compress, prog, schema_bc, metadata_bc,
history_bc, fswopts, header_ifs.get());
std::unique_ptr<filesystem_writer> fsw;
try {
fsw = std::make_unique<filesystem_writer>(
*os, lgr, wg_compress, prog, schema_bc, metadata_bc, history_bc,
fswopts, header_ifs.get());
categorized_option<block_compressor> compression_opt;
contextual_option_parser cop("--compression", compression_opt, cp,
compressor_parser);
@ -1073,21 +1092,28 @@ int mkdwarfs_main(int argc, sys_char** argv) {
cop.parse(compression);
LOG_DEBUG << cop.as_string();
fsw.add_default_compressor(compression_opt.get());
fsw->add_default_compressor(compression_opt.get());
compression_opt.visit_contextual([catmgr = options.inode.categorizer_mgr,
&fsw](auto cat,
block_compressor const& bc) {
try {
catmgr->set_metadata_requirements(cat, bc.metadata_requirements());
fsw.add_category_compressor(cat, bc);
} catch (std::exception const& e) {
throw std::runtime_error(
fmt::format("compression '{}' cannot be used for category '{}': "
"metadata requirements not met ({})",
bc.describe(), catmgr->category_name(cat), e.what()));
}
});
if (recompress) {
compression_opt.visit_contextual(
[catres = cat_resolver, &fsw](auto cat, block_compressor const& bc) {
fsw->add_category_compressor(cat, bc);
});
} else {
compression_opt.visit_contextual([catmgr = options.inode.categorizer_mgr,
&fsw](auto cat,
block_compressor const& bc) {
try {
catmgr->set_metadata_requirements(cat, bc.metadata_requirements());
fsw->add_category_compressor(cat, bc);
} catch (std::exception const& e) {
throw std::runtime_error(
fmt::format("compression '{}' cannot be used for category '{}': "
"metadata requirements not met ({})",
bc.describe(), catmgr->category_name(cat), e.what()));
}
});
}
} catch (std::exception const& e) {
LOG_ERROR << e.what();
return 1;
@ -1097,8 +1123,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
try {
if (recompress) {
filesystem_v2::rewrite_deprecated(
lgr, prog, std::make_shared<dwarfs::mmap>(path), fsw, rw_opts);
input_filesystem->rewrite(prog, *fsw, *cat_resolver, rw_opts);
wg_compress.wait();
} else {
auto sf = std::make_shared<segmenter_factory>(
@ -1109,9 +1134,9 @@ int mkdwarfs_main(int argc, sys_char** argv) {
options);
if (input_list) {
s.scan(fsw, path, prog, *input_list);
s.scan(*fsw, path, prog, *input_list);
} else {
s.scan(fsw, path, prog);
s.scan(*fsw, path, prog);
}
options.inode.categorizer_mgr.reset();

View File

@ -37,6 +37,7 @@
#include "dwarfs/block_compressor.h"
#include "dwarfs/file_stat.h"
#include "dwarfs/filesystem_block_category_resolver.h"
#include "dwarfs/filesystem_extractor.h"
#include "dwarfs/filesystem_v2.h"
#include "dwarfs/filesystem_writer.h"
@ -1108,13 +1109,21 @@ TEST_P(rewrite, filesystem_rewrite) {
progress prog([](const progress&, bool) {}, 1000);
std::ostringstream rewritten, idss;
auto rewrite_fs = [&](auto& fsw, auto const& mm) {
filesystem_options fsopts;
fsopts.image_offset = filesystem_options::IMAGE_OFFSET_AUTO;
filesystem_v2 fs(lgr, mm, fsopts);
filesystem_block_category_resolver resolver(fs.get_all_block_categories());
fs.rewrite(prog, fsw, resolver, opts);
};
{
filesystem_writer fsw(rewritten, lgr, wg, prog, bc, bc, bc);
fsw.add_default_compressor(bc);
auto mm = std::make_shared<mmap>(filename);
EXPECT_NO_THROW(filesystem_v2::identify(lgr, mm, idss));
EXPECT_FALSE(filesystem_v2::header(mm));
filesystem_v2::rewrite_deprecated(lgr, prog, mm, fsw, opts);
rewrite_fs(fsw, mm);
}
{
@ -1134,8 +1143,7 @@ TEST_P(rewrite, filesystem_rewrite) {
filesystem_writer fsw(rewritten, lgr, wg, prog, bc, bc, bc, fsw_opts,
&hdr_iss);
fsw.add_default_compressor(bc);
filesystem_v2::rewrite_deprecated(
lgr, prog, std::make_shared<mmap>(filename), fsw, opts);
rewrite_fs(fsw, std::make_shared<mmap>(filename));
}
{
@ -1161,9 +1169,7 @@ TEST_P(rewrite, filesystem_rewrite) {
filesystem_writer fsw(rewritten2, lgr, wg, prog, bc, bc, bc, fsw_opts,
&hdr_iss);
fsw.add_default_compressor(bc);
filesystem_v2::rewrite_deprecated(
lgr, prog, std::make_shared<test::mmap_mock>(rewritten.str()), fsw,
opts);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten.str()));
}
{
@ -1180,9 +1186,7 @@ TEST_P(rewrite, filesystem_rewrite) {
{
filesystem_writer fsw(rewritten3, lgr, wg, prog, bc, bc, bc);
fsw.add_default_compressor(bc);
filesystem_v2::rewrite_deprecated(
lgr, prog, std::make_shared<test::mmap_mock>(rewritten2.str()), fsw,
opts);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten2.str()));
}
{
@ -1201,9 +1205,7 @@ TEST_P(rewrite, filesystem_rewrite) {
fsw_opts.remove_header = true;
filesystem_writer fsw(rewritten4, lgr, wg, prog, bc, bc, bc, fsw_opts);
fsw.add_default_compressor(bc);
filesystem_v2::rewrite_deprecated(
lgr, prog, std::make_shared<test::mmap_mock>(rewritten3.str()), fsw,
opts);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten3.str()));
}
{
@ -1222,9 +1224,7 @@ TEST_P(rewrite, filesystem_rewrite) {
fsw_opts.no_section_index = true;
filesystem_writer fsw(rewritten5, lgr, wg, prog, bc, bc, bc, fsw_opts);
fsw.add_default_compressor(bc);
filesystem_v2::rewrite_deprecated(
lgr, prog, std::make_shared<test::mmap_mock>(rewritten4.str()), fsw,
opts);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten4.str()));
}
{