mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 11:59:48 -04:00
feat(mkdwarfs): allow selection of categories to recompress
This commit is contained in:
parent
2c91e80119
commit
bbeffbfd98
@ -27,6 +27,7 @@
|
||||
#include <iosfwd>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "dwarfs/categorized_option.h"
|
||||
@ -136,6 +137,8 @@ struct scanner_options {
|
||||
struct rewrite_options {
|
||||
bool recompress_block{false};
|
||||
bool recompress_metadata{false};
|
||||
std::unordered_set<std::string> recompress_categories;
|
||||
bool recompress_categories_exclude{false};
|
||||
bool enable_history{true};
|
||||
std::optional<std::vector<std::string>> command_line_arguments;
|
||||
history_config history;
|
||||
|
@ -48,6 +48,7 @@
|
||||
#include "dwarfs/options.h"
|
||||
#include "dwarfs/performance_monitor.h"
|
||||
#include "dwarfs/progress.h"
|
||||
#include "dwarfs/util.h"
|
||||
#include "dwarfs/worker_group.h"
|
||||
|
||||
namespace dwarfs {
|
||||
@ -581,51 +582,79 @@ void filesystem_<LoggerPolicy>::rewrite(progress& prog,
|
||||
|
||||
size_t block_no{0};
|
||||
|
||||
auto log_recompress =
|
||||
[&](const auto& s,
|
||||
std::optional<fragment_category::value_type> const& cat =
|
||||
std::nullopt) {
|
||||
auto log_rewrite =
|
||||
[&](bool compressing, const auto& s,
|
||||
std::optional<fragment_category::value_type> const& cat) {
|
||||
auto prefix = compressing ? "recompressing" : "copying";
|
||||
std::string catinfo;
|
||||
std::string compinfo;
|
||||
if (cat) {
|
||||
catinfo = fmt::format(", {}", cat_resolver.category_name(*cat));
|
||||
}
|
||||
LOG_VERBOSE << "recompressing " << get_section_name(s->type()) << " ("
|
||||
<< get_compression_name(s->compression()) << catinfo
|
||||
<< ") using '"
|
||||
<< writer.get_compressor(s->type(), cat).describe() << "'";
|
||||
if (compressing) {
|
||||
compinfo = fmt::format(
|
||||
" using '{}'", writer.get_compressor(s->type(), cat).describe());
|
||||
}
|
||||
LOG_VERBOSE << prefix << " " << size_with_unit(s->length()) << " "
|
||||
<< get_section_name(s->type()) << " ("
|
||||
<< get_compression_name(s->compression()) << catinfo << ")"
|
||||
<< compinfo;
|
||||
};
|
||||
|
||||
auto copy_compressed = [&](const auto& s) {
|
||||
LOG_VERBOSE << "copying " << get_section_name(s->type()) << " ("
|
||||
<< get_compression_name(s->compression()) << ")";
|
||||
writer.write_compressed_section(s->type(), s->compression(), s->data(*mm_));
|
||||
};
|
||||
auto log_recompress =
|
||||
[&](const auto& s,
|
||||
std::optional<fragment_category::value_type> const& cat =
|
||||
std::nullopt) { log_rewrite(true, s, cat); };
|
||||
|
||||
auto copy_compressed =
|
||||
[&](const auto& s,
|
||||
std::optional<fragment_category::value_type> const& cat =
|
||||
std::nullopt) {
|
||||
log_rewrite(false, s, cat);
|
||||
writer.write_compressed_section(s->type(), s->compression(),
|
||||
s->data(*mm_));
|
||||
};
|
||||
|
||||
parser_.rewind();
|
||||
|
||||
while (auto s = parser_.next_section()) {
|
||||
switch (s->type()) {
|
||||
case section_type::BLOCK:
|
||||
if (opts.recompress_block) {
|
||||
std::optional<fragment_category::value_type> cat;
|
||||
case section_type::BLOCK: {
|
||||
std::optional<fragment_category::value_type> cat;
|
||||
bool recompress_block{true};
|
||||
|
||||
if (auto catstr = meta_.get_block_category(block_no)) {
|
||||
if (opts.recompress_block) {
|
||||
auto catstr = meta_.get_block_category(block_no);
|
||||
|
||||
if (catstr) {
|
||||
cat = cat_resolver.category_value(catstr.value());
|
||||
|
||||
if (!cat) {
|
||||
LOG_ERROR << "unknown category '" << catstr.value()
|
||||
<< "' for block " << block_no;
|
||||
}
|
||||
}
|
||||
|
||||
if (!opts.recompress_categories.empty()) {
|
||||
bool is_in_set{opts.recompress_categories.count(catstr.value()) >
|
||||
0};
|
||||
|
||||
recompress_block =
|
||||
opts.recompress_categories_exclude ? !is_in_set : is_in_set;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (recompress_block) {
|
||||
log_recompress(s, cat);
|
||||
|
||||
writer.write_section(section_type::BLOCK, s->compression(),
|
||||
s->data(*mm_), cat);
|
||||
} else {
|
||||
copy_compressed(s);
|
||||
copy_compressed(s, cat);
|
||||
}
|
||||
|
||||
++block_no;
|
||||
break;
|
||||
} break;
|
||||
|
||||
case section_type::METADATA_V2_SCHEMA:
|
||||
case section_type::METADATA_V2:
|
||||
|
@ -281,7 +281,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
metadata_compression, log_level_str, timestamp, time_resolution,
|
||||
progress_mode, recompress_opts, pack_metadata, file_hash_algo,
|
||||
debug_filter, max_similarity_size, input_list_str, chmod_str,
|
||||
categorizer_list_str, history_compression;
|
||||
categorizer_list_str, history_compression, recompress_categories;
|
||||
std::vector<sys_string> filter;
|
||||
std::vector<std::string> order, max_lookback_blocks, window_size, window_step,
|
||||
bloom_filter_size, compression;
|
||||
@ -376,6 +376,9 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
("recompress",
|
||||
po::value<std::string>(&recompress_opts)->implicit_value("all"),
|
||||
"recompress an existing filesystem (none, block, metadata, all)")
|
||||
("recompress-categories",
|
||||
po::value<std::string>(&recompress_categories),
|
||||
"only recompress blocks of these categories")
|
||||
("categorize",
|
||||
po::value<std::string>(&categorizer_list_str)
|
||||
->implicit_value("pcmaudio,incompressible"),
|
||||
@ -383,7 +386,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
("order",
|
||||
po::value<std::vector<std::string>>(&order)
|
||||
->value_name("[cat::]arg")->multitoken()->composing(),
|
||||
order_desc.c_str()) // TODO
|
||||
order_desc.c_str())
|
||||
("max-similarity-size",
|
||||
po::value<std::string>(&max_similarity_size),
|
||||
"maximum file size to compute similarity")
|
||||
@ -714,6 +717,15 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
std::cerr << "invalid recompress mode: " << recompress_opts << "\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!recompress_categories.empty()) {
|
||||
std::string_view input = recompress_categories;
|
||||
if (input.front() == '!') {
|
||||
rw_opts.recompress_categories_exclude = true;
|
||||
input.remove_prefix(1);
|
||||
}
|
||||
boost::split(rw_opts.recompress_categories, input, boost::is_any_of(","));
|
||||
}
|
||||
}
|
||||
|
||||
if (file_hash_algo == "none") {
|
||||
@ -1018,6 +1030,13 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
|
||||
cat_resolver = std::make_shared<filesystem_block_category_resolver>(
|
||||
input_filesystem->get_all_block_categories());
|
||||
|
||||
for (auto const& cat : rw_opts.recompress_categories) {
|
||||
if (!cat_resolver->category_value(cat)) {
|
||||
std::cerr << "error: no category '" << cat << "' in input filesystem\n";
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cat_resolver = options.inode.categorizer_mgr;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user