mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-10 04:50:31 -04:00
feat(mkdwarfs): allow selection of categories to recompress
This commit is contained in:
parent
2c91e80119
commit
bbeffbfd98
@ -27,6 +27,7 @@
|
|||||||
#include <iosfwd>
|
#include <iosfwd>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "dwarfs/categorized_option.h"
|
#include "dwarfs/categorized_option.h"
|
||||||
@ -136,6 +137,8 @@ struct scanner_options {
|
|||||||
struct rewrite_options {
|
struct rewrite_options {
|
||||||
bool recompress_block{false};
|
bool recompress_block{false};
|
||||||
bool recompress_metadata{false};
|
bool recompress_metadata{false};
|
||||||
|
std::unordered_set<std::string> recompress_categories;
|
||||||
|
bool recompress_categories_exclude{false};
|
||||||
bool enable_history{true};
|
bool enable_history{true};
|
||||||
std::optional<std::vector<std::string>> command_line_arguments;
|
std::optional<std::vector<std::string>> command_line_arguments;
|
||||||
history_config history;
|
history_config history;
|
||||||
|
@ -48,6 +48,7 @@
|
|||||||
#include "dwarfs/options.h"
|
#include "dwarfs/options.h"
|
||||||
#include "dwarfs/performance_monitor.h"
|
#include "dwarfs/performance_monitor.h"
|
||||||
#include "dwarfs/progress.h"
|
#include "dwarfs/progress.h"
|
||||||
|
#include "dwarfs/util.h"
|
||||||
#include "dwarfs/worker_group.h"
|
#include "dwarfs/worker_group.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
@ -581,51 +582,79 @@ void filesystem_<LoggerPolicy>::rewrite(progress& prog,
|
|||||||
|
|
||||||
size_t block_no{0};
|
size_t block_no{0};
|
||||||
|
|
||||||
auto log_recompress =
|
auto log_rewrite =
|
||||||
[&](const auto& s,
|
[&](bool compressing, const auto& s,
|
||||||
std::optional<fragment_category::value_type> const& cat =
|
std::optional<fragment_category::value_type> const& cat) {
|
||||||
std::nullopt) {
|
auto prefix = compressing ? "recompressing" : "copying";
|
||||||
std::string catinfo;
|
std::string catinfo;
|
||||||
|
std::string compinfo;
|
||||||
if (cat) {
|
if (cat) {
|
||||||
catinfo = fmt::format(", {}", cat_resolver.category_name(*cat));
|
catinfo = fmt::format(", {}", cat_resolver.category_name(*cat));
|
||||||
}
|
}
|
||||||
LOG_VERBOSE << "recompressing " << get_section_name(s->type()) << " ("
|
if (compressing) {
|
||||||
<< get_compression_name(s->compression()) << catinfo
|
compinfo = fmt::format(
|
||||||
<< ") using '"
|
" using '{}'", writer.get_compressor(s->type(), cat).describe());
|
||||||
<< writer.get_compressor(s->type(), cat).describe() << "'";
|
}
|
||||||
|
LOG_VERBOSE << prefix << " " << size_with_unit(s->length()) << " "
|
||||||
|
<< get_section_name(s->type()) << " ("
|
||||||
|
<< get_compression_name(s->compression()) << catinfo << ")"
|
||||||
|
<< compinfo;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto copy_compressed = [&](const auto& s) {
|
auto log_recompress =
|
||||||
LOG_VERBOSE << "copying " << get_section_name(s->type()) << " ("
|
[&](const auto& s,
|
||||||
<< get_compression_name(s->compression()) << ")";
|
std::optional<fragment_category::value_type> const& cat =
|
||||||
writer.write_compressed_section(s->type(), s->compression(), s->data(*mm_));
|
std::nullopt) { log_rewrite(true, s, cat); };
|
||||||
|
|
||||||
|
auto copy_compressed =
|
||||||
|
[&](const auto& s,
|
||||||
|
std::optional<fragment_category::value_type> const& cat =
|
||||||
|
std::nullopt) {
|
||||||
|
log_rewrite(false, s, cat);
|
||||||
|
writer.write_compressed_section(s->type(), s->compression(),
|
||||||
|
s->data(*mm_));
|
||||||
};
|
};
|
||||||
|
|
||||||
parser_.rewind();
|
parser_.rewind();
|
||||||
|
|
||||||
while (auto s = parser_.next_section()) {
|
while (auto s = parser_.next_section()) {
|
||||||
switch (s->type()) {
|
switch (s->type()) {
|
||||||
case section_type::BLOCK:
|
case section_type::BLOCK: {
|
||||||
if (opts.recompress_block) {
|
|
||||||
std::optional<fragment_category::value_type> cat;
|
std::optional<fragment_category::value_type> cat;
|
||||||
|
bool recompress_block{true};
|
||||||
|
|
||||||
if (auto catstr = meta_.get_block_category(block_no)) {
|
if (opts.recompress_block) {
|
||||||
|
auto catstr = meta_.get_block_category(block_no);
|
||||||
|
|
||||||
|
if (catstr) {
|
||||||
cat = cat_resolver.category_value(catstr.value());
|
cat = cat_resolver.category_value(catstr.value());
|
||||||
|
|
||||||
if (!cat) {
|
if (!cat) {
|
||||||
LOG_ERROR << "unknown category '" << catstr.value()
|
LOG_ERROR << "unknown category '" << catstr.value()
|
||||||
<< "' for block " << block_no;
|
<< "' for block " << block_no;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!opts.recompress_categories.empty()) {
|
||||||
|
bool is_in_set{opts.recompress_categories.count(catstr.value()) >
|
||||||
|
0};
|
||||||
|
|
||||||
|
recompress_block =
|
||||||
|
opts.recompress_categories_exclude ? !is_in_set : is_in_set;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (recompress_block) {
|
||||||
log_recompress(s, cat);
|
log_recompress(s, cat);
|
||||||
|
|
||||||
writer.write_section(section_type::BLOCK, s->compression(),
|
writer.write_section(section_type::BLOCK, s->compression(),
|
||||||
s->data(*mm_), cat);
|
s->data(*mm_), cat);
|
||||||
} else {
|
} else {
|
||||||
copy_compressed(s);
|
copy_compressed(s, cat);
|
||||||
}
|
}
|
||||||
|
|
||||||
++block_no;
|
++block_no;
|
||||||
break;
|
} break;
|
||||||
|
|
||||||
case section_type::METADATA_V2_SCHEMA:
|
case section_type::METADATA_V2_SCHEMA:
|
||||||
case section_type::METADATA_V2:
|
case section_type::METADATA_V2:
|
||||||
|
@ -281,7 +281,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
metadata_compression, log_level_str, timestamp, time_resolution,
|
metadata_compression, log_level_str, timestamp, time_resolution,
|
||||||
progress_mode, recompress_opts, pack_metadata, file_hash_algo,
|
progress_mode, recompress_opts, pack_metadata, file_hash_algo,
|
||||||
debug_filter, max_similarity_size, input_list_str, chmod_str,
|
debug_filter, max_similarity_size, input_list_str, chmod_str,
|
||||||
categorizer_list_str, history_compression;
|
categorizer_list_str, history_compression, recompress_categories;
|
||||||
std::vector<sys_string> filter;
|
std::vector<sys_string> filter;
|
||||||
std::vector<std::string> order, max_lookback_blocks, window_size, window_step,
|
std::vector<std::string> order, max_lookback_blocks, window_size, window_step,
|
||||||
bloom_filter_size, compression;
|
bloom_filter_size, compression;
|
||||||
@ -376,6 +376,9 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
("recompress",
|
("recompress",
|
||||||
po::value<std::string>(&recompress_opts)->implicit_value("all"),
|
po::value<std::string>(&recompress_opts)->implicit_value("all"),
|
||||||
"recompress an existing filesystem (none, block, metadata, all)")
|
"recompress an existing filesystem (none, block, metadata, all)")
|
||||||
|
("recompress-categories",
|
||||||
|
po::value<std::string>(&recompress_categories),
|
||||||
|
"only recompress blocks of these categories")
|
||||||
("categorize",
|
("categorize",
|
||||||
po::value<std::string>(&categorizer_list_str)
|
po::value<std::string>(&categorizer_list_str)
|
||||||
->implicit_value("pcmaudio,incompressible"),
|
->implicit_value("pcmaudio,incompressible"),
|
||||||
@ -383,7 +386,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
("order",
|
("order",
|
||||||
po::value<std::vector<std::string>>(&order)
|
po::value<std::vector<std::string>>(&order)
|
||||||
->value_name("[cat::]arg")->multitoken()->composing(),
|
->value_name("[cat::]arg")->multitoken()->composing(),
|
||||||
order_desc.c_str()) // TODO
|
order_desc.c_str())
|
||||||
("max-similarity-size",
|
("max-similarity-size",
|
||||||
po::value<std::string>(&max_similarity_size),
|
po::value<std::string>(&max_similarity_size),
|
||||||
"maximum file size to compute similarity")
|
"maximum file size to compute similarity")
|
||||||
@ -714,6 +717,15 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
std::cerr << "invalid recompress mode: " << recompress_opts << "\n";
|
std::cerr << "invalid recompress mode: " << recompress_opts << "\n";
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!recompress_categories.empty()) {
|
||||||
|
std::string_view input = recompress_categories;
|
||||||
|
if (input.front() == '!') {
|
||||||
|
rw_opts.recompress_categories_exclude = true;
|
||||||
|
input.remove_prefix(1);
|
||||||
|
}
|
||||||
|
boost::split(rw_opts.recompress_categories, input, boost::is_any_of(","));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file_hash_algo == "none") {
|
if (file_hash_algo == "none") {
|
||||||
@ -1018,6 +1030,13 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
|
|
||||||
cat_resolver = std::make_shared<filesystem_block_category_resolver>(
|
cat_resolver = std::make_shared<filesystem_block_category_resolver>(
|
||||||
input_filesystem->get_all_block_categories());
|
input_filesystem->get_all_block_categories());
|
||||||
|
|
||||||
|
for (auto const& cat : rw_opts.recompress_categories) {
|
||||||
|
if (!cat_resolver->category_value(cat)) {
|
||||||
|
std::cerr << "error: no category '" << cat << "' in input filesystem\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
cat_resolver = options.inode.categorizer_mgr;
|
cat_resolver = options.inode.categorizer_mgr;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user