feat(mkdwarfs): improve --categorize defaults

This commit is contained in:
Marcus Holland-Moritz 2023-12-17 17:06:41 +01:00
parent 711b87fd95
commit cc5af7bad8
2 changed files with 71 additions and 20 deletions

View File

@ -33,6 +33,11 @@
namespace dwarfs {
enum class contextual_option_policy {
normal,
fallback,
};
template <typename OptionType, typename ContextParser, typename OptionParser>
class contextual_option_parser;
@ -53,8 +58,11 @@ class contextual_option {
void set_default(value_type const& val) { default_ = val; }
bool add_contextual(context_type const& ctx, value_type const& val) {
return contextual_.emplace(ctx, val).second;
bool add_contextual(
context_type const& ctx, value_type const& val,
contextual_option_policy policy = contextual_option_policy::normal) {
return contextual_.emplace(ctx, val).second ||
policy == contextual_option_policy::fallback;
}
std::optional<value_type>
@ -127,7 +135,8 @@ class contextual_option_parser {
, op_{op}
, name_{name} {}
void parse(std::string_view arg) const {
void parse(std::string_view arg, contextual_option_policy policy =
contextual_option_policy::normal) const {
try {
auto pos = arg.find("::");
@ -140,10 +149,10 @@ class contextual_option_parser {
std::invoke_result_t<decltype(&ContextParser::parse),
ContextParser, decltype(ctx)>,
typename option_type::context_type>) {
add_contextual(cp_.parse(ctx), val);
add_contextual(cp_.parse(ctx), val, policy);
} else {
for (auto c : cp_.parse(ctx)) {
add_contextual(c, val);
add_contextual(c, val, policy);
}
}
}
@ -154,6 +163,10 @@ class contextual_option_parser {
}
}
void parse_fallback(std::string_view arg) const {
parse(arg, contextual_option_policy::fallback);
}
void parse(std::span<std::string const> list) const {
for (auto const& arg : list) {
parse(arg);
@ -187,8 +200,9 @@ class contextual_option_parser {
private:
void add_contextual(typename option_type::context_type const& ctx,
typename option_type::value_type const& val) const {
if (!opt_.add_contextual(ctx, val)) {
typename option_type::value_type const& val,
contextual_option_policy policy) const {
if (!opt_.add_contextual(ctx, val, policy)) {
throw std::runtime_error(fmt::format(
"duplicate context '{}' for option '{}'", cp_.to_string(ctx), name_));
}

View File

@ -268,6 +268,29 @@ constexpr std::array<level_defaults, 10> levels{{
constexpr unsigned default_level = 7;
class categorize_optval {
public:
std::string value;
bool is_explicit{false};
categorize_optval() = default;
categorize_optval(std::string const& val, bool expl = false)
: value{val}
, is_explicit{expl} {}
bool add_implicit_defaults() const { return !value.empty() && !is_explicit; }
};
std::ostream& operator<<(std::ostream& os, categorize_optval const& optval) {
return os << optval.value << (optval.is_explicit ? " (explicit)" : "");
}
void validate(boost::any& v, std::vector<std::string> const& values,
categorize_optval*, int) {
po::validators::check_first_occurrence(v);
v = categorize_optval{po::validators::get_single_string(values), true};
}
} // namespace
int mkdwarfs_main(int argc, sys_char** argv) {
@ -281,7 +304,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
metadata_compression, log_level_str, timestamp, time_resolution,
progress_mode, recompress_opts, pack_metadata, file_hash_algo,
debug_filter, max_similarity_size, input_list_str, chmod_str,
categorizer_list_str, history_compression, recompress_categories;
history_compression, recompress_categories;
std::vector<sys_string> filter;
std::vector<std::string> order, max_lookback_blocks, window_size, window_step,
bloom_filter_size, compression;
@ -292,6 +315,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
unsigned level;
int compress_niceness;
uint16_t uid, gid;
categorize_optval categorizer_list;
integral_value_parser<size_t> max_lookback_parser;
integral_value_parser<unsigned> window_size_parser(0, 24);
@ -380,8 +404,8 @@ int mkdwarfs_main(int argc, sys_char** argv) {
po::value<std::string>(&recompress_categories),
"only recompress blocks of these categories")
("categorize",
po::value<std::string>(&categorizer_list_str)
->implicit_value("pcmaudio,incompressible"),
po::value<categorize_optval>(&categorizer_list)
->implicit_value(categorize_optval("pcmaudio,incompressible")),
categorize_desc.c_str())
("order",
po::value<std::vector<std::string>>(&order)
@ -1008,13 +1032,13 @@ int mkdwarfs_main(int argc, sys_char** argv) {
}
}
if (!categorizer_list_str.empty()) {
std::vector<std::string> categorizer_list;
boost::split(categorizer_list, categorizer_list_str, boost::is_any_of(","));
if (!categorizer_list.value.empty()) {
std::vector<std::string> categorizers;
boost::split(categorizers, categorizer_list.value, boost::is_any_of(","));
options.inode.categorizer_mgr = std::make_shared<categorizer_manager>(lgr);
for (auto const& name : categorizer_list) {
for (auto const& name : categorizers) {
options.inode.categorizer_mgr->add(catreg.create(lgr, name, vm));
}
}
@ -1049,7 +1073,10 @@ int mkdwarfs_main(int argc, sys_char** argv) {
order_parser);
cop.parse(defaults.order);
cop.parse(order);
LOG_DEBUG << cop.as_string();
if (categorizer_list.add_implicit_defaults()) {
cop.parse_fallback("pcmaudio/waveform::revpath");
}
LOG_VERBOSE << cop.as_string();
}
{
@ -1058,7 +1085,10 @@ int mkdwarfs_main(int argc, sys_char** argv) {
max_lookback_parser);
sf_config.max_active_blocks.set_default(1);
cop.parse(max_lookback_blocks);
LOG_DEBUG << cop.as_string();
if (categorizer_list.add_implicit_defaults()) {
cop.parse_fallback("pcmaudio/waveform::0");
}
LOG_VERBOSE << cop.as_string();
}
{
@ -1067,7 +1097,10 @@ int mkdwarfs_main(int argc, sys_char** argv) {
window_size_parser);
sf_config.blockhash_window_size.set_default(defaults.window_size);
cop.parse(window_size);
LOG_DEBUG << cop.as_string();
if (categorizer_list.add_implicit_defaults()) {
cop.parse_fallback("pcmaudio/waveform::0");
}
LOG_VERBOSE << cop.as_string();
}
{
@ -1076,7 +1109,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
window_step_parser);
sf_config.window_increment_shift.set_default(defaults.window_step);
cop.parse(window_step);
LOG_DEBUG << cop.as_string();
LOG_VERBOSE << cop.as_string();
}
{
@ -1085,7 +1118,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
bloom_filter_size_parser);
sf_config.bloom_filter_size.set_default(4);
cop.parse(bloom_filter_size);
LOG_DEBUG << cop.as_string();
LOG_VERBOSE << cop.as_string();
}
} catch (std::exception const& e) {
LOG_ERROR << e.what();
@ -1109,7 +1142,11 @@ int mkdwarfs_main(int argc, sys_char** argv) {
compression_opt.set_default(
block_compressor(std::string(defaults.data_compression)));
cop.parse(compression);
LOG_DEBUG << cop.as_string();
if (categorizer_list.add_implicit_defaults()) {
cop.parse_fallback("incompressible::null");
cop.parse_fallback("pcmaudio/waveform::flac");
}
LOG_VERBOSE << cop.as_string();
fsw->add_default_compressor(compression_opt.get());