From cc5af7bad8602855fe28aa28dba5220e2e798738 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sun, 17 Dec 2023 17:06:41 +0100 Subject: [PATCH] feat(mkdwarfs): improve --categorize defaults --- include/dwarfs/contextual_option.h | 28 +++++++++---- src/mkdwarfs_main.cpp | 63 ++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/include/dwarfs/contextual_option.h b/include/dwarfs/contextual_option.h index ad347af9..be0c4051 100644 --- a/include/dwarfs/contextual_option.h +++ b/include/dwarfs/contextual_option.h @@ -33,6 +33,11 @@ namespace dwarfs { +enum class contextual_option_policy { + normal, + fallback, +}; + template class contextual_option_parser; @@ -53,8 +58,11 @@ class contextual_option { void set_default(value_type const& val) { default_ = val; } - bool add_contextual(context_type const& ctx, value_type const& val) { - return contextual_.emplace(ctx, val).second; + bool add_contextual( + context_type const& ctx, value_type const& val, + contextual_option_policy policy = contextual_option_policy::normal) { + return contextual_.emplace(ctx, val).second || + policy == contextual_option_policy::fallback; } std::optional @@ -127,7 +135,8 @@ class contextual_option_parser { , op_{op} , name_{name} {} - void parse(std::string_view arg) const { + void parse(std::string_view arg, contextual_option_policy policy = + contextual_option_policy::normal) const { try { auto pos = arg.find("::"); @@ -140,10 +149,10 @@ class contextual_option_parser { std::invoke_result_t, typename option_type::context_type>) { - add_contextual(cp_.parse(ctx), val); + add_contextual(cp_.parse(ctx), val, policy); } else { for (auto c : cp_.parse(ctx)) { - add_contextual(c, val); + add_contextual(c, val, policy); } } } @@ -154,6 +163,10 @@ class contextual_option_parser { } } + void parse_fallback(std::string_view arg) const { + parse(arg, contextual_option_policy::fallback); + } + void parse(std::span list) const { for (auto const& arg : list) { parse(arg); @@ -187,8 +200,9 @@ class contextual_option_parser { private: void add_contextual(typename option_type::context_type const& ctx, - typename option_type::value_type const& val) const { - if (!opt_.add_contextual(ctx, val)) { + typename option_type::value_type const& val, + contextual_option_policy policy) const { + if (!opt_.add_contextual(ctx, val, policy)) { throw std::runtime_error(fmt::format( "duplicate context '{}' for option '{}'", cp_.to_string(ctx), name_)); } diff --git a/src/mkdwarfs_main.cpp b/src/mkdwarfs_main.cpp index b1d5f209..d5f57230 100644 --- a/src/mkdwarfs_main.cpp +++ b/src/mkdwarfs_main.cpp @@ -268,6 +268,29 @@ constexpr std::array levels{{ constexpr unsigned default_level = 7; +class categorize_optval { + public: + std::string value; + bool is_explicit{false}; + + categorize_optval() = default; + categorize_optval(std::string const& val, bool expl = false) + : value{val} + , is_explicit{expl} {} + + bool add_implicit_defaults() const { return !value.empty() && !is_explicit; } +}; + +std::ostream& operator<<(std::ostream& os, categorize_optval const& optval) { + return os << optval.value << (optval.is_explicit ? " (explicit)" : ""); +} + +void validate(boost::any& v, std::vector const& values, + categorize_optval*, int) { + po::validators::check_first_occurrence(v); + v = categorize_optval{po::validators::get_single_string(values), true}; +} + } // namespace int mkdwarfs_main(int argc, sys_char** argv) { @@ -281,7 +304,7 @@ int mkdwarfs_main(int argc, sys_char** argv) { metadata_compression, log_level_str, timestamp, time_resolution, progress_mode, recompress_opts, pack_metadata, file_hash_algo, debug_filter, max_similarity_size, input_list_str, chmod_str, - categorizer_list_str, history_compression, recompress_categories; + history_compression, recompress_categories; std::vector filter; std::vector order, max_lookback_blocks, window_size, window_step, bloom_filter_size, compression; @@ -292,6 +315,7 @@ int mkdwarfs_main(int argc, sys_char** argv) { unsigned level; int compress_niceness; uint16_t uid, gid; + categorize_optval categorizer_list; integral_value_parser max_lookback_parser; integral_value_parser window_size_parser(0, 24); @@ -380,8 +404,8 @@ int mkdwarfs_main(int argc, sys_char** argv) { po::value(&recompress_categories), "only recompress blocks of these categories") ("categorize", - po::value(&categorizer_list_str) - ->implicit_value("pcmaudio,incompressible"), + po::value(&categorizer_list) + ->implicit_value(categorize_optval("pcmaudio,incompressible")), categorize_desc.c_str()) ("order", po::value>(&order) @@ -1008,13 +1032,13 @@ int mkdwarfs_main(int argc, sys_char** argv) { } } - if (!categorizer_list_str.empty()) { - std::vector categorizer_list; - boost::split(categorizer_list, categorizer_list_str, boost::is_any_of(",")); + if (!categorizer_list.value.empty()) { + std::vector categorizers; + boost::split(categorizers, categorizer_list.value, boost::is_any_of(",")); options.inode.categorizer_mgr = std::make_shared(lgr); - for (auto const& name : categorizer_list) { + for (auto const& name : categorizers) { options.inode.categorizer_mgr->add(catreg.create(lgr, name, vm)); } } @@ -1049,7 +1073,10 @@ int mkdwarfs_main(int argc, sys_char** argv) { order_parser); cop.parse(defaults.order); cop.parse(order); - LOG_DEBUG << cop.as_string(); + if (categorizer_list.add_implicit_defaults()) { + cop.parse_fallback("pcmaudio/waveform::revpath"); + } + LOG_VERBOSE << cop.as_string(); } { @@ -1058,7 +1085,10 @@ int mkdwarfs_main(int argc, sys_char** argv) { max_lookback_parser); sf_config.max_active_blocks.set_default(1); cop.parse(max_lookback_blocks); - LOG_DEBUG << cop.as_string(); + if (categorizer_list.add_implicit_defaults()) { + cop.parse_fallback("pcmaudio/waveform::0"); + } + LOG_VERBOSE << cop.as_string(); } { @@ -1067,7 +1097,10 @@ int mkdwarfs_main(int argc, sys_char** argv) { window_size_parser); sf_config.blockhash_window_size.set_default(defaults.window_size); cop.parse(window_size); - LOG_DEBUG << cop.as_string(); + if (categorizer_list.add_implicit_defaults()) { + cop.parse_fallback("pcmaudio/waveform::0"); + } + LOG_VERBOSE << cop.as_string(); } { @@ -1076,7 +1109,7 @@ int mkdwarfs_main(int argc, sys_char** argv) { window_step_parser); sf_config.window_increment_shift.set_default(defaults.window_step); cop.parse(window_step); - LOG_DEBUG << cop.as_string(); + LOG_VERBOSE << cop.as_string(); } { @@ -1085,7 +1118,7 @@ int mkdwarfs_main(int argc, sys_char** argv) { bloom_filter_size_parser); sf_config.bloom_filter_size.set_default(4); cop.parse(bloom_filter_size); - LOG_DEBUG << cop.as_string(); + LOG_VERBOSE << cop.as_string(); } } catch (std::exception const& e) { LOG_ERROR << e.what(); @@ -1109,7 +1142,11 @@ int mkdwarfs_main(int argc, sys_char** argv) { compression_opt.set_default( block_compressor(std::string(defaults.data_compression))); cop.parse(compression); - LOG_DEBUG << cop.as_string(); + if (categorizer_list.add_implicit_defaults()) { + cop.parse_fallback("incompressible::null"); + cop.parse_fallback("pcmaudio/waveform::flac"); + } + LOG_VERBOSE << cop.as_string(); fsw->add_default_compressor(compression_opt.get());