diff --git a/doc/mkdwarfs.md b/doc/mkdwarfs.md index d6d99d99..adb2b993 100644 --- a/doc/mkdwarfs.md +++ b/doc/mkdwarfs.md @@ -693,10 +693,11 @@ Patterns support `?` and `*` wildcards matching a single character and any number of characters, respectively. These patterns don't match across directory separators (`/`). -Patterns also support the `**` wildcard, which matches across directory -separators. +Patterns also support the `**` globstar wildcard, which matches across +directory separators. -Patterns also support character classes. +Patterns also support character classes (`[avt]`), ranges (`[a-h]`), +and complementation (`[!a-h]`). Here's an exemplary rule set: diff --git a/src/writer/rule_based_entry_filter.cpp b/src/writer/rule_based_entry_filter.cpp index 60e0a0c7..4a1df973 100644 --- a/src/writer/rule_based_entry_filter.cpp +++ b/src/writer/rule_based_entry_filter.cpp @@ -31,6 +31,8 @@ #include #include +#include + namespace dwarfs::writer { namespace internal { @@ -51,12 +53,22 @@ struct filter_rule { }; filter_rule(rule_type type, bool floating, std::string const& re, - std::string const& rule) + std::string_view rule, bool ignore_case = false) : type{type} , floating{floating} - , re{re} + , re{re, regex_flags(ignore_case)} , rule{rule} {} + static constexpr std::regex_constants::syntax_option_type + regex_flags(bool ignore_case) { + auto flags = + std::regex_constants::ECMAScript | std::regex_constants::optimize; + if (ignore_case) { + flags |= std::regex_constants::icase; + } + return flags; + } + rule_type type; bool floating; std::regex re; @@ -89,14 +101,34 @@ class rule_based_entry_filter_ : public rule_based_entry_filter::impl { template auto rule_based_entry_filter_::compile_filter_rule( - std::string_view rule_sv) -> filter_rule { - std::string rule{rule_sv}; + std::string_view rule) -> filter_rule { std::string re; filter_rule::rule_type type; - auto* p = rule.c_str(); + if (rule.empty()) { + throw std::runtime_error("empty filter rule"); + } - switch (*p) { + auto splitpos = rule.find_first_of(' '); + + if (splitpos == std::string::npos) { + throw std::runtime_error("invalid filter rule: " + std::string(rule)); + } + + auto pattern_start = rule.find_first_not_of(' ', splitpos); + + if (pattern_start == std::string::npos) { + throw std::runtime_error("no pattern in filter rule: " + std::string(rule)); + } + + auto prefix = rule.substr(0, splitpos); + auto pattern = rule.substr(pattern_start); + + if (prefix.empty()) { + throw std::runtime_error("no prefix in filter rule: " + std::string(rule)); + } + + switch (prefix[0]) { case '+': type = filter_rule::rule_type::include; break; @@ -107,76 +139,34 @@ auto rule_based_entry_filter_::compile_filter_rule( throw std::runtime_error("rules must start with + or -"); } - while (*++p == ' ') - ; + bool ignore_case{false}; + + prefix.remove_prefix(1); + for (auto opt : prefix) { + switch (opt) { + case 'i': + ignore_case = true; + break; + + default: + throw std::runtime_error( + fmt::format("unknown option '{}' in filter rule: {}", opt, rule)); + } + } // If the start of the pattern is not explicitly anchored, make it floating. - bool floating = *p && *p != '/'; + bool floating = pattern[0] != '/'; if (floating) { re += ".*/"; } - while (*p) { - switch (*p) { - case '\\': - re += *p++; - if (p) { - re += *p++; - } - continue; - - case '*': { - int nstar = 1; - while (*++p == '*') { - ++nstar; - } - switch (nstar) { - case 1: - if (re.ends_with('/') and (*p == '/' or *p == '\0')) { - re += "[^/]+"; - } else { - re += "[^/]*"; - } - break; - case 2: - re += ".*"; - break; - default: - throw std::runtime_error("too many *s"); - } - } - continue; - - case '?': - re += "[^/]"; - break; - - case '.': - case '+': - case '^': - case '$': - case '(': - case ')': - case '{': - case '}': - case '|': - re += '\\'; - re += *p; - break; - - default: - re += *p; - break; - } - - ++p; - } + re += dwarfs::internal::glob_to_regex_string(pattern) + "$"; LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating - << "]"; + << ", ignore_case=" << ignore_case << "]"; - return filter_rule(type, floating, re, rule); + return filter_rule(type, floating, re, rule, ignore_case); } template diff --git a/test/filter_test_data.cpp b/test/filter_test_data.cpp index 65630da8..54f80db1 100644 --- a/test/filter_test_data.cpp +++ b/test/filter_test_data.cpp @@ -79,6 +79,30 @@ R"( "usr/lib/Mcrt1.o", "usr/lib64", "usr/lib64/gcrt1.o", +}}, +{ + "NoIgnoreCase", +R"( ++ mc* ++ *led* +- * +)", { + "", +}}, +{ + "IgnoreCase", +R"( ++i mc* ++i *led* +- * +)", { + "", + "usr", + "usr/lib", + "usr/lib/Mcrt1.o", + "usr/lib64", + "usr/lib64/xtables", + "usr/lib64/xtables/libxt_LED.so", }}, // clang-format on }; diff --git a/test/tool_main_test.cpp b/test/tool_main_test.cpp index ea8ceae0..6a1b4fc2 100644 --- a/test/tool_main_test.cpp +++ b/test/tool_main_test.cpp @@ -1370,6 +1370,43 @@ TEST(mkdwarfs_test, cannot_combine_input_list_and_filter) { ::testing::HasSubstr("cannot combine --input-list and --filter")); } +TEST(mkdwarfs_test, rules_must_start_with_plus_or_minus) { + auto t = mkdwarfs_tester::create_empty(); + EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "% *"})); + EXPECT_THAT(t.err(), ::testing::HasSubstr("rules must start with + or -")); +} + +TEST(mkdwarfs_test, empty_filter_rule) { + auto t = mkdwarfs_tester::create_empty(); + EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", ""})); + EXPECT_THAT(t.err(), ::testing::HasSubstr("empty filter rule")); +} + +TEST(mkdwarfs_test, invalid_filter_rule) { + auto t = mkdwarfs_tester::create_empty(); + EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+i"})); + EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid filter rule")); +} + +TEST(mkdwarfs_test, no_pattern_in_filter_rule) { + auto t = mkdwarfs_tester::create_empty(); + EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+ "})); + EXPECT_THAT(t.err(), ::testing::HasSubstr("no pattern in filter rule")); +} + +TEST(mkdwarfs_test, no_prefix_in_filter_rule) { + auto t = mkdwarfs_tester::create_empty(); + EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", " foo"})); + EXPECT_THAT(t.err(), ::testing::HasSubstr("no prefix in filter rule")); +} + +TEST(mkdwarfs_test, unknown_option_in_filter_rule) { + auto t = mkdwarfs_tester::create_empty(); + EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+x foo"})); + EXPECT_THAT(t.err(), + ::testing::HasSubstr("unknown option 'x' in filter rule")); +} + TEST(mkdwarfs_test, cannot_open_input_list_file) { mkdwarfs_tester t; EXPECT_NE(0, t.run({"--input-list", "missing.list", "-o", "-"})); @@ -1639,12 +1676,6 @@ TEST(mkdwarfs_test, invalid_progress_mode) { EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid progress mode")); } -TEST(mkdwarfs_test, invalid_filter_rule) { - mkdwarfs_tester t; - EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "grmpf"})); - EXPECT_THAT(t.err(), ::testing::HasSubstr("could not parse filter rule")); -} - TEST(mkdwarfs_test, time_resolution_zero) { mkdwarfs_tester t; EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--time-resolution=0"}));