feat: use glob-to-regex to simplify and enhance rule_based_entry_filter

This commit is contained in:
Marcus Holland-Moritz 2024-11-16 17:37:08 +01:00
parent 5d19513829
commit c427808bb1
4 changed files with 121 additions and 75 deletions

View File

@ -693,10 +693,11 @@ Patterns support `?` and `*` wildcards matching a single character
and any number of characters, respectively. These patterns don't match and any number of characters, respectively. These patterns don't match
across directory separators (`/`). across directory separators (`/`).
Patterns also support the `**` wildcard, which matches across directory Patterns also support the `**` globstar wildcard, which matches across
separators. directory separators.
Patterns also support character classes. Patterns also support character classes (`[avt]`), ranges (`[a-h]`),
and complementation (`[!a-h]`).
Here's an exemplary rule set: Here's an exemplary rule set:

View File

@ -31,6 +31,8 @@
#include <dwarfs/writer/entry_interface.h> #include <dwarfs/writer/entry_interface.h>
#include <dwarfs/writer/rule_based_entry_filter.h> #include <dwarfs/writer/rule_based_entry_filter.h>
#include <dwarfs/internal/glob_to_regex.h>
namespace dwarfs::writer { namespace dwarfs::writer {
namespace internal { namespace internal {
@ -51,12 +53,22 @@ struct filter_rule {
}; };
filter_rule(rule_type type, bool floating, std::string const& re, filter_rule(rule_type type, bool floating, std::string const& re,
std::string const& rule) std::string_view rule, bool ignore_case = false)
: type{type} : type{type}
, floating{floating} , floating{floating}
, re{re} , re{re, regex_flags(ignore_case)}
, rule{rule} {} , rule{rule} {}
static constexpr std::regex_constants::syntax_option_type
regex_flags(bool ignore_case) {
auto flags =
std::regex_constants::ECMAScript | std::regex_constants::optimize;
if (ignore_case) {
flags |= std::regex_constants::icase;
}
return flags;
}
rule_type type; rule_type type;
bool floating; bool floating;
std::regex re; std::regex re;
@ -89,14 +101,34 @@ class rule_based_entry_filter_ : public rule_based_entry_filter::impl {
template <typename LoggerPolicy> template <typename LoggerPolicy>
auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule( auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
std::string_view rule_sv) -> filter_rule { std::string_view rule) -> filter_rule {
std::string rule{rule_sv};
std::string re; std::string re;
filter_rule::rule_type type; filter_rule::rule_type type;
auto* p = rule.c_str(); if (rule.empty()) {
throw std::runtime_error("empty filter rule");
}
switch (*p) { auto splitpos = rule.find_first_of(' ');
if (splitpos == std::string::npos) {
throw std::runtime_error("invalid filter rule: " + std::string(rule));
}
auto pattern_start = rule.find_first_not_of(' ', splitpos);
if (pattern_start == std::string::npos) {
throw std::runtime_error("no pattern in filter rule: " + std::string(rule));
}
auto prefix = rule.substr(0, splitpos);
auto pattern = rule.substr(pattern_start);
if (prefix.empty()) {
throw std::runtime_error("no prefix in filter rule: " + std::string(rule));
}
switch (prefix[0]) {
case '+': case '+':
type = filter_rule::rule_type::include; type = filter_rule::rule_type::include;
break; break;
@ -107,76 +139,34 @@ auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
throw std::runtime_error("rules must start with + or -"); throw std::runtime_error("rules must start with + or -");
} }
while (*++p == ' ') bool ignore_case{false};
;
prefix.remove_prefix(1);
for (auto opt : prefix) {
switch (opt) {
case 'i':
ignore_case = true;
break;
default:
throw std::runtime_error(
fmt::format("unknown option '{}' in filter rule: {}", opt, rule));
}
}
// If the start of the pattern is not explicitly anchored, make it floating. // If the start of the pattern is not explicitly anchored, make it floating.
bool floating = *p && *p != '/'; bool floating = pattern[0] != '/';
if (floating) { if (floating) {
re += ".*/"; re += ".*/";
} }
while (*p) { re += dwarfs::internal::glob_to_regex_string(pattern) + "$";
switch (*p) {
case '\\':
re += *p++;
if (p) {
re += *p++;
}
continue;
case '*': {
int nstar = 1;
while (*++p == '*') {
++nstar;
}
switch (nstar) {
case 1:
if (re.ends_with('/') and (*p == '/' or *p == '\0')) {
re += "[^/]+";
} else {
re += "[^/]*";
}
break;
case 2:
re += ".*";
break;
default:
throw std::runtime_error("too many *s");
}
}
continue;
case '?':
re += "[^/]";
break;
case '.':
case '+':
case '^':
case '$':
case '(':
case ')':
case '{':
case '}':
case '|':
re += '\\';
re += *p;
break;
default:
re += *p;
break;
}
++p;
}
LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating
<< "]"; << ", ignore_case=" << ignore_case << "]";
return filter_rule(type, floating, re, rule); return filter_rule(type, floating, re, rule, ignore_case);
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>

View File

@ -79,6 +79,30 @@ R"(
"usr/lib/Mcrt1.o", "usr/lib/Mcrt1.o",
"usr/lib64", "usr/lib64",
"usr/lib64/gcrt1.o", "usr/lib64/gcrt1.o",
}},
{
"NoIgnoreCase",
R"(
+ mc*
+ *led*
- *
)", {
"",
}},
{
"IgnoreCase",
R"(
+i mc*
+i *led*
- *
)", {
"",
"usr",
"usr/lib",
"usr/lib/Mcrt1.o",
"usr/lib64",
"usr/lib64/xtables",
"usr/lib64/xtables/libxt_LED.so",
}}, }},
// clang-format on // clang-format on
}; };

View File

@ -1370,6 +1370,43 @@ TEST(mkdwarfs_test, cannot_combine_input_list_and_filter) {
::testing::HasSubstr("cannot combine --input-list and --filter")); ::testing::HasSubstr("cannot combine --input-list and --filter"));
} }
TEST(mkdwarfs_test, rules_must_start_with_plus_or_minus) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "% *"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("rules must start with + or -"));
}
TEST(mkdwarfs_test, empty_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", ""}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("empty filter rule"));
}
TEST(mkdwarfs_test, invalid_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+i"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid filter rule"));
}
TEST(mkdwarfs_test, no_pattern_in_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+ "}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("no pattern in filter rule"));
}
TEST(mkdwarfs_test, no_prefix_in_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", " foo"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("no prefix in filter rule"));
}
TEST(mkdwarfs_test, unknown_option_in_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+x foo"}));
EXPECT_THAT(t.err(),
::testing::HasSubstr("unknown option 'x' in filter rule"));
}
TEST(mkdwarfs_test, cannot_open_input_list_file) { TEST(mkdwarfs_test, cannot_open_input_list_file) {
mkdwarfs_tester t; mkdwarfs_tester t;
EXPECT_NE(0, t.run({"--input-list", "missing.list", "-o", "-"})); EXPECT_NE(0, t.run({"--input-list", "missing.list", "-o", "-"}));
@ -1639,12 +1676,6 @@ TEST(mkdwarfs_test, invalid_progress_mode) {
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid progress mode")); EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid progress mode"));
} }
TEST(mkdwarfs_test, invalid_filter_rule) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("could not parse filter rule"));
}
TEST(mkdwarfs_test, time_resolution_zero) { TEST(mkdwarfs_test, time_resolution_zero) {
mkdwarfs_tester t; mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--time-resolution=0"})); EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--time-resolution=0"}));