mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 04:19:10 -04:00
feat: use glob-to-regex to simplify and enhance rule_based_entry_filter
This commit is contained in:
parent
5d19513829
commit
c427808bb1
@ -693,10 +693,11 @@ Patterns support `?` and `*` wildcards matching a single character
|
||||
and any number of characters, respectively. These patterns don't match
|
||||
across directory separators (`/`).
|
||||
|
||||
Patterns also support the `**` wildcard, which matches across directory
|
||||
separators.
|
||||
Patterns also support the `**` globstar wildcard, which matches across
|
||||
directory separators.
|
||||
|
||||
Patterns also support character classes.
|
||||
Patterns also support character classes (`[avt]`), ranges (`[a-h]`),
|
||||
and complementation (`[!a-h]`).
|
||||
|
||||
Here's an exemplary rule set:
|
||||
|
||||
|
@ -31,6 +31,8 @@
|
||||
#include <dwarfs/writer/entry_interface.h>
|
||||
#include <dwarfs/writer/rule_based_entry_filter.h>
|
||||
|
||||
#include <dwarfs/internal/glob_to_regex.h>
|
||||
|
||||
namespace dwarfs::writer {
|
||||
|
||||
namespace internal {
|
||||
@ -51,12 +53,22 @@ struct filter_rule {
|
||||
};
|
||||
|
||||
filter_rule(rule_type type, bool floating, std::string const& re,
|
||||
std::string const& rule)
|
||||
std::string_view rule, bool ignore_case = false)
|
||||
: type{type}
|
||||
, floating{floating}
|
||||
, re{re}
|
||||
, re{re, regex_flags(ignore_case)}
|
||||
, rule{rule} {}
|
||||
|
||||
static constexpr std::regex_constants::syntax_option_type
|
||||
regex_flags(bool ignore_case) {
|
||||
auto flags =
|
||||
std::regex_constants::ECMAScript | std::regex_constants::optimize;
|
||||
if (ignore_case) {
|
||||
flags |= std::regex_constants::icase;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
rule_type type;
|
||||
bool floating;
|
||||
std::regex re;
|
||||
@ -89,14 +101,34 @@ class rule_based_entry_filter_ : public rule_based_entry_filter::impl {
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
|
||||
std::string_view rule_sv) -> filter_rule {
|
||||
std::string rule{rule_sv};
|
||||
std::string_view rule) -> filter_rule {
|
||||
std::string re;
|
||||
filter_rule::rule_type type;
|
||||
|
||||
auto* p = rule.c_str();
|
||||
if (rule.empty()) {
|
||||
throw std::runtime_error("empty filter rule");
|
||||
}
|
||||
|
||||
switch (*p) {
|
||||
auto splitpos = rule.find_first_of(' ');
|
||||
|
||||
if (splitpos == std::string::npos) {
|
||||
throw std::runtime_error("invalid filter rule: " + std::string(rule));
|
||||
}
|
||||
|
||||
auto pattern_start = rule.find_first_not_of(' ', splitpos);
|
||||
|
||||
if (pattern_start == std::string::npos) {
|
||||
throw std::runtime_error("no pattern in filter rule: " + std::string(rule));
|
||||
}
|
||||
|
||||
auto prefix = rule.substr(0, splitpos);
|
||||
auto pattern = rule.substr(pattern_start);
|
||||
|
||||
if (prefix.empty()) {
|
||||
throw std::runtime_error("no prefix in filter rule: " + std::string(rule));
|
||||
}
|
||||
|
||||
switch (prefix[0]) {
|
||||
case '+':
|
||||
type = filter_rule::rule_type::include;
|
||||
break;
|
||||
@ -107,76 +139,34 @@ auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
|
||||
throw std::runtime_error("rules must start with + or -");
|
||||
}
|
||||
|
||||
while (*++p == ' ')
|
||||
;
|
||||
bool ignore_case{false};
|
||||
|
||||
prefix.remove_prefix(1);
|
||||
for (auto opt : prefix) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
ignore_case = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw std::runtime_error(
|
||||
fmt::format("unknown option '{}' in filter rule: {}", opt, rule));
|
||||
}
|
||||
}
|
||||
|
||||
// If the start of the pattern is not explicitly anchored, make it floating.
|
||||
bool floating = *p && *p != '/';
|
||||
bool floating = pattern[0] != '/';
|
||||
|
||||
if (floating) {
|
||||
re += ".*/";
|
||||
}
|
||||
|
||||
while (*p) {
|
||||
switch (*p) {
|
||||
case '\\':
|
||||
re += *p++;
|
||||
if (p) {
|
||||
re += *p++;
|
||||
}
|
||||
continue;
|
||||
|
||||
case '*': {
|
||||
int nstar = 1;
|
||||
while (*++p == '*') {
|
||||
++nstar;
|
||||
}
|
||||
switch (nstar) {
|
||||
case 1:
|
||||
if (re.ends_with('/') and (*p == '/' or *p == '\0')) {
|
||||
re += "[^/]+";
|
||||
} else {
|
||||
re += "[^/]*";
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
re += ".*";
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("too many *s");
|
||||
}
|
||||
}
|
||||
continue;
|
||||
|
||||
case '?':
|
||||
re += "[^/]";
|
||||
break;
|
||||
|
||||
case '.':
|
||||
case '+':
|
||||
case '^':
|
||||
case '$':
|
||||
case '(':
|
||||
case ')':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
re += '\\';
|
||||
re += *p;
|
||||
break;
|
||||
|
||||
default:
|
||||
re += *p;
|
||||
break;
|
||||
}
|
||||
|
||||
++p;
|
||||
}
|
||||
re += dwarfs::internal::glob_to_regex_string(pattern) + "$";
|
||||
|
||||
LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating
|
||||
<< "]";
|
||||
<< ", ignore_case=" << ignore_case << "]";
|
||||
|
||||
return filter_rule(type, floating, re, rule);
|
||||
return filter_rule(type, floating, re, rule, ignore_case);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
|
@ -79,6 +79,30 @@ R"(
|
||||
"usr/lib/Mcrt1.o",
|
||||
"usr/lib64",
|
||||
"usr/lib64/gcrt1.o",
|
||||
}},
|
||||
{
|
||||
"NoIgnoreCase",
|
||||
R"(
|
||||
+ mc*
|
||||
+ *led*
|
||||
- *
|
||||
)", {
|
||||
"",
|
||||
}},
|
||||
{
|
||||
"IgnoreCase",
|
||||
R"(
|
||||
+i mc*
|
||||
+i *led*
|
||||
- *
|
||||
)", {
|
||||
"",
|
||||
"usr",
|
||||
"usr/lib",
|
||||
"usr/lib/Mcrt1.o",
|
||||
"usr/lib64",
|
||||
"usr/lib64/xtables",
|
||||
"usr/lib64/xtables/libxt_LED.so",
|
||||
}},
|
||||
// clang-format on
|
||||
};
|
||||
|
@ -1370,6 +1370,43 @@ TEST(mkdwarfs_test, cannot_combine_input_list_and_filter) {
|
||||
::testing::HasSubstr("cannot combine --input-list and --filter"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, rules_must_start_with_plus_or_minus) {
|
||||
auto t = mkdwarfs_tester::create_empty();
|
||||
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "% *"}));
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr("rules must start with + or -"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, empty_filter_rule) {
|
||||
auto t = mkdwarfs_tester::create_empty();
|
||||
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", ""}));
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr("empty filter rule"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, invalid_filter_rule) {
|
||||
auto t = mkdwarfs_tester::create_empty();
|
||||
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+i"}));
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid filter rule"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, no_pattern_in_filter_rule) {
|
||||
auto t = mkdwarfs_tester::create_empty();
|
||||
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+ "}));
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr("no pattern in filter rule"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, no_prefix_in_filter_rule) {
|
||||
auto t = mkdwarfs_tester::create_empty();
|
||||
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", " foo"}));
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr("no prefix in filter rule"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, unknown_option_in_filter_rule) {
|
||||
auto t = mkdwarfs_tester::create_empty();
|
||||
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+x foo"}));
|
||||
EXPECT_THAT(t.err(),
|
||||
::testing::HasSubstr("unknown option 'x' in filter rule"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, cannot_open_input_list_file) {
|
||||
mkdwarfs_tester t;
|
||||
EXPECT_NE(0, t.run({"--input-list", "missing.list", "-o", "-"}));
|
||||
@ -1639,12 +1676,6 @@ TEST(mkdwarfs_test, invalid_progress_mode) {
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid progress mode"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, invalid_filter_rule) {
|
||||
mkdwarfs_tester t;
|
||||
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "grmpf"}));
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr("could not parse filter rule"));
|
||||
}
|
||||
|
||||
TEST(mkdwarfs_test, time_resolution_zero) {
|
||||
mkdwarfs_tester t;
|
||||
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--time-resolution=0"}));
|
||||
|
Loading…
x
Reference in New Issue
Block a user