mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-22 10:51:59 -04:00
feat: add glob matcher class and glob-to-regex transformer
This commit is contained in:
parent
86d1ef655c
commit
5d19513829
@ -437,6 +437,7 @@ if(WITH_TESTS)
|
||||
filesystem_writer_test
|
||||
fits_categorizer_test
|
||||
fragment_category_test
|
||||
glob_matcher_test
|
||||
global_metadata_test
|
||||
incompressible_categorizer_test
|
||||
integral_value_parser_test
|
||||
|
@ -30,6 +30,7 @@ add_library(
|
||||
src/file_stat.cpp
|
||||
src/file_util.cpp
|
||||
src/fstypes.cpp
|
||||
src/glob_matcher.cpp
|
||||
src/history.cpp
|
||||
src/library_dependencies.cpp
|
||||
src/logger.cpp
|
||||
@ -46,6 +47,7 @@ add_library(
|
||||
src/internal/features.cpp
|
||||
src/internal/file_status_conv.cpp
|
||||
src/internal/fs_section.cpp
|
||||
src/internal/glob_to_regex.cpp
|
||||
src/internal/string_table.cpp
|
||||
src/internal/wcwidth.c
|
||||
src/internal/worker_group.cpp
|
||||
|
70
include/dwarfs/glob_matcher.h
Normal file
70
include/dwarfs/glob_matcher.h
Normal file
@ -0,0 +1,70 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <initializer_list>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
class glob_matcher {
|
||||
public:
|
||||
struct options {
|
||||
bool ignorecase{false};
|
||||
};
|
||||
|
||||
glob_matcher();
|
||||
explicit glob_matcher(std::initializer_list<std::string const> patterns);
|
||||
explicit glob_matcher(std::span<std::string const> patterns);
|
||||
glob_matcher(std::initializer_list<std::string const> patterns,
|
||||
options const& opts);
|
||||
glob_matcher(std::span<std::string const> patterns, options const& opts);
|
||||
~glob_matcher();
|
||||
|
||||
void add_pattern(std::string_view pattern) { impl_->add_pattern(pattern); }
|
||||
|
||||
void add_pattern(std::string_view pattern, options const& opts) {
|
||||
impl_->add_pattern(pattern, opts);
|
||||
}
|
||||
|
||||
bool match(std::string_view sv) const { return impl_->match(sv); }
|
||||
bool match(char c) const { return impl_->match(std::string_view(&c, 1)); }
|
||||
|
||||
bool operator()(std::string_view sv) const { return match(sv); }
|
||||
bool operator()(char c) const { return match(c); }
|
||||
|
||||
class impl {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
virtual void add_pattern(std::string_view pattern) = 0;
|
||||
virtual void add_pattern(std::string_view pattern, options const& opts) = 0;
|
||||
virtual bool match(std::string_view sv) const = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
std::unique_ptr<impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
31
include/dwarfs/internal/glob_to_regex.h
Normal file
31
include/dwarfs/internal/glob_to_regex.h
Normal file
@ -0,0 +1,31 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
namespace dwarfs::internal {
|
||||
|
||||
std::string glob_to_regex_string(std::string_view pattern);
|
||||
|
||||
} // namespace dwarfs::internal
|
116
src/glob_matcher.cpp
Normal file
116
src/glob_matcher.cpp
Normal file
@ -0,0 +1,116 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <regex>
|
||||
#include <vector>
|
||||
|
||||
#include <dwarfs/glob_matcher.h>
|
||||
|
||||
#include <dwarfs/internal/glob_to_regex.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::regex_constants::syntax_option_type
|
||||
regex_flags(glob_matcher::options const& opts) {
|
||||
auto flags =
|
||||
std::regex_constants::ECMAScript | std::regex_constants::optimize;
|
||||
if (opts.ignorecase) {
|
||||
flags |= std::regex_constants::icase;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
std::regex
|
||||
glob_to_regex(std::string_view pattern, glob_matcher::options const& opts) {
|
||||
return std::regex("(?:^" + internal::glob_to_regex_string(pattern) + "$)",
|
||||
regex_flags(opts));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class glob_matcher_ final : public glob_matcher::impl {
|
||||
public:
|
||||
glob_matcher_() = default;
|
||||
|
||||
explicit glob_matcher_(std::span<std::string const> patterns) {
|
||||
for (auto const& p : patterns) {
|
||||
add_pattern(p);
|
||||
}
|
||||
}
|
||||
|
||||
glob_matcher_(std::span<std::string const> patterns,
|
||||
glob_matcher::options const& opts) {
|
||||
for (auto const& p : patterns) {
|
||||
add_pattern(p, opts);
|
||||
}
|
||||
}
|
||||
|
||||
void add_pattern(std::string_view pattern) override {
|
||||
glob_matcher::options opts;
|
||||
|
||||
if (pattern.starts_with("i:")) {
|
||||
opts.ignorecase = true;
|
||||
pattern.remove_prefix(2);
|
||||
} else if (pattern.starts_with(":")) {
|
||||
pattern.remove_prefix(1);
|
||||
}
|
||||
|
||||
add_pattern(pattern, opts);
|
||||
}
|
||||
|
||||
void add_pattern(std::string_view pattern,
|
||||
glob_matcher::options const& opts) override {
|
||||
m_.push_back(glob_to_regex(pattern, opts));
|
||||
}
|
||||
|
||||
bool match(std::string_view sv) const override {
|
||||
return std::any_of(m_.begin(), m_.end(), [&sv](auto const& re) {
|
||||
return std::regex_match(sv.begin(), sv.end(), re);
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::regex> m_;
|
||||
};
|
||||
|
||||
glob_matcher::glob_matcher()
|
||||
: impl_{std::make_unique<glob_matcher_>()} {}
|
||||
|
||||
glob_matcher::glob_matcher(std::initializer_list<std::string const> patterns)
|
||||
: impl_{std::make_unique<glob_matcher_>(patterns)} {}
|
||||
|
||||
glob_matcher::glob_matcher(std::span<std::string const> patterns)
|
||||
: impl_{std::make_unique<glob_matcher_>(patterns)} {}
|
||||
|
||||
glob_matcher::glob_matcher(std::initializer_list<std::string const> patterns,
|
||||
options const& opts)
|
||||
: impl_{std::make_unique<glob_matcher_>(patterns, opts)} {}
|
||||
|
||||
glob_matcher::glob_matcher(std::span<std::string const> patterns,
|
||||
options const& opts)
|
||||
: impl_{std::make_unique<glob_matcher_>(patterns, opts)} {}
|
||||
|
||||
glob_matcher::~glob_matcher() = default;
|
||||
|
||||
} // namespace dwarfs
|
196
src/internal/glob_to_regex.cpp
Normal file
196
src/internal/glob_to_regex.cpp
Normal file
@ -0,0 +1,196 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <dwarfs/internal/glob_to_regex.h>
|
||||
|
||||
namespace dwarfs::internal {
|
||||
|
||||
namespace {
|
||||
|
||||
std::string_view constexpr special_chars = R"(.^$|()[]{}+?*\)";
|
||||
|
||||
std::string escape_special(char c) {
|
||||
std::string esc;
|
||||
if (special_chars.find(c) != std::string_view::npos) {
|
||||
esc = '\\';
|
||||
}
|
||||
return esc + c;
|
||||
}
|
||||
|
||||
std::pair<std::string, size_t>
|
||||
handle_char_set(std::string_view sv, size_t pos) {
|
||||
size_t const len = sv.size();
|
||||
std::string char_class = "[";
|
||||
auto subpat = sv.substr(pos);
|
||||
size_t firstchar = pos + 1;
|
||||
|
||||
if (subpat.starts_with("[!]")) {
|
||||
char_class += R"(^\])";
|
||||
pos += 2;
|
||||
++firstchar;
|
||||
} else if (subpat.starts_with("[!")) {
|
||||
char_class += R"(^)";
|
||||
pos += 1;
|
||||
++firstchar;
|
||||
} else if (subpat.starts_with("[]")) {
|
||||
char_class += R"(\])";
|
||||
pos += 1;
|
||||
} else if (subpat.starts_with("[^")) {
|
||||
char_class += R"(\^)";
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
while (++pos < len) {
|
||||
char c = sv[pos];
|
||||
char_class += c;
|
||||
|
||||
switch (c) {
|
||||
case ']':
|
||||
return {char_class, pos + 1};
|
||||
|
||||
case '\\':
|
||||
char_class += '\\';
|
||||
break;
|
||||
|
||||
case '-':
|
||||
if (pos > firstchar && pos + 1 < len && sv[pos + 1] != ']') {
|
||||
auto from = sv[pos - 1];
|
||||
auto to = sv[pos + 1];
|
||||
|
||||
if (from <= '/' && '/' <= to) {
|
||||
char_class += ".0-";
|
||||
} else if (from > to) {
|
||||
throw std::runtime_error(fmt::format("invalid range '{}-{}' in "
|
||||
"character class in pattern: {}",
|
||||
from, to, sv));
|
||||
}
|
||||
firstchar = pos + 2;
|
||||
}
|
||||
break;
|
||||
|
||||
case '/':
|
||||
throw std::runtime_error(
|
||||
"invalid character '/' in character class in pattern: " +
|
||||
std::string(sv));
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
throw std::runtime_error("unmatched '[' in pattern: " + std::string(sv));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string glob_to_regex_string(std::string_view pattern) {
|
||||
std::string regex;
|
||||
size_t const len = pattern.size();
|
||||
size_t pos = 0;
|
||||
size_t brace_depth = 0;
|
||||
while (pos < len) {
|
||||
char c = pattern[pos];
|
||||
switch (c) {
|
||||
case '\\':
|
||||
if (++pos >= len) {
|
||||
throw std::runtime_error("trailing backslash in pattern: " +
|
||||
std::string(pattern));
|
||||
}
|
||||
regex += escape_special(pattern[pos]);
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '*':
|
||||
if (pos + 1 < len && pattern[pos + 1] == '*') {
|
||||
if (pos + 2 < len && pattern[pos + 2] == '/' &&
|
||||
(pos == 0 || pattern[pos - 1] == '/')) {
|
||||
pos += 3;
|
||||
} else {
|
||||
pos += 2;
|
||||
}
|
||||
regex += ".*";
|
||||
} else {
|
||||
bool onlystar = (pos == 0 || pattern[pos - 1] == '/') &&
|
||||
(pos + 1 == len || pattern[pos + 1] == '/');
|
||||
++pos;
|
||||
regex += "[^/]";
|
||||
regex += onlystar ? '+' : '*';
|
||||
}
|
||||
break;
|
||||
|
||||
case '?':
|
||||
regex += "[^/]";
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '[': {
|
||||
auto [char_class, end] = handle_char_set(pattern, pos);
|
||||
regex += char_class;
|
||||
pos = end;
|
||||
} break;
|
||||
|
||||
case '{':
|
||||
++brace_depth;
|
||||
regex += "(?:";
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case ',':
|
||||
regex += brace_depth > 0 ? '|' : c;
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '}':
|
||||
if (brace_depth == 0) {
|
||||
throw std::runtime_error("unmatched '}' in pattern: " +
|
||||
std::string(pattern));
|
||||
}
|
||||
--brace_depth;
|
||||
regex += ")";
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case ']':
|
||||
throw std::runtime_error("unmatched ']' in pattern: " +
|
||||
std::string(pattern));
|
||||
|
||||
default:
|
||||
regex += escape_special(c);
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (brace_depth > 0) {
|
||||
throw std::runtime_error("unmatched '{' in pattern: " +
|
||||
std::string(pattern));
|
||||
}
|
||||
|
||||
return regex;
|
||||
}
|
||||
|
||||
} // namespace dwarfs::internal
|
459
test/glob_matcher_test.cpp
Normal file
459
test/glob_matcher_test.cpp
Normal file
@ -0,0 +1,459 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <dwarfs/glob_matcher.h>
|
||||
|
||||
using dwarfs::glob_matcher;
|
||||
|
||||
TEST(glob_matcher_test, simple_patterns) {
|
||||
std::vector<std::string> patterns = {"*.cpp", "*.h"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("main.cpp"));
|
||||
EXPECT_TRUE(matcher("utils.h"));
|
||||
EXPECT_FALSE(matcher("README.md"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, brace_expansion) {
|
||||
std::vector<std::string> patterns = {"{README,CONTRIBUTING,LICENSE}.md"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("README.md"));
|
||||
EXPECT_TRUE(matcher("CONTRIBUTING.md"));
|
||||
EXPECT_TRUE(matcher("LICENSE.md"));
|
||||
EXPECT_FALSE(matcher("INSTALL.md"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, nested_brace_expansion) {
|
||||
std::vector<std::string> patterns = {"file{1,{2,3}}.txt"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("file1.txt"));
|
||||
EXPECT_TRUE(matcher("file2.txt"));
|
||||
EXPECT_TRUE(matcher("file3.txt"));
|
||||
EXPECT_FALSE(matcher("file4.txt"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, single_character_wildcard) {
|
||||
std::vector<std::string> patterns = {"data?.csv"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("data1.csv"));
|
||||
EXPECT_TRUE(matcher("dataA.csv"));
|
||||
EXPECT_FALSE(matcher("data10.csv"));
|
||||
EXPECT_FALSE(matcher("data.csv"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, character_class) {
|
||||
std::vector<std::string> patterns = {"log[0-9].txt"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("log0.txt"));
|
||||
EXPECT_TRUE(matcher("log5.txt"));
|
||||
EXPECT_FALSE(matcher("log10.txt"));
|
||||
EXPECT_FALSE(matcher("logA.txt"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, negated_character_class) {
|
||||
std::vector<std::string> patterns = {"log[!0-9].txt"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("logA.txt"));
|
||||
EXPECT_TRUE(matcher("log_.txt"));
|
||||
EXPECT_FALSE(matcher("log0.txt"));
|
||||
EXPECT_FALSE(matcher("log5.txt"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, globstar) {
|
||||
std::vector<std::string> patterns = {"src/**/main.cpp"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("src/main.cpp"));
|
||||
EXPECT_TRUE(matcher("src/utils/main.cpp"));
|
||||
EXPECT_TRUE(matcher("src/utils/helpers/main.cpp"));
|
||||
EXPECT_FALSE(matcher("main.cpp"));
|
||||
EXPECT_FALSE(matcher("src/main.c"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, globstar_at_start) {
|
||||
std::vector<std::string> patterns = {"**/test.cpp"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("test.cpp"));
|
||||
EXPECT_TRUE(matcher("src/test.cpp"));
|
||||
EXPECT_TRUE(matcher("src/utils/test.cpp"));
|
||||
EXPECT_FALSE(matcher("test.c"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, globstar_at_end) {
|
||||
std::vector<std::string> patterns = {"src/**"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("src/"));
|
||||
EXPECT_TRUE(matcher("src/main.cpp"));
|
||||
EXPECT_TRUE(matcher("src/utils/helper.hpp"));
|
||||
EXPECT_FALSE(matcher("include/main.hpp"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, complex_patterns) {
|
||||
std::vector<std::string> patterns = {"build/{debug,release}/**/*.o",
|
||||
"logs/**/*.log", "**/*.{png,jpg,jpeg}"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("build/debug/main.o"));
|
||||
EXPECT_TRUE(matcher("build/release/utils/helper.o"));
|
||||
EXPECT_FALSE(matcher("build/profile/main.o"));
|
||||
|
||||
EXPECT_TRUE(matcher("logs/app.log"));
|
||||
EXPECT_TRUE(matcher("logs/2021/01/01/system.log"));
|
||||
EXPECT_FALSE(matcher("logs/app.txt"));
|
||||
|
||||
EXPECT_TRUE(matcher("image.png"));
|
||||
EXPECT_TRUE(matcher("assets/images/photo.jpg"));
|
||||
EXPECT_TRUE(matcher("screenshots/test.jpeg"));
|
||||
EXPECT_FALSE(matcher("document.pdf"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, edge_cases) {
|
||||
// Character class edge cases
|
||||
{
|
||||
glob_matcher matcher{"[][!]"};
|
||||
for (char c : {'[', ']', '!'}) {
|
||||
EXPECT_TRUE(matcher(c));
|
||||
}
|
||||
for (char c : {'a', 'b', 'c'}) {
|
||||
EXPECT_FALSE(matcher(c));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
glob_matcher matcher{"[]-]"};
|
||||
for (char c : {']', '-'}) {
|
||||
EXPECT_TRUE(matcher(c));
|
||||
}
|
||||
for (char c : {'[', '/', 'a'}) {
|
||||
EXPECT_FALSE(matcher(c));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
glob_matcher matcher{"[,----0]"};
|
||||
for (char c : {',', '-', '.', '0'}) {
|
||||
EXPECT_TRUE(matcher(c));
|
||||
}
|
||||
for (char c : {'[', '/', 'a'}) {
|
||||
EXPECT_FALSE(matcher(c));
|
||||
}
|
||||
}
|
||||
|
||||
// Invalid / in character class
|
||||
EXPECT_THAT(
|
||||
[] { glob_matcher{"foo[a/b]"}; },
|
||||
testing::ThrowsMessage<std::runtime_error>(
|
||||
"invalid character '/' in character class in pattern: foo[a/b]"));
|
||||
|
||||
// Unmatched brace
|
||||
EXPECT_THAT([] { glob_matcher{"file{1,2.txt"}; },
|
||||
testing::ThrowsMessage<std::runtime_error>(
|
||||
"unmatched '{' in pattern: file{1,2.txt"));
|
||||
EXPECT_THAT([] { glob_matcher{"file{1,2.txt}3}"}; },
|
||||
testing::ThrowsMessage<std::runtime_error>(
|
||||
"unmatched '}' in pattern: file{1,2.txt}3}"));
|
||||
|
||||
// Unmatched bracket
|
||||
EXPECT_THAT([] { glob_matcher{"file[1-2.txt"}; },
|
||||
testing::ThrowsMessage<std::runtime_error>(
|
||||
"unmatched '[' in pattern: file[1-2.txt"));
|
||||
EXPECT_THAT([] { glob_matcher{"file[1-2]].txt"}; },
|
||||
testing::ThrowsMessage<std::runtime_error>(
|
||||
"unmatched ']' in pattern: file[1-2]].txt"));
|
||||
|
||||
// Trailing backslash
|
||||
EXPECT_THAT([] { glob_matcher{"file.txt\\"}; },
|
||||
testing::ThrowsMessage<std::runtime_error>(
|
||||
"trailing backslash in pattern: file.txt\\"));
|
||||
|
||||
// Patterns that should match files in the root directory only
|
||||
std::vector<std::string> root_patterns = {"/*.txt"};
|
||||
glob_matcher matcher(root_patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("/file.txt"));
|
||||
EXPECT_FALSE(matcher("/dir/file.txt"));
|
||||
EXPECT_FALSE(matcher("file.txt"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, escaped_characters) {
|
||||
std::vector<std::string> patterns = {"data\\*.csv"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("data*.csv"));
|
||||
EXPECT_FALSE(matcher("data123.csv"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, literal_dots) {
|
||||
std::vector<std::string> patterns = {".*rc"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher(".bashrc"));
|
||||
EXPECT_TRUE(matcher(".vimrc"));
|
||||
EXPECT_FALSE(matcher("myrc"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, multiple_patterns) {
|
||||
std::vector<std::string> patterns = {"*.cpp", "src/**/test{1,2}.cpp",
|
||||
"include/*.{h,hpp}", "docs/README.md"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("main.cpp"));
|
||||
EXPECT_TRUE(matcher("src/test1.cpp"));
|
||||
EXPECT_TRUE(matcher("src/utils/test2.cpp"));
|
||||
EXPECT_TRUE(matcher("include/main.h"));
|
||||
EXPECT_TRUE(matcher("docs/README.md"));
|
||||
EXPECT_FALSE(matcher("include/utils/helper.hpp"));
|
||||
EXPECT_FALSE(matcher("main.c"));
|
||||
EXPECT_FALSE(matcher("docs/CONTRIBUTING.md"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, hidden_files) {
|
||||
std::vector<std::string> patterns = {".*"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher(".gitignore"));
|
||||
EXPECT_TRUE(matcher(".env"));
|
||||
EXPECT_FALSE(matcher("README.md"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, directory_patterns) {
|
||||
std::vector<std::string> patterns = {"*/", "src/*/", "docs/**/"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("bin/"));
|
||||
EXPECT_TRUE(matcher("src/utils/"));
|
||||
EXPECT_TRUE(matcher("docs/"));
|
||||
EXPECT_TRUE(matcher("docs/guides/"));
|
||||
EXPECT_FALSE(matcher("README.md"));
|
||||
EXPECT_FALSE(matcher("src/main.cpp"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, escaped_braces) {
|
||||
std::vector<std::string> patterns = {"src/\\{test\\}.cpp",
|
||||
"data/\\{2020,2021\\}/report.txt",
|
||||
"docs/\\{README\\}.md"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("src/{test}.cpp"));
|
||||
EXPECT_TRUE(matcher("data/{2020,2021}/report.txt"));
|
||||
EXPECT_TRUE(matcher("docs/{README}.md"));
|
||||
EXPECT_FALSE(matcher("src/test.cpp"));
|
||||
EXPECT_FALSE(matcher("data/2020/report.txt"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, mixed_escaped_and_unescaped_braces) {
|
||||
std::vector<std::string> patterns = {"src/{test,prod}/\\{config\\}.json"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("src/test/{config}.json"));
|
||||
EXPECT_TRUE(matcher("src/prod/{config}.json"));
|
||||
EXPECT_FALSE(matcher("src/test/config.json"));
|
||||
EXPECT_FALSE(matcher("src/{test}/config.json"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, escaped_commas_in_braces) {
|
||||
std::vector<std::string> patterns = {"file{one\\,two,three}.txt"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("fileone,two.txt"));
|
||||
EXPECT_TRUE(matcher("filethree.txt"));
|
||||
EXPECT_FALSE(matcher("fileonetwo.txt"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, escaped_characters_in_braces) {
|
||||
std::vector<std::string> patterns = {"dir/{sub\\{dir\\},other}"};
|
||||
glob_matcher matcher(patterns);
|
||||
|
||||
EXPECT_TRUE(matcher("dir/sub{dir}"));
|
||||
EXPECT_TRUE(matcher("dir/other"));
|
||||
EXPECT_FALSE(matcher("dir/subdir"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, python_fnmatch) {
|
||||
EXPECT_TRUE(glob_matcher{"abc"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher{"?*?"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher{"???*"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher{"*???"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher{"???"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher{"*"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher{"ab[cd]"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher{"ab[!de]"}("abc"));
|
||||
EXPECT_FALSE(glob_matcher{"ab[de]"}("abc"));
|
||||
EXPECT_FALSE(glob_matcher{"??"}("a"));
|
||||
EXPECT_FALSE(glob_matcher{"b"}("a"));
|
||||
EXPECT_TRUE(glob_matcher{"[\\]"}("\\"));
|
||||
EXPECT_TRUE(glob_matcher{"[!\\]"}("a"));
|
||||
EXPECT_FALSE(glob_matcher{"[!\\]"}("\\"));
|
||||
EXPECT_TRUE(glob_matcher{"foo*"}("foo\nbar"));
|
||||
EXPECT_TRUE(glob_matcher{"foo*"}("foo\nbar\n"));
|
||||
EXPECT_FALSE(glob_matcher{"foo*"}("\nfoo"));
|
||||
EXPECT_TRUE(glob_matcher{"*"}("\n"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, python_case) {
|
||||
EXPECT_TRUE(glob_matcher{"abc"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher{":abc"}("abc"));
|
||||
EXPECT_FALSE(glob_matcher{"AbC"}("abc"));
|
||||
EXPECT_TRUE(glob_matcher({"AbC"}, {.ignorecase = true})("abc"));
|
||||
EXPECT_TRUE(glob_matcher{"i:AbC"}("abc"));
|
||||
EXPECT_FALSE(glob_matcher{"abc"}("AbC"));
|
||||
EXPECT_TRUE(glob_matcher({"abc"}, {.ignorecase = true})("AbC"));
|
||||
EXPECT_TRUE(glob_matcher{"i:abc"}("AbC"));
|
||||
EXPECT_TRUE(glob_matcher{"AbC"}("AbC"));
|
||||
EXPECT_TRUE(glob_matcher{":AbC"}("AbC"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, python_char_set) {
|
||||
static std::string_view constexpr testcases =
|
||||
R"(abcdefghijklmnopqrstuvwxyz0123456789!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)";
|
||||
static std::string_view constexpr uppercase = R"(ABCDEFGHIJKLMNOPQRSTUVWXYZ)";
|
||||
using namespace std::literals;
|
||||
|
||||
for (char c : testcases) {
|
||||
glob_matcher positive{"[az]"};
|
||||
glob_matcher negative{"[!az]"};
|
||||
if (c == 'a' || c == 'z') {
|
||||
EXPECT_TRUE(positive(c));
|
||||
EXPECT_FALSE(negative(c));
|
||||
} else {
|
||||
EXPECT_FALSE(positive(c));
|
||||
EXPECT_TRUE(negative(c));
|
||||
}
|
||||
}
|
||||
|
||||
for (char c : testcases) {
|
||||
EXPECT_EQ("az"sv.find(c) != std::string_view::npos,
|
||||
glob_matcher{"i:[az]"}(c));
|
||||
EXPECT_EQ("az"sv.find(c) != std::string_view::npos,
|
||||
glob_matcher{"i:[AZ]"}(c));
|
||||
EXPECT_EQ("az"sv.find(c) == std::string_view::npos,
|
||||
glob_matcher{"i:[!az]"}(c));
|
||||
EXPECT_EQ("az"sv.find(c) == std::string_view::npos,
|
||||
glob_matcher{"i:[!AZ]"}(c));
|
||||
}
|
||||
|
||||
for (char c : uppercase) {
|
||||
EXPECT_EQ("AZ"sv.find(c) != std::string_view::npos,
|
||||
glob_matcher{"i:[az]"}(c));
|
||||
EXPECT_EQ("AZ"sv.find(c) != std::string_view::npos,
|
||||
glob_matcher{"i:[AZ]"}(c));
|
||||
EXPECT_EQ("AZ"sv.find(c) == std::string_view::npos,
|
||||
glob_matcher{"i:[!az]"}(c));
|
||||
EXPECT_EQ("AZ"sv.find(c) == std::string_view::npos,
|
||||
glob_matcher{"i:[!AZ]"}(c));
|
||||
}
|
||||
|
||||
for (char c : testcases) {
|
||||
glob_matcher matcher{"[aa]"};
|
||||
if (c == 'a') {
|
||||
EXPECT_TRUE(matcher(c));
|
||||
} else {
|
||||
EXPECT_FALSE(matcher(c));
|
||||
}
|
||||
}
|
||||
|
||||
for (char c : testcases) {
|
||||
EXPECT_EQ(c == '^' || c == 'a' || c == 'z', glob_matcher{"[^az]"}(c));
|
||||
EXPECT_EQ(c == '[' || c == 'a' || c == 'z', glob_matcher{"[[az]"}(c));
|
||||
EXPECT_EQ(c != ']', glob_matcher{"[!]]"}(c));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, python_range) {
|
||||
static std::string_view constexpr testcases =
|
||||
R"(abcdefghijklmnopqrstuvwxyz0123456789!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)";
|
||||
static std::string_view constexpr uppercase = R"(ABCDEFGHIJKLMNOPQRSTUVWXYZ)";
|
||||
using namespace std::literals;
|
||||
|
||||
for (char c : testcases) {
|
||||
EXPECT_EQ("bcd"sv.find(c) != std::string_view::npos,
|
||||
glob_matcher{"[b-d]"}(c));
|
||||
EXPECT_EQ("bcd"sv.find(c) == std::string_view::npos,
|
||||
glob_matcher{"[!b-d]"}(c));
|
||||
EXPECT_EQ("bcdxyz"sv.find(c) != std::string_view::npos,
|
||||
glob_matcher{"[b-dx-z]"}(c));
|
||||
EXPECT_EQ("bcdxyz"sv.find(c) == std::string_view::npos,
|
||||
glob_matcher{"[!b-dx-z]"}(c));
|
||||
}
|
||||
|
||||
for (char c : testcases) {
|
||||
EXPECT_EQ("bcd"sv.find(c) != std::string_view::npos,
|
||||
glob_matcher{"i:[B-D]"}(c));
|
||||
EXPECT_EQ("bcd"sv.find(c) == std::string_view::npos,
|
||||
glob_matcher{"i:[!B-D]"}(c));
|
||||
}
|
||||
|
||||
for (char c : uppercase) {
|
||||
EXPECT_EQ("BCD"sv.find(c) != std::string_view::npos,
|
||||
glob_matcher{"i:[b-d]"}(c));
|
||||
EXPECT_EQ("BCD"sv.find(c) == std::string_view::npos,
|
||||
glob_matcher{"i:[!b-d]"}(c));
|
||||
}
|
||||
|
||||
for (char c : testcases) {
|
||||
EXPECT_EQ(c == 'b', glob_matcher{"[b-b]"}(c));
|
||||
}
|
||||
|
||||
for (char c : testcases) {
|
||||
EXPECT_EQ(c != '-' && c != '#', glob_matcher{"[!-#]"}(c));
|
||||
EXPECT_EQ(c != '-' && c != '.', glob_matcher{"[!--.]"}(c));
|
||||
EXPECT_EQ(c == '^' || c == '_' || c == '`', glob_matcher{"[^-`]"}(c));
|
||||
EXPECT_EQ(c == '[' || c == '\\' || c == ']' || c == '^',
|
||||
glob_matcher{"[[-^]"}(c))
|
||||
<< c;
|
||||
EXPECT_EQ(c == '\\' || c == ']' || c == '^', glob_matcher{R"([\-^])"}(c));
|
||||
EXPECT_EQ(c == '-' || c == 'b', glob_matcher{"[-b]"}(c));
|
||||
EXPECT_EQ(c != '-' && c != 'b', glob_matcher{"[!-b]"}(c));
|
||||
EXPECT_EQ(c == '-' || c == 'b', glob_matcher{"[-b]"}(c));
|
||||
EXPECT_EQ(c != '-' && c != 'b', glob_matcher{"[!-b]"}(c));
|
||||
EXPECT_EQ(c == '-', glob_matcher{"[-]"}(c));
|
||||
EXPECT_EQ(c != '-', glob_matcher{"[!-]"}(c));
|
||||
}
|
||||
|
||||
EXPECT_THAT([] { glob_matcher{"[d-b]"}('a'); },
|
||||
testing::ThrowsMessage<std::runtime_error>(
|
||||
"invalid range 'd-b' in character class in pattern: [d-b]"));
|
||||
}
|
||||
|
||||
TEST(glob_matcher_test, multi_pattern) {
|
||||
glob_matcher matcher;
|
||||
matcher.add_pattern("*.cpp");
|
||||
matcher.add_pattern("*.txt", {.ignorecase = true});
|
||||
|
||||
EXPECT_TRUE(matcher("main.cpp"));
|
||||
EXPECT_TRUE(matcher("README.txt"));
|
||||
EXPECT_TRUE(matcher("CHANGELOG.TXT"));
|
||||
EXPECT_FALSE(matcher("main.c"));
|
||||
EXPECT_FALSE(matcher("UTILS.CPP"));
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user