From 5d19513829607dc6f43e9f3e9e60993fd9a688ca Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 16 Nov 2024 13:03:36 +0100 Subject: [PATCH] feat: add glob matcher class and glob-to-regex transformer --- CMakeLists.txt | 1 + cmake/libdwarfs.cmake | 2 + include/dwarfs/glob_matcher.h | 70 ++++ include/dwarfs/internal/glob_to_regex.h | 31 ++ src/glob_matcher.cpp | 116 ++++++ src/internal/glob_to_regex.cpp | 196 ++++++++++ test/glob_matcher_test.cpp | 459 ++++++++++++++++++++++++ 7 files changed, 875 insertions(+) create mode 100644 include/dwarfs/glob_matcher.h create mode 100644 include/dwarfs/internal/glob_to_regex.h create mode 100644 src/glob_matcher.cpp create mode 100644 src/internal/glob_to_regex.cpp create mode 100644 test/glob_matcher_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 94cc6561..51b6b4f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -437,6 +437,7 @@ if(WITH_TESTS) filesystem_writer_test fits_categorizer_test fragment_category_test + glob_matcher_test global_metadata_test incompressible_categorizer_test integral_value_parser_test diff --git a/cmake/libdwarfs.cmake b/cmake/libdwarfs.cmake index feb4474a..9907dff0 100644 --- a/cmake/libdwarfs.cmake +++ b/cmake/libdwarfs.cmake @@ -30,6 +30,7 @@ add_library( src/file_stat.cpp src/file_util.cpp src/fstypes.cpp + src/glob_matcher.cpp src/history.cpp src/library_dependencies.cpp src/logger.cpp @@ -46,6 +47,7 @@ add_library( src/internal/features.cpp src/internal/file_status_conv.cpp src/internal/fs_section.cpp + src/internal/glob_to_regex.cpp src/internal/string_table.cpp src/internal/wcwidth.c src/internal/worker_group.cpp diff --git a/include/dwarfs/glob_matcher.h b/include/dwarfs/glob_matcher.h new file mode 100644 index 00000000..cbb5e04b --- /dev/null +++ b/include/dwarfs/glob_matcher.h @@ -0,0 +1,70 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace dwarfs { + +class glob_matcher { + public: + struct options { + bool ignorecase{false}; + }; + + glob_matcher(); + explicit glob_matcher(std::initializer_list patterns); + explicit glob_matcher(std::span patterns); + glob_matcher(std::initializer_list patterns, + options const& opts); + glob_matcher(std::span patterns, options const& opts); + ~glob_matcher(); + + void add_pattern(std::string_view pattern) { impl_->add_pattern(pattern); } + + void add_pattern(std::string_view pattern, options const& opts) { + impl_->add_pattern(pattern, opts); + } + + bool match(std::string_view sv) const { return impl_->match(sv); } + bool match(char c) const { return impl_->match(std::string_view(&c, 1)); } + + bool operator()(std::string_view sv) const { return match(sv); } + bool operator()(char c) const { return match(c); } + + class impl { + public: + virtual ~impl() = default; + virtual void add_pattern(std::string_view pattern) = 0; + virtual void add_pattern(std::string_view pattern, options const& opts) = 0; + virtual bool match(std::string_view sv) const = 0; + }; + + private: + std::unique_ptr impl_; +}; + +} // namespace dwarfs diff --git a/include/dwarfs/internal/glob_to_regex.h b/include/dwarfs/internal/glob_to_regex.h new file mode 100644 index 00000000..68fd2100 --- /dev/null +++ b/include/dwarfs/internal/glob_to_regex.h @@ -0,0 +1,31 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include + +namespace dwarfs::internal { + +std::string glob_to_regex_string(std::string_view pattern); + +} // namespace dwarfs::internal diff --git a/src/glob_matcher.cpp b/src/glob_matcher.cpp new file mode 100644 index 00000000..f2d605eb --- /dev/null +++ b/src/glob_matcher.cpp @@ -0,0 +1,116 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include +#include + +#include + +#include + +namespace dwarfs { + +namespace { + +constexpr std::regex_constants::syntax_option_type +regex_flags(glob_matcher::options const& opts) { + auto flags = + std::regex_constants::ECMAScript | std::regex_constants::optimize; + if (opts.ignorecase) { + flags |= std::regex_constants::icase; + } + return flags; +} + +std::regex +glob_to_regex(std::string_view pattern, glob_matcher::options const& opts) { + return std::regex("(?:^" + internal::glob_to_regex_string(pattern) + "$)", + regex_flags(opts)); +} + +} // namespace + +class glob_matcher_ final : public glob_matcher::impl { + public: + glob_matcher_() = default; + + explicit glob_matcher_(std::span patterns) { + for (auto const& p : patterns) { + add_pattern(p); + } + } + + glob_matcher_(std::span patterns, + glob_matcher::options const& opts) { + for (auto const& p : patterns) { + add_pattern(p, opts); + } + } + + void add_pattern(std::string_view pattern) override { + glob_matcher::options opts; + + if (pattern.starts_with("i:")) { + opts.ignorecase = true; + pattern.remove_prefix(2); + } else if (pattern.starts_with(":")) { + pattern.remove_prefix(1); + } + + add_pattern(pattern, opts); + } + + void add_pattern(std::string_view pattern, + glob_matcher::options const& opts) override { + m_.push_back(glob_to_regex(pattern, opts)); + } + + bool match(std::string_view sv) const override { + return std::any_of(m_.begin(), m_.end(), [&sv](auto const& re) { + return std::regex_match(sv.begin(), sv.end(), re); + }); + } + + private: + std::vector m_; +}; + +glob_matcher::glob_matcher() + : impl_{std::make_unique()} {} + +glob_matcher::glob_matcher(std::initializer_list patterns) + : impl_{std::make_unique(patterns)} {} + +glob_matcher::glob_matcher(std::span patterns) + : impl_{std::make_unique(patterns)} {} + +glob_matcher::glob_matcher(std::initializer_list patterns, + options const& opts) + : impl_{std::make_unique(patterns, opts)} {} + +glob_matcher::glob_matcher(std::span patterns, + options const& opts) + : impl_{std::make_unique(patterns, opts)} {} + +glob_matcher::~glob_matcher() = default; + +} // namespace dwarfs diff --git a/src/internal/glob_to_regex.cpp b/src/internal/glob_to_regex.cpp new file mode 100644 index 00000000..e5efc37c --- /dev/null +++ b/src/internal/glob_to_regex.cpp @@ -0,0 +1,196 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include +#include + +#include + +#include + +namespace dwarfs::internal { + +namespace { + +std::string_view constexpr special_chars = R"(.^$|()[]{}+?*\)"; + +std::string escape_special(char c) { + std::string esc; + if (special_chars.find(c) != std::string_view::npos) { + esc = '\\'; + } + return esc + c; +} + +std::pair +handle_char_set(std::string_view sv, size_t pos) { + size_t const len = sv.size(); + std::string char_class = "["; + auto subpat = sv.substr(pos); + size_t firstchar = pos + 1; + + if (subpat.starts_with("[!]")) { + char_class += R"(^\])"; + pos += 2; + ++firstchar; + } else if (subpat.starts_with("[!")) { + char_class += R"(^)"; + pos += 1; + ++firstchar; + } else if (subpat.starts_with("[]")) { + char_class += R"(\])"; + pos += 1; + } else if (subpat.starts_with("[^")) { + char_class += R"(\^)"; + pos += 1; + } + + while (++pos < len) { + char c = sv[pos]; + char_class += c; + + switch (c) { + case ']': + return {char_class, pos + 1}; + + case '\\': + char_class += '\\'; + break; + + case '-': + if (pos > firstchar && pos + 1 < len && sv[pos + 1] != ']') { + auto from = sv[pos - 1]; + auto to = sv[pos + 1]; + + if (from <= '/' && '/' <= to) { + char_class += ".0-"; + } else if (from > to) { + throw std::runtime_error(fmt::format("invalid range '{}-{}' in " + "character class in pattern: {}", + from, to, sv)); + } + firstchar = pos + 2; + } + break; + + case '/': + throw std::runtime_error( + "invalid character '/' in character class in pattern: " + + std::string(sv)); + + default: + break; + } + } + + throw std::runtime_error("unmatched '[' in pattern: " + std::string(sv)); +} + +} // namespace + +std::string glob_to_regex_string(std::string_view pattern) { + std::string regex; + size_t const len = pattern.size(); + size_t pos = 0; + size_t brace_depth = 0; + while (pos < len) { + char c = pattern[pos]; + switch (c) { + case '\\': + if (++pos >= len) { + throw std::runtime_error("trailing backslash in pattern: " + + std::string(pattern)); + } + regex += escape_special(pattern[pos]); + ++pos; + break; + + case '*': + if (pos + 1 < len && pattern[pos + 1] == '*') { + if (pos + 2 < len && pattern[pos + 2] == '/' && + (pos == 0 || pattern[pos - 1] == '/')) { + pos += 3; + } else { + pos += 2; + } + regex += ".*"; + } else { + bool onlystar = (pos == 0 || pattern[pos - 1] == '/') && + (pos + 1 == len || pattern[pos + 1] == '/'); + ++pos; + regex += "[^/]"; + regex += onlystar ? '+' : '*'; + } + break; + + case '?': + regex += "[^/]"; + ++pos; + break; + + case '[': { + auto [char_class, end] = handle_char_set(pattern, pos); + regex += char_class; + pos = end; + } break; + + case '{': + ++brace_depth; + regex += "(?:"; + ++pos; + break; + + case ',': + regex += brace_depth > 0 ? '|' : c; + ++pos; + break; + + case '}': + if (brace_depth == 0) { + throw std::runtime_error("unmatched '}' in pattern: " + + std::string(pattern)); + } + --brace_depth; + regex += ")"; + ++pos; + break; + + case ']': + throw std::runtime_error("unmatched ']' in pattern: " + + std::string(pattern)); + + default: + regex += escape_special(c); + ++pos; + break; + } + } + + if (brace_depth > 0) { + throw std::runtime_error("unmatched '{' in pattern: " + + std::string(pattern)); + } + + return regex; +} + +} // namespace dwarfs::internal diff --git a/test/glob_matcher_test.cpp b/test/glob_matcher_test.cpp new file mode 100644 index 00000000..01aa021b --- /dev/null +++ b/test/glob_matcher_test.cpp @@ -0,0 +1,459 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ + +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include + +#include +#include + +#include + +using dwarfs::glob_matcher; + +TEST(glob_matcher_test, simple_patterns) { + std::vector patterns = {"*.cpp", "*.h"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("main.cpp")); + EXPECT_TRUE(matcher("utils.h")); + EXPECT_FALSE(matcher("README.md")); +} + +TEST(glob_matcher_test, brace_expansion) { + std::vector patterns = {"{README,CONTRIBUTING,LICENSE}.md"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("README.md")); + EXPECT_TRUE(matcher("CONTRIBUTING.md")); + EXPECT_TRUE(matcher("LICENSE.md")); + EXPECT_FALSE(matcher("INSTALL.md")); +} + +TEST(glob_matcher_test, nested_brace_expansion) { + std::vector patterns = {"file{1,{2,3}}.txt"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("file1.txt")); + EXPECT_TRUE(matcher("file2.txt")); + EXPECT_TRUE(matcher("file3.txt")); + EXPECT_FALSE(matcher("file4.txt")); +} + +TEST(glob_matcher_test, single_character_wildcard) { + std::vector patterns = {"data?.csv"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("data1.csv")); + EXPECT_TRUE(matcher("dataA.csv")); + EXPECT_FALSE(matcher("data10.csv")); + EXPECT_FALSE(matcher("data.csv")); +} + +TEST(glob_matcher_test, character_class) { + std::vector patterns = {"log[0-9].txt"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("log0.txt")); + EXPECT_TRUE(matcher("log5.txt")); + EXPECT_FALSE(matcher("log10.txt")); + EXPECT_FALSE(matcher("logA.txt")); +} + +TEST(glob_matcher_test, negated_character_class) { + std::vector patterns = {"log[!0-9].txt"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("logA.txt")); + EXPECT_TRUE(matcher("log_.txt")); + EXPECT_FALSE(matcher("log0.txt")); + EXPECT_FALSE(matcher("log5.txt")); +} + +TEST(glob_matcher_test, globstar) { + std::vector patterns = {"src/**/main.cpp"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("src/main.cpp")); + EXPECT_TRUE(matcher("src/utils/main.cpp")); + EXPECT_TRUE(matcher("src/utils/helpers/main.cpp")); + EXPECT_FALSE(matcher("main.cpp")); + EXPECT_FALSE(matcher("src/main.c")); +} + +TEST(glob_matcher_test, globstar_at_start) { + std::vector patterns = {"**/test.cpp"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("test.cpp")); + EXPECT_TRUE(matcher("src/test.cpp")); + EXPECT_TRUE(matcher("src/utils/test.cpp")); + EXPECT_FALSE(matcher("test.c")); +} + +TEST(glob_matcher_test, globstar_at_end) { + std::vector patterns = {"src/**"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("src/")); + EXPECT_TRUE(matcher("src/main.cpp")); + EXPECT_TRUE(matcher("src/utils/helper.hpp")); + EXPECT_FALSE(matcher("include/main.hpp")); +} + +TEST(glob_matcher_test, complex_patterns) { + std::vector patterns = {"build/{debug,release}/**/*.o", + "logs/**/*.log", "**/*.{png,jpg,jpeg}"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("build/debug/main.o")); + EXPECT_TRUE(matcher("build/release/utils/helper.o")); + EXPECT_FALSE(matcher("build/profile/main.o")); + + EXPECT_TRUE(matcher("logs/app.log")); + EXPECT_TRUE(matcher("logs/2021/01/01/system.log")); + EXPECT_FALSE(matcher("logs/app.txt")); + + EXPECT_TRUE(matcher("image.png")); + EXPECT_TRUE(matcher("assets/images/photo.jpg")); + EXPECT_TRUE(matcher("screenshots/test.jpeg")); + EXPECT_FALSE(matcher("document.pdf")); +} + +TEST(glob_matcher_test, edge_cases) { + // Character class edge cases + { + glob_matcher matcher{"[][!]"}; + for (char c : {'[', ']', '!'}) { + EXPECT_TRUE(matcher(c)); + } + for (char c : {'a', 'b', 'c'}) { + EXPECT_FALSE(matcher(c)); + } + } + + { + glob_matcher matcher{"[]-]"}; + for (char c : {']', '-'}) { + EXPECT_TRUE(matcher(c)); + } + for (char c : {'[', '/', 'a'}) { + EXPECT_FALSE(matcher(c)); + } + } + + { + glob_matcher matcher{"[,----0]"}; + for (char c : {',', '-', '.', '0'}) { + EXPECT_TRUE(matcher(c)); + } + for (char c : {'[', '/', 'a'}) { + EXPECT_FALSE(matcher(c)); + } + } + + // Invalid / in character class + EXPECT_THAT( + [] { glob_matcher{"foo[a/b]"}; }, + testing::ThrowsMessage( + "invalid character '/' in character class in pattern: foo[a/b]")); + + // Unmatched brace + EXPECT_THAT([] { glob_matcher{"file{1,2.txt"}; }, + testing::ThrowsMessage( + "unmatched '{' in pattern: file{1,2.txt")); + EXPECT_THAT([] { glob_matcher{"file{1,2.txt}3}"}; }, + testing::ThrowsMessage( + "unmatched '}' in pattern: file{1,2.txt}3}")); + + // Unmatched bracket + EXPECT_THAT([] { glob_matcher{"file[1-2.txt"}; }, + testing::ThrowsMessage( + "unmatched '[' in pattern: file[1-2.txt")); + EXPECT_THAT([] { glob_matcher{"file[1-2]].txt"}; }, + testing::ThrowsMessage( + "unmatched ']' in pattern: file[1-2]].txt")); + + // Trailing backslash + EXPECT_THAT([] { glob_matcher{"file.txt\\"}; }, + testing::ThrowsMessage( + "trailing backslash in pattern: file.txt\\")); + + // Patterns that should match files in the root directory only + std::vector root_patterns = {"/*.txt"}; + glob_matcher matcher(root_patterns); + + EXPECT_TRUE(matcher("/file.txt")); + EXPECT_FALSE(matcher("/dir/file.txt")); + EXPECT_FALSE(matcher("file.txt")); +} + +TEST(glob_matcher_test, escaped_characters) { + std::vector patterns = {"data\\*.csv"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("data*.csv")); + EXPECT_FALSE(matcher("data123.csv")); +} + +TEST(glob_matcher_test, literal_dots) { + std::vector patterns = {".*rc"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher(".bashrc")); + EXPECT_TRUE(matcher(".vimrc")); + EXPECT_FALSE(matcher("myrc")); +} + +TEST(glob_matcher_test, multiple_patterns) { + std::vector patterns = {"*.cpp", "src/**/test{1,2}.cpp", + "include/*.{h,hpp}", "docs/README.md"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("main.cpp")); + EXPECT_TRUE(matcher("src/test1.cpp")); + EXPECT_TRUE(matcher("src/utils/test2.cpp")); + EXPECT_TRUE(matcher("include/main.h")); + EXPECT_TRUE(matcher("docs/README.md")); + EXPECT_FALSE(matcher("include/utils/helper.hpp")); + EXPECT_FALSE(matcher("main.c")); + EXPECT_FALSE(matcher("docs/CONTRIBUTING.md")); +} + +TEST(glob_matcher_test, hidden_files) { + std::vector patterns = {".*"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher(".gitignore")); + EXPECT_TRUE(matcher(".env")); + EXPECT_FALSE(matcher("README.md")); +} + +TEST(glob_matcher_test, directory_patterns) { + std::vector patterns = {"*/", "src/*/", "docs/**/"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("bin/")); + EXPECT_TRUE(matcher("src/utils/")); + EXPECT_TRUE(matcher("docs/")); + EXPECT_TRUE(matcher("docs/guides/")); + EXPECT_FALSE(matcher("README.md")); + EXPECT_FALSE(matcher("src/main.cpp")); +} + +TEST(glob_matcher_test, escaped_braces) { + std::vector patterns = {"src/\\{test\\}.cpp", + "data/\\{2020,2021\\}/report.txt", + "docs/\\{README\\}.md"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("src/{test}.cpp")); + EXPECT_TRUE(matcher("data/{2020,2021}/report.txt")); + EXPECT_TRUE(matcher("docs/{README}.md")); + EXPECT_FALSE(matcher("src/test.cpp")); + EXPECT_FALSE(matcher("data/2020/report.txt")); +} + +TEST(glob_matcher_test, mixed_escaped_and_unescaped_braces) { + std::vector patterns = {"src/{test,prod}/\\{config\\}.json"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("src/test/{config}.json")); + EXPECT_TRUE(matcher("src/prod/{config}.json")); + EXPECT_FALSE(matcher("src/test/config.json")); + EXPECT_FALSE(matcher("src/{test}/config.json")); +} + +TEST(glob_matcher_test, escaped_commas_in_braces) { + std::vector patterns = {"file{one\\,two,three}.txt"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("fileone,two.txt")); + EXPECT_TRUE(matcher("filethree.txt")); + EXPECT_FALSE(matcher("fileonetwo.txt")); +} + +TEST(glob_matcher_test, escaped_characters_in_braces) { + std::vector patterns = {"dir/{sub\\{dir\\},other}"}; + glob_matcher matcher(patterns); + + EXPECT_TRUE(matcher("dir/sub{dir}")); + EXPECT_TRUE(matcher("dir/other")); + EXPECT_FALSE(matcher("dir/subdir")); +} + +TEST(glob_matcher_test, python_fnmatch) { + EXPECT_TRUE(glob_matcher{"abc"}("abc")); + EXPECT_TRUE(glob_matcher{"?*?"}("abc")); + EXPECT_TRUE(glob_matcher{"???*"}("abc")); + EXPECT_TRUE(glob_matcher{"*???"}("abc")); + EXPECT_TRUE(glob_matcher{"???"}("abc")); + EXPECT_TRUE(glob_matcher{"*"}("abc")); + EXPECT_TRUE(glob_matcher{"ab[cd]"}("abc")); + EXPECT_TRUE(glob_matcher{"ab[!de]"}("abc")); + EXPECT_FALSE(glob_matcher{"ab[de]"}("abc")); + EXPECT_FALSE(glob_matcher{"??"}("a")); + EXPECT_FALSE(glob_matcher{"b"}("a")); + EXPECT_TRUE(glob_matcher{"[\\]"}("\\")); + EXPECT_TRUE(glob_matcher{"[!\\]"}("a")); + EXPECT_FALSE(glob_matcher{"[!\\]"}("\\")); + EXPECT_TRUE(glob_matcher{"foo*"}("foo\nbar")); + EXPECT_TRUE(glob_matcher{"foo*"}("foo\nbar\n")); + EXPECT_FALSE(glob_matcher{"foo*"}("\nfoo")); + EXPECT_TRUE(glob_matcher{"*"}("\n")); +} + +TEST(glob_matcher_test, python_case) { + EXPECT_TRUE(glob_matcher{"abc"}("abc")); + EXPECT_TRUE(glob_matcher{":abc"}("abc")); + EXPECT_FALSE(glob_matcher{"AbC"}("abc")); + EXPECT_TRUE(glob_matcher({"AbC"}, {.ignorecase = true})("abc")); + EXPECT_TRUE(glob_matcher{"i:AbC"}("abc")); + EXPECT_FALSE(glob_matcher{"abc"}("AbC")); + EXPECT_TRUE(glob_matcher({"abc"}, {.ignorecase = true})("AbC")); + EXPECT_TRUE(glob_matcher{"i:abc"}("AbC")); + EXPECT_TRUE(glob_matcher{"AbC"}("AbC")); + EXPECT_TRUE(glob_matcher{":AbC"}("AbC")); +} + +TEST(glob_matcher_test, python_char_set) { + static std::string_view constexpr testcases = + R"(abcdefghijklmnopqrstuvwxyz0123456789!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)"; + static std::string_view constexpr uppercase = R"(ABCDEFGHIJKLMNOPQRSTUVWXYZ)"; + using namespace std::literals; + + for (char c : testcases) { + glob_matcher positive{"[az]"}; + glob_matcher negative{"[!az]"}; + if (c == 'a' || c == 'z') { + EXPECT_TRUE(positive(c)); + EXPECT_FALSE(negative(c)); + } else { + EXPECT_FALSE(positive(c)); + EXPECT_TRUE(negative(c)); + } + } + + for (char c : testcases) { + EXPECT_EQ("az"sv.find(c) != std::string_view::npos, + glob_matcher{"i:[az]"}(c)); + EXPECT_EQ("az"sv.find(c) != std::string_view::npos, + glob_matcher{"i:[AZ]"}(c)); + EXPECT_EQ("az"sv.find(c) == std::string_view::npos, + glob_matcher{"i:[!az]"}(c)); + EXPECT_EQ("az"sv.find(c) == std::string_view::npos, + glob_matcher{"i:[!AZ]"}(c)); + } + + for (char c : uppercase) { + EXPECT_EQ("AZ"sv.find(c) != std::string_view::npos, + glob_matcher{"i:[az]"}(c)); + EXPECT_EQ("AZ"sv.find(c) != std::string_view::npos, + glob_matcher{"i:[AZ]"}(c)); + EXPECT_EQ("AZ"sv.find(c) == std::string_view::npos, + glob_matcher{"i:[!az]"}(c)); + EXPECT_EQ("AZ"sv.find(c) == std::string_view::npos, + glob_matcher{"i:[!AZ]"}(c)); + } + + for (char c : testcases) { + glob_matcher matcher{"[aa]"}; + if (c == 'a') { + EXPECT_TRUE(matcher(c)); + } else { + EXPECT_FALSE(matcher(c)); + } + } + + for (char c : testcases) { + EXPECT_EQ(c == '^' || c == 'a' || c == 'z', glob_matcher{"[^az]"}(c)); + EXPECT_EQ(c == '[' || c == 'a' || c == 'z', glob_matcher{"[[az]"}(c)); + EXPECT_EQ(c != ']', glob_matcher{"[!]]"}(c)); + } +} + +TEST(glob_matcher_test, python_range) { + static std::string_view constexpr testcases = + R"(abcdefghijklmnopqrstuvwxyz0123456789!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)"; + static std::string_view constexpr uppercase = R"(ABCDEFGHIJKLMNOPQRSTUVWXYZ)"; + using namespace std::literals; + + for (char c : testcases) { + EXPECT_EQ("bcd"sv.find(c) != std::string_view::npos, + glob_matcher{"[b-d]"}(c)); + EXPECT_EQ("bcd"sv.find(c) == std::string_view::npos, + glob_matcher{"[!b-d]"}(c)); + EXPECT_EQ("bcdxyz"sv.find(c) != std::string_view::npos, + glob_matcher{"[b-dx-z]"}(c)); + EXPECT_EQ("bcdxyz"sv.find(c) == std::string_view::npos, + glob_matcher{"[!b-dx-z]"}(c)); + } + + for (char c : testcases) { + EXPECT_EQ("bcd"sv.find(c) != std::string_view::npos, + glob_matcher{"i:[B-D]"}(c)); + EXPECT_EQ("bcd"sv.find(c) == std::string_view::npos, + glob_matcher{"i:[!B-D]"}(c)); + } + + for (char c : uppercase) { + EXPECT_EQ("BCD"sv.find(c) != std::string_view::npos, + glob_matcher{"i:[b-d]"}(c)); + EXPECT_EQ("BCD"sv.find(c) == std::string_view::npos, + glob_matcher{"i:[!b-d]"}(c)); + } + + for (char c : testcases) { + EXPECT_EQ(c == 'b', glob_matcher{"[b-b]"}(c)); + } + + for (char c : testcases) { + EXPECT_EQ(c != '-' && c != '#', glob_matcher{"[!-#]"}(c)); + EXPECT_EQ(c != '-' && c != '.', glob_matcher{"[!--.]"}(c)); + EXPECT_EQ(c == '^' || c == '_' || c == '`', glob_matcher{"[^-`]"}(c)); + EXPECT_EQ(c == '[' || c == '\\' || c == ']' || c == '^', + glob_matcher{"[[-^]"}(c)) + << c; + EXPECT_EQ(c == '\\' || c == ']' || c == '^', glob_matcher{R"([\-^])"}(c)); + EXPECT_EQ(c == '-' || c == 'b', glob_matcher{"[-b]"}(c)); + EXPECT_EQ(c != '-' && c != 'b', glob_matcher{"[!-b]"}(c)); + EXPECT_EQ(c == '-' || c == 'b', glob_matcher{"[-b]"}(c)); + EXPECT_EQ(c != '-' && c != 'b', glob_matcher{"[!-b]"}(c)); + EXPECT_EQ(c == '-', glob_matcher{"[-]"}(c)); + EXPECT_EQ(c != '-', glob_matcher{"[!-]"}(c)); + } + + EXPECT_THAT([] { glob_matcher{"[d-b]"}('a'); }, + testing::ThrowsMessage( + "invalid range 'd-b' in character class in pattern: [d-b]")); +} + +TEST(glob_matcher_test, multi_pattern) { + glob_matcher matcher; + matcher.add_pattern("*.cpp"); + matcher.add_pattern("*.txt", {.ignorecase = true}); + + EXPECT_TRUE(matcher("main.cpp")); + EXPECT_TRUE(matcher("README.txt")); + EXPECT_TRUE(matcher("CHANGELOG.TXT")); + EXPECT_FALSE(matcher("main.c")); + EXPECT_FALSE(matcher("UTILS.CPP")); +}