diff --git a/data/botPolicies.json b/data/botPolicies.json index dad04e8..160bbf0 100644 --- a/data/botPolicies.json +++ b/data/botPolicies.json @@ -41,7 +41,7 @@ }, { "name": "generic-browser", - "user_agent_regex": "Mozilla|Opera\n", + "user_agent_regex": "Mozilla|Opera", "action": "CHALLENGE" } ], diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 585be15..51af499 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -43,7 +43,7 @@ bots: # Generic catchall rule - name: generic-browser - user_agent_regex: > + user_agent_regex: >- Mozilla|Opera action: CHALLENGE diff --git a/data/bots/ai-robots-txt.yaml b/data/bots/ai-robots-txt.yaml index 19cbe93..ef2790c 100644 --- a/data/bots/ai-robots-txt.yaml +++ b/data/bots/ai-robots-txt.yaml @@ -1,4 +1,4 @@ - name: "ai-robots-txt" - user_agent_regex: > + user_agent_regex: >- AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot action: DENY \ No newline at end of file diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index bedd38e..2e7c311 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Ensure regexes can't end in newlines ([#372](https://github.com/TecharoHQ/anubis/issues/372)) - Add documentation for default allow behavior (implicit rule) - Enable [importing configuration snippets](./admin/configuration/import.mdx) ([#321](https://github.com/TecharoHQ/anubis/pull/321)) - Refactor check logic to be more generic and work on a Checker type diff --git a/lib/policy/checker.go b/lib/policy/checker.go index ad98ced..51223cd 100644 --- a/lib/policy/checker.go +++ b/lib/policy/checker.go @@ -110,11 +110,11 @@ func NewUserAgentChecker(rexStr string) (Checker, error) { } func NewHeaderMatchesChecker(header, rexStr string) (Checker, error) { - rex, err := regexp.Compile(rexStr) + rex, err := regexp.Compile(strings.TrimSpace(rexStr)) if err != nil { return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, rexStr, err) } - return &HeaderMatchesChecker{header, rex, internal.SHA256sum(header + ": " + rexStr)}, nil + return &HeaderMatchesChecker{strings.TrimSpace(header), rex, internal.SHA256sum(header + ": " + rexStr)}, nil } func (hmc *HeaderMatchesChecker) Check(r *http.Request) (bool, error) { @@ -135,7 +135,7 @@ type PathChecker struct { } func NewPathChecker(rexStr string) (Checker, error) { - rex, err := regexp.Compile(rexStr) + rex, err := regexp.Compile(strings.TrimSpace(rexStr)) if err != nil { return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, rexStr, err) } @@ -155,7 +155,7 @@ func (pc *PathChecker) Hash() string { } func NewHeaderExistsChecker(key string) Checker { - return headerExistsChecker{key} + return headerExistsChecker{strings.TrimSpace(key)} } type headerExistsChecker struct { @@ -180,11 +180,11 @@ func NewHeadersChecker(headermap map[string]string) (Checker, error) { for key, rexStr := range headermap { if rexStr == ".*" { - result = append(result, headerExistsChecker{key}) + result = append(result, headerExistsChecker{strings.TrimSpace(key)}) continue } - rex, err := regexp.Compile(rexStr) + rex, err := regexp.Compile(strings.TrimSpace(rexStr)) if err != nil { errs = append(errs, fmt.Errorf("while compiling header %s regex %s: %w", key, rexStr, err)) continue diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index 627e9cf..c670bac 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -24,6 +24,7 @@ var ( ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex") ErrInvalidHeadersRegex = errors.New("config.Bot: invalid headers regex") ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR") + ErrRegexEndsWithNewline = errors.New("config.Bot: regular expression ends with newline (try >- instead of > in yaml)") ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file") ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time") ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both") @@ -91,12 +92,20 @@ func (b BotConfig) Valid() error { } if b.UserAgentRegex != nil { + if strings.HasSuffix(*b.UserAgentRegex, "\n") { + errs = append(errs, fmt.Errorf("%w: user agent regex: %q", ErrRegexEndsWithNewline, *b.UserAgentRegex)) + } + if _, err := regexp.Compile(*b.UserAgentRegex); err != nil { errs = append(errs, ErrInvalidUserAgentRegex, err) } } if b.PathRegex != nil { + if strings.HasSuffix(*b.PathRegex, "\n") { + errs = append(errs, fmt.Errorf("%w: path regex: %q", ErrRegexEndsWithNewline, *b.PathRegex)) + } + if _, err := regexp.Compile(*b.PathRegex); err != nil { errs = append(errs, ErrInvalidPathRegex, err) } @@ -108,6 +117,10 @@ func (b BotConfig) Valid() error { continue } + if strings.HasSuffix(expr, "\n") { + errs = append(errs, fmt.Errorf("%w: header %s regex: %q", ErrRegexEndsWithNewline, name, expr)) + } + if _, err := regexp.Compile(expr); err != nil { errs = append(errs, ErrInvalidHeadersRegex, err) } diff --git a/lib/policy/config/testdata/bad/regex_ends_newline.json b/lib/policy/config/testdata/bad/regex_ends_newline.json new file mode 100644 index 0000000..14c7fa9 --- /dev/null +++ b/lib/policy/config/testdata/bad/regex_ends_newline.json @@ -0,0 +1,21 @@ +{ + "bots": [ + { + "name": "user-agent-ends-newline", + "user_agent_regex": "Mozilla\n", + "action": "CHALLENGE" + }, + { + "name": "path-ends-newline", + "path_regex": "^/evil/.*$\n", + "action": "CHALLENGE" + }, + { + "name": "headers-ends-newline", + "headers_regex": { + "CF-Worker": ".*\n" + }, + "action": "CHALLENGE" + } + ] +} diff --git a/lib/policy/config/testdata/bad/regex_ends_newline.yaml b/lib/policy/config/testdata/bad/regex_ends_newline.yaml new file mode 100644 index 0000000..1f0ae85 --- /dev/null +++ b/lib/policy/config/testdata/bad/regex_ends_newline.yaml @@ -0,0 +1,17 @@ +bots: +- name: user-agent-ends-newline + # Subtle bug: this ends with a newline + user_agent_regex: > + Mozilla + action: CHALLENGE +- name: path-ends-newline + # Subtle bug: this ends with a newline + path_regex: > + ^/evil/.*$ + action: CHALLENGE +- name: headers-ends-newline + # Subtle bug: this ends with a newline + headers_regex: + CF-Worker: > + .* + action: CHALLENGE \ No newline at end of file