From c638653172700f06f494bc7dd8d3ca8d11af5cc9 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Mon, 9 Jun 2025 15:25:04 -0400 Subject: [PATCH] feat(lib): implement request weight (#621) * feat(lib): implement request weight Replaces #608 This is a big one and will be what makes Anubis a generic web application firewall. This introduces the WEIGH option, allowing administrators to have facets of request metadata add or remove "weight", or the level of suspicion. This really makes Anubis weigh the soul of requests. Signed-off-by: Xe Iaso * fix(lib): maintain legacy challenge behavior Signed-off-by: Xe Iaso * fix(lib): make weight have dedicated checkers for the hashes Signed-off-by: Xe Iaso * feat(data): convert some rules over to weight points Signed-off-by: Xe Iaso * docs: document request weight Signed-off-by: Xe Iaso * fix(CHANGELOG): spelling error Signed-off-by: Xe Iaso * chore: spelling Signed-off-by: Xe Iaso * docs: fix links to challenge information Signed-off-by: Xe Iaso * docs(policies): fix formatting Signed-off-by: Xe Iaso * fix(config): make default weight adjustment 5 Signed-off-by: Xe Iaso --------- Signed-off-by: Xe Iaso --- .github/actions/spelling/expect.txt | 2 + data/botPolicies.yaml | 4 +- data/bots/aggressive-brazilian-scrapers.yaml | 48 ++++++++-------- data/bots/cloudflare-workers.yaml | 4 +- .../small-internet-browsers/_permissive.yaml | 2 + .../small-internet-browsers/netsurf.yaml | 5 ++ .../small-internet-browsers/palemoon.yaml | 5 ++ data/clients/x-firefox-ai.yaml | 4 +- data/common/allow-private-addresses.yaml | 12 ++-- docs/docs/CHANGELOG.md | 4 +- docs/docs/admin/policies.mdx | 36 ++++++++++++ lib/anubis.go | 55 +++++++++++++++++-- lib/policy/bot.go | 1 + lib/policy/checker.go | 14 +++++ lib/policy/checkresult.go | 9 ++- lib/policy/config/config.go | 20 ++++--- lib/policy/config/config_test.go | 19 +++++++ lib/policy/config/expressionorlist.go | 4 +- .../config/testdata/good/simple-weight.yaml | 6 ++ .../testdata/good/weight-no-weight.yaml | 4 ++ lib/policy/config/weight.go | 5 ++ lib/policy/policy.go | 4 ++ 22 files changed, 214 insertions(+), 53 deletions(-) create mode 100644 data/clients/small-internet-browsers/_permissive.yaml create mode 100644 data/clients/small-internet-browsers/netsurf.yaml create mode 100644 data/clients/small-internet-browsers/palemoon.yaml create mode 100644 lib/policy/config/testdata/good/simple-weight.yaml create mode 100644 lib/policy/config/testdata/good/weight-no-weight.yaml create mode 100644 lib/policy/config/weight.go diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 651d788..843b4a7 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -158,6 +158,7 @@ Mojeek mojeekbot mozilla nbf +netsurf nginx nobots NONINFRINGEMENT @@ -170,6 +171,7 @@ onionservice openai openrc pag +palemoon Pangu parseable passthrough diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 5e444fe..78fb087 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -55,7 +55,9 @@ bots: - name: generic-browser user_agent_regex: >- Mozilla|Opera - action: CHALLENGE + action: WEIGH + weight: + adjust: 10 dnsbl: false diff --git a/data/bots/aggressive-brazilian-scrapers.yaml b/data/bots/aggressive-brazilian-scrapers.yaml index 140811a..ce0d7c9 100644 --- a/data/bots/aggressive-brazilian-scrapers.yaml +++ b/data/bots/aggressive-brazilian-scrapers.yaml @@ -1,28 +1,26 @@ - name: deny-aggressive-brazilian-scrapers - action: DENY + action: WEIGH + weight: + adjust: 20 expression: any: - # Internet Explorer should be out of support - - userAgent.contains("MSIE") - # Trident is the Internet Explorer browser engine - - userAgent.contains("Trident") - # Opera is a fork of chrome now - - userAgent.contains("Presto") - # Windows CE is discontinued - - userAgent.contains("Windows CE") - # Windows 95 is discontinued - - userAgent.contains("Windows 95") - # Windows 98 is discontinued - - userAgent.contains("Windows 98") - # Windows 9.x is discontinued - - userAgent.contains("Win 9x") - # Amazon does not have an Alexa Toolbar. - - userAgent.contains("Alexa Toolbar") -- name: challenge-aggressive-brazilian-scrapers - action: CHALLENGE - expression: - any: - # This is not released, even Windows 11 calls itself Windows 10 - - userAgent.contains("Windows NT 11.0") - # iPods are not in common use - - userAgent.contains("iPod") \ No newline at end of file + # Internet Explorer should be out of support + - userAgent.contains("MSIE") + # Trident is the Internet Explorer browser engine + - userAgent.contains("Trident") + # Opera is a fork of chrome now + - userAgent.contains("Presto") + # Windows CE is discontinued + - userAgent.contains("Windows CE") + # Windows 95 is discontinued + - userAgent.contains("Windows 95") + # Windows 98 is discontinued + - userAgent.contains("Windows 98") + # Windows 9.x is discontinued + - userAgent.contains("Win 9x") + # Amazon does not have an Alexa Toolbar. + - userAgent.contains("Alexa Toolbar") + # This is not released, even Windows 11 calls itself Windows 10 + - userAgent.contains("Windows NT 11.0") + # iPods are not in common use + - userAgent.contains("iPod") diff --git a/data/bots/cloudflare-workers.yaml b/data/bots/cloudflare-workers.yaml index 3fe051b..86683fe 100644 --- a/data/bots/cloudflare-workers.yaml +++ b/data/bots/cloudflare-workers.yaml @@ -1,4 +1,6 @@ - name: cloudflare-workers headers_regex: CF-Worker: .* - action: DENY \ No newline at end of file + action: WEIGH + weight: + adjust: 15 diff --git a/data/clients/small-internet-browsers/_permissive.yaml b/data/clients/small-internet-browsers/_permissive.yaml new file mode 100644 index 0000000..395a28f --- /dev/null +++ b/data/clients/small-internet-browsers/_permissive.yaml @@ -0,0 +1,2 @@ +- import: (data)/clients/small-internet-browsers/netsurf.yaml +- import: (data)/clients/small-internet-browsers/palemoon.yaml diff --git a/data/clients/small-internet-browsers/netsurf.yaml b/data/clients/small-internet-browsers/netsurf.yaml new file mode 100644 index 0000000..413f965 --- /dev/null +++ b/data/clients/small-internet-browsers/netsurf.yaml @@ -0,0 +1,5 @@ +- name: "reduce-weight-netsurf" + user_agent_regex: "NetSurf" + action: WEIGH + weight: + adjust: -5 diff --git a/data/clients/small-internet-browsers/palemoon.yaml b/data/clients/small-internet-browsers/palemoon.yaml new file mode 100644 index 0000000..876aa02 --- /dev/null +++ b/data/clients/small-internet-browsers/palemoon.yaml @@ -0,0 +1,5 @@ +- name: "reduce-weight-palemoon" + user_agent_regex: "PaleMoon" + action: WEIGH + weight: + adjust: -5 diff --git a/data/clients/x-firefox-ai.yaml b/data/clients/x-firefox-ai.yaml index 3a1e3b7..c87e14d 100644 --- a/data/clients/x-firefox-ai.yaml +++ b/data/clients/x-firefox-ai.yaml @@ -1,4 +1,6 @@ # https://connect.mozilla.org/t5/firefox-labs/try-out-link-previews-in-firefox-labs-138-and-share-your/td-p/92012 - name: x-firefox-ai - action: CHALLENGE + action: WEIGH expression: '"X-Firefox-Ai" in headers' + weight: + adjust: 5 diff --git a/data/common/allow-private-addresses.yaml b/data/common/allow-private-addresses.yaml index 3a3c0dc..694a806 100644 --- a/data/common/allow-private-addresses.yaml +++ b/data/common/allow-private-addresses.yaml @@ -1,15 +1,15 @@ - name: ipv4-rfc-1918 action: ALLOW remote_addresses: - - 10.0.0.0/8 - - 172.16.0.0/12 - - 192.168.0.0/16 - - 100.64.0.0/10 + - 10.0.0.0/8 + - 172.16.0.0/12 + - 192.168.0.0/16 + - 100.64.0.0/10 - name: ipv6-ula action: ALLOW remote_addresses: - - fc00::/7 + - fc00::/7 - name: ipv6-link-local action: ALLOW remote_addresses: - - fe80::/10 \ No newline at end of file + - fe80::/10 diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 2aba60d..0011ba9 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -10,11 +10,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + - Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in development - - - Add `--xff-strip-private` flag/envvar to toggle skipping X-Forwarded-For private addresses or not +- Requests can have their weight be adjusted, if a request weighs zero or less than it is allowed through - Refactor challenge presentation logic to use a challenge registry - Allow challenge implementations to register HTTP routes - Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95)) diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx index 8722d9e..d5acb71 100644 --- a/docs/docs/admin/policies.mdx +++ b/docs/docs/admin/policies.mdx @@ -244,3 +244,39 @@ In case your service needs it for risk calculation reasons, Anubis exposes infor | `X-Anubis-Status` | The status and how strict Anubis was in its checks | `PASS` | Policy rules are matched using [Go's standard library regular expressions package](https://pkg.go.dev/regexp). You can mess around with the syntax at [regex101.com](https://regex101.com), make sure to select the Golang option. + +## Request Weight + +Anubis rules can also add or remove "weight" from requests, allowing administrators to configure custom levels of suspicion. For example, if your application uses session tokens named `i_love_gitea`: + +```yaml +- name: gitea-session-token + action: WEIGH + expression: + all: + - '"Cookie" in headers' + - headers["Cookie"].contains("i_love_gitea=") + # Remove 5 weight points + weight: + adjust: -5 +``` + +This would remove five weight points from the request, making Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx). + +### Weight Thresholds + +Weight thresholds and challenge associations will be configurable with CEL expressions in the configuration file in an upcoming patch, for now here's how Anubis configures the weight thresholds: + +| Weight Expression | Action | +| -----------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------- | +| `weight < 0` (weight is less than 0) | Allow the request through. | +| `weight < 10` (weight is less than 10) | Challenge the client with the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) at the default difficulty level. | +| `weight >= 10` (weight is greater than or equal to 10) | Challenge the client with the [Proof of Work challenge](./configuration/challenges/proof-of-work.mdx) at the default difficulty level. | + +### Advice + +Weight is still very new and needs work. This is an experimental feature and should be treated as such. Here's some advice to help you better tune requests: + +- The default weight for browser-like clients is 10. This triggers an aggressive challenge. +- Remove and add weight in multiples of five. +- Be careful with how you configure weight. diff --git a/lib/anubis.go b/lib/anubis.go index 3e40680..499747a 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -402,13 +402,20 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, redir, http.StatusFound) } -func cr(name string, rule config.Rule) policy.CheckResult { +func cr(name string, rule config.Rule, weight int) policy.CheckResult { return policy.CheckResult{ - Name: name, - Rule: rule, + Name: name, + Rule: rule, + Weight: weight, } } +var ( + weightOkayStatic = policy.NewStaticHashChecker("weight/okay") + weightMildSusStatic = policy.NewStaticHashChecker("weight/mild-suspicion") + weightVerySusStatic = policy.NewStaticHashChecker("weight/extreme-suspicion") +) + // Check evaluates the list of rules, and returns the result func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) { host := r.Header.Get("X-Real-Ip") @@ -421,6 +428,8 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) return decaymap.Zilch[policy.CheckResult](), nil, fmt.Errorf("[misconfiguration] %q is not an IP address", host) } + weight := 0 + for _, b := range s.policy.Bots { match, err := b.Rules.Check(r) if err != nil { @@ -428,11 +437,47 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) } if match { - return cr("bot/"+b.Name, b.Action), &b, nil + switch b.Action { + case config.RuleDeny, config.RuleAllow, config.RuleBenchmark, config.RuleChallenge: + return cr("bot/"+b.Name, b.Action, weight), &b, nil + case config.RuleWeigh: + slog.Debug("adjusting weight", "name", b.Name, "delta", b.Weight.Adjust) + weight += b.Weight.Adjust + } } } - return cr("default/allow", config.RuleAllow), &policy.Bot{ + switch { + case weight <= 0: + return cr("weight/okay", config.RuleAllow, weight), &policy.Bot{ + Challenge: &config.ChallengeRules{ + Difficulty: s.policy.DefaultDifficulty, + ReportAs: s.policy.DefaultDifficulty, + Algorithm: config.DefaultAlgorithm, + }, + Rules: weightOkayStatic, + }, nil + case weight > 0 && weight < 10: + return cr("weight/mild-suspicion", config.RuleChallenge, weight), &policy.Bot{ + Challenge: &config.ChallengeRules{ + Difficulty: s.policy.DefaultDifficulty, + ReportAs: s.policy.DefaultDifficulty, + Algorithm: "metarefresh", + }, + Rules: weightMildSusStatic, + }, nil + case weight >= 10: + return cr("weight/extreme-suspicion", config.RuleChallenge, weight), &policy.Bot{ + Challenge: &config.ChallengeRules{ + Difficulty: s.policy.DefaultDifficulty, + ReportAs: s.policy.DefaultDifficulty, + Algorithm: "fast", + }, + Rules: weightVerySusStatic, + }, nil + } + + return cr("default/allow", config.RuleAllow, weight), &policy.Bot{ Challenge: &config.ChallengeRules{ Difficulty: s.policy.DefaultDifficulty, ReportAs: s.policy.DefaultDifficulty, diff --git a/lib/policy/bot.go b/lib/policy/bot.go index 3e7a63a..6107524 100644 --- a/lib/policy/bot.go +++ b/lib/policy/bot.go @@ -12,6 +12,7 @@ type Bot struct { Challenge *config.ChallengeRules Name string Action config.Rule + Weight *config.Weight } func (b Bot) Hash() string { diff --git a/lib/policy/checker.go b/lib/policy/checker.go index dd6deb1..447a7ad 100644 --- a/lib/policy/checker.go +++ b/lib/policy/checker.go @@ -47,6 +47,20 @@ func (cl CheckerList) Hash() string { return internal.SHA256sum(sb.String()) } +type staticHashChecker struct { + hash string +} + +func (staticHashChecker) Check(r *http.Request) (bool, error) { + return true, nil +} + +func (s staticHashChecker) Hash() string { return s.hash } + +func NewStaticHashChecker(hashable string) Checker { + return staticHashChecker{hash: internal.SHA256sum(hashable)} +} + type RemoteAddrChecker struct { ranger cidranger.Ranger hash string diff --git a/lib/policy/checkresult.go b/lib/policy/checkresult.go index c84f326..31737dd 100644 --- a/lib/policy/checkresult.go +++ b/lib/policy/checkresult.go @@ -7,12 +7,15 @@ import ( ) type CheckResult struct { - Name string - Rule config.Rule + Name string + Rule config.Rule + Weight int } func (cr CheckResult) LogValue() slog.Value { return slog.GroupValue( slog.String("name", cr.Name), - slog.String("rule", string(cr.Rule))) + slog.String("rule", string(cr.Rule)), + slog.Int("weight", cr.Weight), + ) } diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index 0bf46f7..d140549 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -39,20 +39,22 @@ const ( RuleAllow Rule = "ALLOW" RuleDeny Rule = "DENY" RuleChallenge Rule = "CHALLENGE" + RuleWeigh Rule = "WEIGH" RuleBenchmark Rule = "DEBUG_BENCHMARK" ) const DefaultAlgorithm = "fast" type BotConfig struct { - UserAgentRegex *string `json:"user_agent_regex"` - PathRegex *string `json:"path_regex"` - HeadersRegex map[string]string `json:"headers_regex"` - Expression *ExpressionOrList `json:"expression"` + UserAgentRegex *string `json:"user_agent_regex,omitempty"` + PathRegex *string `json:"path_regex,omitempty"` + HeadersRegex map[string]string `json:"headers_regex,omitempty"` + Expression *ExpressionOrList `json:"expression,omitempty"` Challenge *ChallengeRules `json:"challenge,omitempty"` + Weight *Weight `json:"weight,omitempty"` Name string `json:"name"` Action Rule `json:"action"` - RemoteAddr []string `json:"remote_addresses"` + RemoteAddr []string `json:"remote_addresses,omitempty"` } func (b BotConfig) Zero() bool { @@ -73,7 +75,7 @@ func (b BotConfig) Zero() bool { return true } -func (b BotConfig) Valid() error { +func (b *BotConfig) Valid() error { var errs []error if b.Name == "" { @@ -144,7 +146,7 @@ func (b BotConfig) Valid() error { } switch b.Action { - case RuleAllow, RuleBenchmark, RuleChallenge, RuleDeny: + case RuleAllow, RuleBenchmark, RuleChallenge, RuleDeny, RuleWeigh: // okay default: errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownAction, b.Action)) @@ -156,6 +158,10 @@ func (b BotConfig) Valid() error { } } + if b.Action == RuleWeigh && b.Weight == nil { + b.Weight = &Weight{Adjust: 5} + } + if len(errs) != 0 { return fmt.Errorf("config: bot entry for %q is not valid:\n%w", b.Name, errors.Join(errs...)) } diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go index be603b0..0a77e04 100644 --- a/lib/policy/config/config_test.go +++ b/lib/policy/config/config_test.go @@ -168,6 +168,25 @@ func TestBotValid(t *testing.T) { }, err: nil, }, + { + name: "weight rule without weight", + bot: BotConfig{ + Name: "weight-adjust-if-mozilla", + Action: RuleWeigh, + UserAgentRegex: p("Mozilla"), + }, + }, + { + name: "weight rule with weight adjust", + bot: BotConfig{ + Name: "weight-adjust-if-mozilla", + Action: RuleWeigh, + UserAgentRegex: p("Mozilla"), + Weight: &Weight{ + Adjust: 5, + }, + }, + }, } for _, cs := range tests { diff --git a/lib/policy/config/expressionorlist.go b/lib/policy/config/expressionorlist.go index 7088c8d..8851c5b 100644 --- a/lib/policy/config/expressionorlist.go +++ b/lib/policy/config/expressionorlist.go @@ -14,8 +14,8 @@ var ( type ExpressionOrList struct { Expression string `json:"-"` - All []string `json:"all"` - Any []string `json:"any"` + All []string `json:"all,omitempty"` + Any []string `json:"any,omitempty"` } func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool { diff --git a/lib/policy/config/testdata/good/simple-weight.yaml b/lib/policy/config/testdata/good/simple-weight.yaml new file mode 100644 index 0000000..ec7a92e --- /dev/null +++ b/lib/policy/config/testdata/good/simple-weight.yaml @@ -0,0 +1,6 @@ +bots: + - name: simple-weight-adjust + action: WEIGH + user_agent_regex: Mozilla + weight: + adjust: 5 diff --git a/lib/policy/config/testdata/good/weight-no-weight.yaml b/lib/policy/config/testdata/good/weight-no-weight.yaml new file mode 100644 index 0000000..f137f8a --- /dev/null +++ b/lib/policy/config/testdata/good/weight-no-weight.yaml @@ -0,0 +1,4 @@ +bots: + - name: weight + action: WEIGH + user_agent_regex: Mozilla diff --git a/lib/policy/config/weight.go b/lib/policy/config/weight.go new file mode 100644 index 0000000..f408111 --- /dev/null +++ b/lib/policy/config/weight.go @@ -0,0 +1,5 @@ +package config + +type Weight struct { + Adjust int `json:"adjust"` +} diff --git a/lib/policy/policy.go b/lib/policy/policy.go index 7183d63..d67ca1c 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -117,6 +117,10 @@ func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedCon } } + if b.Weight != nil { + parsedBot.Weight = b.Weight + } + parsedBot.Rules = cl result.Bots = append(result.Bots, parsedBot)