diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 0c5f947..8c6e4e7 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -91,3 +91,57 @@ dnsbl: false status_codes: CHALLENGE: 200 DENY: 200 + +# The weight thresholds for when to trigger individual challenges. Any +# CHALLENGE will take precedence over this. +# +# A threshold has four configuration options: +# +# - name: the name that is reported down the stack and used for metrics +# - expression: A CEL expression with the request weight in the variable +# weight +# - action: the Anubis action to apply, similar to in a bot policy +# - challenge: which challenge to send to the user, similar to in a bot policy +# +# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more +# information. +thresholds: + # By default Anubis ships with the following thresholds: + - name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather + expression: weight < 0 # a feather weighs zero units + action: ALLOW # Allow the traffic through + # For clients that had some weight reduced through custom rules, give them a + # lightweight challenge. + - name: mild-suspicion + expression: + all: + - weight >= 0 + - weight < 10 + action: CHALLENGE + challenge: + # https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh + algorithm: metarefresh + difficulty: 1 + report_as: 1 + # For clients that are browser-like but have either gained points from custom rules or + # report as a standard browser. + - name: moderate-suspicion + expression: + all: + - weight >= 10 + - weight < 20 + action: CHALLENGE + challenge: + # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work + algorithm: fast + difficulty: 2 # two leading zeros, very fast for most clients + report_as: 2 + # For clients that are browser like and have gained many points from custom rules + - name: extreme-suspicion + expression: weight >= 20 + action: CHALLENGE + challenge: + # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work + algorithm: fast + difficulty: 4 + report_as: 4 diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index e6fe5e8..9cc9612 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `robots2policy` CLI utility to convert robots.txt files to Anubis challenge policies using CEL expressions ([#409](https://github.com/TecharoHQ/anubis/issues/409)) - Implement GeoIP and ASN based checks via [Thoth](https://anubis.techaro.lol/docs/admin/thoth) ([#206](https://github.com/TecharoHQ/anubis/issues/206)) - Replace internal SHA256 hashing with xxhash for 4-6x performance improvement in policy evaluation and cache operations +- Add [custom weight thresholds](./admin/configuration/thresholds.mdx) via CEL ([#688](https://github.com/TecharoHQ/anubis/pull/688)) ## v1.19.1: Jenomis cen Lexentale - Echo 1 diff --git a/docs/docs/admin/configuration/thresholds.mdx b/docs/docs/admin/configuration/thresholds.mdx new file mode 100644 index 0000000..2c99ae7 --- /dev/null +++ b/docs/docs/admin/configuration/thresholds.mdx @@ -0,0 +1,140 @@ +# Weight Threshold Configuration + +Anubis offers the ability to assign "weight" to requests. This is a custom level of suspicion that rules can add to or remove from. For example, here's how you assign 10 weight points to anything that might be a browser: + +```yaml +# botPolicies.yaml + +bots: + - name: generic-browser + user_agent_regex: >- + Mozilla|Opera + action: WEIGH + weight: + adjust: 10 +``` + +Thresholds let you take this per-request weight value and take actions in response to it. Thresholds are defined alongside your bot configuration in `botPolicies.yaml`. + +:::note + +Thresholds DO NOT apply when a request matches a bot rule with the CHALLENGE action. Thresholds only apply when requests don't match any terminal bot rules. + +::: + +```yaml +# botPolicies.yaml + +bots: ... + +thresholds: + - name: minimal-suspicion + expression: weight < 0 + action: ALLOW + + - name: mild-suspicion + expression: + all: + - weight >= 0 + - weight < 10 + action: CHALLENGE + challenge: + algorithm: metarefresh + difficulty: 1 + report_as: 1 + + - name: moderate-suspicion + expression: + all: + - weight >= 10 + - weight < 20 + action: CHALLENGE + challenge: + algorithm: fast + difficulty: 2 + report_as: 2 + + - name: extreme-suspicion + expression: weight >= 20 + action: CHALLENGE + challenge: + algorithm: fast + difficulty: 4 + report_as: 4 +``` + +This defines a suite of 4 thresholds: + +1. If the request weight is less than zero, allow it through. +2. If the request weight is greater than or equal to zero, but less than ten: give it [a very lightweight challenge](./challenges/metarefresh.mdx). +3. If the request weight is greater than or equal to ten, but less than twenty: give it [a slightly heavier challenge](./challenges/proof-of-work.mdx). +4. Otherwise, give it [the heaviest challenge](./challenges/proof-of-work.mdx). + +Thresholds can be configured with the following options: + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameDescriptionExample
`name`The human-readable name for this threshold. + +```yaml +name: extreme-suspicion +``` + +
`expression`A [CEL](https://cel.dev/) expression taking the request weight and returning true or false + +To check if the request weight is less than zero: + +```yaml +expression: weight < 0 +``` + +To check if it's between 0 and 10 (inclusive): + +```yaml +expression: + all: + - weight >= 0 + - weight < 10 +``` + +
`action`The Anubis action to apply: `ALLOW`, `CHALLENGE`, or `DENY` + +```yaml +action: ALLOW +``` + +If you set the CHALLENGE action, you must set challenge details: + +```yaml +action: CHALLENGE +challenge: + algorithm: metarefresh + difficulty: 1 + report_as: 1 +``` + +
diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx index d5acb71..85e7b95 100644 --- a/docs/docs/admin/policies.mdx +++ b/docs/docs/admin/policies.mdx @@ -261,17 +261,11 @@ Anubis rules can also add or remove "weight" from requests, allowing administrat adjust: -5 ``` -This would remove five weight points from the request, making Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx). +This would remove five weight points from the request, which would make Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) in the default configuration. ### Weight Thresholds -Weight thresholds and challenge associations will be configurable with CEL expressions in the configuration file in an upcoming patch, for now here's how Anubis configures the weight thresholds: - -| Weight Expression | Action | -| -----------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------- | -| `weight < 0` (weight is less than 0) | Allow the request through. | -| `weight < 10` (weight is less than 10) | Challenge the client with the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) at the default difficulty level. | -| `weight >= 10` (weight is greater than or equal to 10) | Challenge the client with the [Proof of Work challenge](./configuration/challenges/proof-of-work.mdx) at the default difficulty level. | +For more information on configuring weight thresholds, see [Weight Threshold Configuration](./configuration/thresholds.mdx) ### Advice diff --git a/lib/anubis.go b/lib/anubis.go index c8945fb..195c10d 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -15,6 +15,7 @@ import ( "time" "github.com/golang-jwt/jwt/v5" + "github.com/google/cel-go/common/types" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -411,12 +412,6 @@ func cr(name string, rule config.Rule, weight int) policy.CheckResult { } } -var ( - weightOkayStatic = policy.NewStaticHashChecker("weight/okay") - weightMildSusStatic = policy.NewStaticHashChecker("weight/mild-suspicion") - weightVerySusStatic = policy.NewStaticHashChecker("weight/extreme-suspicion") -) - // Check evaluates the list of rules, and returns the result func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) { host := r.Header.Get("X-Real-Ip") @@ -448,34 +443,25 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) } } - switch { - case weight <= 0: - return cr("weight/okay", config.RuleAllow, weight), &policy.Bot{ - Challenge: &config.ChallengeRules{ - Difficulty: s.policy.DefaultDifficulty, - ReportAs: s.policy.DefaultDifficulty, - Algorithm: config.DefaultAlgorithm, - }, - Rules: weightOkayStatic, - }, nil - case weight > 0 && weight < 10: - return cr("weight/mild-suspicion", config.RuleChallenge, weight), &policy.Bot{ - Challenge: &config.ChallengeRules{ - Difficulty: s.policy.DefaultDifficulty, - ReportAs: s.policy.DefaultDifficulty, - Algorithm: "metarefresh", - }, - Rules: weightMildSusStatic, - }, nil - case weight >= 10: - return cr("weight/extreme-suspicion", config.RuleChallenge, weight), &policy.Bot{ - Challenge: &config.ChallengeRules{ - Difficulty: s.policy.DefaultDifficulty, - ReportAs: s.policy.DefaultDifficulty, - Algorithm: "fast", - }, - Rules: weightVerySusStatic, - }, nil + for _, t := range s.policy.Thresholds { + result, _, err := t.Program.ContextEval(r.Context(), &policy.ThresholdRequest{Weight: weight}) + if err != nil { + slog.Error("error when evaluating threshold expression", "expression", t.Expression.String(), "err", err) + continue + } + + var matches bool + + if val, ok := result.(types.Bool); ok { + matches = bool(val) + } + + if matches { + return cr("threshold/"+t.Name, t.Action, weight), &policy.Bot{ + Challenge: t.Challenge, + Rules: &checker.List{}, + }, nil + } } return cr("default/allow", config.RuleAllow, weight), &policy.Bot{ diff --git a/lib/policy/celchecker.go b/lib/policy/celchecker.go index b19ea60..c2cc335 100644 --- a/lib/policy/celchecker.go +++ b/lib/policy/celchecker.go @@ -17,47 +17,18 @@ type CELChecker struct { } func NewCELChecker(cfg *config.ExpressionOrList) (*CELChecker, error) { - env, err := expressions.NewEnvironment() + env, err := expressions.BotEnvironment() if err != nil { return nil, err } - var src string - var ast *cel.Ast - - if cfg.Expression != "" { - src = cfg.Expression - var iss *cel.Issues - intermediate, iss := env.Compile(src) - if iss != nil { - return nil, iss.Err() - } - - ast, iss = env.Check(intermediate) - if iss != nil { - return nil, iss.Err() - } - } - - if len(cfg.All) != 0 { - ast, err = expressions.Join(env, expressions.JoinAnd, cfg.All...) - } - - if len(cfg.Any) != 0 { - ast, err = expressions.Join(env, expressions.JoinOr, cfg.Any...) - } - - if err != nil { - return nil, err - } - - program, err := expressions.Compile(env, ast) + program, err := expressions.Compile(env, cfg.String()) if err != nil { return nil, fmt.Errorf("can't compile CEL program: %w", err) } return &CELChecker{ - src: src, + src: cfg.String(), program: program, }, nil } diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index 78cbb97..1803f32 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -43,6 +43,15 @@ const ( RuleBenchmark Rule = "DEBUG_BENCHMARK" ) +func (r Rule) Valid() error { + switch r { + case RuleAllow, RuleDeny, RuleChallenge, RuleWeigh, RuleBenchmark: + return nil + default: + return ErrUnknownAction + } +} + const DefaultAlgorithm = "fast" type BotConfig struct { @@ -184,13 +193,18 @@ type ChallengeRules struct { } var ( - ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)") - ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)") + ErrChallengeDifficultyTooLow = errors.New("config.ChallengeRules: difficulty is too low (must be >= 1)") + ErrChallengeDifficultyTooHigh = errors.New("config.ChallengeRules: difficulty is too high (must be <= 64)") + ErrChallengeMustHaveAlgorithm = errors.New("config.ChallengeRules: must have algorithm name set") ) func (cr ChallengeRules) Valid() error { var errs []error + if cr.Algorithm == "" { + errs = append(errs, ErrChallengeMustHaveAlgorithm) + } + if cr.Difficulty < 1 { errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty)) } @@ -312,18 +326,19 @@ type fileConfig struct { Bots []BotOrImport `json:"bots"` DNSBL bool `json:"dnsbl"` StatusCodes StatusCodes `json:"status_codes"` + Thresholds []Threshold `json:"threshold"` } -func (c fileConfig) Valid() error { +func (c *fileConfig) Valid() error { var errs []error if len(c.Bots) == 0 { errs = append(errs, ErrNoBotRulesDefined) } - for _, b := range c.Bots { + for i, b := range c.Bots { if err := b.Valid(); err != nil { - errs = append(errs, err) + errs = append(errs, fmt.Errorf("bot %d: %w", i, err)) } } @@ -331,6 +346,16 @@ func (c fileConfig) Valid() error { errs = append(errs, err) } + if len(c.Thresholds) == 0 { + errs = append(errs, ErrNoThresholdRulesDefined) + } + + for i, t := range c.Thresholds { + if err := t.Valid(); err != nil { + errs = append(errs, fmt.Errorf("threshold %d: %w", i, err)) + } + } + if len(errs) != 0 { return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...)) } @@ -339,11 +364,14 @@ func (c fileConfig) Valid() error { } func Load(fin io.Reader, fname string) (*Config, error) { - var c fileConfig - c.StatusCodes = StatusCodes{ - Challenge: http.StatusOK, - Deny: http.StatusOK, + c := &fileConfig{ + StatusCodes: StatusCodes{ + Challenge: http.StatusOK, + Deny: http.StatusOK, + }, + Thresholds: DefaultThresholds, } + if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil { return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err) } @@ -379,6 +407,15 @@ func Load(fin io.Reader, fname string) (*Config, error) { } } + for _, t := range c.Thresholds { + if err := t.Valid(); err != nil { + validationErrs = append(validationErrs, err) + continue + } + + result.Thresholds = append(result.Thresholds, t) + } + if len(validationErrs) > 0 { return nil, fmt.Errorf("errors validating policy config %s: %w", fname, errors.Join(validationErrs...)) } @@ -388,6 +425,7 @@ func Load(fin io.Reader, fname string) (*Config, error) { type Config struct { Bots []BotConfig + Thresholds []Threshold DNSBL bool StatusCodes StatusCodes } diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go index 0a77e04..730b3d6 100644 --- a/lib/policy/config/config_test.go +++ b/lib/policy/config/config_test.go @@ -8,7 +8,6 @@ import ( "testing" "github.com/TecharoHQ/anubis/data" - "k8s.io/apimachinery/pkg/util/yaml" ) func p[V any](v V) *V { return &v } @@ -313,12 +312,8 @@ func TestConfigValidBad(t *testing.T) { } defer fin.Close() - var c fileConfig - if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil { - t.Fatalf("can't decode file: %v", err) - } - - if err := c.Valid(); err == nil { + _, err = Load(fin, filepath.Join("testdata", "bad", st.Name())) + if err == nil { t.Fatal("validation should have failed but didn't somehow") } else { t.Log(err) diff --git a/lib/policy/config/expressionorlist.go b/lib/policy/config/expressionorlist.go index 68dafd2..b4e64c4 100644 --- a/lib/policy/config/expressionorlist.go +++ b/lib/policy/config/expressionorlist.go @@ -3,7 +3,9 @@ package config import ( "encoding/json" "errors" + "fmt" "slices" + "strings" ) var ( @@ -18,6 +20,32 @@ type ExpressionOrList struct { Any []string `json:"any,omitempty" yaml:"any,omitempty"` } +func (eol ExpressionOrList) String() string { + switch { + case len(eol.Expression) != 0: + return eol.Expression + case len(eol.All) != 0: + var sb strings.Builder + for i, pred := range eol.All { + if i != 0 { + fmt.Fprintf(&sb, " && ") + } + fmt.Fprintf(&sb, "( %s )", pred) + } + return sb.String() + case len(eol.Any) != 0: + var sb strings.Builder + for i, pred := range eol.Any { + if i != 0 { + fmt.Fprintf(&sb, " || ") + } + fmt.Fprintf(&sb, "( %s )", pred) + } + return sb.String() + } + panic("this should not happen") +} + func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool { if eol.Expression != rhs.Expression { return false diff --git a/lib/policy/config/expressionorlist_test.go b/lib/policy/config/expressionorlist_test.go index 8d0c843..a09baf3 100644 --- a/lib/policy/config/expressionorlist_test.go +++ b/lib/policy/config/expressionorlist_test.go @@ -213,3 +213,54 @@ func TestExpressionOrListUnmarshalJSON(t *testing.T) { }) } } + +func TestExpressionOrListString(t *testing.T) { + for _, tt := range []struct { + name string + in ExpressionOrList + out string + }{ + { + name: "single expression", + in: ExpressionOrList{ + Expression: "true", + }, + out: "true", + }, + { + name: "all", + in: ExpressionOrList{ + All: []string{"true"}, + }, + out: "( true )", + }, + { + name: "all with &&", + in: ExpressionOrList{ + All: []string{"true", "true"}, + }, + out: "( true ) && ( true )", + }, + { + name: "any", + in: ExpressionOrList{ + All: []string{"true"}, + }, + out: "( true )", + }, + { + name: "any with ||", + in: ExpressionOrList{ + Any: []string{"true", "true"}, + }, + out: "( true ) || ( true )", + }, + } { + t.Run(tt.name, func(t *testing.T) { + result := tt.in.String() + if result != tt.out { + t.Errorf("wanted %q, got: %q", tt.out, result) + } + }) + } +} diff --git a/lib/policy/config/testdata/bad/unparseable.json b/lib/policy/config/testdata/bad/unparseable.json new file mode 100644 index 0000000..ff30235 --- /dev/null +++ b/lib/policy/config/testdata/bad/unparseable.json @@ -0,0 +1 @@ +} \ No newline at end of file diff --git a/lib/policy/config/testdata/bad/unparseable.yaml b/lib/policy/config/testdata/bad/unparseable.yaml new file mode 100644 index 0000000..5c34318 --- /dev/null +++ b/lib/policy/config/testdata/bad/unparseable.yaml @@ -0,0 +1 @@ +} diff --git a/lib/policy/config/testdata/good/no-thresholds.yaml b/lib/policy/config/testdata/good/no-thresholds.yaml new file mode 100644 index 0000000..bf981fd --- /dev/null +++ b/lib/policy/config/testdata/good/no-thresholds.yaml @@ -0,0 +1,8 @@ +bots: + - name: simple-weight-adjust + action: WEIGH + user_agent_regex: Mozilla + weight: + adjust: 5 + +thresholds: [] diff --git a/lib/policy/config/testdata/good/thresholds.yaml b/lib/policy/config/testdata/good/thresholds.yaml new file mode 100644 index 0000000..75e1788 --- /dev/null +++ b/lib/policy/config/testdata/good/thresholds.yaml @@ -0,0 +1,38 @@ +bots: + - name: simple-weight-adjust + action: WEIGH + user_agent_regex: Mozilla + weight: + adjust: 5 + +thresholds: + - name: minimal-suspicion + expression: weight < 0 + action: ALLOW + - name: mild-suspicion + expression: + all: + - weight >= 0 + - weight < 10 + action: CHALLENGE + challenge: + algorithm: metarefresh + difficulty: 1 + report_as: 1 + - name: moderate-suspicion + expression: + all: + - weight >= 10 + - weight < 20 + action: CHALLENGE + challenge: + algorithm: fast + difficulty: 2 + report_as: 2 + - name: extreme-suspicion + expression: weight >= 20 + action: CHALLENGE + challenge: + algorithm: fast + difficulty: 4 + report_as: 4 diff --git a/lib/policy/config/threshold.go b/lib/policy/config/threshold.go new file mode 100644 index 0000000..d9a0ed0 --- /dev/null +++ b/lib/policy/config/threshold.go @@ -0,0 +1,80 @@ +package config + +import ( + "errors" + "fmt" + + "github.com/TecharoHQ/anubis" +) + +var ( + ErrNoThresholdRulesDefined = errors.New("config: no thresholds defined") + ErrThresholdMustHaveName = errors.New("config.Threshold: must set name") + ErrThresholdMustHaveExpression = errors.New("config.Threshold: must set expression") + ErrThresholdChallengeMustHaveChallenge = errors.New("config.Threshold: a threshold with the CHALLENGE action must have challenge set") + ErrThresholdCannotHaveWeighAction = errors.New("config.Threshold: a threshold cannot have the WEIGH action") + + DefaultThresholds = []Threshold{ + { + Name: "legacy-anubis-behaviour", + Expression: &ExpressionOrList{ + Expression: "weight > 0", + }, + Action: RuleChallenge, + Challenge: &ChallengeRules{ + Algorithm: "fast", + Difficulty: anubis.DefaultDifficulty, + ReportAs: anubis.DefaultDifficulty, + }, + }, + } +) + +type Threshold struct { + Name string `json:"name" yaml:"name"` + Expression *ExpressionOrList `json:"expression" yaml:"expression"` + Action Rule `json:"action" yaml:"action"` + Challenge *ChallengeRules `json:"challenge" yaml:"challenge"` +} + +func (t Threshold) Valid() error { + var errs []error + + if len(t.Name) == 0 { + errs = append(errs, ErrThresholdMustHaveName) + } + + if t.Expression == nil { + errs = append(errs, ErrThresholdMustHaveExpression) + } + + if t.Expression != nil { + if err := t.Expression.Valid(); err != nil { + errs = append(errs, err) + } + } + + if err := t.Action.Valid(); err != nil { + errs = append(errs, err) + } + + if t.Action == RuleWeigh { + errs = append(errs, ErrThresholdCannotHaveWeighAction) + } + + if t.Action == RuleChallenge && t.Challenge == nil { + errs = append(errs, ErrThresholdChallengeMustHaveChallenge) + } + + if t.Challenge != nil { + if err := t.Challenge.Valid(); err != nil { + errs = append(errs, err) + } + } + + if len(errs) != 0 { + return fmt.Errorf("config: threshold entry for %q is not valid:\n%w", t.Name, errors.Join(errs...)) + } + + return nil +} diff --git a/lib/policy/config/threshold_test.go b/lib/policy/config/threshold_test.go new file mode 100644 index 0000000..cfade61 --- /dev/null +++ b/lib/policy/config/threshold_test.go @@ -0,0 +1,92 @@ +package config + +import ( + "errors" + "fmt" + "testing" +) + +func TestThresholdValid(t *testing.T) { + for _, tt := range []struct { + name string + input *Threshold + err error + }{ + { + name: "basic allow", + input: &Threshold{ + Name: "basic-allow", + Expression: &ExpressionOrList{Expression: "true"}, + Action: RuleAllow, + }, + err: nil, + }, + { + name: "basic challenge", + input: &Threshold{ + Name: "basic-challenge", + Expression: &ExpressionOrList{Expression: "true"}, + Action: RuleChallenge, + Challenge: &ChallengeRules{ + Algorithm: "fast", + Difficulty: 1, + ReportAs: 1, + }, + }, + err: nil, + }, + { + name: "no name", + input: &Threshold{}, + err: ErrThresholdMustHaveName, + }, + { + name: "no expression", + input: &Threshold{}, + err: ErrThresholdMustHaveName, + }, + { + name: "invalid expression", + input: &Threshold{ + Expression: &ExpressionOrList{}, + }, + err: ErrExpressionEmpty, + }, + { + name: "invalid action", + input: &Threshold{}, + err: ErrUnknownAction, + }, + { + name: "challenge action but no challenge", + input: &Threshold{ + Action: RuleChallenge, + }, + err: ErrThresholdChallengeMustHaveChallenge, + }, + { + name: "challenge invalid", + input: &Threshold{ + Action: RuleChallenge, + Challenge: &ChallengeRules{Difficulty: 0, ReportAs: 0}, + }, + err: ErrChallengeDifficultyTooLow, + }, + } { + t.Run(tt.name, func(t *testing.T) { + if err := tt.input.Valid(); !errors.Is(err, tt.err) { + t.Errorf("threshold is invalid: %v", err) + } + }) + } +} + +func TestDefaultThresholdsValid(t *testing.T) { + for i, th := range DefaultThresholds { + t.Run(fmt.Sprintf("%d %s", i, th.Name), func(t *testing.T) { + if err := th.Valid(); err != nil { + t.Errorf("threshold invalid: %v", err) + } + }) + } +} diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go index 474fd9e..6f46377 100644 --- a/lib/policy/expressions/environment.go +++ b/lib/policy/expressions/environment.go @@ -9,20 +9,12 @@ import ( "github.com/google/cel-go/ext" ) -// NewEnvironment creates a new CEL environment, this is the set of +// BotEnvironment creates a new CEL environment, this is the set of // variables and functions that are passed into the CEL scope so that // Anubis can fail loudly and early when something is invalid instead // of blowing up at runtime. -func NewEnvironment() (*cel.Env, error) { - return cel.NewEnv( - ext.Strings( - ext.StringsLocale("en_US"), - ext.StringsValidateFormatCalls(true), - ), - - // default all timestamps to UTC - cel.DefaultUTCTimeZone(true), - +func BotEnvironment() (*cel.Env, error) { + return New( // Variables exposed to CEL programs: cel.Variable("remoteAddress", cel.StringType), cel.Variable("host", cel.StringType), @@ -31,8 +23,27 @@ func NewEnvironment() (*cel.Env, error) { cel.Variable("path", cel.StringType), cel.Variable("query", cel.MapType(cel.StringType, cel.StringType)), cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)), + ) +} - // Functions exposed to CEL programs: +// NewThreshold creates a new CEL environment for threshold checking. +func ThresholdEnvironment() (*cel.Env, error) { + return New( + cel.Variable("weight", cel.IntType), + ) +} + +func New(opts ...cel.EnvOption) (*cel.Env, error) { + args := []cel.EnvOption{ + ext.Strings( + ext.StringsLocale("en_US"), + ext.StringsValidateFormatCalls(true), + ), + + // default all timestamps to UTC + cel.DefaultUTCTimeZone(true), + + // Functions exposed to all CEL programs: cel.Function("randInt", cel.Overload("randInt_int", []*cel.Type{cel.IntType}, @@ -47,12 +58,25 @@ func NewEnvironment() (*cel.Env, error) { }), ), ), - ) + } + + args = append(args, opts...) + return cel.NewEnv(args...) } // Compile takes CEL environment and syntax tree then emits an optimized // Program for execution. -func Compile(env *cel.Env, ast *cel.Ast) (cel.Program, error) { +func Compile(env *cel.Env, src string) (cel.Program, error) { + intermediate, iss := env.Compile(src) + if iss != nil { + return nil, iss.Err() + } + + ast, iss := env.Check(intermediate) + if iss != nil { + return nil, iss.Err() + } + return env.Program( ast, cel.EvalOptions( diff --git a/lib/policy/expressions/join.go b/lib/policy/expressions/join.go deleted file mode 100644 index 46f2ed5..0000000 --- a/lib/policy/expressions/join.go +++ /dev/null @@ -1,104 +0,0 @@ -package expressions - -import ( - "errors" - "fmt" - "strings" - - "github.com/google/cel-go/cel" -) - -// JoinOperator is a type wrapper for and/or operators. -// -// This is a separate type so that validation can be done at the type level. -type JoinOperator string - -// Possible values for JoinOperator -const ( - JoinAnd JoinOperator = "&&" - JoinOr JoinOperator = "||" -) - -// Valid ensures that JoinOperator is semantically valid. -func (jo JoinOperator) Valid() error { - switch jo { - case JoinAnd, JoinOr: - return nil - default: - return ErrWrongJoinOperator - } -} - -var ( - ErrWrongJoinOperator = errors.New("expressions: invalid join operator") - ErrNoExpressions = errors.New("expressions: cannot join zero expressions") - ErrCantCompile = errors.New("expressions: can't compile one expression") -) - -// JoinClauses joins a list of compiled clauses into one big if statement. -// -// Imagine the following two clauses: -// -// ball.color == "red" -// ball.shape == "round" -// -// JoinClauses would emit one "joined" clause such as: -// -// ( ball.color == "red" ) && ( ball.shape == "round" ) -func JoinClauses(env *cel.Env, operator JoinOperator, clauses ...*cel.Ast) (*cel.Ast, error) { - if err := operator.Valid(); err != nil { - return nil, fmt.Errorf("%w: wanted && or ||, got: %q", err, operator) - } - - switch len(clauses) { - case 0: - return nil, ErrNoExpressions - case 1: - return clauses[0], nil - } - - var exprs []string - var errs []error - - for _, clause := range clauses { - clauseStr, err := cel.AstToString(clause) - if err != nil { - errs = append(errs, err) - continue - } - - exprs = append(exprs, "( "+clauseStr+" )") - } - - if len(errs) != 0 { - return nil, fmt.Errorf("errors while decompiling statements: %w", errors.Join(errs...)) - } - - statement := strings.Join(exprs, " "+string(operator)+" ") - result, iss := env.Compile(statement) - if iss != nil { - return nil, iss.Err() - } - - return result, nil -} - -func Join(env *cel.Env, operator JoinOperator, clauses ...string) (*cel.Ast, error) { - var statements []*cel.Ast - var errs []error - - for _, clause := range clauses { - stmt, iss := env.Compile(clause) - if iss != nil && iss.Err() != nil { - errs = append(errs, fmt.Errorf("%w: %q gave: %w", ErrCantCompile, clause, iss.Err())) - continue - } - statements = append(statements, stmt) - } - - if len(errs) != 0 { - return nil, fmt.Errorf("errors while joining clauses: %w", errors.Join(errs...)) - } - - return JoinClauses(env, operator, statements...) -} diff --git a/lib/policy/expressions/join_test.go b/lib/policy/expressions/join_test.go deleted file mode 100644 index be393c6..0000000 --- a/lib/policy/expressions/join_test.go +++ /dev/null @@ -1,90 +0,0 @@ -package expressions - -import ( - "errors" - "testing" - - "github.com/google/cel-go/cel" -) - -func TestJoin(t *testing.T) { - env, err := NewEnvironment() - if err != nil { - t.Fatal(err) - } - - for _, tt := range []struct { - err error - name string - op JoinOperator - resultStr string - clauses []string - }{ - { - name: "no-clauses", - clauses: []string{}, - op: JoinAnd, - err: ErrNoExpressions, - }, - { - name: "one-clause-identity", - clauses: []string{`remoteAddress == "8.8.8.8"`}, - op: JoinAnd, - err: nil, - resultStr: `remoteAddress == "8.8.8.8"`, - }, - { - name: "multi-clause-and", - clauses: []string{ - `remoteAddress == "8.8.8.8"`, - `host == "anubis.techaro.lol"`, - }, - op: JoinAnd, - err: nil, - resultStr: `remoteAddress == "8.8.8.8" && host == "anubis.techaro.lol"`, - }, - { - name: "multi-clause-or", - clauses: []string{ - `remoteAddress == "8.8.8.8"`, - `host == "anubis.techaro.lol"`, - }, - op: JoinOr, - err: nil, - resultStr: `remoteAddress == "8.8.8.8" || host == "anubis.techaro.lol"`, - }, - { - name: "git-user-agent", - clauses: []string{ - `userAgent.startsWith("git/") || userAgent.contains("libgit")`, - `"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"`, - }, - op: JoinAnd, - err: nil, - resultStr: `(userAgent.startsWith("git/") || userAgent.contains("libgit")) && "Git-Protocol" in headers && -headers["Git-Protocol"] == "version=2"`, - }, - } { - t.Run(tt.name, func(t *testing.T) { - result, err := Join(env, tt.op, tt.clauses...) - if !errors.Is(err, tt.err) { - t.Errorf("wanted error %v but got: %v", tt.err, err) - } - - if tt.err != nil { - return - } - - program, err := cel.AstToString(result) - if err != nil { - t.Fatalf("can't decompile program: %v", err) - } - - if tt.resultStr != program { - t.Logf("wanted: %s", tt.resultStr) - t.Logf("got: %s", program) - t.Error("program did not compile as expected") - } - }) - } -} diff --git a/lib/policy/policy.go b/lib/policy/policy.go index aed30d1..9723ef2 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "log/slog" + "sync/atomic" "github.com/TecharoHQ/anubis/internal/thoth" "github.com/TecharoHQ/anubis/lib/policy/checker" @@ -21,12 +22,14 @@ var ( }, []string{"rule", "action"}) ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid") + warnedAboutThresholds = &atomic.Bool{} ) type ParsedConfig struct { orig *config.Config Bots []Bot + Thresholds []*Threshold DNSBL bool DefaultDifficulty int StatusCodes config.StatusCodes @@ -150,6 +153,26 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic result.Bots = append(result.Bots, parsedBot) } + for _, t := range c.Thresholds { + if t.Name == "legacy-anubis-behaviour" && t.Expression.String() == "true" { + if !warnedAboutThresholds.Load() { + slog.Warn("configuration file does not contain thresholds, see docs for details on how to upgrade", "fname", fname, "docs_url", "https://anubis.techaro.lol/docs/admin/configuration/thresholds/") + warnedAboutThresholds.Store(true) + } + + t.Challenge.Difficulty = defaultDifficulty + t.Challenge.ReportAs = defaultDifficulty + } + + threshold, err := ParsedThresholdFromConfig(t) + if err != nil { + validationErrs = append(validationErrs, fmt.Errorf("can't compile threshold config for %s: %w", t.Name, err)) + continue + } + + result.Thresholds = append(result.Thresholds, threshold) + } + if len(validationErrs) > 0 { return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, errors.Join(validationErrs...)) } diff --git a/lib/policy/thresholds.go b/lib/policy/thresholds.go new file mode 100644 index 0000000..1f77f63 --- /dev/null +++ b/lib/policy/thresholds.go @@ -0,0 +1,47 @@ +package policy + +import ( + "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/TecharoHQ/anubis/lib/policy/expressions" + "github.com/google/cel-go/cel" +) + +type Threshold struct { + config.Threshold + Program cel.Program +} + +func ParsedThresholdFromConfig(t config.Threshold) (*Threshold, error) { + result := &Threshold{ + Threshold: t, + } + + env, err := expressions.ThresholdEnvironment() + if err != nil { + return nil, err + } + + program, err := expressions.Compile(env, t.Expression.String()) + if err != nil { + return nil, err + } + + result.Program = program + + return result, nil +} + +type ThresholdRequest struct { + Weight int +} + +func (tr *ThresholdRequest) Parent() cel.Activation { return nil } + +func (tr *ThresholdRequest) ResolveName(name string) (any, bool) { + switch name { + case "weight": + return tr.Weight, true + default: + return nil, false + } +} diff --git a/lib/testdata/aggressive_403.yaml b/lib/testdata/aggressive_403.yaml index facafd6..ff96651 100644 --- a/lib/testdata/aggressive_403.yaml +++ b/lib/testdata/aggressive_403.yaml @@ -1,12 +1,12 @@ bots: -- name: deny - user_agent_regex: DENY - action: DENY + - name: deny + user_agent_regex: DENY + action: DENY -- name: challenge - user_agent_regex: CHALLENGE - action: CHALLENGE + - name: challenge + user_agent_regex: CHALLENGE + action: CHALLENGE status_codes: CHALLENGE: 401 - DENY: 403 \ No newline at end of file + DENY: 403