diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml
index 0c5f947..8c6e4e7 100644
--- a/data/botPolicies.yaml
+++ b/data/botPolicies.yaml
@@ -91,3 +91,57 @@ dnsbl: false
status_codes:
CHALLENGE: 200
DENY: 200
+
+# The weight thresholds for when to trigger individual challenges. Any
+# CHALLENGE will take precedence over this.
+#
+# A threshold has four configuration options:
+#
+# - name: the name that is reported down the stack and used for metrics
+# - expression: A CEL expression with the request weight in the variable
+# weight
+# - action: the Anubis action to apply, similar to in a bot policy
+# - challenge: which challenge to send to the user, similar to in a bot policy
+#
+# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
+# information.
+thresholds:
+ # By default Anubis ships with the following thresholds:
+ - name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
+ expression: weight < 0 # a feather weighs zero units
+ action: ALLOW # Allow the traffic through
+ # For clients that had some weight reduced through custom rules, give them a
+ # lightweight challenge.
+ - name: mild-suspicion
+ expression:
+ all:
+ - weight >= 0
+ - weight < 10
+ action: CHALLENGE
+ challenge:
+ # https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
+ algorithm: metarefresh
+ difficulty: 1
+ report_as: 1
+ # For clients that are browser-like but have either gained points from custom rules or
+ # report as a standard browser.
+ - name: moderate-suspicion
+ expression:
+ all:
+ - weight >= 10
+ - weight < 20
+ action: CHALLENGE
+ challenge:
+ # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
+ algorithm: fast
+ difficulty: 2 # two leading zeros, very fast for most clients
+ report_as: 2
+ # For clients that are browser like and have gained many points from custom rules
+ - name: extreme-suspicion
+ expression: weight >= 20
+ action: CHALLENGE
+ challenge:
+ # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
+ algorithm: fast
+ difficulty: 4
+ report_as: 4
diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md
index e6fe5e8..9cc9612 100644
--- a/docs/docs/CHANGELOG.md
+++ b/docs/docs/CHANGELOG.md
@@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `robots2policy` CLI utility to convert robots.txt files to Anubis challenge policies using CEL expressions ([#409](https://github.com/TecharoHQ/anubis/issues/409))
- Implement GeoIP and ASN based checks via [Thoth](https://anubis.techaro.lol/docs/admin/thoth) ([#206](https://github.com/TecharoHQ/anubis/issues/206))
- Replace internal SHA256 hashing with xxhash for 4-6x performance improvement in policy evaluation and cache operations
+- Add [custom weight thresholds](./admin/configuration/thresholds.mdx) via CEL ([#688](https://github.com/TecharoHQ/anubis/pull/688))
## v1.19.1: Jenomis cen Lexentale - Echo 1
diff --git a/docs/docs/admin/configuration/thresholds.mdx b/docs/docs/admin/configuration/thresholds.mdx
new file mode 100644
index 0000000..2c99ae7
--- /dev/null
+++ b/docs/docs/admin/configuration/thresholds.mdx
@@ -0,0 +1,140 @@
+# Weight Threshold Configuration
+
+Anubis offers the ability to assign "weight" to requests. This is a custom level of suspicion that rules can add to or remove from. For example, here's how you assign 10 weight points to anything that might be a browser:
+
+```yaml
+# botPolicies.yaml
+
+bots:
+ - name: generic-browser
+ user_agent_regex: >-
+ Mozilla|Opera
+ action: WEIGH
+ weight:
+ adjust: 10
+```
+
+Thresholds let you take this per-request weight value and take actions in response to it. Thresholds are defined alongside your bot configuration in `botPolicies.yaml`.
+
+:::note
+
+Thresholds DO NOT apply when a request matches a bot rule with the CHALLENGE action. Thresholds only apply when requests don't match any terminal bot rules.
+
+:::
+
+```yaml
+# botPolicies.yaml
+
+bots: ...
+
+thresholds:
+ - name: minimal-suspicion
+ expression: weight < 0
+ action: ALLOW
+
+ - name: mild-suspicion
+ expression:
+ all:
+ - weight >= 0
+ - weight < 10
+ action: CHALLENGE
+ challenge:
+ algorithm: metarefresh
+ difficulty: 1
+ report_as: 1
+
+ - name: moderate-suspicion
+ expression:
+ all:
+ - weight >= 10
+ - weight < 20
+ action: CHALLENGE
+ challenge:
+ algorithm: fast
+ difficulty: 2
+ report_as: 2
+
+ - name: extreme-suspicion
+ expression: weight >= 20
+ action: CHALLENGE
+ challenge:
+ algorithm: fast
+ difficulty: 4
+ report_as: 4
+```
+
+This defines a suite of 4 thresholds:
+
+1. If the request weight is less than zero, allow it through.
+2. If the request weight is greater than or equal to zero, but less than ten: give it [a very lightweight challenge](./challenges/metarefresh.mdx).
+3. If the request weight is greater than or equal to ten, but less than twenty: give it [a slightly heavier challenge](./challenges/proof-of-work.mdx).
+4. Otherwise, give it [the heaviest challenge](./challenges/proof-of-work.mdx).
+
+Thresholds can be configured with the following options:
+
+
+
+
+ Name |
+ Description |
+ Example |
+
+
+
+
+ `name` |
+ The human-readable name for this threshold. |
+
+
+```yaml
+name: extreme-suspicion
+```
+
+ |
+
+
+ `expression` |
+ A [CEL](https://cel.dev/) expression taking the request weight and returning true or false |
+
+
+To check if the request weight is less than zero:
+
+```yaml
+expression: weight < 0
+```
+
+To check if it's between 0 and 10 (inclusive):
+
+```yaml
+expression:
+ all:
+ - weight >= 0
+ - weight < 10
+```
+
+ |
+
+
+ `action` |
+ The Anubis action to apply: `ALLOW`, `CHALLENGE`, or `DENY` |
+
+
+```yaml
+action: ALLOW
+```
+
+If you set the CHALLENGE action, you must set challenge details:
+
+```yaml
+action: CHALLENGE
+challenge:
+ algorithm: metarefresh
+ difficulty: 1
+ report_as: 1
+```
+
+ |
+
+
+
+
diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx
index d5acb71..85e7b95 100644
--- a/docs/docs/admin/policies.mdx
+++ b/docs/docs/admin/policies.mdx
@@ -261,17 +261,11 @@ Anubis rules can also add or remove "weight" from requests, allowing administrat
adjust: -5
```
-This would remove five weight points from the request, making Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx).
+This would remove five weight points from the request, which would make Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) in the default configuration.
### Weight Thresholds
-Weight thresholds and challenge associations will be configurable with CEL expressions in the configuration file in an upcoming patch, for now here's how Anubis configures the weight thresholds:
-
-| Weight Expression | Action |
-| -----------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------- |
-| `weight < 0` (weight is less than 0) | Allow the request through. |
-| `weight < 10` (weight is less than 10) | Challenge the client with the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) at the default difficulty level. |
-| `weight >= 10` (weight is greater than or equal to 10) | Challenge the client with the [Proof of Work challenge](./configuration/challenges/proof-of-work.mdx) at the default difficulty level. |
+For more information on configuring weight thresholds, see [Weight Threshold Configuration](./configuration/thresholds.mdx)
### Advice
diff --git a/lib/anubis.go b/lib/anubis.go
index c8945fb..195c10d 100644
--- a/lib/anubis.go
+++ b/lib/anubis.go
@@ -15,6 +15,7 @@ import (
"time"
"github.com/golang-jwt/jwt/v5"
+ "github.com/google/cel-go/common/types"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
@@ -411,12 +412,6 @@ func cr(name string, rule config.Rule, weight int) policy.CheckResult {
}
}
-var (
- weightOkayStatic = policy.NewStaticHashChecker("weight/okay")
- weightMildSusStatic = policy.NewStaticHashChecker("weight/mild-suspicion")
- weightVerySusStatic = policy.NewStaticHashChecker("weight/extreme-suspicion")
-)
-
// Check evaluates the list of rules, and returns the result
func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) {
host := r.Header.Get("X-Real-Ip")
@@ -448,34 +443,25 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error)
}
}
- switch {
- case weight <= 0:
- return cr("weight/okay", config.RuleAllow, weight), &policy.Bot{
- Challenge: &config.ChallengeRules{
- Difficulty: s.policy.DefaultDifficulty,
- ReportAs: s.policy.DefaultDifficulty,
- Algorithm: config.DefaultAlgorithm,
- },
- Rules: weightOkayStatic,
- }, nil
- case weight > 0 && weight < 10:
- return cr("weight/mild-suspicion", config.RuleChallenge, weight), &policy.Bot{
- Challenge: &config.ChallengeRules{
- Difficulty: s.policy.DefaultDifficulty,
- ReportAs: s.policy.DefaultDifficulty,
- Algorithm: "metarefresh",
- },
- Rules: weightMildSusStatic,
- }, nil
- case weight >= 10:
- return cr("weight/extreme-suspicion", config.RuleChallenge, weight), &policy.Bot{
- Challenge: &config.ChallengeRules{
- Difficulty: s.policy.DefaultDifficulty,
- ReportAs: s.policy.DefaultDifficulty,
- Algorithm: "fast",
- },
- Rules: weightVerySusStatic,
- }, nil
+ for _, t := range s.policy.Thresholds {
+ result, _, err := t.Program.ContextEval(r.Context(), &policy.ThresholdRequest{Weight: weight})
+ if err != nil {
+ slog.Error("error when evaluating threshold expression", "expression", t.Expression.String(), "err", err)
+ continue
+ }
+
+ var matches bool
+
+ if val, ok := result.(types.Bool); ok {
+ matches = bool(val)
+ }
+
+ if matches {
+ return cr("threshold/"+t.Name, t.Action, weight), &policy.Bot{
+ Challenge: t.Challenge,
+ Rules: &checker.List{},
+ }, nil
+ }
}
return cr("default/allow", config.RuleAllow, weight), &policy.Bot{
diff --git a/lib/policy/celchecker.go b/lib/policy/celchecker.go
index b19ea60..c2cc335 100644
--- a/lib/policy/celchecker.go
+++ b/lib/policy/celchecker.go
@@ -17,47 +17,18 @@ type CELChecker struct {
}
func NewCELChecker(cfg *config.ExpressionOrList) (*CELChecker, error) {
- env, err := expressions.NewEnvironment()
+ env, err := expressions.BotEnvironment()
if err != nil {
return nil, err
}
- var src string
- var ast *cel.Ast
-
- if cfg.Expression != "" {
- src = cfg.Expression
- var iss *cel.Issues
- intermediate, iss := env.Compile(src)
- if iss != nil {
- return nil, iss.Err()
- }
-
- ast, iss = env.Check(intermediate)
- if iss != nil {
- return nil, iss.Err()
- }
- }
-
- if len(cfg.All) != 0 {
- ast, err = expressions.Join(env, expressions.JoinAnd, cfg.All...)
- }
-
- if len(cfg.Any) != 0 {
- ast, err = expressions.Join(env, expressions.JoinOr, cfg.Any...)
- }
-
- if err != nil {
- return nil, err
- }
-
- program, err := expressions.Compile(env, ast)
+ program, err := expressions.Compile(env, cfg.String())
if err != nil {
return nil, fmt.Errorf("can't compile CEL program: %w", err)
}
return &CELChecker{
- src: src,
+ src: cfg.String(),
program: program,
}, nil
}
diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go
index 78cbb97..1803f32 100644
--- a/lib/policy/config/config.go
+++ b/lib/policy/config/config.go
@@ -43,6 +43,15 @@ const (
RuleBenchmark Rule = "DEBUG_BENCHMARK"
)
+func (r Rule) Valid() error {
+ switch r {
+ case RuleAllow, RuleDeny, RuleChallenge, RuleWeigh, RuleBenchmark:
+ return nil
+ default:
+ return ErrUnknownAction
+ }
+}
+
const DefaultAlgorithm = "fast"
type BotConfig struct {
@@ -184,13 +193,18 @@ type ChallengeRules struct {
}
var (
- ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)")
- ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)")
+ ErrChallengeDifficultyTooLow = errors.New("config.ChallengeRules: difficulty is too low (must be >= 1)")
+ ErrChallengeDifficultyTooHigh = errors.New("config.ChallengeRules: difficulty is too high (must be <= 64)")
+ ErrChallengeMustHaveAlgorithm = errors.New("config.ChallengeRules: must have algorithm name set")
)
func (cr ChallengeRules) Valid() error {
var errs []error
+ if cr.Algorithm == "" {
+ errs = append(errs, ErrChallengeMustHaveAlgorithm)
+ }
+
if cr.Difficulty < 1 {
errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty))
}
@@ -312,18 +326,19 @@ type fileConfig struct {
Bots []BotOrImport `json:"bots"`
DNSBL bool `json:"dnsbl"`
StatusCodes StatusCodes `json:"status_codes"`
+ Thresholds []Threshold `json:"threshold"`
}
-func (c fileConfig) Valid() error {
+func (c *fileConfig) Valid() error {
var errs []error
if len(c.Bots) == 0 {
errs = append(errs, ErrNoBotRulesDefined)
}
- for _, b := range c.Bots {
+ for i, b := range c.Bots {
if err := b.Valid(); err != nil {
- errs = append(errs, err)
+ errs = append(errs, fmt.Errorf("bot %d: %w", i, err))
}
}
@@ -331,6 +346,16 @@ func (c fileConfig) Valid() error {
errs = append(errs, err)
}
+ if len(c.Thresholds) == 0 {
+ errs = append(errs, ErrNoThresholdRulesDefined)
+ }
+
+ for i, t := range c.Thresholds {
+ if err := t.Valid(); err != nil {
+ errs = append(errs, fmt.Errorf("threshold %d: %w", i, err))
+ }
+ }
+
if len(errs) != 0 {
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
}
@@ -339,11 +364,14 @@ func (c fileConfig) Valid() error {
}
func Load(fin io.Reader, fname string) (*Config, error) {
- var c fileConfig
- c.StatusCodes = StatusCodes{
- Challenge: http.StatusOK,
- Deny: http.StatusOK,
+ c := &fileConfig{
+ StatusCodes: StatusCodes{
+ Challenge: http.StatusOK,
+ Deny: http.StatusOK,
+ },
+ Thresholds: DefaultThresholds,
}
+
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
}
@@ -379,6 +407,15 @@ func Load(fin io.Reader, fname string) (*Config, error) {
}
}
+ for _, t := range c.Thresholds {
+ if err := t.Valid(); err != nil {
+ validationErrs = append(validationErrs, err)
+ continue
+ }
+
+ result.Thresholds = append(result.Thresholds, t)
+ }
+
if len(validationErrs) > 0 {
return nil, fmt.Errorf("errors validating policy config %s: %w", fname, errors.Join(validationErrs...))
}
@@ -388,6 +425,7 @@ func Load(fin io.Reader, fname string) (*Config, error) {
type Config struct {
Bots []BotConfig
+ Thresholds []Threshold
DNSBL bool
StatusCodes StatusCodes
}
diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go
index 0a77e04..730b3d6 100644
--- a/lib/policy/config/config_test.go
+++ b/lib/policy/config/config_test.go
@@ -8,7 +8,6 @@ import (
"testing"
"github.com/TecharoHQ/anubis/data"
- "k8s.io/apimachinery/pkg/util/yaml"
)
func p[V any](v V) *V { return &v }
@@ -313,12 +312,8 @@ func TestConfigValidBad(t *testing.T) {
}
defer fin.Close()
- var c fileConfig
- if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
- t.Fatalf("can't decode file: %v", err)
- }
-
- if err := c.Valid(); err == nil {
+ _, err = Load(fin, filepath.Join("testdata", "bad", st.Name()))
+ if err == nil {
t.Fatal("validation should have failed but didn't somehow")
} else {
t.Log(err)
diff --git a/lib/policy/config/expressionorlist.go b/lib/policy/config/expressionorlist.go
index 68dafd2..b4e64c4 100644
--- a/lib/policy/config/expressionorlist.go
+++ b/lib/policy/config/expressionorlist.go
@@ -3,7 +3,9 @@ package config
import (
"encoding/json"
"errors"
+ "fmt"
"slices"
+ "strings"
)
var (
@@ -18,6 +20,32 @@ type ExpressionOrList struct {
Any []string `json:"any,omitempty" yaml:"any,omitempty"`
}
+func (eol ExpressionOrList) String() string {
+ switch {
+ case len(eol.Expression) != 0:
+ return eol.Expression
+ case len(eol.All) != 0:
+ var sb strings.Builder
+ for i, pred := range eol.All {
+ if i != 0 {
+ fmt.Fprintf(&sb, " && ")
+ }
+ fmt.Fprintf(&sb, "( %s )", pred)
+ }
+ return sb.String()
+ case len(eol.Any) != 0:
+ var sb strings.Builder
+ for i, pred := range eol.Any {
+ if i != 0 {
+ fmt.Fprintf(&sb, " || ")
+ }
+ fmt.Fprintf(&sb, "( %s )", pred)
+ }
+ return sb.String()
+ }
+ panic("this should not happen")
+}
+
func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
if eol.Expression != rhs.Expression {
return false
diff --git a/lib/policy/config/expressionorlist_test.go b/lib/policy/config/expressionorlist_test.go
index 8d0c843..a09baf3 100644
--- a/lib/policy/config/expressionorlist_test.go
+++ b/lib/policy/config/expressionorlist_test.go
@@ -213,3 +213,54 @@ func TestExpressionOrListUnmarshalJSON(t *testing.T) {
})
}
}
+
+func TestExpressionOrListString(t *testing.T) {
+ for _, tt := range []struct {
+ name string
+ in ExpressionOrList
+ out string
+ }{
+ {
+ name: "single expression",
+ in: ExpressionOrList{
+ Expression: "true",
+ },
+ out: "true",
+ },
+ {
+ name: "all",
+ in: ExpressionOrList{
+ All: []string{"true"},
+ },
+ out: "( true )",
+ },
+ {
+ name: "all with &&",
+ in: ExpressionOrList{
+ All: []string{"true", "true"},
+ },
+ out: "( true ) && ( true )",
+ },
+ {
+ name: "any",
+ in: ExpressionOrList{
+ All: []string{"true"},
+ },
+ out: "( true )",
+ },
+ {
+ name: "any with ||",
+ in: ExpressionOrList{
+ Any: []string{"true", "true"},
+ },
+ out: "( true ) || ( true )",
+ },
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ result := tt.in.String()
+ if result != tt.out {
+ t.Errorf("wanted %q, got: %q", tt.out, result)
+ }
+ })
+ }
+}
diff --git a/lib/policy/config/testdata/bad/unparseable.json b/lib/policy/config/testdata/bad/unparseable.json
new file mode 100644
index 0000000..ff30235
--- /dev/null
+++ b/lib/policy/config/testdata/bad/unparseable.json
@@ -0,0 +1 @@
+}
\ No newline at end of file
diff --git a/lib/policy/config/testdata/bad/unparseable.yaml b/lib/policy/config/testdata/bad/unparseable.yaml
new file mode 100644
index 0000000..5c34318
--- /dev/null
+++ b/lib/policy/config/testdata/bad/unparseable.yaml
@@ -0,0 +1 @@
+}
diff --git a/lib/policy/config/testdata/good/no-thresholds.yaml b/lib/policy/config/testdata/good/no-thresholds.yaml
new file mode 100644
index 0000000..bf981fd
--- /dev/null
+++ b/lib/policy/config/testdata/good/no-thresholds.yaml
@@ -0,0 +1,8 @@
+bots:
+ - name: simple-weight-adjust
+ action: WEIGH
+ user_agent_regex: Mozilla
+ weight:
+ adjust: 5
+
+thresholds: []
diff --git a/lib/policy/config/testdata/good/thresholds.yaml b/lib/policy/config/testdata/good/thresholds.yaml
new file mode 100644
index 0000000..75e1788
--- /dev/null
+++ b/lib/policy/config/testdata/good/thresholds.yaml
@@ -0,0 +1,38 @@
+bots:
+ - name: simple-weight-adjust
+ action: WEIGH
+ user_agent_regex: Mozilla
+ weight:
+ adjust: 5
+
+thresholds:
+ - name: minimal-suspicion
+ expression: weight < 0
+ action: ALLOW
+ - name: mild-suspicion
+ expression:
+ all:
+ - weight >= 0
+ - weight < 10
+ action: CHALLENGE
+ challenge:
+ algorithm: metarefresh
+ difficulty: 1
+ report_as: 1
+ - name: moderate-suspicion
+ expression:
+ all:
+ - weight >= 10
+ - weight < 20
+ action: CHALLENGE
+ challenge:
+ algorithm: fast
+ difficulty: 2
+ report_as: 2
+ - name: extreme-suspicion
+ expression: weight >= 20
+ action: CHALLENGE
+ challenge:
+ algorithm: fast
+ difficulty: 4
+ report_as: 4
diff --git a/lib/policy/config/threshold.go b/lib/policy/config/threshold.go
new file mode 100644
index 0000000..d9a0ed0
--- /dev/null
+++ b/lib/policy/config/threshold.go
@@ -0,0 +1,80 @@
+package config
+
+import (
+ "errors"
+ "fmt"
+
+ "github.com/TecharoHQ/anubis"
+)
+
+var (
+ ErrNoThresholdRulesDefined = errors.New("config: no thresholds defined")
+ ErrThresholdMustHaveName = errors.New("config.Threshold: must set name")
+ ErrThresholdMustHaveExpression = errors.New("config.Threshold: must set expression")
+ ErrThresholdChallengeMustHaveChallenge = errors.New("config.Threshold: a threshold with the CHALLENGE action must have challenge set")
+ ErrThresholdCannotHaveWeighAction = errors.New("config.Threshold: a threshold cannot have the WEIGH action")
+
+ DefaultThresholds = []Threshold{
+ {
+ Name: "legacy-anubis-behaviour",
+ Expression: &ExpressionOrList{
+ Expression: "weight > 0",
+ },
+ Action: RuleChallenge,
+ Challenge: &ChallengeRules{
+ Algorithm: "fast",
+ Difficulty: anubis.DefaultDifficulty,
+ ReportAs: anubis.DefaultDifficulty,
+ },
+ },
+ }
+)
+
+type Threshold struct {
+ Name string `json:"name" yaml:"name"`
+ Expression *ExpressionOrList `json:"expression" yaml:"expression"`
+ Action Rule `json:"action" yaml:"action"`
+ Challenge *ChallengeRules `json:"challenge" yaml:"challenge"`
+}
+
+func (t Threshold) Valid() error {
+ var errs []error
+
+ if len(t.Name) == 0 {
+ errs = append(errs, ErrThresholdMustHaveName)
+ }
+
+ if t.Expression == nil {
+ errs = append(errs, ErrThresholdMustHaveExpression)
+ }
+
+ if t.Expression != nil {
+ if err := t.Expression.Valid(); err != nil {
+ errs = append(errs, err)
+ }
+ }
+
+ if err := t.Action.Valid(); err != nil {
+ errs = append(errs, err)
+ }
+
+ if t.Action == RuleWeigh {
+ errs = append(errs, ErrThresholdCannotHaveWeighAction)
+ }
+
+ if t.Action == RuleChallenge && t.Challenge == nil {
+ errs = append(errs, ErrThresholdChallengeMustHaveChallenge)
+ }
+
+ if t.Challenge != nil {
+ if err := t.Challenge.Valid(); err != nil {
+ errs = append(errs, err)
+ }
+ }
+
+ if len(errs) != 0 {
+ return fmt.Errorf("config: threshold entry for %q is not valid:\n%w", t.Name, errors.Join(errs...))
+ }
+
+ return nil
+}
diff --git a/lib/policy/config/threshold_test.go b/lib/policy/config/threshold_test.go
new file mode 100644
index 0000000..cfade61
--- /dev/null
+++ b/lib/policy/config/threshold_test.go
@@ -0,0 +1,92 @@
+package config
+
+import (
+ "errors"
+ "fmt"
+ "testing"
+)
+
+func TestThresholdValid(t *testing.T) {
+ for _, tt := range []struct {
+ name string
+ input *Threshold
+ err error
+ }{
+ {
+ name: "basic allow",
+ input: &Threshold{
+ Name: "basic-allow",
+ Expression: &ExpressionOrList{Expression: "true"},
+ Action: RuleAllow,
+ },
+ err: nil,
+ },
+ {
+ name: "basic challenge",
+ input: &Threshold{
+ Name: "basic-challenge",
+ Expression: &ExpressionOrList{Expression: "true"},
+ Action: RuleChallenge,
+ Challenge: &ChallengeRules{
+ Algorithm: "fast",
+ Difficulty: 1,
+ ReportAs: 1,
+ },
+ },
+ err: nil,
+ },
+ {
+ name: "no name",
+ input: &Threshold{},
+ err: ErrThresholdMustHaveName,
+ },
+ {
+ name: "no expression",
+ input: &Threshold{},
+ err: ErrThresholdMustHaveName,
+ },
+ {
+ name: "invalid expression",
+ input: &Threshold{
+ Expression: &ExpressionOrList{},
+ },
+ err: ErrExpressionEmpty,
+ },
+ {
+ name: "invalid action",
+ input: &Threshold{},
+ err: ErrUnknownAction,
+ },
+ {
+ name: "challenge action but no challenge",
+ input: &Threshold{
+ Action: RuleChallenge,
+ },
+ err: ErrThresholdChallengeMustHaveChallenge,
+ },
+ {
+ name: "challenge invalid",
+ input: &Threshold{
+ Action: RuleChallenge,
+ Challenge: &ChallengeRules{Difficulty: 0, ReportAs: 0},
+ },
+ err: ErrChallengeDifficultyTooLow,
+ },
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ if err := tt.input.Valid(); !errors.Is(err, tt.err) {
+ t.Errorf("threshold is invalid: %v", err)
+ }
+ })
+ }
+}
+
+func TestDefaultThresholdsValid(t *testing.T) {
+ for i, th := range DefaultThresholds {
+ t.Run(fmt.Sprintf("%d %s", i, th.Name), func(t *testing.T) {
+ if err := th.Valid(); err != nil {
+ t.Errorf("threshold invalid: %v", err)
+ }
+ })
+ }
+}
diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go
index 474fd9e..6f46377 100644
--- a/lib/policy/expressions/environment.go
+++ b/lib/policy/expressions/environment.go
@@ -9,20 +9,12 @@ import (
"github.com/google/cel-go/ext"
)
-// NewEnvironment creates a new CEL environment, this is the set of
+// BotEnvironment creates a new CEL environment, this is the set of
// variables and functions that are passed into the CEL scope so that
// Anubis can fail loudly and early when something is invalid instead
// of blowing up at runtime.
-func NewEnvironment() (*cel.Env, error) {
- return cel.NewEnv(
- ext.Strings(
- ext.StringsLocale("en_US"),
- ext.StringsValidateFormatCalls(true),
- ),
-
- // default all timestamps to UTC
- cel.DefaultUTCTimeZone(true),
-
+func BotEnvironment() (*cel.Env, error) {
+ return New(
// Variables exposed to CEL programs:
cel.Variable("remoteAddress", cel.StringType),
cel.Variable("host", cel.StringType),
@@ -31,8 +23,27 @@ func NewEnvironment() (*cel.Env, error) {
cel.Variable("path", cel.StringType),
cel.Variable("query", cel.MapType(cel.StringType, cel.StringType)),
cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)),
+ )
+}
- // Functions exposed to CEL programs:
+// NewThreshold creates a new CEL environment for threshold checking.
+func ThresholdEnvironment() (*cel.Env, error) {
+ return New(
+ cel.Variable("weight", cel.IntType),
+ )
+}
+
+func New(opts ...cel.EnvOption) (*cel.Env, error) {
+ args := []cel.EnvOption{
+ ext.Strings(
+ ext.StringsLocale("en_US"),
+ ext.StringsValidateFormatCalls(true),
+ ),
+
+ // default all timestamps to UTC
+ cel.DefaultUTCTimeZone(true),
+
+ // Functions exposed to all CEL programs:
cel.Function("randInt",
cel.Overload("randInt_int",
[]*cel.Type{cel.IntType},
@@ -47,12 +58,25 @@ func NewEnvironment() (*cel.Env, error) {
}),
),
),
- )
+ }
+
+ args = append(args, opts...)
+ return cel.NewEnv(args...)
}
// Compile takes CEL environment and syntax tree then emits an optimized
// Program for execution.
-func Compile(env *cel.Env, ast *cel.Ast) (cel.Program, error) {
+func Compile(env *cel.Env, src string) (cel.Program, error) {
+ intermediate, iss := env.Compile(src)
+ if iss != nil {
+ return nil, iss.Err()
+ }
+
+ ast, iss := env.Check(intermediate)
+ if iss != nil {
+ return nil, iss.Err()
+ }
+
return env.Program(
ast,
cel.EvalOptions(
diff --git a/lib/policy/expressions/join.go b/lib/policy/expressions/join.go
deleted file mode 100644
index 46f2ed5..0000000
--- a/lib/policy/expressions/join.go
+++ /dev/null
@@ -1,104 +0,0 @@
-package expressions
-
-import (
- "errors"
- "fmt"
- "strings"
-
- "github.com/google/cel-go/cel"
-)
-
-// JoinOperator is a type wrapper for and/or operators.
-//
-// This is a separate type so that validation can be done at the type level.
-type JoinOperator string
-
-// Possible values for JoinOperator
-const (
- JoinAnd JoinOperator = "&&"
- JoinOr JoinOperator = "||"
-)
-
-// Valid ensures that JoinOperator is semantically valid.
-func (jo JoinOperator) Valid() error {
- switch jo {
- case JoinAnd, JoinOr:
- return nil
- default:
- return ErrWrongJoinOperator
- }
-}
-
-var (
- ErrWrongJoinOperator = errors.New("expressions: invalid join operator")
- ErrNoExpressions = errors.New("expressions: cannot join zero expressions")
- ErrCantCompile = errors.New("expressions: can't compile one expression")
-)
-
-// JoinClauses joins a list of compiled clauses into one big if statement.
-//
-// Imagine the following two clauses:
-//
-// ball.color == "red"
-// ball.shape == "round"
-//
-// JoinClauses would emit one "joined" clause such as:
-//
-// ( ball.color == "red" ) && ( ball.shape == "round" )
-func JoinClauses(env *cel.Env, operator JoinOperator, clauses ...*cel.Ast) (*cel.Ast, error) {
- if err := operator.Valid(); err != nil {
- return nil, fmt.Errorf("%w: wanted && or ||, got: %q", err, operator)
- }
-
- switch len(clauses) {
- case 0:
- return nil, ErrNoExpressions
- case 1:
- return clauses[0], nil
- }
-
- var exprs []string
- var errs []error
-
- for _, clause := range clauses {
- clauseStr, err := cel.AstToString(clause)
- if err != nil {
- errs = append(errs, err)
- continue
- }
-
- exprs = append(exprs, "( "+clauseStr+" )")
- }
-
- if len(errs) != 0 {
- return nil, fmt.Errorf("errors while decompiling statements: %w", errors.Join(errs...))
- }
-
- statement := strings.Join(exprs, " "+string(operator)+" ")
- result, iss := env.Compile(statement)
- if iss != nil {
- return nil, iss.Err()
- }
-
- return result, nil
-}
-
-func Join(env *cel.Env, operator JoinOperator, clauses ...string) (*cel.Ast, error) {
- var statements []*cel.Ast
- var errs []error
-
- for _, clause := range clauses {
- stmt, iss := env.Compile(clause)
- if iss != nil && iss.Err() != nil {
- errs = append(errs, fmt.Errorf("%w: %q gave: %w", ErrCantCompile, clause, iss.Err()))
- continue
- }
- statements = append(statements, stmt)
- }
-
- if len(errs) != 0 {
- return nil, fmt.Errorf("errors while joining clauses: %w", errors.Join(errs...))
- }
-
- return JoinClauses(env, operator, statements...)
-}
diff --git a/lib/policy/expressions/join_test.go b/lib/policy/expressions/join_test.go
deleted file mode 100644
index be393c6..0000000
--- a/lib/policy/expressions/join_test.go
+++ /dev/null
@@ -1,90 +0,0 @@
-package expressions
-
-import (
- "errors"
- "testing"
-
- "github.com/google/cel-go/cel"
-)
-
-func TestJoin(t *testing.T) {
- env, err := NewEnvironment()
- if err != nil {
- t.Fatal(err)
- }
-
- for _, tt := range []struct {
- err error
- name string
- op JoinOperator
- resultStr string
- clauses []string
- }{
- {
- name: "no-clauses",
- clauses: []string{},
- op: JoinAnd,
- err: ErrNoExpressions,
- },
- {
- name: "one-clause-identity",
- clauses: []string{`remoteAddress == "8.8.8.8"`},
- op: JoinAnd,
- err: nil,
- resultStr: `remoteAddress == "8.8.8.8"`,
- },
- {
- name: "multi-clause-and",
- clauses: []string{
- `remoteAddress == "8.8.8.8"`,
- `host == "anubis.techaro.lol"`,
- },
- op: JoinAnd,
- err: nil,
- resultStr: `remoteAddress == "8.8.8.8" && host == "anubis.techaro.lol"`,
- },
- {
- name: "multi-clause-or",
- clauses: []string{
- `remoteAddress == "8.8.8.8"`,
- `host == "anubis.techaro.lol"`,
- },
- op: JoinOr,
- err: nil,
- resultStr: `remoteAddress == "8.8.8.8" || host == "anubis.techaro.lol"`,
- },
- {
- name: "git-user-agent",
- clauses: []string{
- `userAgent.startsWith("git/") || userAgent.contains("libgit")`,
- `"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"`,
- },
- op: JoinAnd,
- err: nil,
- resultStr: `(userAgent.startsWith("git/") || userAgent.contains("libgit")) && "Git-Protocol" in headers &&
-headers["Git-Protocol"] == "version=2"`,
- },
- } {
- t.Run(tt.name, func(t *testing.T) {
- result, err := Join(env, tt.op, tt.clauses...)
- if !errors.Is(err, tt.err) {
- t.Errorf("wanted error %v but got: %v", tt.err, err)
- }
-
- if tt.err != nil {
- return
- }
-
- program, err := cel.AstToString(result)
- if err != nil {
- t.Fatalf("can't decompile program: %v", err)
- }
-
- if tt.resultStr != program {
- t.Logf("wanted: %s", tt.resultStr)
- t.Logf("got: %s", program)
- t.Error("program did not compile as expected")
- }
- })
- }
-}
diff --git a/lib/policy/policy.go b/lib/policy/policy.go
index aed30d1..9723ef2 100644
--- a/lib/policy/policy.go
+++ b/lib/policy/policy.go
@@ -6,6 +6,7 @@ import (
"fmt"
"io"
"log/slog"
+ "sync/atomic"
"github.com/TecharoHQ/anubis/internal/thoth"
"github.com/TecharoHQ/anubis/lib/policy/checker"
@@ -21,12 +22,14 @@ var (
}, []string{"rule", "action"})
ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid")
+ warnedAboutThresholds = &atomic.Bool{}
)
type ParsedConfig struct {
orig *config.Config
Bots []Bot
+ Thresholds []*Threshold
DNSBL bool
DefaultDifficulty int
StatusCodes config.StatusCodes
@@ -150,6 +153,26 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic
result.Bots = append(result.Bots, parsedBot)
}
+ for _, t := range c.Thresholds {
+ if t.Name == "legacy-anubis-behaviour" && t.Expression.String() == "true" {
+ if !warnedAboutThresholds.Load() {
+ slog.Warn("configuration file does not contain thresholds, see docs for details on how to upgrade", "fname", fname, "docs_url", "https://anubis.techaro.lol/docs/admin/configuration/thresholds/")
+ warnedAboutThresholds.Store(true)
+ }
+
+ t.Challenge.Difficulty = defaultDifficulty
+ t.Challenge.ReportAs = defaultDifficulty
+ }
+
+ threshold, err := ParsedThresholdFromConfig(t)
+ if err != nil {
+ validationErrs = append(validationErrs, fmt.Errorf("can't compile threshold config for %s: %w", t.Name, err))
+ continue
+ }
+
+ result.Thresholds = append(result.Thresholds, threshold)
+ }
+
if len(validationErrs) > 0 {
return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, errors.Join(validationErrs...))
}
diff --git a/lib/policy/thresholds.go b/lib/policy/thresholds.go
new file mode 100644
index 0000000..1f77f63
--- /dev/null
+++ b/lib/policy/thresholds.go
@@ -0,0 +1,47 @@
+package policy
+
+import (
+ "github.com/TecharoHQ/anubis/lib/policy/config"
+ "github.com/TecharoHQ/anubis/lib/policy/expressions"
+ "github.com/google/cel-go/cel"
+)
+
+type Threshold struct {
+ config.Threshold
+ Program cel.Program
+}
+
+func ParsedThresholdFromConfig(t config.Threshold) (*Threshold, error) {
+ result := &Threshold{
+ Threshold: t,
+ }
+
+ env, err := expressions.ThresholdEnvironment()
+ if err != nil {
+ return nil, err
+ }
+
+ program, err := expressions.Compile(env, t.Expression.String())
+ if err != nil {
+ return nil, err
+ }
+
+ result.Program = program
+
+ return result, nil
+}
+
+type ThresholdRequest struct {
+ Weight int
+}
+
+func (tr *ThresholdRequest) Parent() cel.Activation { return nil }
+
+func (tr *ThresholdRequest) ResolveName(name string) (any, bool) {
+ switch name {
+ case "weight":
+ return tr.Weight, true
+ default:
+ return nil, false
+ }
+}
diff --git a/lib/testdata/aggressive_403.yaml b/lib/testdata/aggressive_403.yaml
index facafd6..ff96651 100644
--- a/lib/testdata/aggressive_403.yaml
+++ b/lib/testdata/aggressive_403.yaml
@@ -1,12 +1,12 @@
bots:
-- name: deny
- user_agent_regex: DENY
- action: DENY
+ - name: deny
+ user_agent_regex: DENY
+ action: DENY
-- name: challenge
- user_agent_regex: CHALLENGE
- action: CHALLENGE
+ - name: challenge
+ user_agent_regex: CHALLENGE
+ action: CHALLENGE
status_codes:
CHALLENGE: 401
- DENY: 403
\ No newline at end of file
+ DENY: 403