diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 13df330..1736147 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -11,44 +11,44 @@ ## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from. bots: -# Pathological bots to deny -- # This correlates to data/bots/deny-pathological.yaml in the source tree - # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml - import: (data)/bots/_deny-pathological.yaml -- import: (data)/bots/aggressive-brazilian-scrapers.yaml + # Pathological bots to deny + - # This correlates to data/bots/deny-pathological.yaml in the source tree + # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml + import: (data)/bots/_deny-pathological.yaml + - import: (data)/bots/aggressive-brazilian-scrapers.yaml -# Enforce https://github.com/ai-robots-txt/ai.robots.txt -- import: (data)/bots/ai-robots-txt.yaml + # Enforce https://github.com/ai-robots-txt/ai.robots.txt + - import: (data)/bots/ai-robots-txt.yaml -# Search engine crawlers to allow, defaults to: -# - Google (so they don't try to bypass Anubis) -# - Bing -# - DuckDuckGo -# - Qwant -# - The Internet Archive -# - Kagi -# - Marginalia -# - Mojeek -- import: (data)/crawlers/_allow-good.yaml + # Search engine crawlers to allow, defaults to: + # - Google (so they don't try to bypass Anubis) + # - Bing + # - DuckDuckGo + # - Qwant + # - The Internet Archive + # - Kagi + # - Marginalia + # - Mojeek + - import: (data)/crawlers/_allow-good.yaml -# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt) -- import: (data)/common/keep-internet-working.yaml + # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt) + - import: (data)/common/keep-internet-working.yaml -# # Punish any bot with "bot" in the user-agent string -# # This is known to have a high false-positive rate, use at your own risk -# - name: generic-bot-catchall -# user_agent_regex: (?i:bot|crawler) -# action: CHALLENGE -# challenge: -# difficulty: 16 # impossible -# report_as: 4 # lie to the operator -# algorithm: slow # intentionally waste CPU cycles and time + # # Punish any bot with "bot" in the user-agent string + # # This is known to have a high false-positive rate, use at your own risk + # - name: generic-bot-catchall + # user_agent_regex: (?i:bot|crawler) + # action: CHALLENGE + # challenge: + # difficulty: 16 # impossible + # report_as: 4 # lie to the operator + # algorithm: slow # intentionally waste CPU cycles and time -# Generic catchall rule -- name: generic-browser - user_agent_regex: >- - Mozilla|Opera - action: CHALLENGE + # Generic catchall rule + - name: generic-browser + user_agent_regex: >- + Mozilla|Opera + action: CHALLENGE dnsbl: false @@ -58,4 +58,4 @@ dnsbl: false # will stop sending requests once they get it. status_codes: CHALLENGE: 200 - DENY: 200 \ No newline at end of file + DENY: 200 diff --git a/docs/docs/admin/configuration/expressions.mdx b/docs/docs/admin/configuration/expressions.mdx index 4e23ab8..0786c22 100644 --- a/docs/docs/admin/configuration/expressions.mdx +++ b/docs/docs/admin/configuration/expressions.mdx @@ -143,7 +143,29 @@ Anubis would return a challenge because all of those conditions are true. ## Functions exposed to Anubis expressions -There are currently no functions from the Anubis runtime exposed to expressions. This will change in the future. +Anubis expressions can be augmented with the following functions: + +### `randInt` + +```ts +function randInt(n: int): int; +``` + +randInt returns a randomly selected integer value in the range of `[0,n)`. This is a thin wrapper around [Go's math/rand#Intn](https://pkg.go.dev/math/rand#Intn). Be careful with this as it may cause inconsistent behavior for genuine users. + +This is best applied when doing explicit block rules, eg: + +```yaml +# Denies LightPanda about 75% of the time on average +- name: deny-lightpanda-sometimes + action: DENY + expression: + all: + - userAgent.matches("LightPanda") + - randInt(16) >= 4 +``` + +It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand. ## Life advice diff --git a/lib/policy/config/testdata/good/entropy.yaml b/lib/policy/config/testdata/good/entropy.yaml new file mode 100644 index 0000000..80110c1 --- /dev/null +++ b/lib/policy/config/testdata/good/entropy.yaml @@ -0,0 +1,8 @@ +bots: + - name: total-randomness + action: ALLOW + expression: + all: + - '"Accept" in headers' + - headers["Accept"].contains("text/html") + - randInt(1) == 0 diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go index f0ea4fd..474fd9e 100644 --- a/lib/policy/expressions/environment.go +++ b/lib/policy/expressions/environment.go @@ -1,7 +1,11 @@ package expressions import ( + "math/rand/v2" + "github.com/google/cel-go/cel" + "github.com/google/cel-go/common/types" + "github.com/google/cel-go/common/types/ref" "github.com/google/cel-go/ext" ) @@ -29,6 +33,20 @@ func NewEnvironment() (*cel.Env, error) { cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)), // Functions exposed to CEL programs: + cel.Function("randInt", + cel.Overload("randInt_int", + []*cel.Type{cel.IntType}, + cel.IntType, + cel.UnaryBinding(func(val ref.Val) ref.Val { + n, ok := val.(types.Int) + if !ok { + return types.ValOrErr(val, "value is not an integer, but is %T", val) + } + + return types.Int(rand.IntN(int(n))) + }), + ), + ), ) }