mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 17:59:24 -04:00
feat(expressions): add randInt function to allow making rules nondeterministic (#578)
This seems counter-intuitive at first glance, but let me cook. One of the problems with Anubis is that the rule matching is super deterministic. This means that attackers can figure out what patterns they are hitting and change things to bypass them. The randInt function lets you have rulesets behave nondeterministically. This is a very easy way to hang yourself, but can be great to psychologically mess with scraper operators. Consider this rule: ```yaml - name: deny-lightpanda-sometimes action: DENY expression: all: - userAgent.matches("LightPanda") - randInt(16) >= 4 ``` It would match about 75% of the time. Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
669671bd46
commit
22c47f40d1
@ -11,44 +11,44 @@
|
||||
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
||||
|
||||
bots:
|
||||
# Pathological bots to deny
|
||||
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
||||
import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
# Pathological bots to deny
|
||||
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
||||
import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
|
||||
# Enforce https://github.com/ai-robots-txt/ai.robots.txt
|
||||
- import: (data)/bots/ai-robots-txt.yaml
|
||||
# Enforce https://github.com/ai-robots-txt/ai.robots.txt
|
||||
- import: (data)/bots/ai-robots-txt.yaml
|
||||
|
||||
# Search engine crawlers to allow, defaults to:
|
||||
# - Google (so they don't try to bypass Anubis)
|
||||
# - Bing
|
||||
# - DuckDuckGo
|
||||
# - Qwant
|
||||
# - The Internet Archive
|
||||
# - Kagi
|
||||
# - Marginalia
|
||||
# - Mojeek
|
||||
- import: (data)/crawlers/_allow-good.yaml
|
||||
# Search engine crawlers to allow, defaults to:
|
||||
# - Google (so they don't try to bypass Anubis)
|
||||
# - Bing
|
||||
# - DuckDuckGo
|
||||
# - Qwant
|
||||
# - The Internet Archive
|
||||
# - Kagi
|
||||
# - Marginalia
|
||||
# - Mojeek
|
||||
- import: (data)/crawlers/_allow-good.yaml
|
||||
|
||||
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
||||
- import: (data)/common/keep-internet-working.yaml
|
||||
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
||||
- import: (data)/common/keep-internet-working.yaml
|
||||
|
||||
# # Punish any bot with "bot" in the user-agent string
|
||||
# # This is known to have a high false-positive rate, use at your own risk
|
||||
# - name: generic-bot-catchall
|
||||
# user_agent_regex: (?i:bot|crawler)
|
||||
# action: CHALLENGE
|
||||
# challenge:
|
||||
# difficulty: 16 # impossible
|
||||
# report_as: 4 # lie to the operator
|
||||
# algorithm: slow # intentionally waste CPU cycles and time
|
||||
# # Punish any bot with "bot" in the user-agent string
|
||||
# # This is known to have a high false-positive rate, use at your own risk
|
||||
# - name: generic-bot-catchall
|
||||
# user_agent_regex: (?i:bot|crawler)
|
||||
# action: CHALLENGE
|
||||
# challenge:
|
||||
# difficulty: 16 # impossible
|
||||
# report_as: 4 # lie to the operator
|
||||
# algorithm: slow # intentionally waste CPU cycles and time
|
||||
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: CHALLENGE
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: CHALLENGE
|
||||
|
||||
dnsbl: false
|
||||
|
||||
@ -58,4 +58,4 @@ dnsbl: false
|
||||
# will stop sending requests once they get it.
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
||||
DENY: 200
|
||||
|
@ -143,7 +143,29 @@ Anubis would return a challenge because all of those conditions are true.
|
||||
|
||||
## Functions exposed to Anubis expressions
|
||||
|
||||
There are currently no functions from the Anubis runtime exposed to expressions. This will change in the future.
|
||||
Anubis expressions can be augmented with the following functions:
|
||||
|
||||
### `randInt`
|
||||
|
||||
```ts
|
||||
function randInt(n: int): int;
|
||||
```
|
||||
|
||||
randInt returns a randomly selected integer value in the range of `[0,n)`. This is a thin wrapper around [Go's math/rand#Intn](https://pkg.go.dev/math/rand#Intn). Be careful with this as it may cause inconsistent behavior for genuine users.
|
||||
|
||||
This is best applied when doing explicit block rules, eg:
|
||||
|
||||
```yaml
|
||||
# Denies LightPanda about 75% of the time on average
|
||||
- name: deny-lightpanda-sometimes
|
||||
action: DENY
|
||||
expression:
|
||||
all:
|
||||
- userAgent.matches("LightPanda")
|
||||
- randInt(16) >= 4
|
||||
```
|
||||
|
||||
It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand.
|
||||
|
||||
## Life advice
|
||||
|
||||
|
8
lib/policy/config/testdata/good/entropy.yaml
vendored
Normal file
8
lib/policy/config/testdata/good/entropy.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
bots:
|
||||
- name: total-randomness
|
||||
action: ALLOW
|
||||
expression:
|
||||
all:
|
||||
- '"Accept" in headers'
|
||||
- headers["Accept"].contains("text/html")
|
||||
- randInt(1) == 0
|
@ -1,7 +1,11 @@
|
||||
package expressions
|
||||
|
||||
import (
|
||||
"math/rand/v2"
|
||||
|
||||
"github.com/google/cel-go/cel"
|
||||
"github.com/google/cel-go/common/types"
|
||||
"github.com/google/cel-go/common/types/ref"
|
||||
"github.com/google/cel-go/ext"
|
||||
)
|
||||
|
||||
@ -29,6 +33,20 @@ func NewEnvironment() (*cel.Env, error) {
|
||||
cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)),
|
||||
|
||||
// Functions exposed to CEL programs:
|
||||
cel.Function("randInt",
|
||||
cel.Overload("randInt_int",
|
||||
[]*cel.Type{cel.IntType},
|
||||
cel.IntType,
|
||||
cel.UnaryBinding(func(val ref.Val) ref.Val {
|
||||
n, ok := val.(types.Int)
|
||||
if !ok {
|
||||
return types.ValOrErr(val, "value is not an integer, but is %T", val)
|
||||
}
|
||||
|
||||
return types.Int(rand.IntN(int(n)))
|
||||
}),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user