diff --git a/cmd/anubis/main.go b/cmd/anubis/main.go index add0fec..938f0c8 100644 --- a/cmd/anubis/main.go +++ b/cmd/anubis/main.go @@ -65,6 +65,7 @@ var ( ogCacheConsiderHost = flag.Bool("og-cache-consider-host", false, "enable or disable the use of the host in the Open Graph tag cache") extractResources = flag.String("extract-resources", "", "if set, extract the static resources to the specified folder") webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals") + valkeyURL = flag.String("valkey-url", "", "Valkey URL for Anubis' state layer") ) func keyFromHex(value string) (ed25519.PrivateKey, error) { diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 13df330..c0a54fe 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -48,7 +48,16 @@ bots: - name: generic-browser user_agent_regex: >- Mozilla|Opera - action: CHALLENGE + action: WEIGH + weight: + adjust: 5 + +- name: high-pass-rate + pass_rate: + rate: 0.8 + action: WEIGH + weight: + adjust: -15 dnsbl: false diff --git a/data/bots/aggressive-brazilian-scrapers.yaml b/data/bots/aggressive-brazilian-scrapers.yaml index 140811a..60b38a7 100644 --- a/data/bots/aggressive-brazilian-scrapers.yaml +++ b/data/bots/aggressive-brazilian-scrapers.yaml @@ -1,5 +1,5 @@ -- name: deny-aggressive-brazilian-scrapers - action: DENY +- name: aggressive-brazilian-scrapers + action: WEIGH expression: any: # Internet Explorer should be out of support @@ -18,11 +18,9 @@ - userAgent.contains("Win 9x") # Amazon does not have an Alexa Toolbar. - userAgent.contains("Alexa Toolbar") -- name: challenge-aggressive-brazilian-scrapers - action: CHALLENGE - expression: - any: # This is not released, even Windows 11 calls itself Windows 10 - userAgent.contains("Windows NT 11.0") # iPods are not in common use - - userAgent.contains("iPod") \ No newline at end of file + - userAgent.contains("iPod") + weight: + adjust: 10 \ No newline at end of file diff --git a/data/bots/cloudflare-workers.yaml b/data/bots/cloudflare-workers.yaml index 3fe051b..b18fc4f 100644 --- a/data/bots/cloudflare-workers.yaml +++ b/data/bots/cloudflare-workers.yaml @@ -1,4 +1,6 @@ - name: cloudflare-workers headers_regex: CF-Worker: .* - action: DENY \ No newline at end of file + action: WEIGH + weight: + adjust: 5 \ No newline at end of file diff --git a/data/bots/headless-browsers.yaml b/data/bots/headless-browsers.yaml index 9805290..5cccb26 100644 --- a/data/bots/headless-browsers.yaml +++ b/data/bots/headless-browsers.yaml @@ -1,9 +1,14 @@ - name: lightpanda user_agent_regex: ^LightPanda/.*$ - action: DENY + action: WEIGH + weight: + adjust: 5 - name: headless-chrome user_agent_regex: HeadlessChrome - action: DENY + action: WEIGH + weight: + adjust: 5 - name: headless-chromium user_agent_regex: HeadlessChromium - action: DENY \ No newline at end of file + weight: + adjust: 5 \ No newline at end of file diff --git a/data/common/rfc-violations.yaml b/data/common/rfc-violations.yaml index 6f93360..f6a68c6 100644 --- a/data/common/rfc-violations.yaml +++ b/data/common/rfc-violations.yaml @@ -1,3 +1,5 @@ - name: no-user-agent-string - action: DENY - expression: userAgent == "" \ No newline at end of file + action: WEIGH + expression: userAgent == "" + weight: + adjust: 10 \ No newline at end of file diff --git a/go.mod b/go.mod index af4eb7e..9bf0f40 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/google/cel-go v0.25.0 github.com/playwright-community/playwright-go v0.5200.0 github.com/prometheus/client_golang v1.22.0 + github.com/redis/go-redis/v9 v9.8.0 github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a github.com/yl2chen/cidranger v1.0.2 golang.org/x/net v0.40.0 @@ -41,6 +42,7 @@ require ( github.com/cloudflare/circl v1.6.0 // indirect github.com/cyphar/filepath-securejoin v0.4.1 // indirect github.com/deckarep/golang-set/v2 v2.7.0 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dlclark/regexp2 v1.11.4 // indirect github.com/dop251/goja v0.0.0-20250309171923-bcd7cc6bf64c // indirect github.com/emirpasic/gods v1.18.1 // indirect diff --git a/go.sum b/go.sum index 56e204b..cc295fa 100644 --- a/go.sum +++ b/go.sum @@ -46,6 +46,10 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb h1:m935MPodAbYS46DG4pJSv7WO+VECIWUQ7OJYSoTrMh4= github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb/go.mod h1:PkYb9DJNAwrSvRx5DYA+gUcOIgTGVMNkfSCbZM8cWpI= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= github.com/caarlos0/testfs v0.4.4 h1:3PHvzHi5Lt+g332CiShwS8ogTgS3HjrmzZxCm6JCDr8= github.com/caarlos0/testfs v0.4.4/go.mod h1:bRN55zgG4XCUVVHZCeU+/Tz1Q6AxEJOEJTliBy+1DMk= github.com/cavaliergopher/cpio v1.0.1 h1:KQFSeKmZhv0cr+kawA3a0xTQCU4QxXF1vhU7P7av2KM= @@ -73,6 +77,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/deckarep/golang-set/v2 v2.7.0 h1:gIloKvD7yH2oip4VLhsv3JyLLFnC0Y2mlusgcvJYW5k= github.com/deckarep/golang-set/v2 v2.7.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo= github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dop251/goja v0.0.0-20250309171923-bcd7cc6bf64c h1:mxWGS0YyquJ/ikZOjSrRjjFIbUqIP9ojyYQ+QZTU3Rg= @@ -224,6 +230,8 @@ github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/redis/go-redis/v9 v9.8.0 h1:q3nRvjrlge/6UD7eTu/DSg2uYiU2mCL0G/uzBWqhicI= +github.com/redis/go-redis/v9 v9.8.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= diff --git a/internal/store/valkey/valkey.go b/internal/store/valkey/valkey.go new file mode 100644 index 0000000..ac95118 --- /dev/null +++ b/internal/store/valkey/valkey.go @@ -0,0 +1,86 @@ +package valkey + +import ( + "context" + "errors" + "fmt" + "strconv" + "strings" + + valkey "github.com/redis/go-redis/v9" +) + +type Store struct { + rdb *valkey.Client +} + +func New(rdb *valkey.Client) *Store { + return &Store{rdb: rdb} +} + +func (s *Store) Increment(ctx context.Context, segments []string) error { + key := fmt.Sprintf("anubis:%s", strings.Join(segments, ":")) + if err := s.rdb.Incr(ctx, key).Err(); err != nil { + return err + } + + return nil +} + +func (s *Store) GetInt(ctx context.Context, segments []string) (int, error) { + key := fmt.Sprintf("anubis:%s", strings.Join(segments, ":")) + numStr, err := s.rdb.Get(ctx, key).Result() + if err != nil { + return 0, err + } + + num, err := strconv.Atoi(numStr) + if err != nil { + return 0, err + } + + return num, nil +} + +func (s *Store) MultiGetInt(ctx context.Context, segments [][]string) ([]int, error) { + var keys []string + for _, segment := range segments { + key := fmt.Sprintf("anubis:%s", strings.Join(segment, ":")) + keys = append(keys, key) + } + + values, err := s.rdb.MGet(ctx, keys...).Result() + if err != nil { + return nil, err + } + + var errs []error + + result := make([]int, len(values)) + for i, val := range values { + if val == nil { + result[i] = 0 + errs = append(errs, fmt.Errorf("can't get key %s: value is null", keys[i])) + continue + } + + switch v := val.(type) { + case string: + num, err := strconv.Atoi(v) + if err != nil { + errs = append(errs, fmt.Errorf("can't parse key %s: %w", keys[i], err)) + continue + } + + result[i] = num + default: + errs = append(errs, fmt.Errorf("can't parse key %s: wanted type string but got type %T", keys[i], val)) + } + } + + if len(errs) != 0 { + return nil, fmt.Errorf("can't read from valkey: %w", errors.Join(errs...)) + } + + return result, nil +} diff --git a/lib/anubis.go b/lib/anubis.go index 4ca584e..02bcabd 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -26,6 +26,7 @@ import ( "github.com/TecharoHQ/anubis/internal" "github.com/TecharoHQ/anubis/internal/dnsbl" "github.com/TecharoHQ/anubis/internal/ogtags" + "github.com/TecharoHQ/anubis/internal/store/valkey" "github.com/TecharoHQ/anubis/lib/policy" "github.com/TecharoHQ/anubis/lib/policy/config" ) @@ -68,6 +69,7 @@ type Server struct { pub ed25519.PublicKey opts Options cookieName string + store *valkey.Store } func (s *Server) challengeFor(r *http.Request, difficulty int) string { @@ -233,6 +235,10 @@ func (s *Server) MakeChallenge(w http.ResponseWriter, r *http.Request) { lg = lg.With("check_result", cr) challenge := s.challengeFor(r, rule.Challenge.Difficulty) + if s.store != nil { + s.store.Increment(r.Context(), []string{"pass_rate", "User-Agent", r.UserAgent(), "challenges_issued"}) + } + err = encoder.Encode(struct { Rules *config.ChallengeRules `json:"rules"` Challenge string `json:"challenge"` @@ -325,6 +331,9 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) { s.ClearCookie(w) lg.Debug("hash does not match", "got", response, "want", calculated) s.respondWithStatus(w, r, "invalid response", http.StatusForbidden) + if s.store != nil { + s.store.Increment(r.Context(), []string{"pass_rate", "User-Agent", r.UserAgent(), "fail"}) + } failedValidations.Inc() return } @@ -334,6 +343,9 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) { s.ClearCookie(w) lg.Debug("difficulty check failed", "response", response, "difficulty", rule.Challenge.Difficulty) s.respondWithStatus(w, r, "invalid response", http.StatusForbidden) + if s.store != nil { + s.store.Increment(r.Context(), []string{"pass_rate", "User-Agent", r.UserAgent(), "fail"}) + } failedValidations.Inc() return } @@ -370,6 +382,10 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) { Path: cookiePath, }) + if s.store != nil { + s.store.Increment(r.Context(), []string{"pass_rate", "User-Agent", r.UserAgent(), "pass"}) + } + challengesValidated.Inc() lg.Debug("challenge passed, redirecting to app") http.Redirect(w, r, redir, http.StatusFound) @@ -399,6 +415,8 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) return decaymap.Zilch[policy.CheckResult](), nil, fmt.Errorf("[misconfiguration] %q is not an IP address", host) } + weight := 0 + for _, b := range s.policy.Bots { match, err := b.Rules.Check(r) if err != nil { @@ -406,10 +424,27 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) } if match { - return cr("bot/"+b.Name, b.Action), &b, nil + switch b.Action { + case config.RuleDeny, config.RuleAllow, config.RuleBenchmark: + return cr("bot/"+b.Name, b.Action), &b, nil + case config.RuleChallenge: + weight += 5 + case config.RuleWeigh: + weight += b.Weight.Adjust + } } } + if weight < 0 { + return cr("weight/okay", config.RuleAllow), &policy.Bot{ + Challenge: &config.ChallengeRules{ + Difficulty: s.policy.DefaultDifficulty, + ReportAs: s.policy.DefaultDifficulty, + Algorithm: config.AlgorithmFast, + }, + }, nil + } + return cr("default/allow", config.RuleAllow), &policy.Bot{ Challenge: &config.ChallengeRules{ Difficulty: s.policy.DefaultDifficulty, diff --git a/lib/http.go b/lib/http.go index 27c1dad..7a6cb02 100644 --- a/lib/http.go +++ b/lib/http.go @@ -65,6 +65,10 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic return } + if s.store != nil { + s.store.Increment(r.Context(), []string{"pass_rate", "User-Agent", r.UserAgent(), "challenges_issued"}) + } + handler := internal.NoStoreCache(templ.Handler( component, templ.WithStatus(s.opts.Policy.StatusCodes.Challenge), diff --git a/lib/policy/bot.go b/lib/policy/bot.go index 3e7a63a..6107524 100644 --- a/lib/policy/bot.go +++ b/lib/policy/bot.go @@ -12,6 +12,7 @@ type Bot struct { Challenge *config.ChallengeRules Name string Action config.Rule + Weight *config.Weight } func (b Bot) Hash() string { diff --git a/lib/policy/checkresult.go b/lib/policy/checkresult.go index c84f326..31737dd 100644 --- a/lib/policy/checkresult.go +++ b/lib/policy/checkresult.go @@ -7,12 +7,15 @@ import ( ) type CheckResult struct { - Name string - Rule config.Rule + Name string + Rule config.Rule + Weight int } func (cr CheckResult) LogValue() slog.Value { return slog.GroupValue( slog.String("name", cr.Name), - slog.String("rule", string(cr.Rule))) + slog.String("rule", string(cr.Rule)), + slog.Int("weight", cr.Weight), + ) } diff --git a/lib/policy/passratechecker.go b/lib/policy/passratechecker.go new file mode 100644 index 0000000..f9c97b3 --- /dev/null +++ b/lib/policy/passratechecker.go @@ -0,0 +1,47 @@ +package policy + +import ( + "fmt" + "net/http" + + "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/internal/store/valkey" +) + +type PassRateChecker struct { + store *valkey.Store + header string + rate float64 +} + +func NewPassRateChecker(store *valkey.Store, rate float64) Checker { + return &PassRateChecker{ + store: store, + rate: rate, + header: "User-Agent", + } +} + +func (prc *PassRateChecker) Hash() string { + return internal.SHA256sum(fmt.Sprintf("pass rate checker::%s", prc.header)) +} + +func (prc *PassRateChecker) Check(r *http.Request) (bool, error) { + data, err := prc.store.MultiGetInt(r.Context(), [][]string{ + {"pass_rate", prc.header, r.Header.Get(prc.header), "pass"}, + {"pass_rate", prc.header, r.Header.Get(prc.header), "challenges_issued"}, + {"pass_rate", prc.header, r.Header.Get(prc.header), "fail"}, + }) + if err != nil { + return false, err + } + + passCount, challengeCount, failCount := data[0], data[1], data[2] + passRate := float64(passCount-failCount) / float64(challengeCount) + + if passRate >= prc.rate { + return true, nil + } + + return false, nil +} diff --git a/lib/policy/policy.go b/lib/policy/policy.go index 1dfeafb..4675849 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -116,6 +116,10 @@ func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedCon } } + if b.Weight != nil { + parsedBot.Weight = b.Weight + } + parsedBot.Rules = cl result.Bots = append(result.Bots, parsedBot)