Merge branch 'main' into Xe/anubis-custom-logger

Signed-off-by: Xe Iaso <xe.iaso@techaro.lol>
This commit is contained in:
Xe Iaso 2025-07-28 10:59:16 -04:00 committed by GitHub
commit ebeead900d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 321 additions and 115 deletions

View File

@ -21,7 +21,7 @@ jobs:
persist-credentials: false
- name: Install the latest version of uv
uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
- name: Run zizmor 🌈
run: uvx zizmor --format sarif . > results.sarif
@ -29,7 +29,7 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload SARIF file
uses: github/codeql-action/upload-sarif@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/upload-sarif@4e828ff8d448a8a6e532957b1811f387a63867e8 # v3.29.4
with:
sarif_file: results.sarif
category: zizmor

View File

@ -1,3 +1,4 @@
- import: (data)/bots/cloudflare-workers.yaml
- import: (data)/bots/headless-browsers.yaml
- import: (data)/bots/us-ai-scraper.yaml
- import: (data)/bots/custom-async-http-client.yaml

View File

@ -0,0 +1,5 @@
- name: "custom-async-http-client"
user_agent_regex: "Custom-AsyncHttpClient"
action: WEIGH
weight:
adjust: 10

View File

@ -15,6 +15,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Downstream consumers can change the default [log/slog#Logger](https://pkg.go.dev/log/slog#Logger) instance that Anubis uses by setting `opts.Logger` to your slog instance of choice ([#864](https://github.com/TecharoHQ/anubis/issues/864)).
- The [Thoth client](https://anubis.techaro.lol/docs/admin/thoth) is now public in the repo instead of being an internal package.
- [Custom-AsyncHttpClient](https://github.com/AsyncHttpClient/async-http-client)'s default User-Agent has an increased weight by default ([#852](https://github.com/TecharoHQ/anubis/issues/852)).
- The [`segments`](./admin/configuration/expressions.mdx#segments) function was added for splitting a path into its slash-separated segments.
- When issuing a challenge, Anubis stores information about that challenge into the store. That stored information is later used to validate challenge responses. This works around nondeterminism in bot rules. ([#917](https://github.com/TecharoHQ/anubis/issues/917))
## v1.21.3: Minfilia Warde - Echo 3

View File

@ -232,6 +232,39 @@ This is best applied when doing explicit block rules, eg:
It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand.
### `segments`
Available in `bot` expressions.
```ts
function segments(path: string): string[];
```
`segments` returns the number of slash-separated path segments, ignoring the leading slash. Here is what it will return with some common paths:
| Input | Output |
| :----------------------- | :--------------------- |
| `segments("/")` | `[""]` |
| `segments("/foo/bar")` | `["foo", "bar"] ` |
| `segments("/users/xe/")` | `["users", "xe", ""] ` |
:::note
If the path ends with a `/`, then the last element of the result will be an empty string. This is because `/users/xe` and `/users/xe/` are semantically different paths.
:::
This is useful if you want to write rules that allow requests that have no query parameters only if they have less than two path segments:
```yaml
- name: two-path-segments-no-query
action: ALLOW
expression:
all:
- size(query) == 0
- size(segments(path)) < 2
```
## Life advice
Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this.

View File

@ -28,6 +28,9 @@ func InitSlog(level string) {
func GetRequestLogger(base *slog.Logger, r *http.Request) *slog.Logger {
return base.With(
"host", r.Host,
"method", r.Method,
"path", r.URL.Path,
"user_agent", r.UserAgent(),
"accept_language", r.Header.Get("Accept-Language"),
"priority", r.Header.Get("Priority"),

View File

@ -91,41 +91,39 @@ func (s *Server) getTokenKeyfunc() jwt.Keyfunc {
}
}
func (s *Server) challengeFor(r *http.Request) (*challenge.Challenge, error) {
func (s *Server) getChallenge(r *http.Request) (*challenge.Challenge, error) {
ckies := r.CookiesNamed(anubis.TestCookieName)
if len(ckies) == 0 {
return s.issueChallenge(r.Context(), r)
return nil, store.ErrNotFound
}
j := store.JSON[challenge.Challenge]{Underlying: s.store}
ckie := ckies[0]
chall, err := j.Get(r.Context(), "challenge:"+ckie.Value)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
return s.issueChallenge(r.Context(), r)
}
return nil, err
}
return &chall, nil
return &chall, err
}
func (s *Server) issueChallenge(ctx context.Context, r *http.Request) (*challenge.Challenge, error) {
func (s *Server) issueChallenge(ctx context.Context, r *http.Request, lg *slog.Logger, cr policy.CheckResult, rule *policy.Bot) (*challenge.Challenge, error) {
if cr.Rule != config.RuleChallenge {
slog.Error("this should be impossible, asked to issue a challenge but the rule is not a challenge rule", "cr", cr, "rule", rule)
//return nil, errors.New("[unexpected] this codepath should be impossible, asked to issue a challenge for a non-challenge rule")
}
id, err := uuid.NewV7()
if err != nil {
return nil, err
}
var randomData = make([]byte, 256)
var randomData = make([]byte, 64)
if _, err := rand.Read(randomData); err != nil {
return nil, err
}
chall := challenge.Challenge{
ID: id.String(),
Method: rule.Challenge.Algorithm,
RandomData: fmt.Sprintf("%x", randomData),
IssuedAt: time.Now(),
Metadata: map[string]string{
@ -139,6 +137,8 @@ func (s *Server) issueChallenge(ctx context.Context, r *http.Request) (*challeng
return nil, err
}
lg.Info("new challenge issued", "challenge", id.String())
return &chall, err
}
@ -186,21 +186,21 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS
if err != nil {
lg.Debug("cookie not found", "path", r.URL.Path)
s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host})
s.RenderIndex(w, r, rule, httpStatusOnly)
s.RenderIndex(w, r, cr, rule, httpStatusOnly)
return
}
if err := ckie.Valid(); err != nil {
lg.Debug("cookie is invalid", "err", err)
s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host})
s.RenderIndex(w, r, rule, httpStatusOnly)
s.RenderIndex(w, r, cr, rule, httpStatusOnly)
return
}
if time.Now().After(ckie.Expires) && !ckie.Expires.IsZero() {
lg.Debug("cookie expired", "path", r.URL.Path)
s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host})
s.RenderIndex(w, r, rule, httpStatusOnly)
s.RenderIndex(w, r, cr, rule, httpStatusOnly)
return
}
@ -209,7 +209,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS
if err != nil || !token.Valid {
lg.Debug("invalid token", "path", r.URL.Path, "err", err)
s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host})
s.RenderIndex(w, r, rule, httpStatusOnly)
s.RenderIndex(w, r, cr, rule, httpStatusOnly)
return
}
@ -217,7 +217,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS
if !ok {
lg.Debug("invalid token claims type", "path", r.URL.Path)
s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host})
s.RenderIndex(w, r, rule, httpStatusOnly)
s.RenderIndex(w, r, cr, rule, httpStatusOnly)
return
}
@ -225,14 +225,14 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS
if !ok {
lg.Debug("policyRule claim is not a string")
s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host})
s.RenderIndex(w, r, rule, httpStatusOnly)
s.RenderIndex(w, r, cr, rule, httpStatusOnly)
return
}
if policyRule != rule.Hash() {
lg.Debug("user originally passed with a different rule, issuing new challenge", "old", policyRule, "new", rule.Name)
s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host})
s.RenderIndex(w, r, rule, httpStatusOnly)
s.RenderIndex(w, r, cr, rule, httpStatusOnly)
return
}
@ -347,7 +347,7 @@ func (s *Server) MakeChallenge(w http.ResponseWriter, r *http.Request) {
}
lg = lg.With("check_result", cr)
chall, err := s.challengeFor(r)
chall, err := s.issueChallenge(r.Context(), r, lg, cr, rule)
if err != nil {
lg.Error("failed to fetch or issue challenge", "err", err)
w.WriteHeader(http.StatusInternalServerError)
@ -437,19 +437,21 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
}
lg = lg.With("check_result", cr)
impl, ok := challenge.Get(rule.Challenge.Algorithm)
chall, err := s.getChallenge(r)
if err != nil {
lg.Error("getChallenge failed", "err", err)
s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm))
return
}
impl, ok := challenge.Get(chall.Method)
if !ok {
lg.Error("check failed", "err", err)
s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm))
return
}
chall, err := s.challengeFor(r)
if err != nil {
lg.Error("check failed", "err", err)
s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm))
return
}
lg = lg.With("challenge", chall.ID)
in := &challenge.ValidateInput{
Challenge: chall,
@ -467,9 +469,13 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
case errors.As(err, &cerr):
switch {
case errors.Is(err, challenge.ErrFailed):
lg.Error("challenge failed", "err", err)
s.respondWithStatus(w, r, cerr.PublicReason, cerr.StatusCode)
return
case errors.Is(err, challenge.ErrInvalidFormat), errors.Is(err, challenge.ErrMissingField):
lg.Error("invalid challenge format", "err", err)
s.respondWithError(w, r, cerr.PublicReason)
return
}
}
}

View File

@ -5,6 +5,7 @@ import "time"
// Challenge is the metadata about a single challenge issuance.
type Challenge struct {
ID string `json:"id"` // UUID identifying the challenge
Method string `json:"method"` // Challenge method
RandomData string `json:"randomData"` // The random data the client processes
IssuedAt time.Time `json:"issuedAt"` // When the challenge was issued
Metadata map[string]string `json:"metadata"` // Challenge metadata such as IP address and user agent

View File

@ -111,7 +111,7 @@ func randomChance(n int) bool {
return rand.Intn(n) == 0
}
func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *policy.Bot, returnHTTPStatusOnly bool) {
func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, cr policy.CheckResult, rule *policy.Bot, returnHTTPStatusOnly bool) {
localizer := localization.GetLocalizer(r)
if returnHTTPStatusOnly {
@ -129,14 +129,16 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
}
challengesIssued.WithLabelValues("embedded").Add(1)
chall, err := s.challengeFor(r)
chall, err := s.issueChallenge(r.Context(), r, lg, cr, rule)
if err != nil {
lg.Error("can't get challenge", "err", "err")
lg.Error("can't get challenge", "err", err)
s.ClearCookie(w, CookieOpts{Name: anubis.TestCookieName, Host: r.Host})
s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm))
return
}
lg = lg.With("challenge", chall.ID)
var ogTags map[string]string = nil
if s.opts.OpenGraph.Enabled {
var err error
@ -154,7 +156,7 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
Expiry: 30 * time.Minute,
})
impl, ok := challenge.Get(rule.Challenge.Algorithm)
impl, ok := challenge.Get(chall.Method)
if !ok {
lg.Error("check failed", "err", "can't get algorithm", "algorithm", rule.Challenge.Algorithm)
s.ClearCookie(w, CookieOpts{Name: anubis.TestCookieName, Host: r.Host})

View File

@ -2,6 +2,7 @@ package expressions
import (
"math/rand/v2"
"strings"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
@ -54,6 +55,28 @@ func BotEnvironment() (*cel.Env, error) {
}),
),
),
cel.Function("segments",
cel.Overload("segments_string_list_string",
[]*cel.Type{cel.StringType},
cel.ListType(cel.StringType),
cel.UnaryBinding(func(path ref.Val) ref.Val {
pathStrType, ok := path.(types.String)
if !ok {
return types.ValOrErr(path, "path is not a string, but is %T", path)
}
pathStr := string(pathStrType)
if !strings.HasPrefix(pathStr, "/") {
return types.ValOrErr(path, "path does not start with /")
}
pathList := strings.Split(string(pathStr), "/")[1:]
return types.NewStringList(types.DefaultTypeAdapter, pathList)
}),
),
),
)
}

View File

@ -12,99 +12,228 @@ func TestBotEnvironment(t *testing.T) {
t.Fatalf("failed to create bot environment: %v", err)
}
tests := []struct {
name string
expression string
headers map[string]string
expected types.Bool
description string
}{
{
name: "missing-header",
expression: `missingHeader(headers, "Missing-Header")`,
headers: map[string]string{
"User-Agent": "test-agent",
"Content-Type": "application/json",
t.Run("missingHeader", func(t *testing.T) {
tests := []struct {
name string
expression string
headers map[string]string
expected types.Bool
description string
}{
{
name: "missing-header",
expression: `missingHeader(headers, "Missing-Header")`,
headers: map[string]string{
"User-Agent": "test-agent",
"Content-Type": "application/json",
},
expected: types.Bool(true),
description: "should return true when header is missing",
},
expected: types.Bool(true),
description: "should return true when header is missing",
},
{
name: "existing-header",
expression: `missingHeader(headers, "User-Agent")`,
headers: map[string]string{
"User-Agent": "test-agent",
"Content-Type": "application/json",
{
name: "existing-header",
expression: `missingHeader(headers, "User-Agent")`,
headers: map[string]string{
"User-Agent": "test-agent",
"Content-Type": "application/json",
},
expected: types.Bool(false),
description: "should return false when header exists",
},
expected: types.Bool(false),
description: "should return false when header exists",
},
{
name: "case-sensitive",
expression: `missingHeader(headers, "user-agent")`,
headers: map[string]string{
"User-Agent": "test-agent",
{
name: "case-sensitive",
expression: `missingHeader(headers, "user-agent")`,
headers: map[string]string{
"User-Agent": "test-agent",
},
expected: types.Bool(true),
description: "should be case-sensitive (user-agent != User-Agent)",
},
expected: types.Bool(true),
description: "should be case-sensitive (user-agent != User-Agent)",
},
{
name: "empty-headers",
expression: `missingHeader(headers, "Any-Header")`,
headers: map[string]string{},
expected: types.Bool(true),
description: "should return true for any header when map is empty",
},
{
name: "real-world-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "curl/7.68.0",
"Accept": "*/*",
"Host": "example.com",
{
name: "empty-headers",
expression: `missingHeader(headers, "Any-Header")`,
headers: map[string]string{},
expected: types.Bool(true),
description: "should return true for any header when map is empty",
},
expected: types.Bool(true),
description: "should detect missing browser-specific headers from bots",
},
{
name: "browser-with-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
{
name: "real-world-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "curl/7.68.0",
"Accept": "*/*",
"Host": "example.com",
},
expected: types.Bool(true),
description: "should detect missing browser-specific headers from bots",
},
expected: types.Bool(false),
description: "should return false when browser sends Sec-Ch-Ua header",
},
}
{
name: "browser-with-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
},
expected: types.Bool(false),
description: "should return false when browser sends Sec-Ch-Ua header",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
}
result, _, err := prog.Eval(map[string]interface{}{
"headers": tt.headers,
result, _, err := prog.Eval(map[string]interface{}{
"headers": tt.headers,
})
if err != nil {
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
if result != tt.expected {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result)
}
})
if err != nil {
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
}
if result != tt.expected {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result)
t.Run("function-compilation", func(t *testing.T) {
src := `missingHeader(headers, "Test-Header")`
_, err := Compile(env, src)
if err != nil {
t.Fatalf("failed to compile missingHeader expression: %v", err)
}
})
}
})
t.Run("function-compilation", func(t *testing.T) {
src := `missingHeader(headers, "Test-Header")`
_, err := Compile(env, src)
if err != nil {
t.Fatalf("failed to compile missingHeader expression: %v", err)
t.Run("segments", func(t *testing.T) {
for _, tt := range []struct {
name string
description string
expression string
path string
expected types.Bool
}{
{
name: "simple",
description: "/ should have one path segment",
expression: `size(segments(path)) == 1`,
path: "/",
expected: types.Bool(true),
},
{
name: "two segments without trailing slash",
description: "/user/foo should have two segments",
expression: `size(segments(path)) == 2`,
path: "/user/foo",
expected: types.Bool(true),
},
{
name: "at least two segments",
description: "/foo/bar/ should have at least two path segments",
expression: `size(segments(path)) >= 2`,
path: "/foo/bar/",
expected: types.Bool(true),
},
{
name: "at most two segments",
description: "/foo/bar/ does not have less than two path segments",
expression: `size(segments(path)) < 2`,
path: "/foo/bar/",
expected: types.Bool(false),
},
} {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
}
result, _, err := prog.Eval(map[string]interface{}{
"path": tt.path,
})
if err != nil {
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
if result != tt.expected {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result)
}
})
}
t.Run("invalid", func(t *testing.T) {
for _, tt := range []struct {
name string
description string
expression string
env any
wantFailCompile bool
wantFailEval bool
}{
{
name: "segments of headers",
description: "headers are not a path list",
expression: `segments(headers)`,
env: map[string]any{
"headers": map[string]string{
"foo": "bar",
},
},
wantFailCompile: true,
},
{
name: "invalid path type",
description: "a path should be a sting",
expression: `size(segments(path)) != 0`,
env: map[string]any{
"path": 4,
},
wantFailEval: true,
},
{
name: "invalid path",
description: "a path should start with a leading slash",
expression: `size(segments(path)) != 0`,
env: map[string]any{
"path": "foo",
},
wantFailEval: true,
},
} {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
if !tt.wantFailCompile {
t.Log(tt.description)
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
} else {
return
}
}
_, _, err = prog.Eval(tt.env)
if err == nil {
t.Log(tt.description)
t.Fatal("wanted an error but got none")
}
t.Log(err)
})
}
})
t.Run("function-compilation", func(t *testing.T) {
src := `size(segments(path)) <= 2`
_, err := Compile(env, src)
if err != nil {
t.Fatalf("failed to compile missingHeader expression: %v", err)
}
})
})
}