Merge branch 'main' into Xe/default-rules/customasynchttpclient

Signed-off-by: Xe Iaso <xe.iaso@techaro.lol>
This commit is contained in:
Xe Iaso 2025-07-26 20:36:43 -04:00 committed by GitHub
commit cf44607f8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 266 additions and 80 deletions

View File

@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- The [Thoth client](https://anubis.techaro.lol/docs/admin/thoth) is now public in the repo instead of being an internal package. - The [Thoth client](https://anubis.techaro.lol/docs/admin/thoth) is now public in the repo instead of being an internal package.
- [Custom-AsyncHttpClient](https://github.com/AsyncHttpClient/async-http-client)'s default User-Agent has an increased weight by default ([#852](https://github.com/TecharoHQ/anubis/issues/852)). - [Custom-AsyncHttpClient](https://github.com/AsyncHttpClient/async-http-client)'s default User-Agent has an increased weight by default ([#852](https://github.com/TecharoHQ/anubis/issues/852)).
- The [`segments`](./admin/configuration/expressions.mdx#segments) function was added for splitting a path into its slash-separated segments.
## v1.21.3: Minfilia Warde - Echo 3 ## v1.21.3: Minfilia Warde - Echo 3

View File

@ -232,6 +232,39 @@ This is best applied when doing explicit block rules, eg:
It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand. It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand.
### `segments`
Available in `bot` expressions.
```ts
function segments(path: string): string[];
```
`segments` returns the number of slash-separated path segments, ignoring the leading slash. Here is what it will return with some common paths:
| Input | Output |
| :----------------------- | :--------------------- |
| `segments("/")` | `[""]` |
| `segments("/foo/bar")` | `["foo", "bar"] ` |
| `segments("/users/xe/")` | `["users", "xe", ""] ` |
:::note
If the path ends with a `/`, then the last element of the result will be an empty string. This is because `/users/xe` and `/users/xe/` are semantically different paths.
:::
This is useful if you want to write rules that allow requests that have no query parameters only if they have less than two path segments:
```yaml
- name: two-path-segments-no-query
action: ALLOW
expression:
all:
- size(query) == 0
- size(segments(path)) < 2
```
## Life advice ## Life advice
Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this. Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this.

View File

@ -2,6 +2,7 @@ package expressions
import ( import (
"math/rand/v2" "math/rand/v2"
"strings"
"github.com/google/cel-go/cel" "github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types" "github.com/google/cel-go/common/types"
@ -54,6 +55,28 @@ func BotEnvironment() (*cel.Env, error) {
}), }),
), ),
), ),
cel.Function("segments",
cel.Overload("segments_string_list_string",
[]*cel.Type{cel.StringType},
cel.ListType(cel.StringType),
cel.UnaryBinding(func(path ref.Val) ref.Val {
pathStrType, ok := path.(types.String)
if !ok {
return types.ValOrErr(path, "path is not a string, but is %T", path)
}
pathStr := string(pathStrType)
if !strings.HasPrefix(pathStr, "/") {
return types.ValOrErr(path, "path does not start with /")
}
pathList := strings.Split(string(pathStr), "/")[1:]
return types.NewStringList(types.DefaultTypeAdapter, pathList)
}),
),
),
) )
} }

View File

@ -12,99 +12,228 @@ func TestBotEnvironment(t *testing.T) {
t.Fatalf("failed to create bot environment: %v", err) t.Fatalf("failed to create bot environment: %v", err)
} }
tests := []struct { t.Run("missingHeader", func(t *testing.T) {
name string tests := []struct {
expression string name string
headers map[string]string expression string
expected types.Bool headers map[string]string
description string expected types.Bool
}{ description string
{ }{
name: "missing-header", {
expression: `missingHeader(headers, "Missing-Header")`, name: "missing-header",
headers: map[string]string{ expression: `missingHeader(headers, "Missing-Header")`,
"User-Agent": "test-agent", headers: map[string]string{
"Content-Type": "application/json", "User-Agent": "test-agent",
"Content-Type": "application/json",
},
expected: types.Bool(true),
description: "should return true when header is missing",
}, },
expected: types.Bool(true), {
description: "should return true when header is missing", name: "existing-header",
}, expression: `missingHeader(headers, "User-Agent")`,
{ headers: map[string]string{
name: "existing-header", "User-Agent": "test-agent",
expression: `missingHeader(headers, "User-Agent")`, "Content-Type": "application/json",
headers: map[string]string{ },
"User-Agent": "test-agent", expected: types.Bool(false),
"Content-Type": "application/json", description: "should return false when header exists",
}, },
expected: types.Bool(false), {
description: "should return false when header exists", name: "case-sensitive",
}, expression: `missingHeader(headers, "user-agent")`,
{ headers: map[string]string{
name: "case-sensitive", "User-Agent": "test-agent",
expression: `missingHeader(headers, "user-agent")`, },
headers: map[string]string{ expected: types.Bool(true),
"User-Agent": "test-agent", description: "should be case-sensitive (user-agent != User-Agent)",
}, },
expected: types.Bool(true), {
description: "should be case-sensitive (user-agent != User-Agent)", name: "empty-headers",
}, expression: `missingHeader(headers, "Any-Header")`,
{ headers: map[string]string{},
name: "empty-headers", expected: types.Bool(true),
expression: `missingHeader(headers, "Any-Header")`, description: "should return true for any header when map is empty",
headers: map[string]string{},
expected: types.Bool(true),
description: "should return true for any header when map is empty",
},
{
name: "real-world-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "curl/7.68.0",
"Accept": "*/*",
"Host": "example.com",
}, },
expected: types.Bool(true), {
description: "should detect missing browser-specific headers from bots", name: "real-world-sec-ch-ua",
}, expression: `missingHeader(headers, "Sec-Ch-Ua")`,
{ headers: map[string]string{
name: "browser-with-sec-ch-ua", "User-Agent": "curl/7.68.0",
expression: `missingHeader(headers, "Sec-Ch-Ua")`, "Accept": "*/*",
headers: map[string]string{ "Host": "example.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", },
"Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`, expected: types.Bool(true),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", description: "should detect missing browser-specific headers from bots",
}, },
expected: types.Bool(false), {
description: "should return false when browser sends Sec-Ch-Ua header", name: "browser-with-sec-ch-ua",
}, expression: `missingHeader(headers, "Sec-Ch-Ua")`,
} headers: map[string]string{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
},
expected: types.Bool(false),
description: "should return false when browser sends Sec-Ch-Ua header",
},
}
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression) prog, err := Compile(env, tt.expression)
if err != nil { if err != nil {
t.Fatalf("failed to compile expression %q: %v", tt.expression, err) t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
} }
result, _, err := prog.Eval(map[string]interface{}{ result, _, err := prog.Eval(map[string]interface{}{
"headers": tt.headers, "headers": tt.headers,
})
if err != nil {
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
if result != tt.expected {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result)
}
}) })
if err != nil { }
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
if result != tt.expected { t.Run("function-compilation", func(t *testing.T) {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result) src := `missingHeader(headers, "Test-Header")`
_, err := Compile(env, src)
if err != nil {
t.Fatalf("failed to compile missingHeader expression: %v", err)
} }
}) })
} })
t.Run("function-compilation", func(t *testing.T) { t.Run("segments", func(t *testing.T) {
src := `missingHeader(headers, "Test-Header")` for _, tt := range []struct {
_, err := Compile(env, src) name string
if err != nil { description string
t.Fatalf("failed to compile missingHeader expression: %v", err) expression string
path string
expected types.Bool
}{
{
name: "simple",
description: "/ should have one path segment",
expression: `size(segments(path)) == 1`,
path: "/",
expected: types.Bool(true),
},
{
name: "two segments without trailing slash",
description: "/user/foo should have two segments",
expression: `size(segments(path)) == 2`,
path: "/user/foo",
expected: types.Bool(true),
},
{
name: "at least two segments",
description: "/foo/bar/ should have at least two path segments",
expression: `size(segments(path)) >= 2`,
path: "/foo/bar/",
expected: types.Bool(true),
},
{
name: "at most two segments",
description: "/foo/bar/ does not have less than two path segments",
expression: `size(segments(path)) < 2`,
path: "/foo/bar/",
expected: types.Bool(false),
},
} {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
}
result, _, err := prog.Eval(map[string]interface{}{
"path": tt.path,
})
if err != nil {
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
if result != tt.expected {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result)
}
})
} }
t.Run("invalid", func(t *testing.T) {
for _, tt := range []struct {
name string
description string
expression string
env any
wantFailCompile bool
wantFailEval bool
}{
{
name: "segments of headers",
description: "headers are not a path list",
expression: `segments(headers)`,
env: map[string]any{
"headers": map[string]string{
"foo": "bar",
},
},
wantFailCompile: true,
},
{
name: "invalid path type",
description: "a path should be a sting",
expression: `size(segments(path)) != 0`,
env: map[string]any{
"path": 4,
},
wantFailEval: true,
},
{
name: "invalid path",
description: "a path should start with a leading slash",
expression: `size(segments(path)) != 0`,
env: map[string]any{
"path": "foo",
},
wantFailEval: true,
},
} {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
if !tt.wantFailCompile {
t.Log(tt.description)
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
} else {
return
}
}
_, _, err = prog.Eval(tt.env)
if err == nil {
t.Log(tt.description)
t.Fatal("wanted an error but got none")
}
t.Log(err)
})
}
})
t.Run("function-compilation", func(t *testing.T) {
src := `size(segments(path)) <= 2`
_, err := Compile(env, src)
if err != nil {
t.Fatalf("failed to compile missingHeader expression: %v", err)
}
})
}) })
} }