feat(expressions): add segments function to break path into segments (#916)

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso 2025-07-25 16:21:08 -04:00 committed by GitHub
parent bf42014ac3
commit a735770c93
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 266 additions and 80 deletions

View File

@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
<!-- This changes the project to: -->
- The [Thoth client](https://anubis.techaro.lol/docs/admin/thoth) is now public in the repo instead of being an internal package.
- The [`segments`](./admin/configuration/expressions.mdx#segments) function was added for splitting a path into its slash-separated segments.
## v1.21.3: Minfilia Warde - Echo 3

View File

@ -232,6 +232,39 @@ This is best applied when doing explicit block rules, eg:
It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand.
### `segments`
Available in `bot` expressions.
```ts
function segments(path: string): string[];
```
`segments` returns the number of slash-separated path segments, ignoring the leading slash. Here is what it will return with some common paths:
| Input | Output |
| :----------------------- | :--------------------- |
| `segments("/")` | `[""]` |
| `segments("/foo/bar")` | `["foo", "bar"] ` |
| `segments("/users/xe/")` | `["users", "xe", ""] ` |
:::note
If the path ends with a `/`, then the last element of the result will be an empty string. This is because `/users/xe` and `/users/xe/` are semantically different paths.
:::
This is useful if you want to write rules that allow requests that have no query parameters only if they have less than two path segments:
```yaml
- name: two-path-segments-no-query
action: ALLOW
expression:
all:
- size(query) == 0
- size(segments(path)) < 2
```
## Life advice
Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this.

View File

@ -2,6 +2,7 @@ package expressions
import (
"math/rand/v2"
"strings"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
@ -54,6 +55,28 @@ func BotEnvironment() (*cel.Env, error) {
}),
),
),
cel.Function("segments",
cel.Overload("segments_string_list_string",
[]*cel.Type{cel.StringType},
cel.ListType(cel.StringType),
cel.UnaryBinding(func(path ref.Val) ref.Val {
pathStrType, ok := path.(types.String)
if !ok {
return types.ValOrErr(path, "path is not a string, but is %T", path)
}
pathStr := string(pathStrType)
if !strings.HasPrefix(pathStr, "/") {
return types.ValOrErr(path, "path does not start with /")
}
pathList := strings.Split(string(pathStr), "/")[1:]
return types.NewStringList(types.DefaultTypeAdapter, pathList)
}),
),
),
)
}

View File

@ -12,99 +12,228 @@ func TestBotEnvironment(t *testing.T) {
t.Fatalf("failed to create bot environment: %v", err)
}
tests := []struct {
name string
expression string
headers map[string]string
expected types.Bool
description string
}{
{
name: "missing-header",
expression: `missingHeader(headers, "Missing-Header")`,
headers: map[string]string{
"User-Agent": "test-agent",
"Content-Type": "application/json",
t.Run("missingHeader", func(t *testing.T) {
tests := []struct {
name string
expression string
headers map[string]string
expected types.Bool
description string
}{
{
name: "missing-header",
expression: `missingHeader(headers, "Missing-Header")`,
headers: map[string]string{
"User-Agent": "test-agent",
"Content-Type": "application/json",
},
expected: types.Bool(true),
description: "should return true when header is missing",
},
expected: types.Bool(true),
description: "should return true when header is missing",
},
{
name: "existing-header",
expression: `missingHeader(headers, "User-Agent")`,
headers: map[string]string{
"User-Agent": "test-agent",
"Content-Type": "application/json",
{
name: "existing-header",
expression: `missingHeader(headers, "User-Agent")`,
headers: map[string]string{
"User-Agent": "test-agent",
"Content-Type": "application/json",
},
expected: types.Bool(false),
description: "should return false when header exists",
},
expected: types.Bool(false),
description: "should return false when header exists",
},
{
name: "case-sensitive",
expression: `missingHeader(headers, "user-agent")`,
headers: map[string]string{
"User-Agent": "test-agent",
{
name: "case-sensitive",
expression: `missingHeader(headers, "user-agent")`,
headers: map[string]string{
"User-Agent": "test-agent",
},
expected: types.Bool(true),
description: "should be case-sensitive (user-agent != User-Agent)",
},
expected: types.Bool(true),
description: "should be case-sensitive (user-agent != User-Agent)",
},
{
name: "empty-headers",
expression: `missingHeader(headers, "Any-Header")`,
headers: map[string]string{},
expected: types.Bool(true),
description: "should return true for any header when map is empty",
},
{
name: "real-world-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "curl/7.68.0",
"Accept": "*/*",
"Host": "example.com",
{
name: "empty-headers",
expression: `missingHeader(headers, "Any-Header")`,
headers: map[string]string{},
expected: types.Bool(true),
description: "should return true for any header when map is empty",
},
expected: types.Bool(true),
description: "should detect missing browser-specific headers from bots",
},
{
name: "browser-with-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
{
name: "real-world-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "curl/7.68.0",
"Accept": "*/*",
"Host": "example.com",
},
expected: types.Bool(true),
description: "should detect missing browser-specific headers from bots",
},
expected: types.Bool(false),
description: "should return false when browser sends Sec-Ch-Ua header",
},
}
{
name: "browser-with-sec-ch-ua",
expression: `missingHeader(headers, "Sec-Ch-Ua")`,
headers: map[string]string{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
},
expected: types.Bool(false),
description: "should return false when browser sends Sec-Ch-Ua header",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
}
result, _, err := prog.Eval(map[string]interface{}{
"headers": tt.headers,
result, _, err := prog.Eval(map[string]interface{}{
"headers": tt.headers,
})
if err != nil {
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
if result != tt.expected {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result)
}
})
if err != nil {
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
}
if result != tt.expected {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result)
t.Run("function-compilation", func(t *testing.T) {
src := `missingHeader(headers, "Test-Header")`
_, err := Compile(env, src)
if err != nil {
t.Fatalf("failed to compile missingHeader expression: %v", err)
}
})
}
})
t.Run("function-compilation", func(t *testing.T) {
src := `missingHeader(headers, "Test-Header")`
_, err := Compile(env, src)
if err != nil {
t.Fatalf("failed to compile missingHeader expression: %v", err)
t.Run("segments", func(t *testing.T) {
for _, tt := range []struct {
name string
description string
expression string
path string
expected types.Bool
}{
{
name: "simple",
description: "/ should have one path segment",
expression: `size(segments(path)) == 1`,
path: "/",
expected: types.Bool(true),
},
{
name: "two segments without trailing slash",
description: "/user/foo should have two segments",
expression: `size(segments(path)) == 2`,
path: "/user/foo",
expected: types.Bool(true),
},
{
name: "at least two segments",
description: "/foo/bar/ should have at least two path segments",
expression: `size(segments(path)) >= 2`,
path: "/foo/bar/",
expected: types.Bool(true),
},
{
name: "at most two segments",
description: "/foo/bar/ does not have less than two path segments",
expression: `size(segments(path)) < 2`,
path: "/foo/bar/",
expected: types.Bool(false),
},
} {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
}
result, _, err := prog.Eval(map[string]interface{}{
"path": tt.path,
})
if err != nil {
t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err)
}
if result != tt.expected {
t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result)
}
})
}
t.Run("invalid", func(t *testing.T) {
for _, tt := range []struct {
name string
description string
expression string
env any
wantFailCompile bool
wantFailEval bool
}{
{
name: "segments of headers",
description: "headers are not a path list",
expression: `segments(headers)`,
env: map[string]any{
"headers": map[string]string{
"foo": "bar",
},
},
wantFailCompile: true,
},
{
name: "invalid path type",
description: "a path should be a sting",
expression: `size(segments(path)) != 0`,
env: map[string]any{
"path": 4,
},
wantFailEval: true,
},
{
name: "invalid path",
description: "a path should start with a leading slash",
expression: `size(segments(path)) != 0`,
env: map[string]any{
"path": "foo",
},
wantFailEval: true,
},
} {
t.Run(tt.name, func(t *testing.T) {
prog, err := Compile(env, tt.expression)
if err != nil {
if !tt.wantFailCompile {
t.Log(tt.description)
t.Fatalf("failed to compile expression %q: %v", tt.expression, err)
} else {
return
}
}
_, _, err = prog.Eval(tt.env)
if err == nil {
t.Log(tt.description)
t.Fatal("wanted an error but got none")
}
t.Log(err)
})
}
})
t.Run("function-compilation", func(t *testing.T) {
src := `size(segments(path)) <= 2`
_, err := Compile(env, src)
if err != nil {
t.Fatalf("failed to compile missingHeader expression: %v", err)
}
})
})
}