From a735770c93aa9fa4c748fefbba108fc7a8a7c067 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 25 Jul 2025 16:21:08 -0400 Subject: [PATCH] feat(expressions): add segments function to break path into segments (#916) Signed-off-by: Xe Iaso --- docs/docs/CHANGELOG.md | 1 + docs/docs/admin/configuration/expressions.mdx | 33 ++ lib/policy/expressions/environment.go | 23 ++ lib/policy/expressions/environment_test.go | 289 +++++++++++++----- 4 files changed, 266 insertions(+), 80 deletions(-) diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index bea36f1..5970c61 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - The [Thoth client](https://anubis.techaro.lol/docs/admin/thoth) is now public in the repo instead of being an internal package. +- The [`segments`](./admin/configuration/expressions.mdx#segments) function was added for splitting a path into its slash-separated segments. ## v1.21.3: Minfilia Warde - Echo 3 diff --git a/docs/docs/admin/configuration/expressions.mdx b/docs/docs/admin/configuration/expressions.mdx index 49d0e05..6cb7954 100644 --- a/docs/docs/admin/configuration/expressions.mdx +++ b/docs/docs/admin/configuration/expressions.mdx @@ -232,6 +232,39 @@ This is best applied when doing explicit block rules, eg: It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand. +### `segments` + +Available in `bot` expressions. + +```ts +function segments(path: string): string[]; +``` + +`segments` returns the number of slash-separated path segments, ignoring the leading slash. Here is what it will return with some common paths: + +| Input | Output | +| :----------------------- | :--------------------- | +| `segments("/")` | `[""]` | +| `segments("/foo/bar")` | `["foo", "bar"] ` | +| `segments("/users/xe/")` | `["users", "xe", ""] ` | + +:::note + +If the path ends with a `/`, then the last element of the result will be an empty string. This is because `/users/xe` and `/users/xe/` are semantically different paths. + +::: + +This is useful if you want to write rules that allow requests that have no query parameters only if they have less than two path segments: + +```yaml +- name: two-path-segments-no-query + action: ALLOW + expression: + all: + - size(query) == 0 + - size(segments(path)) < 2 +``` + ## Life advice Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this. diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go index 14b57be..27f298c 100644 --- a/lib/policy/expressions/environment.go +++ b/lib/policy/expressions/environment.go @@ -2,6 +2,7 @@ package expressions import ( "math/rand/v2" + "strings" "github.com/google/cel-go/cel" "github.com/google/cel-go/common/types" @@ -54,6 +55,28 @@ func BotEnvironment() (*cel.Env, error) { }), ), ), + + cel.Function("segments", + cel.Overload("segments_string_list_string", + []*cel.Type{cel.StringType}, + cel.ListType(cel.StringType), + cel.UnaryBinding(func(path ref.Val) ref.Val { + pathStrType, ok := path.(types.String) + if !ok { + return types.ValOrErr(path, "path is not a string, but is %T", path) + } + + pathStr := string(pathStrType) + if !strings.HasPrefix(pathStr, "/") { + return types.ValOrErr(path, "path does not start with /") + } + + pathList := strings.Split(string(pathStr), "/")[1:] + + return types.NewStringList(types.DefaultTypeAdapter, pathList) + }), + ), + ), ) } diff --git a/lib/policy/expressions/environment_test.go b/lib/policy/expressions/environment_test.go index 9878e1c..4e7d796 100644 --- a/lib/policy/expressions/environment_test.go +++ b/lib/policy/expressions/environment_test.go @@ -12,99 +12,228 @@ func TestBotEnvironment(t *testing.T) { t.Fatalf("failed to create bot environment: %v", err) } - tests := []struct { - name string - expression string - headers map[string]string - expected types.Bool - description string - }{ - { - name: "missing-header", - expression: `missingHeader(headers, "Missing-Header")`, - headers: map[string]string{ - "User-Agent": "test-agent", - "Content-Type": "application/json", + t.Run("missingHeader", func(t *testing.T) { + tests := []struct { + name string + expression string + headers map[string]string + expected types.Bool + description string + }{ + { + name: "missing-header", + expression: `missingHeader(headers, "Missing-Header")`, + headers: map[string]string{ + "User-Agent": "test-agent", + "Content-Type": "application/json", + }, + expected: types.Bool(true), + description: "should return true when header is missing", }, - expected: types.Bool(true), - description: "should return true when header is missing", - }, - { - name: "existing-header", - expression: `missingHeader(headers, "User-Agent")`, - headers: map[string]string{ - "User-Agent": "test-agent", - "Content-Type": "application/json", + { + name: "existing-header", + expression: `missingHeader(headers, "User-Agent")`, + headers: map[string]string{ + "User-Agent": "test-agent", + "Content-Type": "application/json", + }, + expected: types.Bool(false), + description: "should return false when header exists", }, - expected: types.Bool(false), - description: "should return false when header exists", - }, - { - name: "case-sensitive", - expression: `missingHeader(headers, "user-agent")`, - headers: map[string]string{ - "User-Agent": "test-agent", + { + name: "case-sensitive", + expression: `missingHeader(headers, "user-agent")`, + headers: map[string]string{ + "User-Agent": "test-agent", + }, + expected: types.Bool(true), + description: "should be case-sensitive (user-agent != User-Agent)", }, - expected: types.Bool(true), - description: "should be case-sensitive (user-agent != User-Agent)", - }, - { - name: "empty-headers", - expression: `missingHeader(headers, "Any-Header")`, - headers: map[string]string{}, - expected: types.Bool(true), - description: "should return true for any header when map is empty", - }, - { - name: "real-world-sec-ch-ua", - expression: `missingHeader(headers, "Sec-Ch-Ua")`, - headers: map[string]string{ - "User-Agent": "curl/7.68.0", - "Accept": "*/*", - "Host": "example.com", + { + name: "empty-headers", + expression: `missingHeader(headers, "Any-Header")`, + headers: map[string]string{}, + expected: types.Bool(true), + description: "should return true for any header when map is empty", }, - expected: types.Bool(true), - description: "should detect missing browser-specific headers from bots", - }, - { - name: "browser-with-sec-ch-ua", - expression: `missingHeader(headers, "Sec-Ch-Ua")`, - headers: map[string]string{ - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", - "Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`, - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + { + name: "real-world-sec-ch-ua", + expression: `missingHeader(headers, "Sec-Ch-Ua")`, + headers: map[string]string{ + "User-Agent": "curl/7.68.0", + "Accept": "*/*", + "Host": "example.com", + }, + expected: types.Bool(true), + description: "should detect missing browser-specific headers from bots", }, - expected: types.Bool(false), - description: "should return false when browser sends Sec-Ch-Ua header", - }, - } + { + name: "browser-with-sec-ch-ua", + expression: `missingHeader(headers, "Sec-Ch-Ua")`, + headers: map[string]string{ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + }, + expected: types.Bool(false), + description: "should return false when browser sends Sec-Ch-Ua header", + }, + } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - prog, err := Compile(env, tt.expression) - if err != nil { - t.Fatalf("failed to compile expression %q: %v", tt.expression, err) - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + prog, err := Compile(env, tt.expression) + if err != nil { + t.Fatalf("failed to compile expression %q: %v", tt.expression, err) + } - result, _, err := prog.Eval(map[string]interface{}{ - "headers": tt.headers, + result, _, err := prog.Eval(map[string]interface{}{ + "headers": tt.headers, + }) + if err != nil { + t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err) + } + + if result != tt.expected { + t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result) + } }) - if err != nil { - t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err) - } + } - if result != tt.expected { - t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result) + t.Run("function-compilation", func(t *testing.T) { + src := `missingHeader(headers, "Test-Header")` + _, err := Compile(env, src) + if err != nil { + t.Fatalf("failed to compile missingHeader expression: %v", err) } }) - } + }) - t.Run("function-compilation", func(t *testing.T) { - src := `missingHeader(headers, "Test-Header")` - _, err := Compile(env, src) - if err != nil { - t.Fatalf("failed to compile missingHeader expression: %v", err) + t.Run("segments", func(t *testing.T) { + for _, tt := range []struct { + name string + description string + expression string + path string + expected types.Bool + }{ + { + name: "simple", + description: "/ should have one path segment", + expression: `size(segments(path)) == 1`, + path: "/", + expected: types.Bool(true), + }, + { + name: "two segments without trailing slash", + description: "/user/foo should have two segments", + expression: `size(segments(path)) == 2`, + path: "/user/foo", + expected: types.Bool(true), + }, + { + name: "at least two segments", + description: "/foo/bar/ should have at least two path segments", + expression: `size(segments(path)) >= 2`, + path: "/foo/bar/", + expected: types.Bool(true), + }, + { + name: "at most two segments", + description: "/foo/bar/ does not have less than two path segments", + expression: `size(segments(path)) < 2`, + path: "/foo/bar/", + expected: types.Bool(false), + }, + } { + t.Run(tt.name, func(t *testing.T) { + prog, err := Compile(env, tt.expression) + if err != nil { + t.Fatalf("failed to compile expression %q: %v", tt.expression, err) + } + + result, _, err := prog.Eval(map[string]interface{}{ + "path": tt.path, + }) + if err != nil { + t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err) + } + + if result != tt.expected { + t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result) + } + }) } + + t.Run("invalid", func(t *testing.T) { + for _, tt := range []struct { + name string + description string + expression string + env any + wantFailCompile bool + wantFailEval bool + }{ + { + name: "segments of headers", + description: "headers are not a path list", + expression: `segments(headers)`, + env: map[string]any{ + "headers": map[string]string{ + "foo": "bar", + }, + }, + wantFailCompile: true, + }, + { + name: "invalid path type", + description: "a path should be a sting", + expression: `size(segments(path)) != 0`, + env: map[string]any{ + "path": 4, + }, + wantFailEval: true, + }, + { + name: "invalid path", + description: "a path should start with a leading slash", + expression: `size(segments(path)) != 0`, + env: map[string]any{ + "path": "foo", + }, + wantFailEval: true, + }, + } { + t.Run(tt.name, func(t *testing.T) { + prog, err := Compile(env, tt.expression) + if err != nil { + if !tt.wantFailCompile { + t.Log(tt.description) + t.Fatalf("failed to compile expression %q: %v", tt.expression, err) + } else { + return + } + } + + _, _, err = prog.Eval(tt.env) + + if err == nil { + t.Log(tt.description) + t.Fatal("wanted an error but got none") + } + + t.Log(err) + }) + } + }) + + t.Run("function-compilation", func(t *testing.T) { + src := `size(segments(path)) <= 2` + _, err := Compile(env, src) + if err != nil { + t.Fatalf("failed to compile missingHeader expression: %v", err) + } + }) }) }