diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7a4f095 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,15 @@ +{ + "github.copilot.enable": { + "*": false, + "plaintext": false, + "markdown": false, + "mdx": false, + "json": false, + "scminput": false, + "yaml": false, + "go": false, + "zig": false, + "javascript": false, + "properties": false + } +} diff --git a/data/apps/allow-api-routes.yaml b/data/apps/allow-api-routes.yaml new file mode 100644 index 0000000..0cc3e3b --- /dev/null +++ b/data/apps/allow-api-routes.yaml @@ -0,0 +1,6 @@ +- name: allow-api-routes + action: ALLOW + expression: + all: + - '!(method == "HEAD" || method == "GET")' + - path.startsWith("/api/") \ No newline at end of file diff --git a/data/botPolicies.json b/data/botPolicies.json index 7af1d90..5f24e99 100644 --- a/data/botPolicies.json +++ b/data/botPolicies.json @@ -9,6 +9,9 @@ { "import": "(data)/crawlers/_allow-good.yaml" }, + { + "import": "(data)/bots/aggressive-brazilian-scrapers.yaml" + }, { "import": "(data)/common/keep-internet-working.yaml" }, @@ -23,4 +26,4 @@ "CHALLENGE": 200, "DENY": 200 } -} +} \ No newline at end of file diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index e688491..d0f35e6 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -15,6 +15,7 @@ bots: - # This correlates to data/bots/deny-pathological.yaml in the source tree # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml import: (data)/bots/_deny-pathological.yaml +- import: (data)/bots/aggressive-brazilian-scrapers.yaml # Enforce https://github.com/ai-robots-txt/ai.robots.txt - import: (data)/bots/ai-robots-txt.yaml diff --git a/data/bots/aggressive-brazilian-scrapers.yaml b/data/bots/aggressive-brazilian-scrapers.yaml new file mode 100644 index 0000000..140811a --- /dev/null +++ b/data/bots/aggressive-brazilian-scrapers.yaml @@ -0,0 +1,28 @@ +- name: deny-aggressive-brazilian-scrapers + action: DENY + expression: + any: + # Internet Explorer should be out of support + - userAgent.contains("MSIE") + # Trident is the Internet Explorer browser engine + - userAgent.contains("Trident") + # Opera is a fork of chrome now + - userAgent.contains("Presto") + # Windows CE is discontinued + - userAgent.contains("Windows CE") + # Windows 95 is discontinued + - userAgent.contains("Windows 95") + # Windows 98 is discontinued + - userAgent.contains("Windows 98") + # Windows 9.x is discontinued + - userAgent.contains("Win 9x") + # Amazon does not have an Alexa Toolbar. + - userAgent.contains("Alexa Toolbar") +- name: challenge-aggressive-brazilian-scrapers + action: CHALLENGE + expression: + any: + # This is not released, even Windows 11 calls itself Windows 10 + - userAgent.contains("Windows NT 11.0") + # iPods are not in common use + - userAgent.contains("iPod") \ No newline at end of file diff --git a/data/bots/irc-bots/archlinux-phrik.yaml b/data/bots/irc-bots/archlinux-phrik.yaml new file mode 100644 index 0000000..ecbc8cb --- /dev/null +++ b/data/bots/irc-bots/archlinux-phrik.yaml @@ -0,0 +1,9 @@ +# phrik in the Arch Linux IRC channels +- name: archlinux-phrik + action: ALLOW + expression: + all: + - remoteAddress == "159.69.213.214" + - userAgent == "Mozilla/5.0 (compatible; utils.web Limnoria module)" + - '"X-Http-Version" in headers' + - headers["X-Http-Version"] == "HTTP/1.1" \ No newline at end of file diff --git a/data/bots/irc-bots/gentoo-chat.yaml b/data/bots/irc-bots/gentoo-chat.yaml new file mode 100644 index 0000000..92f2a4f --- /dev/null +++ b/data/bots/irc-bots/gentoo-chat.yaml @@ -0,0 +1,9 @@ +# chat in the gentoo IRC channels +- name: gentoo-chat + action: ALLOW + expression: + all: + - remoteAddress == "45.76.166.57" + - userAgent == "Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0" + - '"X-Http-Version" in headers' + - headers["X-Http-Version"] == "HTTP/1.1" \ No newline at end of file diff --git a/data/clients/git.yaml b/data/clients/git.yaml new file mode 100644 index 0000000..4fba66b --- /dev/null +++ b/data/clients/git.yaml @@ -0,0 +1,14 @@ +- name: allow-git-clients + action: ALLOW + expression: + all: + - > + ( + userAgent.startsWith("git/") || + userAgent.contains("libgit") || + userAgent.startsWith("go-git") || + userAgent.startsWith("JGit/") || + userAgent.startsWith("JGit-") + ) + - '"Git-Protocol" in headers' + - headers["Git-Protocol"] == "version=2" \ No newline at end of file diff --git a/data/clients/go-get.yaml b/data/clients/go-get.yaml new file mode 100644 index 0000000..701bd5d --- /dev/null +++ b/data/clients/go-get.yaml @@ -0,0 +1,7 @@ +- name: go-get + action: ALLOW + expression: + all: + - userAgent.startsWith("Go-http-client/") + - '"go-get" in query' + - query["go-get"] == "1" \ No newline at end of file diff --git a/data/common/allow-api-like.yaml b/data/common/allow-api-like.yaml new file mode 100644 index 0000000..0cc3e3b --- /dev/null +++ b/data/common/allow-api-like.yaml @@ -0,0 +1,6 @@ +- name: allow-api-routes + action: ALLOW + expression: + all: + - '!(method == "HEAD" || method == "GET")' + - path.startsWith("/api/") \ No newline at end of file diff --git a/data/common/json-api.yaml b/data/common/json-api.yaml new file mode 100644 index 0000000..b5b51d6 --- /dev/null +++ b/data/common/json-api.yaml @@ -0,0 +1,7 @@ +- name: allow-api-requests + action: ALLOW + expression: + all: + - '"Accept" in headers' + - 'headers["Accept"] == "application/json"' + - 'path.startsWith("/api/")' \ No newline at end of file diff --git a/data/common/keep-internet-working.yaml b/data/common/keep-internet-working.yaml index 8270ef4..d72f5ab 100644 --- a/data/common/keep-internet-working.yaml +++ b/data/common/keep-internet-working.yaml @@ -7,4 +7,7 @@ action: ALLOW - name: robots-txt path_regex: ^/robots.txt$ + action: ALLOW +- name: sitemap + path_regex: ^/sitemap.xml$ action: ALLOW \ No newline at end of file diff --git a/data/common/rfc-violations.yaml b/data/common/rfc-violations.yaml new file mode 100644 index 0000000..6f93360 --- /dev/null +++ b/data/common/rfc-violations.yaml @@ -0,0 +1,3 @@ +- name: no-user-agent-string + action: DENY + expression: userAgent == "" \ No newline at end of file diff --git a/data/embed.go b/data/embed.go index 3e5278f..849c75f 100644 --- a/data/embed.go +++ b/data/embed.go @@ -3,6 +3,6 @@ package data import "embed" var ( - //go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:common all:crawlers + //go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:clients all:common all:crawlers BotPolicies embed.FS ) diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index b00bd7c..5868589 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -10,11 +10,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] -- Use CSS variables to deduplicate styles +- Use CSS variables to deduplicate styles - Fixed native packages not containing the stdlib and botPolicies.yaml - Change import syntax to allow multi-level imports - Changed the startup logging to use JSON formatting as all the other logs do. +- Added the ability to do [expression matching with CEL](./admin/configuration/expressions.mdx) ## v1.17.1: Asahi sas Brutus: Echo 1 diff --git a/docs/docs/admin/configuration/expressions.mdx b/docs/docs/admin/configuration/expressions.mdx new file mode 100644 index 0000000..7e24ea5 --- /dev/null +++ b/docs/docs/admin/configuration/expressions.mdx @@ -0,0 +1,150 @@ +# Expression-based rule matching + +Most of the Anubis matchers let you match individual parts of a request and only those parts in isolation. In order to defend a service in depth, you often need the ability to match against multiple aspects of a request. Anubis implements [Common Expression Language (CEL)](https://cel.dev) to let administrators define these more advanced rules. This allows you to tailor your approach for the individual services you are protecting. + +As an example, here is a rule that lets you allow JSON API requests through Anubis: + +```yaml +- name: allow-api-requests + action: ALLOW + expression: + all: + - '"Accept" in headers' + - 'headers["Accept"] == "application/json"' + - 'path.startsWith("/api/")' +``` + +This is an advanced feature and as such it is easy to get yourself in trouble with it. Use this with care. + +## Common Expression Language (CEL) + +CEL is an expression language made by Google as a part of their access control lists system. As programs grow more complicated and users have the need to express more complicated security requirements, they often want the ability to just run a small bit of code to check things for themselves. CEL expressions are built for this. They are implictly sandboxed so that they cannot affect the system they are running in and also designed to evaluate as fast as humanly possible. + +Imagine a CEL expression as the contents of an `if` statement in JavaScript or the `WHERE` clause in SQL. Consider this example expression: + +```python +userAgent == "" +``` + +This is roughly equivalent to the following in JavaScript: + +```js +if (userAgent == "") { + // Do something +} +``` + +Using these expressions, you can define more elaborate rules as facts and circumstances demand. For more information about the syntax and grammar of CEL, take a look at [the language specification](https://github.com/google/cel-spec/blob/master/doc/langdef.md). + +## How Anubis uses CEL + +Anubis uses CEL to let administrators create complicated filter rules. Anubis has several modes of using CEL: + +- Validating requests against single expressions +- Validating multiple expressions and ensuring at least one of them are true (`any`) +- Validating multiple expressions and ensuring all of them are true (`all`) + +The common pattern is that every Anubis expression returns `true`, `false`, or raises an error. + +### Single expressions + +A single expression that returns either `true` or `false`. If the expression returns `true`, then the action specified in the rule will be taken. If it returns `false`, Anubis will move on to the next rule. + +For example, consider this rule: + +```yaml +- name: no-user-agent-string + action: DENY + expression: userAgent == "" +``` + +For this rule, if a request comes in without a [`User-Agent` string](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent) set, Anubis will deny the request and return an error page. + +### `any` blocks + +An `any` block that contains a list of expressions. If any expression in the list returns `true`, then the action specified in the rule will be taken. If all expressions in that list return `false`, Anubis will move on to the next rule. + +For example, consider this rule: + +```yaml +- name: known-banned-user + action: DENY + expression: + any: + - remoteAddress == "8.8.8.8" + - remoteAddress == "1.1.1.1" +``` + +For this rule, if a request comes in from `8.8.8.8` or `1.1.1.1`, Anubis will deny the request and return an error page. + +#### `all` blocks + +An `all` block that contains a list of expessions. If all expressions in the list return `true`, then the action specified in the rule will be taken. If any of the expressions in the list returns `false`, Anubis will move on to the next rule. + +For example, consider this rule: + +```yaml +- name: go-get + action: ALLOW + expression: + all: + - userAgent.startsWith("Go-http-client/") + - '"go-get" in query' + - query["go-get"] == "1" +``` + +For this rule, if a request comes in matching [the signature of the `go get` command](https://pkg.go.dev/cmd/go#hdr-Remote_import_paths), Anubis will allow it through to the target. + +## Variables exposed to Anubis expressions + +Anubis exposes the following variables to expressions: + +| Name | Type | Explanation | Example | +| :-------------- | :-------------------- | :---------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------- | +| `headers` | `map[string, string]` | The [headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers) of the request being processed. | `{"User-Agent": "Mozilla/5.0 Gecko/20100101 Firefox/137.0"}` | +| `host` | `string` | The [HTTP hostname](https://web.dev/articles/url-parts#host) the request is targeted to. | `anubis.techaro.lol` | +| `method` | `string` | The [HTTP method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Methods) in the request being processed. | `GET`, `POST`, `DELETE`, etc. | +| `path` | `string` | The [path](https://web.dev/articles/url-parts#pathname) of the request being processed. | `/`, `/api/memes/create` | +| `query` | `map[string, string]` | The [query parameters](https://web.dev/articles/url-parts#query) of the request being processed. | `?foo=bar` -> `{"foo": "bar"}` | +| `remoteAddress` | `string` | The IP address of the client. | `1.1.1.1` | +| `userAgent` | `string` | The [`User-Agent`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent) string in the request being processed. | `Mozilla/5.0 Gecko/20100101 Firefox/137.0` | + +Of note: in many languages when you look up a key in a map and there is nothing there, the language will return some "falsy" value like `undefined` in JavaScript, `None` in Python, or the zero value of the type in Go. In CEL, if you try to look up a value that does not exist, execution of the expression will fail and Anubis will return an error. + +In order to avoid this, make sure the header or query parameter you are testing is present in the request with an `all` block like this: + +```yaml +- name: challenge-wiki-history-page + action: CHALLENGE + all: + - 'path == "/index.php"' + - '"title" in query' + - '"action" in query' + - 'query["action"] == "history" +``` + +This rule throws a challenge if and only if all of the following conditions are true: + +- The URL path is `/index.php` +- The URL query string contains a `title` value +- The URL query string contains an `action` value +- The URL query string's `action` value is `"history"` + +So given an HTTP request like this: + +```text +GET /index.php?title=Index&action=history HTTP/1.1 +User-Agent: Mozilla/5.0 Gecko/20100101 Firefox/137.0 +Host: wiki.int.techaro.lol +X-Real-Ip: 8.8.8.8 +``` + +Anubis would return a challenge because all of those conditions are true. + +## Functions exposed to Anubis expressions + +There are currently no functions from the Anubis runtime exposed to expressions. This will change in the future. + +## Life advice + +Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this. diff --git a/go.mod b/go.mod index b934937..f3b1ced 100644 --- a/go.mod +++ b/go.mod @@ -6,22 +6,25 @@ require ( github.com/a-h/templ v0.3.857 github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456 github.com/golang-jwt/jwt/v5 v5.2.2 + github.com/google/cel-go v0.25.0 github.com/playwright-community/playwright-go v0.5101.0 github.com/prometheus/client_golang v1.22.0 github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a github.com/yl2chen/cidranger v1.0.2 golang.org/x/net v0.39.0 + k8s.io/apimachinery v0.32.3 ) require ( + cel.dev/expr v0.23.1 // indirect github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c // indirect github.com/a-h/parse v0.0.0-20250122154542-74294addb73e // indirect github.com/andybalholm/brotli v1.1.0 // indirect + github.com/antlr4-go/antlr/v4 v4.13.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cli/browser v1.3.0 // indirect - github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/deckarep/golang-set/v2 v2.6.0 // indirect github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect @@ -30,22 +33,26 @@ require ( github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-jose/go-jose/v3 v3.0.4 // indirect github.com/go-stack/stack v1.8.1 // indirect + github.com/kr/text v0.2.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/natefinch/atomic v1.0.1 // indirect - github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect + github.com/stoewer/go-strcase v1.2.0 // indirect + golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678 // indirect golang.org/x/mod v0.24.0 // indirect golang.org/x/sync v0.13.0 // indirect golang.org/x/sys v0.32.0 // indirect + golang.org/x/text v0.24.0 // indirect golang.org/x/tools v0.32.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect google.golang.org/protobuf v1.36.5 // indirect honnef.co/go/tools v0.6.1 // indirect - k8s.io/apimachinery v0.32.3 // indirect sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index 316a972..40904c0 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +cel.dev/expr v0.23.1 h1:K4KOtPCJQjVggkARsjG9RWXP6O4R73aHeJMa/dmCQQg= +cel.dev/expr v0.23.1/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c h1:pxW6RcqyfI9/kWtOwnv/G+AzdKuy2ZrqINhenH4HyNs= github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/a-h/parse v0.0.0-20250122154542-74294addb73e h1:HjVbSQHy+dnlS6C3XajZ69NYAb5jbGNfHanvm1+iYlo= @@ -6,6 +8,8 @@ github.com/a-h/templ v0.3.857 h1:6EqcJuGZW4OL+2iZ3MD+NnIcG7nGkaQeF2Zq5kf9ZGg= github.com/a-h/templ v0.3.857/go.mod h1:qhrhAkRFubE7khxLZHsBFHfX+gWwVNKbzKeF9GlPV4M= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= +github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= @@ -14,6 +18,7 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cli/browser v1.3.0 h1:LejqCrpWr+1pRqmEPDGnTZOjsMe7sehifLynZJuqJpo= github.com/cli/browser v1.3.0/go.mod h1:HH8s+fOAxjhQoBUAsKuPCbqUuxZDhQ2/aD+SzsEfBTk= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -37,11 +42,17 @@ github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw= github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4= github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8= github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= +github.com/google/cel-go v0.25.0 h1:jsFw9Fhn+3y2kBbltZR4VEz5xKkcIFRPDnuEzAGv5GY= +github.com/google/cel-go v0.25.0/go.mod h1:hjEb6r5SuOSlhCHmFoLzu8HGCERvIsDAbxDAyNU/MmI= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= @@ -68,10 +79,15 @@ github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a h1:iLcLb5Fwwz7g/DLK89F+uQBDeAhHhwdzB5fSlVdhGcM= github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a/go.mod h1:wozgYq9WEBQBaIJe4YZ0qTSFAMxmcwBhQH0fO0R34Z0= +github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU= +github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= @@ -81,6 +97,8 @@ github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5t golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ= +golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE= golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678 h1:1P7xPZEwZMoBoz0Yze5Nx2/4pxj6nw9ZqHWXqP0iRgQ= golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= @@ -97,8 +115,6 @@ golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= -golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -124,18 +140,24 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= +golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU= -golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ= golang.org/x/tools v0.32.0 h1:Q7N1vhpkQv7ybVzLFtTjvQya2ewbwNDZzUgfXGqtMWU= golang.org/x/tools v0.32.0/go.mod h1:ZxrU41P/wAbZD8EDa6dDCa6XfpkhJ7HFMjHJXfBDu8s= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw= +google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/lib/anubis_test.go b/lib/anubis_test.go index 2c401f5..cf7e152 100644 --- a/lib/anubis_test.go +++ b/lib/anubis_test.go @@ -14,6 +14,7 @@ import ( "github.com/TecharoHQ/anubis/data" "github.com/TecharoHQ/anubis/internal" "github.com/TecharoHQ/anubis/lib/policy" + "github.com/TecharoHQ/anubis/lib/policy/config" ) func loadPolicies(t *testing.T, fname string) *policy.ParsedConfig { @@ -84,7 +85,7 @@ func TestCVE2025_24369(t *testing.T) { Next: http.NewServeMux(), Policy: pol, - CookieDomain: "local.cetacean.club", + CookieDomain: ".local.cetacean.club", CookiePartitioned: true, CookieName: t.Name(), }) @@ -541,3 +542,42 @@ func TestCustomStatusCodes(t *testing.T) { }) } } + +func TestCloudflareWorkersRule(t *testing.T) { + for _, variant := range []string{"cel", "header"} { + t.Run(variant, func(t *testing.T) { + pol := loadPolicies(t, "./testdata/cloudflare-workers-"+variant+".yaml") + + h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, "OK") + }) + + s, err := New(Options{ + Next: h, + Policy: pol, + ServeRobotsTXT: true, + }) + if err != nil { + t.Fatalf("can't construct libanubis.Server: %v", err) + } + + t.Run("no-cf-worker-header", func(t *testing.T) { + req, err := http.NewRequest(http.MethodGet, "/", nil) + if err != nil { + t.Fatal(err) + } + + req.Header.Add("X-Real-Ip", "127.0.0.1") + + cr, _, err := s.check(req) + if err != nil { + t.Fatal(err) + } + + if cr.Rule != config.RuleAllow { + t.Errorf("rule is wrong, wanted %s, got: %s", config.RuleAllow, cr.Rule) + } + }) + }) + } +} diff --git a/lib/policy/celchecker.go b/lib/policy/celchecker.go new file mode 100644 index 0000000..38b81e1 --- /dev/null +++ b/lib/policy/celchecker.go @@ -0,0 +1,108 @@ +package policy + +import ( + "fmt" + "net/http" + + "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/TecharoHQ/anubis/lib/policy/expressions" + "github.com/google/cel-go/cel" + "github.com/google/cel-go/common/types" +) + +type CELChecker struct { + src string + program cel.Program +} + +func NewCELChecker(cfg *config.ExpressionOrList) (*CELChecker, error) { + env, err := expressions.NewEnvironment() + if err != nil { + return nil, err + } + + var src string + var ast *cel.Ast + + if cfg.Expression != "" { + src = cfg.Expression + var iss *cel.Issues + interm, iss := env.Compile(src) + if iss != nil { + return nil, iss.Err() + } + + ast, iss = env.Check(interm) + if iss != nil { + return nil, iss.Err() + } + } + + if len(cfg.All) != 0 { + ast, err = expressions.Join(env, expressions.JoinAnd, cfg.All...) + } + + if len(cfg.Any) != 0 { + ast, err = expressions.Join(env, expressions.JoinOr, cfg.Any...) + } + + if err != nil { + return nil, err + } + + program, err := expressions.Compile(env, ast) + if err != nil { + return nil, fmt.Errorf("can't compile CEL program: %w", err) + } + + return &CELChecker{ + src: src, + program: program, + }, nil +} + +func (cc *CELChecker) Hash() string { + return internal.SHA256sum(cc.src) +} + +func (cc *CELChecker) Check(r *http.Request) (bool, error) { + result, _, err := cc.program.ContextEval(r.Context(), &CELRequest{r}) + + if err != nil { + return false, err + } + + if val, ok := result.(types.Bool); ok { + return bool(val), nil + } + + return false, nil +} + +type CELRequest struct { + *http.Request +} + +func (cr *CELRequest) Parent() cel.Activation { return nil } + +func (cr *CELRequest) ResolveName(name string) (any, bool) { + switch name { + case "remoteAddress": + return cr.Header.Get("X-Real-Ip"), true + case "host": + return cr.Host, true + case "method": + return cr.Method, true + case "userAgent": + return cr.UserAgent(), true + case "path": + return cr.URL.Path, true + case "query": + return expressions.URLValues{Values: cr.URL.Query()}, true + case "headers": + return expressions.HTTPHeaders{Header: cr.Header}, true + default: + return nil, false + } +} diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index 2c51401..8a49667 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -55,9 +55,11 @@ type BotConfig struct { UserAgentRegex *string `json:"user_agent_regex"` PathRegex *string `json:"path_regex"` HeadersRegex map[string]string `json:"headers_regex"` - Action Rule `json:"action"` RemoteAddr []string `json:"remote_addresses"` - Challenge *ChallengeRules `json:"challenge,omitempty"` + Expression *ExpressionOrList `json:"expression"` + + Action Rule `json:"action"` + Challenge *ChallengeRules `json:"challenge,omitempty"` } func (b BotConfig) Zero() bool { @@ -85,7 +87,12 @@ func (b BotConfig) Valid() error { errs = append(errs, ErrBotMustHaveName) } - if b.UserAgentRegex == nil && b.PathRegex == nil && len(b.RemoteAddr) == 0 && len(b.HeadersRegex) == 0 { + allFieldsEmpty := b.UserAgentRegex == nil && + b.PathRegex == nil && + len(b.RemoteAddr) == 0 && + len(b.HeadersRegex) == 0 + + if allFieldsEmpty && b.Expression == nil { errs = append(errs, ErrBotMustHaveUserAgentOrPath) } @@ -137,6 +144,12 @@ func (b BotConfig) Valid() error { } } + if b.Expression != nil { + if err := b.Expression.Valid(); err != nil { + errs = append(errs, err) + } + } + switch b.Action { case RuleAllow, RuleBenchmark, RuleChallenge, RuleDeny: // okay diff --git a/lib/policy/config/expressionorlist.go b/lib/policy/config/expressionorlist.go new file mode 100644 index 0000000..7b07a35 --- /dev/null +++ b/lib/policy/config/expressionorlist.go @@ -0,0 +1,62 @@ +package config + +import ( + "encoding/json" + "errors" + "slices" +) + +var ( + ErrExpressionOrListMustBeStringOrObject = errors.New("config: this must be a string or an object") + ErrExpressionEmpty = errors.New("config: this expression is empty") + ErrExpressionCantHaveBoth = errors.New("config: expression block can't contain multiple expression types") +) + +type ExpressionOrList struct { + Expression string `json:"-"` + All []string `json:"all"` + Any []string `json:"any"` +} + +func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool { + if eol.Expression != rhs.Expression { + return false + } + + if !slices.Equal(eol.All, rhs.All) { + return false + } + + if !slices.Equal(eol.Any, rhs.Any) { + return false + } + + return true +} + +func (eol *ExpressionOrList) UnmarshalJSON(data []byte) error { + switch string(data[0]) { + case `"`: // string + return json.Unmarshal(data, &eol.Expression) + case "{": // object + type RawExpressionOrList ExpressionOrList + var val RawExpressionOrList + if err := json.Unmarshal(data, &val); err != nil { + return err + } + eol.All = val.All + eol.Any = val.Any + + return nil + } + + return ErrExpressionOrListMustBeStringOrObject +} + +func (eol *ExpressionOrList) Valid() error { + if len(eol.All) != 0 && len(eol.Any) != 0 { + return ErrExpressionCantHaveBoth + } + + return nil +} diff --git a/lib/policy/config/expressionorlist_test.go b/lib/policy/config/expressionorlist_test.go new file mode 100644 index 0000000..ea4319e --- /dev/null +++ b/lib/policy/config/expressionorlist_test.go @@ -0,0 +1,73 @@ +package config + +import ( + "encoding/json" + "errors" + "testing" +) + +func TestExpressionOrListUnmarshal(t *testing.T) { + for _, tt := range []struct { + name string + inp string + err error + validErr error + result *ExpressionOrList + }{ + { + name: "simple", + inp: `"\"User-Agent\" in headers"`, + result: &ExpressionOrList{ + Expression: `"User-Agent" in headers`, + }, + }, + { + name: "object-and", + inp: `{ + "all": ["\"User-Agent\" in headers"] + }`, + result: &ExpressionOrList{ + All: []string{ + `"User-Agent" in headers`, + }, + }, + }, + { + name: "object-or", + inp: `{ + "any": ["\"User-Agent\" in headers"] + }`, + result: &ExpressionOrList{ + Any: []string{ + `"User-Agent" in headers`, + }, + }, + }, + { + name: "both-or-and", + inp: `{ + "all": ["\"User-Agent\" in headers"], + "any": ["\"User-Agent\" in headers"] + }`, + validErr: ErrExpressionCantHaveBoth, + }, + } { + t.Run(tt.name, func(t *testing.T) { + var eol ExpressionOrList + + if err := json.Unmarshal([]byte(tt.inp), &eol); !errors.Is(err, tt.err) { + t.Errorf("wanted unmarshal error: %v but got: %v", tt.err, err) + } + + if tt.result != nil && !eol.Equal(tt.result) { + t.Logf("wanted: %#v", tt.result) + t.Logf("got: %#v", &eol) + t.Fatal("parsed expression is not what was expected") + } + + if err := eol.Valid(); !errors.Is(err, tt.validErr) { + t.Errorf("wanted validation error: %v but got: %v", tt.err, err) + } + }) + } +} diff --git a/lib/policy/config/testdata/bad/multiple_expression_types.json b/lib/policy/config/testdata/bad/multiple_expression_types.json new file mode 100644 index 0000000..8b85276 --- /dev/null +++ b/lib/policy/config/testdata/bad/multiple_expression_types.json @@ -0,0 +1,17 @@ +{ + "bots": [ + { + "name": "multiple-expression-types", + "action": "ALLOW", + "expression": { + "all": [ + "userAgent.startsWith(\"git/\") || userAgent.contains(\"libgit\")", + "\"Git-Protocol\" in headers && headers[\"Git-Protocol\"] == \"version=2\"\n" + ], + "any": [ + "userAgent.startsWith(\"evilbot/\")" + ] + } + } + ] +} \ No newline at end of file diff --git a/lib/policy/config/testdata/bad/multiple_expression_types.yaml b/lib/policy/config/testdata/bad/multiple_expression_types.yaml new file mode 100644 index 0000000..f7aa546 --- /dev/null +++ b/lib/policy/config/testdata/bad/multiple_expression_types.yaml @@ -0,0 +1,10 @@ +bots: +- name: multiple-expression-types + action: ALLOW + expression: + all: + - userAgent.startsWith("git/") || userAgent.contains("libgit") + - > + "Git-Protocol" in headers && headers["Git-Protocol"] == "version=2" + any: + - userAgent.startsWith("evilbot/") diff --git a/lib/policy/config/testdata/good/git_client.json b/lib/policy/config/testdata/good/git_client.json new file mode 100644 index 0000000..68a2b3e --- /dev/null +++ b/lib/policy/config/testdata/good/git_client.json @@ -0,0 +1,14 @@ +{ + "bots": [ + { + "name": "allow-git-clients", + "action": "ALLOW", + "expression": { + "all": [ + "userAgent.startsWith(\"git/\") || userAgent.contains(\"libgit\")", + "\"Git-Protocol\" in headers && headers[\"Git-Protocol\"] == \"version=2\"" + ] + } + } + ] +} \ No newline at end of file diff --git a/lib/policy/config/testdata/good/git_client.yaml b/lib/policy/config/testdata/good/git_client.yaml new file mode 100644 index 0000000..44aa2da --- /dev/null +++ b/lib/policy/config/testdata/good/git_client.yaml @@ -0,0 +1,8 @@ +bots: +- name: allow-git-clients + action: ALLOW + expression: + all: + - userAgent.startsWith("git/") || userAgent.contains("libgit") + - > + "Git-Protocol" in headers && headers["Git-Protocol"] == "version=2" diff --git a/lib/policy/expressions/README.md b/lib/policy/expressions/README.md new file mode 100644 index 0000000..e61fe7b --- /dev/null +++ b/lib/policy/expressions/README.md @@ -0,0 +1,3 @@ +# Expressions support + +The expressions support is based on ideas from [go-away](https://git.gammaspectra.live/git/go-away) but with different opinions about how things should be done. diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go new file mode 100644 index 0000000..f0ea4fd --- /dev/null +++ b/lib/policy/expressions/environment.go @@ -0,0 +1,45 @@ +package expressions + +import ( + "github.com/google/cel-go/cel" + "github.com/google/cel-go/ext" +) + +// NewEnvironment creates a new CEL environment, this is the set of +// variables and functions that are passed into the CEL scope so that +// Anubis can fail loudly and early when something is invalid instead +// of blowing up at runtime. +func NewEnvironment() (*cel.Env, error) { + return cel.NewEnv( + ext.Strings( + ext.StringsLocale("en_US"), + ext.StringsValidateFormatCalls(true), + ), + + // default all timestamps to UTC + cel.DefaultUTCTimeZone(true), + + // Variables exposed to CEL programs: + cel.Variable("remoteAddress", cel.StringType), + cel.Variable("host", cel.StringType), + cel.Variable("method", cel.StringType), + cel.Variable("userAgent", cel.StringType), + cel.Variable("path", cel.StringType), + cel.Variable("query", cel.MapType(cel.StringType, cel.StringType)), + cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)), + + // Functions exposed to CEL programs: + ) +} + +// Compile takes CEL environment and syntax tree then emits an optimized +// Program for execution. +func Compile(env *cel.Env, ast *cel.Ast) (cel.Program, error) { + return env.Program( + ast, + cel.EvalOptions( + // optimize regular expressions right now instead of on the fly + cel.OptOptimize, + ), + ) +} diff --git a/lib/policy/expressions/http_headers.go b/lib/policy/expressions/http_headers.go new file mode 100644 index 0000000..57fcc84 --- /dev/null +++ b/lib/policy/expressions/http_headers.go @@ -0,0 +1,75 @@ +package expressions + +import ( + "net/http" + "reflect" + "strings" + + "github.com/google/cel-go/common/types" + "github.com/google/cel-go/common/types/ref" + "github.com/google/cel-go/common/types/traits" +) + +// HTTPHeaders is a type wrapper to expose HTTP headers into CEL programs. +type HTTPHeaders struct { + http.Header +} + +func (h HTTPHeaders) ConvertToNative(typeDesc reflect.Type) (any, error) { + return nil, ErrNotImplemented +} + +func (h HTTPHeaders) ConvertToType(typeVal ref.Type) ref.Val { + switch typeVal { + case types.MapType: + return h + case types.TypeType: + return types.MapType + } + + return types.NewErr("can't convert from %q to %q", types.MapType, typeVal) +} + +func (h HTTPHeaders) Equal(other ref.Val) ref.Val { + return types.Bool(false) // We don't want to compare header maps +} + +func (h HTTPHeaders) Type() ref.Type { + return types.MapType +} + +func (h HTTPHeaders) Value() any { return h } + +func (h HTTPHeaders) Find(key ref.Val) (ref.Val, bool) { + k, ok := key.(types.String) + if !ok { + return nil, false + } + + if _, ok := h.Header[string(k)]; !ok { + return nil, false + } + + return types.String(strings.Join(h.Header.Values(string(k)), ",")), true +} + +func (h HTTPHeaders) Contains(key ref.Val) ref.Val { + _, ok := h.Find(key) + return types.Bool(ok) +} + +func (h HTTPHeaders) Get(key ref.Val) ref.Val { + result, ok := h.Find(key) + if !ok { + return types.ValOrErr(result, "no such key: %v", key) + } + return result +} + +func (h HTTPHeaders) Iterator() traits.Iterator { panic("TODO(Xe): implement me") } + +func (h HTTPHeaders) IsZeroValue() bool { + return len(h.Header) == 0 +} + +func (h HTTPHeaders) Size() ref.Val { return types.Int(len(h.Header)) } diff --git a/lib/policy/expressions/http_headers_test.go b/lib/policy/expressions/http_headers_test.go new file mode 100644 index 0000000..d56f65c --- /dev/null +++ b/lib/policy/expressions/http_headers_test.go @@ -0,0 +1,52 @@ +package expressions + +import ( + "net/http" + "testing" + + "github.com/google/cel-go/common/types" +) + +func TestHTTPHeaders(t *testing.T) { + headers := HTTPHeaders{ + Header: http.Header{ + "Content-Type": {"application/json"}, + "Cf-Worker": {"true"}, + "User-Agent": {"Go-http-client/2"}, + }, + } + + t.Run("contains-existing-header", func(t *testing.T) { + resp := headers.Contains(types.String("User-Agent")) + if !bool(resp.(types.Bool)) { + t.Fatal("headers does not contain User-Agent") + } + }) + + t.Run("not-contains-missing-header", func(t *testing.T) { + resp := headers.Contains(types.String("Xxx-Random-Header")) + if bool(resp.(types.Bool)) { + t.Fatal("headers does not contain User-Agent") + } + }) + + t.Run("get-existing-header", func(t *testing.T) { + val := headers.Get(types.String("User-Agent")) + switch val.(type) { + case types.String: + // ok + default: + t.Fatalf("result was wrong type %T", val) + } + }) + + t.Run("not-get-missing-header", func(t *testing.T) { + val := headers.Get(types.String("Xxx-Random-Header")) + switch val.(type) { + case *types.Err: + // ok + default: + t.Fatalf("result was wrong type %T", val) + } + }) +} diff --git a/lib/policy/expressions/join.go b/lib/policy/expressions/join.go new file mode 100644 index 0000000..46f2ed5 --- /dev/null +++ b/lib/policy/expressions/join.go @@ -0,0 +1,104 @@ +package expressions + +import ( + "errors" + "fmt" + "strings" + + "github.com/google/cel-go/cel" +) + +// JoinOperator is a type wrapper for and/or operators. +// +// This is a separate type so that validation can be done at the type level. +type JoinOperator string + +// Possible values for JoinOperator +const ( + JoinAnd JoinOperator = "&&" + JoinOr JoinOperator = "||" +) + +// Valid ensures that JoinOperator is semantically valid. +func (jo JoinOperator) Valid() error { + switch jo { + case JoinAnd, JoinOr: + return nil + default: + return ErrWrongJoinOperator + } +} + +var ( + ErrWrongJoinOperator = errors.New("expressions: invalid join operator") + ErrNoExpressions = errors.New("expressions: cannot join zero expressions") + ErrCantCompile = errors.New("expressions: can't compile one expression") +) + +// JoinClauses joins a list of compiled clauses into one big if statement. +// +// Imagine the following two clauses: +// +// ball.color == "red" +// ball.shape == "round" +// +// JoinClauses would emit one "joined" clause such as: +// +// ( ball.color == "red" ) && ( ball.shape == "round" ) +func JoinClauses(env *cel.Env, operator JoinOperator, clauses ...*cel.Ast) (*cel.Ast, error) { + if err := operator.Valid(); err != nil { + return nil, fmt.Errorf("%w: wanted && or ||, got: %q", err, operator) + } + + switch len(clauses) { + case 0: + return nil, ErrNoExpressions + case 1: + return clauses[0], nil + } + + var exprs []string + var errs []error + + for _, clause := range clauses { + clauseStr, err := cel.AstToString(clause) + if err != nil { + errs = append(errs, err) + continue + } + + exprs = append(exprs, "( "+clauseStr+" )") + } + + if len(errs) != 0 { + return nil, fmt.Errorf("errors while decompiling statements: %w", errors.Join(errs...)) + } + + statement := strings.Join(exprs, " "+string(operator)+" ") + result, iss := env.Compile(statement) + if iss != nil { + return nil, iss.Err() + } + + return result, nil +} + +func Join(env *cel.Env, operator JoinOperator, clauses ...string) (*cel.Ast, error) { + var statements []*cel.Ast + var errs []error + + for _, clause := range clauses { + stmt, iss := env.Compile(clause) + if iss != nil && iss.Err() != nil { + errs = append(errs, fmt.Errorf("%w: %q gave: %w", ErrCantCompile, clause, iss.Err())) + continue + } + statements = append(statements, stmt) + } + + if len(errs) != 0 { + return nil, fmt.Errorf("errors while joining clauses: %w", errors.Join(errs...)) + } + + return JoinClauses(env, operator, statements...) +} diff --git a/lib/policy/expressions/join_test.go b/lib/policy/expressions/join_test.go new file mode 100644 index 0000000..5eff9bd --- /dev/null +++ b/lib/policy/expressions/join_test.go @@ -0,0 +1,90 @@ +package expressions + +import ( + "errors" + "testing" + + "github.com/google/cel-go/cel" +) + +func TestJoin(t *testing.T) { + env, err := NewEnvironment() + if err != nil { + t.Fatal(err) + } + + for _, tt := range []struct { + name string + clauses []string + op JoinOperator + err error + resultStr string + }{ + { + name: "no-clauses", + clauses: []string{}, + op: JoinAnd, + err: ErrNoExpressions, + }, + { + name: "one-clause-identity", + clauses: []string{`remoteAddress == "8.8.8.8"`}, + op: JoinAnd, + err: nil, + resultStr: `remoteAddress == "8.8.8.8"`, + }, + { + name: "multi-clause-and", + clauses: []string{ + `remoteAddress == "8.8.8.8"`, + `host == "anubis.techaro.lol"`, + }, + op: JoinAnd, + err: nil, + resultStr: `remoteAddress == "8.8.8.8" && host == "anubis.techaro.lol"`, + }, + { + name: "multi-clause-or", + clauses: []string{ + `remoteAddress == "8.8.8.8"`, + `host == "anubis.techaro.lol"`, + }, + op: JoinOr, + err: nil, + resultStr: `remoteAddress == "8.8.8.8" || host == "anubis.techaro.lol"`, + }, + { + name: "git-user-agent", + clauses: []string{ + `userAgent.startsWith("git/") || userAgent.contains("libgit")`, + `"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"`, + }, + op: JoinAnd, + err: nil, + resultStr: `(userAgent.startsWith("git/") || userAgent.contains("libgit")) && "Git-Protocol" in headers && +headers["Git-Protocol"] == "version=2"`, + }, + } { + t.Run(tt.name, func(t *testing.T) { + result, err := Join(env, tt.op, tt.clauses...) + if !errors.Is(err, tt.err) { + t.Errorf("wanted error %v but got: %v", tt.err, err) + } + + if tt.err != nil { + return + } + + program, err := cel.AstToString(result) + if err != nil { + t.Fatalf("can't decompile program: %v", err) + } + + if tt.resultStr != program { + t.Logf("wanted: %s", tt.resultStr) + t.Logf("got: %s", program) + t.Error("program did not compile as expected") + } + }) + } +} diff --git a/lib/policy/expressions/url_values.go b/lib/policy/expressions/url_values.go new file mode 100644 index 0000000..a4c6351 --- /dev/null +++ b/lib/policy/expressions/url_values.go @@ -0,0 +1,78 @@ +package expressions + +import ( + "errors" + "net/url" + "reflect" + "strings" + + "github.com/google/cel-go/common/types" + "github.com/google/cel-go/common/types/ref" + "github.com/google/cel-go/common/types/traits" +) + +var ErrNotImplemented = errors.New("expressions: not implemented") + +// URLValues is a type wrapper to expose url.Values into CEL programs. +type URLValues struct { + url.Values +} + +func (u URLValues) ConvertToNative(typeDesc reflect.Type) (any, error) { + return nil, ErrNotImplemented +} + +func (u URLValues) ConvertToType(typeVal ref.Type) ref.Val { + switch typeVal { + case types.MapType: + return u + case types.TypeType: + return types.MapType + } + + return types.NewErr("can't convert from %q to %q", types.MapType, typeVal) +} + +func (u URLValues) Equal(other ref.Val) ref.Val { + return types.Bool(false) // We don't want to compare header maps +} + +func (u URLValues) Type() ref.Type { + return types.MapType +} + +func (u URLValues) Value() any { return u } + +func (u URLValues) Find(key ref.Val) (ref.Val, bool) { + k, ok := key.(types.String) + if !ok { + return nil, false + } + + if _, ok := u.Values[string(k)]; !ok { + return nil, false + } + + return types.String(strings.Join(u.Values[string(k)], ",")), true +} + +func (u URLValues) Contains(key ref.Val) ref.Val { + _, ok := u.Find(key) + return types.Bool(ok) +} + +func (u URLValues) Get(key ref.Val) ref.Val { + result, ok := u.Find(key) + if !ok { + return types.ValOrErr(result, "no such key: %v", key) + } + return result +} + +func (u URLValues) Iterator() traits.Iterator { panic("TODO(Xe): implement me") } + +func (u URLValues) IsZeroValue() bool { + return len(u.Values) == 0 +} + +func (u URLValues) Size() ref.Val { return types.Int(len(u.Values)) } diff --git a/lib/policy/expressions/url_values_test.go b/lib/policy/expressions/url_values_test.go new file mode 100644 index 0000000..49d27b7 --- /dev/null +++ b/lib/policy/expressions/url_values_test.go @@ -0,0 +1,50 @@ +package expressions + +import ( + "net/url" + "testing" + + "github.com/google/cel-go/common/types" +) + +func TestURLValues(t *testing.T) { + headers := URLValues{ + Values: url.Values{ + "format": {"json"}, + }, + } + + t.Run("contains-existing-key", func(t *testing.T) { + resp := headers.Contains(types.String("format")) + if !bool(resp.(types.Bool)) { + t.Fatal("headers does not contain User-Agent") + } + }) + + t.Run("not-contains-missing-key", func(t *testing.T) { + resp := headers.Contains(types.String("not-there")) + if bool(resp.(types.Bool)) { + t.Fatal("headers does not contain User-Agent") + } + }) + + t.Run("get-existing-key", func(t *testing.T) { + val := headers.Get(types.String("format")) + switch val.(type) { + case types.String: + // ok + default: + t.Fatalf("result was wrong type %T", val) + } + }) + + t.Run("not-get-missing-key", func(t *testing.T) { + val := headers.Get(types.String("not-there")) + switch val.(type) { + case *types.Err: + // ok + default: + t.Fatalf("result was wrong type %T", val) + } + }) +} diff --git a/lib/policy/policy.go b/lib/policy/policy.go index d5d1188..1dfeafb 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -94,6 +94,15 @@ func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedCon } } + if b.Expression != nil { + c, err := NewCELChecker(b.Expression) + if err != nil { + validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s expressions: %w", b.Name, err)) + } else { + cl = append(cl, c) + } + } + if b.Challenge == nil { parsedBot.Challenge = &config.ChallengeRules{ Difficulty: defaultDifficulty, diff --git a/lib/testdata/cloudflare-workers-cel.yaml b/lib/testdata/cloudflare-workers-cel.yaml new file mode 100644 index 0000000..123b634 --- /dev/null +++ b/lib/testdata/cloudflare-workers-cel.yaml @@ -0,0 +1,4 @@ +bots: +- name: cloudflare-workers + expression: '"Cf-Worker" in headers' + action: DENY \ No newline at end of file diff --git a/lib/testdata/cloudflare-workers-header.yaml b/lib/testdata/cloudflare-workers-header.yaml new file mode 100644 index 0000000..89bc069 --- /dev/null +++ b/lib/testdata/cloudflare-workers-header.yaml @@ -0,0 +1,5 @@ +bots: +- name: cloudflare-workers + headers_regex: + CF-Worker: .* + action: DENY \ No newline at end of file diff --git a/yeetfile.js b/yeetfile.js index b6ba804..8068d9c 100644 --- a/yeetfile.js +++ b/yeetfile.js @@ -27,6 +27,7 @@ $`npm run assets`; $`mkdir -p ${doc}/data`; $`cp -a data/apps ${doc}/data/apps`; $`cp -a data/bots ${doc}/data/bots`; + $`cp -a data/clients ${doc}/data/clients`; $`cp -a data/common ${doc}/data/common`; $`cp -a data/crawlers ${doc}/data/crawlers`; },