diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml index 9cfa4f0..eaf13a1 100644 --- a/.github/workflows/zizmor.yml +++ b/.github/workflows/zizmor.yml @@ -21,7 +21,7 @@ jobs: persist-credentials: false - name: Install the latest version of uv - uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1 + uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3 - name: Run zizmor 🌈 run: uvx zizmor --format sarif . > results.sarif @@ -29,7 +29,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Upload SARIF file - uses: github/codeql-action/upload-sarif@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2 + uses: github/codeql-action/upload-sarif@4e828ff8d448a8a6e532957b1811f387a63867e8 # v3.29.4 with: sarif_file: results.sarif category: zizmor diff --git a/data/bots/_deny-pathological.yaml b/data/bots/_deny-pathological.yaml index 09d4bfc..ba64a04 100644 --- a/data/bots/_deny-pathological.yaml +++ b/data/bots/_deny-pathological.yaml @@ -1,3 +1,4 @@ - import: (data)/bots/cloudflare-workers.yaml - import: (data)/bots/headless-browsers.yaml -- import: (data)/bots/us-ai-scraper.yaml \ No newline at end of file +- import: (data)/bots/us-ai-scraper.yaml +- import: (data)/bots/custom-async-http-client.yaml diff --git a/data/bots/custom-async-http-client.yaml b/data/bots/custom-async-http-client.yaml new file mode 100644 index 0000000..d42d2d8 --- /dev/null +++ b/data/bots/custom-async-http-client.yaml @@ -0,0 +1,5 @@ +- name: "custom-async-http-client" + user_agent_regex: "Custom-AsyncHttpClient" + action: WEIGH + weight: + adjust: 10 diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 97e12ee..e9e8da4 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -15,6 +15,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Downstream consumers can change the default [log/slog#Logger](https://pkg.go.dev/log/slog#Logger) instance that Anubis uses by setting `opts.Logger` to your slog instance of choice ([#864](https://github.com/TecharoHQ/anubis/issues/864)). - The [Thoth client](https://anubis.techaro.lol/docs/admin/thoth) is now public in the repo instead of being an internal package. +- [Custom-AsyncHttpClient](https://github.com/AsyncHttpClient/async-http-client)'s default User-Agent has an increased weight by default ([#852](https://github.com/TecharoHQ/anubis/issues/852)). +- The [`segments`](./admin/configuration/expressions.mdx#segments) function was added for splitting a path into its slash-separated segments. +- When issuing a challenge, Anubis stores information about that challenge into the store. That stored information is later used to validate challenge responses. This works around nondeterminism in bot rules. ([#917](https://github.com/TecharoHQ/anubis/issues/917)) ## v1.21.3: Minfilia Warde - Echo 3 diff --git a/docs/docs/admin/configuration/expressions.mdx b/docs/docs/admin/configuration/expressions.mdx index 49d0e05..6cb7954 100644 --- a/docs/docs/admin/configuration/expressions.mdx +++ b/docs/docs/admin/configuration/expressions.mdx @@ -232,6 +232,39 @@ This is best applied when doing explicit block rules, eg: It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand. +### `segments` + +Available in `bot` expressions. + +```ts +function segments(path: string): string[]; +``` + +`segments` returns the number of slash-separated path segments, ignoring the leading slash. Here is what it will return with some common paths: + +| Input | Output | +| :----------------------- | :--------------------- | +| `segments("/")` | `[""]` | +| `segments("/foo/bar")` | `["foo", "bar"] ` | +| `segments("/users/xe/")` | `["users", "xe", ""] ` | + +:::note + +If the path ends with a `/`, then the last element of the result will be an empty string. This is because `/users/xe` and `/users/xe/` are semantically different paths. + +::: + +This is useful if you want to write rules that allow requests that have no query parameters only if they have less than two path segments: + +```yaml +- name: two-path-segments-no-query + action: ALLOW + expression: + all: + - size(query) == 0 + - size(segments(path)) < 2 +``` + ## Life advice Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this. diff --git a/internal/log.go b/internal/log.go index b503b11..1f9e0e7 100644 --- a/internal/log.go +++ b/internal/log.go @@ -28,6 +28,9 @@ func InitSlog(level string) { func GetRequestLogger(base *slog.Logger, r *http.Request) *slog.Logger { return base.With( + "host", r.Host, + "method", r.Method, + "path", r.URL.Path, "user_agent", r.UserAgent(), "accept_language", r.Header.Get("Accept-Language"), "priority", r.Header.Get("Priority"), diff --git a/lib/anubis.go b/lib/anubis.go index 736f797..790985f 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -91,41 +91,39 @@ func (s *Server) getTokenKeyfunc() jwt.Keyfunc { } } -func (s *Server) challengeFor(r *http.Request) (*challenge.Challenge, error) { +func (s *Server) getChallenge(r *http.Request) (*challenge.Challenge, error) { ckies := r.CookiesNamed(anubis.TestCookieName) - if len(ckies) == 0 { - return s.issueChallenge(r.Context(), r) + return nil, store.ErrNotFound } j := store.JSON[challenge.Challenge]{Underlying: s.store} ckie := ckies[0] chall, err := j.Get(r.Context(), "challenge:"+ckie.Value) - if err != nil { - if errors.Is(err, store.ErrNotFound) { - return s.issueChallenge(r.Context(), r) - } - return nil, err - } - - return &chall, nil + return &chall, err } -func (s *Server) issueChallenge(ctx context.Context, r *http.Request) (*challenge.Challenge, error) { +func (s *Server) issueChallenge(ctx context.Context, r *http.Request, lg *slog.Logger, cr policy.CheckResult, rule *policy.Bot) (*challenge.Challenge, error) { + if cr.Rule != config.RuleChallenge { + slog.Error("this should be impossible, asked to issue a challenge but the rule is not a challenge rule", "cr", cr, "rule", rule) + //return nil, errors.New("[unexpected] this codepath should be impossible, asked to issue a challenge for a non-challenge rule") + } + id, err := uuid.NewV7() if err != nil { return nil, err } - var randomData = make([]byte, 256) + var randomData = make([]byte, 64) if _, err := rand.Read(randomData); err != nil { return nil, err } chall := challenge.Challenge{ ID: id.String(), + Method: rule.Challenge.Algorithm, RandomData: fmt.Sprintf("%x", randomData), IssuedAt: time.Now(), Metadata: map[string]string{ @@ -139,6 +137,8 @@ func (s *Server) issueChallenge(ctx context.Context, r *http.Request) (*challeng return nil, err } + lg.Info("new challenge issued", "challenge", id.String()) + return &chall, err } @@ -186,21 +186,21 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS if err != nil { lg.Debug("cookie not found", "path", r.URL.Path) s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host}) - s.RenderIndex(w, r, rule, httpStatusOnly) + s.RenderIndex(w, r, cr, rule, httpStatusOnly) return } if err := ckie.Valid(); err != nil { lg.Debug("cookie is invalid", "err", err) s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host}) - s.RenderIndex(w, r, rule, httpStatusOnly) + s.RenderIndex(w, r, cr, rule, httpStatusOnly) return } if time.Now().After(ckie.Expires) && !ckie.Expires.IsZero() { lg.Debug("cookie expired", "path", r.URL.Path) s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host}) - s.RenderIndex(w, r, rule, httpStatusOnly) + s.RenderIndex(w, r, cr, rule, httpStatusOnly) return } @@ -209,7 +209,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS if err != nil || !token.Valid { lg.Debug("invalid token", "path", r.URL.Path, "err", err) s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host}) - s.RenderIndex(w, r, rule, httpStatusOnly) + s.RenderIndex(w, r, cr, rule, httpStatusOnly) return } @@ -217,7 +217,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS if !ok { lg.Debug("invalid token claims type", "path", r.URL.Path) s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host}) - s.RenderIndex(w, r, rule, httpStatusOnly) + s.RenderIndex(w, r, cr, rule, httpStatusOnly) return } @@ -225,14 +225,14 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS if !ok { lg.Debug("policyRule claim is not a string") s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host}) - s.RenderIndex(w, r, rule, httpStatusOnly) + s.RenderIndex(w, r, cr, rule, httpStatusOnly) return } if policyRule != rule.Hash() { lg.Debug("user originally passed with a different rule, issuing new challenge", "old", policyRule, "new", rule.Name) s.ClearCookie(w, CookieOpts{Path: cookiePath, Host: r.Host}) - s.RenderIndex(w, r, rule, httpStatusOnly) + s.RenderIndex(w, r, cr, rule, httpStatusOnly) return } @@ -347,7 +347,7 @@ func (s *Server) MakeChallenge(w http.ResponseWriter, r *http.Request) { } lg = lg.With("check_result", cr) - chall, err := s.challengeFor(r) + chall, err := s.issueChallenge(r.Context(), r, lg, cr, rule) if err != nil { lg.Error("failed to fetch or issue challenge", "err", err) w.WriteHeader(http.StatusInternalServerError) @@ -437,19 +437,21 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) { } lg = lg.With("check_result", cr) - impl, ok := challenge.Get(rule.Challenge.Algorithm) + chall, err := s.getChallenge(r) + if err != nil { + lg.Error("getChallenge failed", "err", err) + s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm)) + return + } + + impl, ok := challenge.Get(chall.Method) if !ok { lg.Error("check failed", "err", err) s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm)) return } - chall, err := s.challengeFor(r) - if err != nil { - lg.Error("check failed", "err", err) - s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm)) - return - } + lg = lg.With("challenge", chall.ID) in := &challenge.ValidateInput{ Challenge: chall, @@ -467,9 +469,13 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) { case errors.As(err, &cerr): switch { case errors.Is(err, challenge.ErrFailed): + lg.Error("challenge failed", "err", err) s.respondWithStatus(w, r, cerr.PublicReason, cerr.StatusCode) + return case errors.Is(err, challenge.ErrInvalidFormat), errors.Is(err, challenge.ErrMissingField): + lg.Error("invalid challenge format", "err", err) s.respondWithError(w, r, cerr.PublicReason) + return } } } diff --git a/lib/challenge/challenge.go b/lib/challenge/challenge.go index 4c975c8..1200e33 100644 --- a/lib/challenge/challenge.go +++ b/lib/challenge/challenge.go @@ -5,6 +5,7 @@ import "time" // Challenge is the metadata about a single challenge issuance. type Challenge struct { ID string `json:"id"` // UUID identifying the challenge + Method string `json:"method"` // Challenge method RandomData string `json:"randomData"` // The random data the client processes IssuedAt time.Time `json:"issuedAt"` // When the challenge was issued Metadata map[string]string `json:"metadata"` // Challenge metadata such as IP address and user agent diff --git a/lib/http.go b/lib/http.go index 3e29de9..4f8ed02 100644 --- a/lib/http.go +++ b/lib/http.go @@ -111,7 +111,7 @@ func randomChance(n int) bool { return rand.Intn(n) == 0 } -func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *policy.Bot, returnHTTPStatusOnly bool) { +func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, cr policy.CheckResult, rule *policy.Bot, returnHTTPStatusOnly bool) { localizer := localization.GetLocalizer(r) if returnHTTPStatusOnly { @@ -129,14 +129,16 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic } challengesIssued.WithLabelValues("embedded").Add(1) - chall, err := s.challengeFor(r) + chall, err := s.issueChallenge(r.Context(), r, lg, cr, rule) if err != nil { - lg.Error("can't get challenge", "err", "err") + lg.Error("can't get challenge", "err", err) s.ClearCookie(w, CookieOpts{Name: anubis.TestCookieName, Host: r.Host}) s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm)) return } + lg = lg.With("challenge", chall.ID) + var ogTags map[string]string = nil if s.opts.OpenGraph.Enabled { var err error @@ -154,7 +156,7 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic Expiry: 30 * time.Minute, }) - impl, ok := challenge.Get(rule.Challenge.Algorithm) + impl, ok := challenge.Get(chall.Method) if !ok { lg.Error("check failed", "err", "can't get algorithm", "algorithm", rule.Challenge.Algorithm) s.ClearCookie(w, CookieOpts{Name: anubis.TestCookieName, Host: r.Host}) diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go index 14b57be..27f298c 100644 --- a/lib/policy/expressions/environment.go +++ b/lib/policy/expressions/environment.go @@ -2,6 +2,7 @@ package expressions import ( "math/rand/v2" + "strings" "github.com/google/cel-go/cel" "github.com/google/cel-go/common/types" @@ -54,6 +55,28 @@ func BotEnvironment() (*cel.Env, error) { }), ), ), + + cel.Function("segments", + cel.Overload("segments_string_list_string", + []*cel.Type{cel.StringType}, + cel.ListType(cel.StringType), + cel.UnaryBinding(func(path ref.Val) ref.Val { + pathStrType, ok := path.(types.String) + if !ok { + return types.ValOrErr(path, "path is not a string, but is %T", path) + } + + pathStr := string(pathStrType) + if !strings.HasPrefix(pathStr, "/") { + return types.ValOrErr(path, "path does not start with /") + } + + pathList := strings.Split(string(pathStr), "/")[1:] + + return types.NewStringList(types.DefaultTypeAdapter, pathList) + }), + ), + ), ) } diff --git a/lib/policy/expressions/environment_test.go b/lib/policy/expressions/environment_test.go index 9878e1c..4e7d796 100644 --- a/lib/policy/expressions/environment_test.go +++ b/lib/policy/expressions/environment_test.go @@ -12,99 +12,228 @@ func TestBotEnvironment(t *testing.T) { t.Fatalf("failed to create bot environment: %v", err) } - tests := []struct { - name string - expression string - headers map[string]string - expected types.Bool - description string - }{ - { - name: "missing-header", - expression: `missingHeader(headers, "Missing-Header")`, - headers: map[string]string{ - "User-Agent": "test-agent", - "Content-Type": "application/json", + t.Run("missingHeader", func(t *testing.T) { + tests := []struct { + name string + expression string + headers map[string]string + expected types.Bool + description string + }{ + { + name: "missing-header", + expression: `missingHeader(headers, "Missing-Header")`, + headers: map[string]string{ + "User-Agent": "test-agent", + "Content-Type": "application/json", + }, + expected: types.Bool(true), + description: "should return true when header is missing", }, - expected: types.Bool(true), - description: "should return true when header is missing", - }, - { - name: "existing-header", - expression: `missingHeader(headers, "User-Agent")`, - headers: map[string]string{ - "User-Agent": "test-agent", - "Content-Type": "application/json", + { + name: "existing-header", + expression: `missingHeader(headers, "User-Agent")`, + headers: map[string]string{ + "User-Agent": "test-agent", + "Content-Type": "application/json", + }, + expected: types.Bool(false), + description: "should return false when header exists", }, - expected: types.Bool(false), - description: "should return false when header exists", - }, - { - name: "case-sensitive", - expression: `missingHeader(headers, "user-agent")`, - headers: map[string]string{ - "User-Agent": "test-agent", + { + name: "case-sensitive", + expression: `missingHeader(headers, "user-agent")`, + headers: map[string]string{ + "User-Agent": "test-agent", + }, + expected: types.Bool(true), + description: "should be case-sensitive (user-agent != User-Agent)", }, - expected: types.Bool(true), - description: "should be case-sensitive (user-agent != User-Agent)", - }, - { - name: "empty-headers", - expression: `missingHeader(headers, "Any-Header")`, - headers: map[string]string{}, - expected: types.Bool(true), - description: "should return true for any header when map is empty", - }, - { - name: "real-world-sec-ch-ua", - expression: `missingHeader(headers, "Sec-Ch-Ua")`, - headers: map[string]string{ - "User-Agent": "curl/7.68.0", - "Accept": "*/*", - "Host": "example.com", + { + name: "empty-headers", + expression: `missingHeader(headers, "Any-Header")`, + headers: map[string]string{}, + expected: types.Bool(true), + description: "should return true for any header when map is empty", }, - expected: types.Bool(true), - description: "should detect missing browser-specific headers from bots", - }, - { - name: "browser-with-sec-ch-ua", - expression: `missingHeader(headers, "Sec-Ch-Ua")`, - headers: map[string]string{ - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", - "Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`, - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + { + name: "real-world-sec-ch-ua", + expression: `missingHeader(headers, "Sec-Ch-Ua")`, + headers: map[string]string{ + "User-Agent": "curl/7.68.0", + "Accept": "*/*", + "Host": "example.com", + }, + expected: types.Bool(true), + description: "should detect missing browser-specific headers from bots", }, - expected: types.Bool(false), - description: "should return false when browser sends Sec-Ch-Ua header", - }, - } + { + name: "browser-with-sec-ch-ua", + expression: `missingHeader(headers, "Sec-Ch-Ua")`, + headers: map[string]string{ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Sec-Ch-Ua": `"Chrome"; v="91", "Not A Brand"; v="99"`, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + }, + expected: types.Bool(false), + description: "should return false when browser sends Sec-Ch-Ua header", + }, + } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - prog, err := Compile(env, tt.expression) - if err != nil { - t.Fatalf("failed to compile expression %q: %v", tt.expression, err) - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + prog, err := Compile(env, tt.expression) + if err != nil { + t.Fatalf("failed to compile expression %q: %v", tt.expression, err) + } - result, _, err := prog.Eval(map[string]interface{}{ - "headers": tt.headers, + result, _, err := prog.Eval(map[string]interface{}{ + "headers": tt.headers, + }) + if err != nil { + t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err) + } + + if result != tt.expected { + t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result) + } }) - if err != nil { - t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err) - } + } - if result != tt.expected { - t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result) + t.Run("function-compilation", func(t *testing.T) { + src := `missingHeader(headers, "Test-Header")` + _, err := Compile(env, src) + if err != nil { + t.Fatalf("failed to compile missingHeader expression: %v", err) } }) - } + }) - t.Run("function-compilation", func(t *testing.T) { - src := `missingHeader(headers, "Test-Header")` - _, err := Compile(env, src) - if err != nil { - t.Fatalf("failed to compile missingHeader expression: %v", err) + t.Run("segments", func(t *testing.T) { + for _, tt := range []struct { + name string + description string + expression string + path string + expected types.Bool + }{ + { + name: "simple", + description: "/ should have one path segment", + expression: `size(segments(path)) == 1`, + path: "/", + expected: types.Bool(true), + }, + { + name: "two segments without trailing slash", + description: "/user/foo should have two segments", + expression: `size(segments(path)) == 2`, + path: "/user/foo", + expected: types.Bool(true), + }, + { + name: "at least two segments", + description: "/foo/bar/ should have at least two path segments", + expression: `size(segments(path)) >= 2`, + path: "/foo/bar/", + expected: types.Bool(true), + }, + { + name: "at most two segments", + description: "/foo/bar/ does not have less than two path segments", + expression: `size(segments(path)) < 2`, + path: "/foo/bar/", + expected: types.Bool(false), + }, + } { + t.Run(tt.name, func(t *testing.T) { + prog, err := Compile(env, tt.expression) + if err != nil { + t.Fatalf("failed to compile expression %q: %v", tt.expression, err) + } + + result, _, err := prog.Eval(map[string]interface{}{ + "path": tt.path, + }) + if err != nil { + t.Fatalf("failed to evaluate expression %q: %v", tt.expression, err) + } + + if result != tt.expected { + t.Errorf("%s: expected %v, got %v", tt.description, tt.expected, result) + } + }) } + + t.Run("invalid", func(t *testing.T) { + for _, tt := range []struct { + name string + description string + expression string + env any + wantFailCompile bool + wantFailEval bool + }{ + { + name: "segments of headers", + description: "headers are not a path list", + expression: `segments(headers)`, + env: map[string]any{ + "headers": map[string]string{ + "foo": "bar", + }, + }, + wantFailCompile: true, + }, + { + name: "invalid path type", + description: "a path should be a sting", + expression: `size(segments(path)) != 0`, + env: map[string]any{ + "path": 4, + }, + wantFailEval: true, + }, + { + name: "invalid path", + description: "a path should start with a leading slash", + expression: `size(segments(path)) != 0`, + env: map[string]any{ + "path": "foo", + }, + wantFailEval: true, + }, + } { + t.Run(tt.name, func(t *testing.T) { + prog, err := Compile(env, tt.expression) + if err != nil { + if !tt.wantFailCompile { + t.Log(tt.description) + t.Fatalf("failed to compile expression %q: %v", tt.expression, err) + } else { + return + } + } + + _, _, err = prog.Eval(tt.env) + + if err == nil { + t.Log(tt.description) + t.Fatal("wanted an error but got none") + } + + t.Log(err) + }) + } + }) + + t.Run("function-compilation", func(t *testing.T) { + src := `size(segments(path)) <= 2` + _, err := Compile(env, src) + if err != nil { + t.Fatalf("failed to compile missingHeader expression: %v", err) + } + }) }) }