feat(config): add ability to customize HTTP status codes Anubis returns (#393)

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso 2025-04-29 15:13:44 -04:00 committed by GitHub
parent 2935bd4aa7
commit 74d330cec5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 242 additions and 9 deletions

View File

@ -48,3 +48,11 @@ bots:
action: CHALLENGE
dnsbl: false
# By default, send HTTP 200 back to clients that either get issued a challenge
# or a denial. This seems weird, but this is load-bearing due to the fact that
# the most aggressive scraper bots seem to really really want an HTTP 200 and
# will stop sending requests once they get it.
status_codes:
CHALLENGE: 200
DENY: 200

View File

@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
- Added the ability to [customize Anubis' HTTP status codes](./admin/configuration/custom-status-codes.mdx) ([#355](https://github.com/TecharoHQ/anubis/issues/355))
## v1.17.0: Asahi sas Brutus
- Ensure regexes can't end in newlines ([#372](https://github.com/TecharoHQ/anubis/issues/372))

View File

@ -0,0 +1,19 @@
# Custom status codes for Anubis errors
Out of the box, Anubis will reply with `HTTP 200` for challenge and denial pages. This is intended to make AI scrapers have a hard time with your website because when they are faced with a non-200 response, they will hammer the page over and over until they get a 200 response. This behavior may not be desirable, as such Anubis lets you customize what HTTP status codes are returned when Anubis throws challenge and denial pages.
This is configured in the `status_codes` block of your [bot policy file](../policies.mdx):
```yaml
status_codes:
CHALLENGE: 200
DENY: 200
```
To match CloudFlare's behavior, use a configuration like this:
```yaml
status_codes:
CHALLENGE: 403
DENY: 403
```

View File

@ -170,7 +170,7 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch
hash := rule.Hash()
lg.Debug("rule hash", "hash", hash)
s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), http.StatusOK)
s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), s.policy.StatusCodes.Deny)
return true
case config.RuleChallenge:
lg.Debug("challenge requested")
@ -202,7 +202,7 @@ func (s *Server) handleDNSBL(w http.ResponseWriter, r *http.Request, ip string,
if resp != dnsbl.AllGood {
lg.Info("DNSBL hit", "status", resp.String())
s.respondWithStatus(w, r, fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip), http.StatusOK)
s.respondWithStatus(w, r, fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip), s.policy.StatusCodes.Deny)
return true
}
}

View File

@ -393,3 +393,48 @@ func TestBasePrefix(t *testing.T) {
})
}
}
func TestCustomStatusCodes(t *testing.T) {
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Log(r.UserAgent())
w.WriteHeader(http.StatusOK)
fmt.Fprintln(w, "OK")
})
statusMap := map[string]int{
"ALLOW": 200,
"CHALLENGE": 401,
"DENY": 403,
}
pol := loadPolicies(t, "./testdata/aggressive_403.yaml")
pol.DefaultDifficulty = 4
srv := spawnAnubis(t, Options{
Next: h,
Policy: pol,
})
ts := httptest.NewServer(internal.RemoteXRealIP(true, "tcp", srv))
defer ts.Close()
for userAgent, statusCode := range statusMap {
t.Run(userAgent, func(t *testing.T) {
req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, ts.URL, nil)
if err != nil {
t.Fatal(err)
}
req.Header.Set("User-Agent", userAgent)
resp, err := ts.Client().Do(req)
if err != nil {
t.Fatal(err)
}
if resp.StatusCode != statusCode {
t.Errorf("wanted status code %d but got: %d", statusCode, resp.StatusCode)
}
})
}
}

View File

@ -67,7 +67,10 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
return
}
handler := internal.NoStoreCache(templ.Handler(component))
handler := internal.NoStoreCache(templ.Handler(
component,
templ.WithStatus(s.opts.Policy.StatusCodes.Challenge),
))
handler.ServeHTTP(w, r)
}

View File

@ -6,6 +6,7 @@ import (
"io"
"io/fs"
"net"
"net/http"
"os"
"regexp"
"strings"
@ -28,6 +29,7 @@ var (
ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file")
ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both")
ErrStatusCodeNotValid = errors.New("config.StatusCode: status code not valid, must be between 100 and 599")
)
type Rule string
@ -262,9 +264,33 @@ func (boi *BotOrImport) Valid() error {
return ErrMustSetBotOrImportRules
}
type StatusCodes struct {
Challenge int `json:"CHALLENGE"`
Deny int `json:"DENY"`
}
func (sc StatusCodes) Valid() error {
var errs []error
if sc.Challenge == 0 || (sc.Challenge < 100 && sc.Challenge >= 599) {
errs = append(errs, fmt.Errorf("%w: challenge is %d", ErrStatusCodeNotValid, sc.Challenge))
}
if sc.Deny == 0 || (sc.Deny < 100 && sc.Deny >= 599) {
errs = append(errs, fmt.Errorf("%w: deny is %d", ErrStatusCodeNotValid, sc.Deny))
}
if len(errs) != 0 {
return fmt.Errorf("status codes not valid:\n%w", errors.Join(errs...))
}
return nil
}
type fileConfig struct {
Bots []BotOrImport `json:"bots"`
DNSBL bool `json:"dnsbl"`
Bots []BotOrImport `json:"bots"`
DNSBL bool `json:"dnsbl"`
StatusCodes StatusCodes `json:"status_codes"`
}
func (c fileConfig) Valid() error {
@ -280,6 +306,10 @@ func (c fileConfig) Valid() error {
}
}
if err := c.StatusCodes.Valid(); err != nil {
errs = append(errs, err)
}
if len(errs) != 0 {
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
}
@ -289,6 +319,10 @@ func (c fileConfig) Valid() error {
func Load(fin io.Reader, fname string) (*Config, error) {
var c fileConfig
c.StatusCodes = StatusCodes{
Challenge: http.StatusOK,
Deny: http.StatusOK,
}
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
}
@ -298,7 +332,8 @@ func Load(fin io.Reader, fname string) (*Config, error) {
}
result := &Config{
DNSBL: c.DNSBL,
DNSBL: c.DNSBL,
StatusCodes: c.StatusCodes,
}
var validationErrs []error
@ -331,8 +366,9 @@ func Load(fin io.Reader, fname string) (*Config, error) {
}
type Config struct {
Bots []BotConfig
DNSBL bool
Bots []BotConfig
DNSBL bool
StatusCodes StatusCodes
}
func (c Config) Valid() error {

View File

@ -0,0 +1,13 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 0,
"DENY": 0
}
}

View File

@ -0,0 +1,8 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 0
DENY: 0

View File

@ -0,0 +1,13 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 200,
"DENY": 200
}
}

View File

@ -0,0 +1,8 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 200
DENY: 200

View File

@ -0,0 +1,13 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 403,
"DENY": 403
}
}

View File

@ -0,0 +1,8 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 403
DENY: 403

View File

@ -24,11 +24,13 @@ type ParsedConfig struct {
Bots []Bot
DNSBL bool
DefaultDifficulty int
StatusCodes config.StatusCodes
}
func NewParsedConfig(orig *config.Config) *ParsedConfig {
return &ParsedConfig{
orig: orig,
orig: orig,
StatusCodes: orig.StatusCodes,
}
}

12
lib/testdata/aggressive_403.yaml vendored Normal file
View File

@ -0,0 +1,12 @@
bots:
- name: deny
user_agent_regex: DENY
action: DENY
- name: challenge
user_agent_regex: CHALLENGE
action: CHALLENGE
status_codes:
CHALLENGE: 401
DENY: 403

View File

@ -0,0 +1,12 @@
bots:
- name: deny
user_agent_regex: DENY
action: DENY
- name: challenge
user_agent_regex: CHALLENGE
action: CHALLENGE
status_codes:
CHALLENGE: 401
DENY: 403

View File

@ -37,6 +37,7 @@ go run ../cmd/unixhttpd &
go tool anubis \
--bind=./anubis.sock \
--bind-network=unix \
--policy-fname=../anubis_configs/aggressive_403.yaml \
--target=unix://$(pwd)/unixhttpd.sock &
# A simple TLS terminator that forwards to Anubis, which will forward to

View File

@ -0,0 +1,30 @@
async function testWithUserAgent(userAgent) {
const statusCode =
await fetch("https://relayd.local.cetacean.club:3004/reqmeta", {
headers: {
"User-Agent": userAgent,
}
})
.then(resp => resp.status);
return statusCode;
}
const codes = {
allow: await testWithUserAgent("ALLOW"),
challenge: await testWithUserAgent("CHALLENGE"),
deny: await testWithUserAgent("DENY")
}
const expected = {
allow: 200,
challenge: 401,
deny: 403,
};
console.log("ALLOW: ", codes.allow);
console.log("CHALLENGE:", codes.challenge);
console.log("DENY: ", codes.deny);
if (JSON.stringify(codes) !== JSON.stringify(expected)) {
throw new Error(`wanted ${JSON.stringify(expected)}, got: ${JSON.stringify(codes)}`);
}