mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 01:38:14 -04:00
feat(config): add ability to customize HTTP status codes Anubis returns (#393)
Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
2935bd4aa7
commit
74d330cec5
@ -48,3 +48,11 @@ bots:
|
||||
action: CHALLENGE
|
||||
|
||||
dnsbl: false
|
||||
|
||||
# By default, send HTTP 200 back to clients that either get issued a challenge
|
||||
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
||||
# the most aggressive scraper bots seem to really really want an HTTP 200 and
|
||||
# will stop sending requests once they get it.
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
- Added the ability to [customize Anubis' HTTP status codes](./admin/configuration/custom-status-codes.mdx) ([#355](https://github.com/TecharoHQ/anubis/issues/355))
|
||||
|
||||
## v1.17.0: Asahi sas Brutus
|
||||
|
||||
- Ensure regexes can't end in newlines ([#372](https://github.com/TecharoHQ/anubis/issues/372))
|
||||
|
19
docs/docs/admin/configuration/custom-status-codes.mdx
Normal file
19
docs/docs/admin/configuration/custom-status-codes.mdx
Normal file
@ -0,0 +1,19 @@
|
||||
# Custom status codes for Anubis errors
|
||||
|
||||
Out of the box, Anubis will reply with `HTTP 200` for challenge and denial pages. This is intended to make AI scrapers have a hard time with your website because when they are faced with a non-200 response, they will hammer the page over and over until they get a 200 response. This behavior may not be desirable, as such Anubis lets you customize what HTTP status codes are returned when Anubis throws challenge and denial pages.
|
||||
|
||||
This is configured in the `status_codes` block of your [bot policy file](../policies.mdx):
|
||||
|
||||
```yaml
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
||||
```
|
||||
|
||||
To match CloudFlare's behavior, use a configuration like this:
|
||||
|
||||
```yaml
|
||||
status_codes:
|
||||
CHALLENGE: 403
|
||||
DENY: 403
|
||||
```
|
@ -170,7 +170,7 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch
|
||||
hash := rule.Hash()
|
||||
|
||||
lg.Debug("rule hash", "hash", hash)
|
||||
s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), http.StatusOK)
|
||||
s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), s.policy.StatusCodes.Deny)
|
||||
return true
|
||||
case config.RuleChallenge:
|
||||
lg.Debug("challenge requested")
|
||||
@ -202,7 +202,7 @@ func (s *Server) handleDNSBL(w http.ResponseWriter, r *http.Request, ip string,
|
||||
|
||||
if resp != dnsbl.AllGood {
|
||||
lg.Info("DNSBL hit", "status", resp.String())
|
||||
s.respondWithStatus(w, r, fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip), http.StatusOK)
|
||||
s.respondWithStatus(w, r, fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip), s.policy.StatusCodes.Deny)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -393,3 +393,48 @@ func TestBasePrefix(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCustomStatusCodes(t *testing.T) {
|
||||
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Log(r.UserAgent())
|
||||
w.WriteHeader(http.StatusOK)
|
||||
fmt.Fprintln(w, "OK")
|
||||
})
|
||||
|
||||
statusMap := map[string]int{
|
||||
"ALLOW": 200,
|
||||
"CHALLENGE": 401,
|
||||
"DENY": 403,
|
||||
}
|
||||
|
||||
pol := loadPolicies(t, "./testdata/aggressive_403.yaml")
|
||||
pol.DefaultDifficulty = 4
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: h,
|
||||
Policy: pol,
|
||||
})
|
||||
|
||||
ts := httptest.NewServer(internal.RemoteXRealIP(true, "tcp", srv))
|
||||
defer ts.Close()
|
||||
|
||||
for userAgent, statusCode := range statusMap {
|
||||
t.Run(userAgent, func(t *testing.T) {
|
||||
req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, ts.URL, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := ts.Client().Do(req)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != statusCode {
|
||||
t.Errorf("wanted status code %d but got: %d", statusCode, resp.StatusCode)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -67,7 +67,10 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
|
||||
return
|
||||
}
|
||||
|
||||
handler := internal.NoStoreCache(templ.Handler(component))
|
||||
handler := internal.NoStoreCache(templ.Handler(
|
||||
component,
|
||||
templ.WithStatus(s.opts.Policy.StatusCodes.Challenge),
|
||||
))
|
||||
handler.ServeHTTP(w, r)
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@ import (
|
||||
"io"
|
||||
"io/fs"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
@ -28,6 +29,7 @@ var (
|
||||
ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file")
|
||||
ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
|
||||
ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both")
|
||||
ErrStatusCodeNotValid = errors.New("config.StatusCode: status code not valid, must be between 100 and 599")
|
||||
)
|
||||
|
||||
type Rule string
|
||||
@ -262,9 +264,33 @@ func (boi *BotOrImport) Valid() error {
|
||||
return ErrMustSetBotOrImportRules
|
||||
}
|
||||
|
||||
type StatusCodes struct {
|
||||
Challenge int `json:"CHALLENGE"`
|
||||
Deny int `json:"DENY"`
|
||||
}
|
||||
|
||||
func (sc StatusCodes) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if sc.Challenge == 0 || (sc.Challenge < 100 && sc.Challenge >= 599) {
|
||||
errs = append(errs, fmt.Errorf("%w: challenge is %d", ErrStatusCodeNotValid, sc.Challenge))
|
||||
}
|
||||
|
||||
if sc.Deny == 0 || (sc.Deny < 100 && sc.Deny >= 599) {
|
||||
errs = append(errs, fmt.Errorf("%w: deny is %d", ErrStatusCodeNotValid, sc.Deny))
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("status codes not valid:\n%w", errors.Join(errs...))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type fileConfig struct {
|
||||
Bots []BotOrImport `json:"bots"`
|
||||
DNSBL bool `json:"dnsbl"`
|
||||
Bots []BotOrImport `json:"bots"`
|
||||
DNSBL bool `json:"dnsbl"`
|
||||
StatusCodes StatusCodes `json:"status_codes"`
|
||||
}
|
||||
|
||||
func (c fileConfig) Valid() error {
|
||||
@ -280,6 +306,10 @@ func (c fileConfig) Valid() error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := c.StatusCodes.Valid(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
|
||||
}
|
||||
@ -289,6 +319,10 @@ func (c fileConfig) Valid() error {
|
||||
|
||||
func Load(fin io.Reader, fname string) (*Config, error) {
|
||||
var c fileConfig
|
||||
c.StatusCodes = StatusCodes{
|
||||
Challenge: http.StatusOK,
|
||||
Deny: http.StatusOK,
|
||||
}
|
||||
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
|
||||
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
|
||||
}
|
||||
@ -298,7 +332,8 @@ func Load(fin io.Reader, fname string) (*Config, error) {
|
||||
}
|
||||
|
||||
result := &Config{
|
||||
DNSBL: c.DNSBL,
|
||||
DNSBL: c.DNSBL,
|
||||
StatusCodes: c.StatusCodes,
|
||||
}
|
||||
|
||||
var validationErrs []error
|
||||
@ -331,8 +366,9 @@ func Load(fin io.Reader, fname string) (*Config, error) {
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Bots []BotConfig
|
||||
DNSBL bool
|
||||
Bots []BotConfig
|
||||
DNSBL bool
|
||||
StatusCodes StatusCodes
|
||||
}
|
||||
|
||||
func (c Config) Valid() error {
|
||||
|
13
lib/policy/config/testdata/bad/status-codes-0.json
vendored
Normal file
13
lib/policy/config/testdata/bad/status-codes-0.json
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"bots": [
|
||||
{
|
||||
"name": "everything",
|
||||
"user_agent_regex": ".*",
|
||||
"action": "DENY"
|
||||
}
|
||||
],
|
||||
"status_codes": {
|
||||
"CHALLENGE": 0,
|
||||
"DENY": 0
|
||||
}
|
||||
}
|
8
lib/policy/config/testdata/bad/status-codes-0.yaml
vendored
Normal file
8
lib/policy/config/testdata/bad/status-codes-0.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
bots:
|
||||
- name: everything
|
||||
user_agent_regex: .*
|
||||
action: DENY
|
||||
|
||||
status_codes:
|
||||
CHALLENGE: 0
|
||||
DENY: 0
|
13
lib/policy/config/testdata/good/status-codes-paranoid.json
vendored
Normal file
13
lib/policy/config/testdata/good/status-codes-paranoid.json
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"bots": [
|
||||
{
|
||||
"name": "everything",
|
||||
"user_agent_regex": ".*",
|
||||
"action": "DENY"
|
||||
}
|
||||
],
|
||||
"status_codes": {
|
||||
"CHALLENGE": 200,
|
||||
"DENY": 200
|
||||
}
|
||||
}
|
8
lib/policy/config/testdata/good/status-codes-paranoid.yaml
vendored
Normal file
8
lib/policy/config/testdata/good/status-codes-paranoid.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
bots:
|
||||
- name: everything
|
||||
user_agent_regex: .*
|
||||
action: DENY
|
||||
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
13
lib/policy/config/testdata/good/status-codes-rfc.json
vendored
Normal file
13
lib/policy/config/testdata/good/status-codes-rfc.json
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"bots": [
|
||||
{
|
||||
"name": "everything",
|
||||
"user_agent_regex": ".*",
|
||||
"action": "DENY"
|
||||
}
|
||||
],
|
||||
"status_codes": {
|
||||
"CHALLENGE": 403,
|
||||
"DENY": 403
|
||||
}
|
||||
}
|
8
lib/policy/config/testdata/good/status-codes-rfc.yaml
vendored
Normal file
8
lib/policy/config/testdata/good/status-codes-rfc.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
bots:
|
||||
- name: everything
|
||||
user_agent_regex: .*
|
||||
action: DENY
|
||||
|
||||
status_codes:
|
||||
CHALLENGE: 403
|
||||
DENY: 403
|
@ -24,11 +24,13 @@ type ParsedConfig struct {
|
||||
Bots []Bot
|
||||
DNSBL bool
|
||||
DefaultDifficulty int
|
||||
StatusCodes config.StatusCodes
|
||||
}
|
||||
|
||||
func NewParsedConfig(orig *config.Config) *ParsedConfig {
|
||||
return &ParsedConfig{
|
||||
orig: orig,
|
||||
orig: orig,
|
||||
StatusCodes: orig.StatusCodes,
|
||||
}
|
||||
}
|
||||
|
||||
|
12
lib/testdata/aggressive_403.yaml
vendored
Normal file
12
lib/testdata/aggressive_403.yaml
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
bots:
|
||||
- name: deny
|
||||
user_agent_regex: DENY
|
||||
action: DENY
|
||||
|
||||
- name: challenge
|
||||
user_agent_regex: CHALLENGE
|
||||
action: CHALLENGE
|
||||
|
||||
status_codes:
|
||||
CHALLENGE: 401
|
||||
DENY: 403
|
12
test/anubis_configs/aggressive_403.yaml
Normal file
12
test/anubis_configs/aggressive_403.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
bots:
|
||||
- name: deny
|
||||
user_agent_regex: DENY
|
||||
action: DENY
|
||||
|
||||
- name: challenge
|
||||
user_agent_regex: CHALLENGE
|
||||
action: CHALLENGE
|
||||
|
||||
status_codes:
|
||||
CHALLENGE: 401
|
||||
DENY: 403
|
@ -37,6 +37,7 @@ go run ../cmd/unixhttpd &
|
||||
go tool anubis \
|
||||
--bind=./anubis.sock \
|
||||
--bind-network=unix \
|
||||
--policy-fname=../anubis_configs/aggressive_403.yaml \
|
||||
--target=unix://$(pwd)/unixhttpd.sock &
|
||||
|
||||
# A simple TLS terminator that forwards to Anubis, which will forward to
|
||||
|
30
test/unix-socket-xff/test.mjs
Normal file
30
test/unix-socket-xff/test.mjs
Normal file
@ -0,0 +1,30 @@
|
||||
async function testWithUserAgent(userAgent) {
|
||||
const statusCode =
|
||||
await fetch("https://relayd.local.cetacean.club:3004/reqmeta", {
|
||||
headers: {
|
||||
"User-Agent": userAgent,
|
||||
}
|
||||
})
|
||||
.then(resp => resp.status);
|
||||
return statusCode;
|
||||
}
|
||||
|
||||
const codes = {
|
||||
allow: await testWithUserAgent("ALLOW"),
|
||||
challenge: await testWithUserAgent("CHALLENGE"),
|
||||
deny: await testWithUserAgent("DENY")
|
||||
}
|
||||
|
||||
const expected = {
|
||||
allow: 200,
|
||||
challenge: 401,
|
||||
deny: 403,
|
||||
};
|
||||
|
||||
console.log("ALLOW: ", codes.allow);
|
||||
console.log("CHALLENGE:", codes.challenge);
|
||||
console.log("DENY: ", codes.deny);
|
||||
|
||||
if (JSON.stringify(codes) !== JSON.stringify(expected)) {
|
||||
throw new Error(`wanted ${JSON.stringify(expected)}, got: ${JSON.stringify(codes)}`);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user