mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-09-12 14:17:57 -04:00
feat(config): add ability to customize HTTP status codes Anubis returns (#393)
Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
2935bd4aa7
commit
74d330cec5
@ -48,3 +48,11 @@ bots:
|
|||||||
action: CHALLENGE
|
action: CHALLENGE
|
||||||
|
|
||||||
dnsbl: false
|
dnsbl: false
|
||||||
|
|
||||||
|
# By default, send HTTP 200 back to clients that either get issued a challenge
|
||||||
|
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
||||||
|
# the most aggressive scraper bots seem to really really want an HTTP 200 and
|
||||||
|
# will stop sending requests once they get it.
|
||||||
|
status_codes:
|
||||||
|
CHALLENGE: 200
|
||||||
|
DENY: 200
|
@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
- Added the ability to [customize Anubis' HTTP status codes](./admin/configuration/custom-status-codes.mdx) ([#355](https://github.com/TecharoHQ/anubis/issues/355))
|
||||||
|
|
||||||
## v1.17.0: Asahi sas Brutus
|
## v1.17.0: Asahi sas Brutus
|
||||||
|
|
||||||
- Ensure regexes can't end in newlines ([#372](https://github.com/TecharoHQ/anubis/issues/372))
|
- Ensure regexes can't end in newlines ([#372](https://github.com/TecharoHQ/anubis/issues/372))
|
||||||
|
19
docs/docs/admin/configuration/custom-status-codes.mdx
Normal file
19
docs/docs/admin/configuration/custom-status-codes.mdx
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Custom status codes for Anubis errors
|
||||||
|
|
||||||
|
Out of the box, Anubis will reply with `HTTP 200` for challenge and denial pages. This is intended to make AI scrapers have a hard time with your website because when they are faced with a non-200 response, they will hammer the page over and over until they get a 200 response. This behavior may not be desirable, as such Anubis lets you customize what HTTP status codes are returned when Anubis throws challenge and denial pages.
|
||||||
|
|
||||||
|
This is configured in the `status_codes` block of your [bot policy file](../policies.mdx):
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
status_codes:
|
||||||
|
CHALLENGE: 200
|
||||||
|
DENY: 200
|
||||||
|
```
|
||||||
|
|
||||||
|
To match CloudFlare's behavior, use a configuration like this:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
status_codes:
|
||||||
|
CHALLENGE: 403
|
||||||
|
DENY: 403
|
||||||
|
```
|
@ -170,7 +170,7 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch
|
|||||||
hash := rule.Hash()
|
hash := rule.Hash()
|
||||||
|
|
||||||
lg.Debug("rule hash", "hash", hash)
|
lg.Debug("rule hash", "hash", hash)
|
||||||
s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), http.StatusOK)
|
s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), s.policy.StatusCodes.Deny)
|
||||||
return true
|
return true
|
||||||
case config.RuleChallenge:
|
case config.RuleChallenge:
|
||||||
lg.Debug("challenge requested")
|
lg.Debug("challenge requested")
|
||||||
@ -202,7 +202,7 @@ func (s *Server) handleDNSBL(w http.ResponseWriter, r *http.Request, ip string,
|
|||||||
|
|
||||||
if resp != dnsbl.AllGood {
|
if resp != dnsbl.AllGood {
|
||||||
lg.Info("DNSBL hit", "status", resp.String())
|
lg.Info("DNSBL hit", "status", resp.String())
|
||||||
s.respondWithStatus(w, r, fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip), http.StatusOK)
|
s.respondWithStatus(w, r, fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip), s.policy.StatusCodes.Deny)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -393,3 +393,48 @@ func TestBasePrefix(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCustomStatusCodes(t *testing.T) {
|
||||||
|
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
t.Log(r.UserAgent())
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
fmt.Fprintln(w, "OK")
|
||||||
|
})
|
||||||
|
|
||||||
|
statusMap := map[string]int{
|
||||||
|
"ALLOW": 200,
|
||||||
|
"CHALLENGE": 401,
|
||||||
|
"DENY": 403,
|
||||||
|
}
|
||||||
|
|
||||||
|
pol := loadPolicies(t, "./testdata/aggressive_403.yaml")
|
||||||
|
pol.DefaultDifficulty = 4
|
||||||
|
|
||||||
|
srv := spawnAnubis(t, Options{
|
||||||
|
Next: h,
|
||||||
|
Policy: pol,
|
||||||
|
})
|
||||||
|
|
||||||
|
ts := httptest.NewServer(internal.RemoteXRealIP(true, "tcp", srv))
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
for userAgent, statusCode := range statusMap {
|
||||||
|
t.Run(userAgent, func(t *testing.T) {
|
||||||
|
req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, ts.URL, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("User-Agent", userAgent)
|
||||||
|
|
||||||
|
resp, err := ts.Client().Do(req)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != statusCode {
|
||||||
|
t.Errorf("wanted status code %d but got: %d", statusCode, resp.StatusCode)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -67,7 +67,10 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
handler := internal.NoStoreCache(templ.Handler(component))
|
handler := internal.NoStoreCache(templ.Handler(
|
||||||
|
component,
|
||||||
|
templ.WithStatus(s.opts.Policy.StatusCodes.Challenge),
|
||||||
|
))
|
||||||
handler.ServeHTTP(w, r)
|
handler.ServeHTTP(w, r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"net"
|
"net"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
@ -28,6 +29,7 @@ var (
|
|||||||
ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file")
|
ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file")
|
||||||
ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
|
ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
|
||||||
ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both")
|
ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both")
|
||||||
|
ErrStatusCodeNotValid = errors.New("config.StatusCode: status code not valid, must be between 100 and 599")
|
||||||
)
|
)
|
||||||
|
|
||||||
type Rule string
|
type Rule string
|
||||||
@ -262,9 +264,33 @@ func (boi *BotOrImport) Valid() error {
|
|||||||
return ErrMustSetBotOrImportRules
|
return ErrMustSetBotOrImportRules
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type StatusCodes struct {
|
||||||
|
Challenge int `json:"CHALLENGE"`
|
||||||
|
Deny int `json:"DENY"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sc StatusCodes) Valid() error {
|
||||||
|
var errs []error
|
||||||
|
|
||||||
|
if sc.Challenge == 0 || (sc.Challenge < 100 && sc.Challenge >= 599) {
|
||||||
|
errs = append(errs, fmt.Errorf("%w: challenge is %d", ErrStatusCodeNotValid, sc.Challenge))
|
||||||
|
}
|
||||||
|
|
||||||
|
if sc.Deny == 0 || (sc.Deny < 100 && sc.Deny >= 599) {
|
||||||
|
errs = append(errs, fmt.Errorf("%w: deny is %d", ErrStatusCodeNotValid, sc.Deny))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) != 0 {
|
||||||
|
return fmt.Errorf("status codes not valid:\n%w", errors.Join(errs...))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
type fileConfig struct {
|
type fileConfig struct {
|
||||||
Bots []BotOrImport `json:"bots"`
|
Bots []BotOrImport `json:"bots"`
|
||||||
DNSBL bool `json:"dnsbl"`
|
DNSBL bool `json:"dnsbl"`
|
||||||
|
StatusCodes StatusCodes `json:"status_codes"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c fileConfig) Valid() error {
|
func (c fileConfig) Valid() error {
|
||||||
@ -280,6 +306,10 @@ func (c fileConfig) Valid() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := c.StatusCodes.Valid(); err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
|
||||||
if len(errs) != 0 {
|
if len(errs) != 0 {
|
||||||
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
|
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
|
||||||
}
|
}
|
||||||
@ -289,6 +319,10 @@ func (c fileConfig) Valid() error {
|
|||||||
|
|
||||||
func Load(fin io.Reader, fname string) (*Config, error) {
|
func Load(fin io.Reader, fname string) (*Config, error) {
|
||||||
var c fileConfig
|
var c fileConfig
|
||||||
|
c.StatusCodes = StatusCodes{
|
||||||
|
Challenge: http.StatusOK,
|
||||||
|
Deny: http.StatusOK,
|
||||||
|
}
|
||||||
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
|
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
|
||||||
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
|
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
|
||||||
}
|
}
|
||||||
@ -299,6 +333,7 @@ func Load(fin io.Reader, fname string) (*Config, error) {
|
|||||||
|
|
||||||
result := &Config{
|
result := &Config{
|
||||||
DNSBL: c.DNSBL,
|
DNSBL: c.DNSBL,
|
||||||
|
StatusCodes: c.StatusCodes,
|
||||||
}
|
}
|
||||||
|
|
||||||
var validationErrs []error
|
var validationErrs []error
|
||||||
@ -333,6 +368,7 @@ func Load(fin io.Reader, fname string) (*Config, error) {
|
|||||||
type Config struct {
|
type Config struct {
|
||||||
Bots []BotConfig
|
Bots []BotConfig
|
||||||
DNSBL bool
|
DNSBL bool
|
||||||
|
StatusCodes StatusCodes
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Config) Valid() error {
|
func (c Config) Valid() error {
|
||||||
|
13
lib/policy/config/testdata/bad/status-codes-0.json
vendored
Normal file
13
lib/policy/config/testdata/bad/status-codes-0.json
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"bots": [
|
||||||
|
{
|
||||||
|
"name": "everything",
|
||||||
|
"user_agent_regex": ".*",
|
||||||
|
"action": "DENY"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"status_codes": {
|
||||||
|
"CHALLENGE": 0,
|
||||||
|
"DENY": 0
|
||||||
|
}
|
||||||
|
}
|
8
lib/policy/config/testdata/bad/status-codes-0.yaml
vendored
Normal file
8
lib/policy/config/testdata/bad/status-codes-0.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
bots:
|
||||||
|
- name: everything
|
||||||
|
user_agent_regex: .*
|
||||||
|
action: DENY
|
||||||
|
|
||||||
|
status_codes:
|
||||||
|
CHALLENGE: 0
|
||||||
|
DENY: 0
|
13
lib/policy/config/testdata/good/status-codes-paranoid.json
vendored
Normal file
13
lib/policy/config/testdata/good/status-codes-paranoid.json
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"bots": [
|
||||||
|
{
|
||||||
|
"name": "everything",
|
||||||
|
"user_agent_regex": ".*",
|
||||||
|
"action": "DENY"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"status_codes": {
|
||||||
|
"CHALLENGE": 200,
|
||||||
|
"DENY": 200
|
||||||
|
}
|
||||||
|
}
|
8
lib/policy/config/testdata/good/status-codes-paranoid.yaml
vendored
Normal file
8
lib/policy/config/testdata/good/status-codes-paranoid.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
bots:
|
||||||
|
- name: everything
|
||||||
|
user_agent_regex: .*
|
||||||
|
action: DENY
|
||||||
|
|
||||||
|
status_codes:
|
||||||
|
CHALLENGE: 200
|
||||||
|
DENY: 200
|
13
lib/policy/config/testdata/good/status-codes-rfc.json
vendored
Normal file
13
lib/policy/config/testdata/good/status-codes-rfc.json
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"bots": [
|
||||||
|
{
|
||||||
|
"name": "everything",
|
||||||
|
"user_agent_regex": ".*",
|
||||||
|
"action": "DENY"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"status_codes": {
|
||||||
|
"CHALLENGE": 403,
|
||||||
|
"DENY": 403
|
||||||
|
}
|
||||||
|
}
|
8
lib/policy/config/testdata/good/status-codes-rfc.yaml
vendored
Normal file
8
lib/policy/config/testdata/good/status-codes-rfc.yaml
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
bots:
|
||||||
|
- name: everything
|
||||||
|
user_agent_regex: .*
|
||||||
|
action: DENY
|
||||||
|
|
||||||
|
status_codes:
|
||||||
|
CHALLENGE: 403
|
||||||
|
DENY: 403
|
@ -24,11 +24,13 @@ type ParsedConfig struct {
|
|||||||
Bots []Bot
|
Bots []Bot
|
||||||
DNSBL bool
|
DNSBL bool
|
||||||
DefaultDifficulty int
|
DefaultDifficulty int
|
||||||
|
StatusCodes config.StatusCodes
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewParsedConfig(orig *config.Config) *ParsedConfig {
|
func NewParsedConfig(orig *config.Config) *ParsedConfig {
|
||||||
return &ParsedConfig{
|
return &ParsedConfig{
|
||||||
orig: orig,
|
orig: orig,
|
||||||
|
StatusCodes: orig.StatusCodes,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
12
lib/testdata/aggressive_403.yaml
vendored
Normal file
12
lib/testdata/aggressive_403.yaml
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
bots:
|
||||||
|
- name: deny
|
||||||
|
user_agent_regex: DENY
|
||||||
|
action: DENY
|
||||||
|
|
||||||
|
- name: challenge
|
||||||
|
user_agent_regex: CHALLENGE
|
||||||
|
action: CHALLENGE
|
||||||
|
|
||||||
|
status_codes:
|
||||||
|
CHALLENGE: 401
|
||||||
|
DENY: 403
|
12
test/anubis_configs/aggressive_403.yaml
Normal file
12
test/anubis_configs/aggressive_403.yaml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
bots:
|
||||||
|
- name: deny
|
||||||
|
user_agent_regex: DENY
|
||||||
|
action: DENY
|
||||||
|
|
||||||
|
- name: challenge
|
||||||
|
user_agent_regex: CHALLENGE
|
||||||
|
action: CHALLENGE
|
||||||
|
|
||||||
|
status_codes:
|
||||||
|
CHALLENGE: 401
|
||||||
|
DENY: 403
|
@ -37,6 +37,7 @@ go run ../cmd/unixhttpd &
|
|||||||
go tool anubis \
|
go tool anubis \
|
||||||
--bind=./anubis.sock \
|
--bind=./anubis.sock \
|
||||||
--bind-network=unix \
|
--bind-network=unix \
|
||||||
|
--policy-fname=../anubis_configs/aggressive_403.yaml \
|
||||||
--target=unix://$(pwd)/unixhttpd.sock &
|
--target=unix://$(pwd)/unixhttpd.sock &
|
||||||
|
|
||||||
# A simple TLS terminator that forwards to Anubis, which will forward to
|
# A simple TLS terminator that forwards to Anubis, which will forward to
|
||||||
|
30
test/unix-socket-xff/test.mjs
Normal file
30
test/unix-socket-xff/test.mjs
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
async function testWithUserAgent(userAgent) {
|
||||||
|
const statusCode =
|
||||||
|
await fetch("https://relayd.local.cetacean.club:3004/reqmeta", {
|
||||||
|
headers: {
|
||||||
|
"User-Agent": userAgent,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.then(resp => resp.status);
|
||||||
|
return statusCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
const codes = {
|
||||||
|
allow: await testWithUserAgent("ALLOW"),
|
||||||
|
challenge: await testWithUserAgent("CHALLENGE"),
|
||||||
|
deny: await testWithUserAgent("DENY")
|
||||||
|
}
|
||||||
|
|
||||||
|
const expected = {
|
||||||
|
allow: 200,
|
||||||
|
challenge: 401,
|
||||||
|
deny: 403,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log("ALLOW: ", codes.allow);
|
||||||
|
console.log("CHALLENGE:", codes.challenge);
|
||||||
|
console.log("DENY: ", codes.deny);
|
||||||
|
|
||||||
|
if (JSON.stringify(codes) !== JSON.stringify(expected)) {
|
||||||
|
throw new Error(`wanted ${JSON.stringify(expected)}, got: ${JSON.stringify(codes)}`);
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user