mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 01:38:14 -04:00

* feat(internal): add Thoth client and simple ASN checker Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(thoth): cached ip to asn checker Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: go mod tidy Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(thoth): minor testing fixups, ensure ASNChecker is Checker Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(thoth): make ASNChecker instances Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(thoth): add GeoIP checker Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(thoth): store a thoth client in a context Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: refactor Checker type to its own package Signed-off-by: Xe Iaso <me@xeiaso.net> * test(thoth): add thoth mocking package, ignore context deadline exceeded errors Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(thoth): pre-cache private ranges Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(lib/policy/config): enable thoth ASNs and GeoIP checker parsing Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(thoth): refactor to move checker creation to the checker files Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(policy): enable thoth checks Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(thothmock): test helper function for loading a mock thoth instance Signed-off-by: Xe Iaso <me@xeiaso.net> * feat: wire up Thoth, make thoth checks part of the default config Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(thoth): mend staticcheck errors Signed-off-by: Xe Iaso <me@xeiaso.net> * docs(admin): add Thoth docs Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(policy): update Thoth links in error messages Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: update CHANGELOG Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(docs/manifest): enable Thoth Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: add THOTH_INSECURE for contacting Thoth over plain TCP in extreme circumstances Signed-off-by: Xe Iaso <me@xeiaso.net> * test(thoth): use mock thoth when credentials aren't detected in the environment Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(cmd/anubis): better warnings for half-configured Thoth setups Signed-off-by: Xe Iaso <me@xeiaso.net> * docs(botpolicies): link to Thoth geoip docs Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net>
94 lines
3.2 KiB
YAML
94 lines
3.2 KiB
YAML
## Anubis has the ability to let you import snippets of configuration into the main
|
|
## configuration file. This allows you to break up your config into smaller parts
|
|
## that get logically assembled into one big file.
|
|
##
|
|
## Of note, a bot rule can either have inline bot configuration or import a
|
|
## bot config snippet. You cannot do both in a single bot rule.
|
|
##
|
|
## Import paths can either be prefixed with (data) to import from the common/shared
|
|
## rules in the data folder in the Anubis source tree or will point to absolute/relative
|
|
## paths in your filesystem. If you don't have access to the Anubis source tree, check
|
|
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
|
|
|
bots:
|
|
# Pathological bots to deny
|
|
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
|
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
|
import: (data)/bots/_deny-pathological.yaml
|
|
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
|
|
|
# Aggressively block AI/LLM related bots/agents by default
|
|
- import: (data)/meta/ai-block-aggressive.yaml
|
|
|
|
# Consider replacing the aggressive AI policy with more selective policies:
|
|
# - import: (data)/meta/ai-block-moderate.yaml
|
|
# - import: (data)/meta/ai-block-permissive.yaml
|
|
|
|
# Search engine crawlers to allow, defaults to:
|
|
# - Google (so they don't try to bypass Anubis)
|
|
# - Apple
|
|
# - Bing
|
|
# - DuckDuckGo
|
|
# - Qwant
|
|
# - The Internet Archive
|
|
# - Kagi
|
|
# - Marginalia
|
|
# - Mojeek
|
|
- import: (data)/crawlers/_allow-good.yaml
|
|
# Challenge Firefox AI previews
|
|
- import: (data)/clients/x-firefox-ai.yaml
|
|
|
|
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
|
- import: (data)/common/keep-internet-working.yaml
|
|
|
|
# # Punish any bot with "bot" in the user-agent string
|
|
# # This is known to have a high false-positive rate, use at your own risk
|
|
# - name: generic-bot-catchall
|
|
# user_agent_regex: (?i:bot|crawler)
|
|
# action: CHALLENGE
|
|
# challenge:
|
|
# difficulty: 16 # impossible
|
|
# report_as: 4 # lie to the operator
|
|
# algorithm: slow # intentionally waste CPU cycles and time
|
|
|
|
# Requires a subscription to Thoth to use, see
|
|
# https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
|
|
- name: countries-with-aggressive-scrapers
|
|
action: WEIGH
|
|
geoip:
|
|
counties:
|
|
- BR
|
|
- CN
|
|
weight:
|
|
adjust: 10
|
|
|
|
# Requires a subscription to Thoth to use, see
|
|
# https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
|
|
- name: aggressive-asns-without-functional-abuse-contact
|
|
action: WEIGH
|
|
asns:
|
|
match:
|
|
- 13335 # Cloudflare
|
|
- 136907 # Huawei Cloud
|
|
- 45102 # Alibaba Cloud
|
|
weight:
|
|
adjust: 10
|
|
|
|
# Generic catchall rule
|
|
- name: generic-browser
|
|
user_agent_regex: >-
|
|
Mozilla|Opera
|
|
action: WEIGH
|
|
weight:
|
|
adjust: 10
|
|
|
|
dnsbl: false
|
|
|
|
# By default, send HTTP 200 back to clients that either get issued a challenge
|
|
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
|
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
|
|
# will stop sending requests once they get it.
|
|
status_codes:
|
|
CHALLENGE: 200
|
|
DENY: 200
|