mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 01:38:14 -04:00

* feat(lib/policy/expressions): add system load average to bot expression inputs This lets Anubis dynamically react to system load in order to increase and decrease the required level of scrutiny. High load? More scrutiny required. Low load? Less scrutiny required. * docs: spell system correctly Signed-off-by: Xe Iaso <me@xeiaso.net> * Update metadata check-spelling run (pull_request) for Xe/load-average Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * fix(default-config): don't enable low load average feature by default Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net> Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> Signed-off-by: Xe Iaso <xe.iaso@techaro.lol>
228 lines
8.2 KiB
YAML
228 lines
8.2 KiB
YAML
## Anubis has the ability to let you import snippets of configuration into the main
|
|
## configuration file. This allows you to break up your config into smaller parts
|
|
## that get logically assembled into one big file.
|
|
##
|
|
## Of note, a bot rule can either have inline bot configuration or import a
|
|
## bot config snippet. You cannot do both in a single bot rule.
|
|
##
|
|
## Import paths can either be prefixed with (data) to import from the common/shared
|
|
## rules in the data folder in the Anubis source tree or will point to absolute/relative
|
|
## paths in your filesystem. If you don't have access to the Anubis source tree, check
|
|
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
|
|
|
bots:
|
|
# Pathological bots to deny
|
|
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
|
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
|
import: (data)/bots/_deny-pathological.yaml
|
|
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
|
|
|
# Aggressively block AI/LLM related bots/agents by default
|
|
- import: (data)/meta/ai-block-aggressive.yaml
|
|
|
|
# Consider replacing the aggressive AI policy with more selective policies:
|
|
# - import: (data)/meta/ai-block-moderate.yaml
|
|
# - import: (data)/meta/ai-block-permissive.yaml
|
|
|
|
# Search engine crawlers to allow, defaults to:
|
|
# - Google (so they don't try to bypass Anubis)
|
|
# - Apple
|
|
# - Bing
|
|
# - DuckDuckGo
|
|
# - Qwant
|
|
# - The Internet Archive
|
|
# - Kagi
|
|
# - Marginalia
|
|
# - Mojeek
|
|
- import: (data)/crawlers/_allow-good.yaml
|
|
# Challenge Firefox AI previews
|
|
- import: (data)/clients/x-firefox-ai.yaml
|
|
|
|
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
|
- import: (data)/common/keep-internet-working.yaml
|
|
|
|
# # Punish any bot with "bot" in the user-agent string
|
|
# # This is known to have a high false-positive rate, use at your own risk
|
|
# - name: generic-bot-catchall
|
|
# user_agent_regex: (?i:bot|crawler)
|
|
# action: CHALLENGE
|
|
# challenge:
|
|
# difficulty: 16 # impossible
|
|
# report_as: 4 # lie to the operator
|
|
# algorithm: slow # intentionally waste CPU cycles and time
|
|
|
|
# Requires a subscription to Thoth to use, see
|
|
# https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
|
|
- name: countries-with-aggressive-scrapers
|
|
action: WEIGH
|
|
geoip:
|
|
countries:
|
|
- BR
|
|
- CN
|
|
weight:
|
|
adjust: 10
|
|
|
|
# Requires a subscription to Thoth to use, see
|
|
# https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
|
|
- name: aggressive-asns-without-functional-abuse-contact
|
|
action: WEIGH
|
|
asns:
|
|
match:
|
|
- 13335 # Cloudflare
|
|
- 136907 # Huawei Cloud
|
|
- 45102 # Alibaba Cloud
|
|
weight:
|
|
adjust: 10
|
|
|
|
## System load based checks.
|
|
# If the system is under high load, add weight.
|
|
- name: high-load-average
|
|
action: WEIGH
|
|
expression: load_1m >= 10.0 # make sure to end the load comparison in a .0
|
|
weight:
|
|
adjust: 20
|
|
|
|
## If your backend service is running on the same operating system as Anubis,
|
|
## you can uncomment this rule to make the challenge easier when the system is
|
|
## under low load.
|
|
##
|
|
## If it is not, remove weight.
|
|
# - name: low-load-average
|
|
# action: WEIGH
|
|
# expression: load_15m <= 4.0 # make sure to end the load comparison in a .0
|
|
# weight:
|
|
# adjust: -10
|
|
|
|
# Generic catchall rule
|
|
- name: generic-browser
|
|
user_agent_regex: >-
|
|
Mozilla|Opera
|
|
action: WEIGH
|
|
weight:
|
|
adjust: 10
|
|
|
|
dnsbl: false
|
|
|
|
# #
|
|
# impressum:
|
|
# # Displayed at the bottom of every page rendered by Anubis.
|
|
# footer: >-
|
|
# This website is hosted by Zombocom. If you have any complaints or notes
|
|
# about the service, please contact
|
|
# <a href="mailto:contact@domainhere.example">contact@domainhere.example</a>
|
|
# and we will assist you as soon as possible.
|
|
|
|
# # The imprint page that will be linked to at the footer of every Anubis page.
|
|
# page:
|
|
# # The HTML <title> of the page
|
|
# title: Imprint and Privacy Policy
|
|
# # The HTML contents of the page. The exact contents of this page can
|
|
# # and will vary by locale. Please consult with a lawyer if you are not
|
|
# # sure what to put here
|
|
# body: >-
|
|
# <p>Last updated: June 2025</p>
|
|
|
|
# <h2>Information that is gathered from visitors</h2>
|
|
|
|
# <p>In common with other websites, log files are stored on the web server saving details such as the visitor's IP address, browser type, referring page and time of visit.</p>
|
|
|
|
# <p>Cookies may be used to remember visitor preferences when interacting with the website.</p>
|
|
|
|
# <p>Where registration is required, the visitor's email and a username will be stored on the server.</p>
|
|
|
|
# <!-- ... -->
|
|
|
|
# Open Graph passthrough configuration, see here for more information:
|
|
# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
|
|
openGraph:
|
|
# Enables Open Graph passthrough
|
|
enabled: false
|
|
# Enables the use of the HTTP host in the cache key, this enables
|
|
# caching metadata for multiple http hosts at once.
|
|
considerHost: false
|
|
# How long cached OpenGraph metadata should last in memory
|
|
ttl: 24h
|
|
# # If set, return these opengraph values instead of looking them up with
|
|
# # the target service.
|
|
# #
|
|
# # Correlates to properties in https://ogp.me/
|
|
# override:
|
|
# # og:title is required, it is the title of the website
|
|
# "og:title": "Techaro Anubis"
|
|
# "og:description": >-
|
|
# Anubis is a Web AI Firewall Utility that helps you fight the bots
|
|
# away so that you can maintain uptime at work!
|
|
# "description": >-
|
|
# Anubis is a Web AI Firewall Utility that helps you fight the bots
|
|
# away so that you can maintain uptime at work!
|
|
|
|
# By default, send HTTP 200 back to clients that either get issued a challenge
|
|
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
|
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
|
|
# will stop sending requests once they get it.
|
|
status_codes:
|
|
CHALLENGE: 200
|
|
DENY: 200
|
|
|
|
# Anubis can store temporary data in one of a few backends. See the storage
|
|
# backends section of the docs for more information:
|
|
#
|
|
# https://anubis.techaro.lol/docs/admin/policies#storage-backends
|
|
store:
|
|
backend: memory
|
|
parameters: {}
|
|
|
|
# The weight thresholds for when to trigger individual challenges. Any
|
|
# CHALLENGE will take precedence over this.
|
|
#
|
|
# A threshold has four configuration options:
|
|
#
|
|
# - name: the name that is reported down the stack and used for metrics
|
|
# - expression: A CEL expression with the request weight in the variable
|
|
# weight
|
|
# - action: the Anubis action to apply, similar to in a bot policy
|
|
# - challenge: which challenge to send to the user, similar to in a bot policy
|
|
#
|
|
# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
|
|
# information.
|
|
thresholds:
|
|
# By default Anubis ships with the following thresholds:
|
|
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
|
|
expression: weight <= 0 # a feather weighs zero units
|
|
action: ALLOW # Allow the traffic through
|
|
# For clients that had some weight reduced through custom rules, give them a
|
|
# lightweight challenge.
|
|
- name: mild-suspicion
|
|
expression:
|
|
all:
|
|
- weight > 0
|
|
- weight < 10
|
|
action: CHALLENGE
|
|
challenge:
|
|
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
|
|
algorithm: metarefresh
|
|
difficulty: 1
|
|
report_as: 1
|
|
# For clients that are browser-like but have either gained points from custom rules or
|
|
# report as a standard browser.
|
|
- name: moderate-suspicion
|
|
expression:
|
|
all:
|
|
- weight >= 10
|
|
- weight < 20
|
|
action: CHALLENGE
|
|
challenge:
|
|
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
|
algorithm: fast
|
|
difficulty: 2 # two leading zeros, very fast for most clients
|
|
report_as: 2
|
|
# For clients that are browser like and have gained many points from custom rules
|
|
- name: extreme-suspicion
|
|
expression: weight >= 20
|
|
action: CHALLENGE
|
|
challenge:
|
|
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
|
algorithm: fast
|
|
difficulty: 4
|
|
report_as: 4
|