mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 17:59:24 -04:00
chore(docs/manifest): try no-js challenge to see how it impacts false positive rate
Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
4ac59c3a79
commit
8eff57fcb6
72
docs/manifest/cfg/anubis/botPolicies.yaml
Normal file
72
docs/manifest/cfg/anubis/botPolicies.yaml
Normal file
@ -0,0 +1,72 @@
|
||||
## Anubis has the ability to let you import snippets of configuration into the main
|
||||
## configuration file. This allows you to break up your config into smaller parts
|
||||
## that get logically assembled into one big file.
|
||||
##
|
||||
## Of note, a bot rule can either have inline bot configuration or import a
|
||||
## bot config snippet. You cannot do both in a single bot rule.
|
||||
##
|
||||
## Import paths can either be prefixed with (data) to import from the common/shared
|
||||
## rules in the data folder in the Anubis source tree or will point to absolute/relative
|
||||
## paths in your filesystem. If you don't have access to the Anubis source tree, check
|
||||
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
||||
|
||||
bots:
|
||||
# Pathological bots to deny
|
||||
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
||||
import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
|
||||
# Aggressively block AI/LLM related bots/agents by default
|
||||
- import: (data)/meta/ai-block-aggressive.yaml
|
||||
|
||||
# Consider replacing the aggressive AI policy with more selective policies:
|
||||
# - import: (data)/meta/ai-block-moderate.yaml
|
||||
# - import: (data)/meta/ai-block-permissive.yaml
|
||||
|
||||
# Search engine crawlers to allow, defaults to:
|
||||
# - Google (so they don't try to bypass Anubis)
|
||||
# - Apple
|
||||
# - Bing
|
||||
# - DuckDuckGo
|
||||
# - Qwant
|
||||
# - The Internet Archive
|
||||
# - Kagi
|
||||
# - Marginalia
|
||||
# - Mojeek
|
||||
- import: (data)/crawlers/_allow-good.yaml
|
||||
# Challenge Firefox AI previews
|
||||
- import: (data)/clients/x-firefox-ai.yaml
|
||||
|
||||
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
||||
- import: (data)/common/keep-internet-working.yaml
|
||||
|
||||
# # Punish any bot with "bot" in the user-agent string
|
||||
# # This is known to have a high false-positive rate, use at your own risk
|
||||
# - name: generic-bot-catchall
|
||||
# user_agent_regex: (?i:bot|crawler)
|
||||
# action: CHALLENGE
|
||||
# challenge:
|
||||
# difficulty: 16 # impossible
|
||||
# report_as: 4 # lie to the operator
|
||||
# algorithm: slow # intentionally waste CPU cycles and time
|
||||
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
difficulty: 1 # Number of seconds to wait before refreshing the page
|
||||
report_as: 4 # Unused by this challenge method
|
||||
algorithm: metarefresh # Specify a non-JS challenge method
|
||||
|
||||
dnsbl: false
|
||||
|
||||
# By default, send HTTP 200 back to clients that either get issued a challenge
|
||||
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
||||
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
|
||||
# will stop sending requests once they get it.
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
@ -11,48 +11,58 @@ spec:
|
||||
labels:
|
||||
app: anubis-docs
|
||||
spec:
|
||||
volumes:
|
||||
- name: anubis
|
||||
configMap:
|
||||
name: anubis-cfg
|
||||
containers:
|
||||
- name: anubis-docs
|
||||
image: ghcr.io/techarohq/anubis/docs:main
|
||||
imagePullPolicy: Always
|
||||
resources:
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "500m"
|
||||
ports:
|
||||
- containerPort: 80
|
||||
- name: anubis
|
||||
image: ghcr.io/techarohq/anubis:main
|
||||
imagePullPolicy: Always
|
||||
env:
|
||||
- name: "BIND"
|
||||
value: ":8081"
|
||||
- name: "DIFFICULTY"
|
||||
value: "4"
|
||||
- name: "METRICS_BIND"
|
||||
value: ":9090"
|
||||
- name: "POLICY_FNAME"
|
||||
value: ""
|
||||
- name: "SERVE_ROBOTS_TXT"
|
||||
value: "false"
|
||||
- name: "TARGET"
|
||||
value: "http://localhost:80"
|
||||
# - name: "SLOG_LEVEL"
|
||||
# value: "debug"
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 128Mi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 128Mi
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
runAsNonRoot: true
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
- name: anubis-docs
|
||||
image: ghcr.io/techarohq/anubis/docs:main
|
||||
imagePullPolicy: Always
|
||||
resources:
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "500m"
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 128Mi
|
||||
ports:
|
||||
- containerPort: 80
|
||||
- name: anubis
|
||||
image: ghcr.io/techarohq/anubis:main
|
||||
imagePullPolicy: Always
|
||||
env:
|
||||
- name: "BIND"
|
||||
value: ":8081"
|
||||
- name: "DIFFICULTY"
|
||||
value: "4"
|
||||
- name: "METRICS_BIND"
|
||||
value: ":9090"
|
||||
- name: "POLICY_FNAME"
|
||||
value: "/xe/cfg/anubis/botPolicies.yaml"
|
||||
- name: "SERVE_ROBOTS_TXT"
|
||||
value: "false"
|
||||
- name: "TARGET"
|
||||
value: "http://localhost:80"
|
||||
# - name: "SLOG_LEVEL"
|
||||
# value: "debug"
|
||||
volumeMounts:
|
||||
- name: anubis
|
||||
mountPath: /xe/cfg/anubis
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 128Mi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 128Mi
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
runAsNonRoot: true
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
@ -2,4 +2,10 @@ resources:
|
||||
- deployment.yaml
|
||||
- ingress.yaml
|
||||
- onionservice.yaml
|
||||
- service.yaml
|
||||
- service.yaml
|
||||
|
||||
configMapGenerator:
|
||||
- name: anubis-cfg
|
||||
behavior: create
|
||||
files:
|
||||
- ./cfg/anubis/botPolicies.yaml
|
||||
|
Loading…
x
Reference in New Issue
Block a user