diff --git a/docs/manifest/cfg/anubis/botPolicies.yaml b/docs/manifest/cfg/anubis/botPolicies.yaml new file mode 100644 index 0000000..e259dfd --- /dev/null +++ b/docs/manifest/cfg/anubis/botPolicies.yaml @@ -0,0 +1,72 @@ +## Anubis has the ability to let you import snippets of configuration into the main +## configuration file. This allows you to break up your config into smaller parts +## that get logically assembled into one big file. +## +## Of note, a bot rule can either have inline bot configuration or import a +## bot config snippet. You cannot do both in a single bot rule. +## +## Import paths can either be prefixed with (data) to import from the common/shared +## rules in the data folder in the Anubis source tree or will point to absolute/relative +## paths in your filesystem. If you don't have access to the Anubis source tree, check +## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from. + +bots: + # Pathological bots to deny + - # This correlates to data/bots/deny-pathological.yaml in the source tree + # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml + import: (data)/bots/_deny-pathological.yaml + - import: (data)/bots/aggressive-brazilian-scrapers.yaml + + # Aggressively block AI/LLM related bots/agents by default + - import: (data)/meta/ai-block-aggressive.yaml + + # Consider replacing the aggressive AI policy with more selective policies: + # - import: (data)/meta/ai-block-moderate.yaml + # - import: (data)/meta/ai-block-permissive.yaml + + # Search engine crawlers to allow, defaults to: + # - Google (so they don't try to bypass Anubis) + # - Apple + # - Bing + # - DuckDuckGo + # - Qwant + # - The Internet Archive + # - Kagi + # - Marginalia + # - Mojeek + - import: (data)/crawlers/_allow-good.yaml + # Challenge Firefox AI previews + - import: (data)/clients/x-firefox-ai.yaml + + # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt) + - import: (data)/common/keep-internet-working.yaml + + # # Punish any bot with "bot" in the user-agent string + # # This is known to have a high false-positive rate, use at your own risk + # - name: generic-bot-catchall + # user_agent_regex: (?i:bot|crawler) + # action: CHALLENGE + # challenge: + # difficulty: 16 # impossible + # report_as: 4 # lie to the operator + # algorithm: slow # intentionally waste CPU cycles and time + + # Generic catchall rule + - name: generic-browser + user_agent_regex: >- + Mozilla|Opera + action: CHALLENGE + challenge: + difficulty: 1 # Number of seconds to wait before refreshing the page + report_as: 4 # Unused by this challenge method + algorithm: metarefresh # Specify a non-JS challenge method + +dnsbl: false + +# By default, send HTTP 200 back to clients that either get issued a challenge +# or a denial. This seems weird, but this is load-bearing due to the fact that +# the most aggressive scraper bots seem to really, really, want an HTTP 200 and +# will stop sending requests once they get it. +status_codes: + CHALLENGE: 200 + DENY: 200 diff --git a/docs/manifest/deployment.yaml b/docs/manifest/deployment.yaml index 4abb94b..a22c186 100644 --- a/docs/manifest/deployment.yaml +++ b/docs/manifest/deployment.yaml @@ -11,48 +11,58 @@ spec: labels: app: anubis-docs spec: + volumes: + - name: anubis + configMap: + name: anubis-cfg containers: - - name: anubis-docs - image: ghcr.io/techarohq/anubis/docs:main - imagePullPolicy: Always - resources: - limits: - memory: "128Mi" - cpu: "500m" - ports: - - containerPort: 80 - - name: anubis - image: ghcr.io/techarohq/anubis:main - imagePullPolicy: Always - env: - - name: "BIND" - value: ":8081" - - name: "DIFFICULTY" - value: "4" - - name: "METRICS_BIND" - value: ":9090" - - name: "POLICY_FNAME" - value: "" - - name: "SERVE_ROBOTS_TXT" - value: "false" - - name: "TARGET" - value: "http://localhost:80" - # - name: "SLOG_LEVEL" - # value: "debug" - resources: - limits: - cpu: 500m - memory: 128Mi - requests: - cpu: 250m - memory: 128Mi - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - runAsNonRoot: true - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault + - name: anubis-docs + image: ghcr.io/techarohq/anubis/docs:main + imagePullPolicy: Always + resources: + limits: + memory: "128Mi" + cpu: "500m" + requests: + cpu: 250m + memory: 128Mi + ports: + - containerPort: 80 + - name: anubis + image: ghcr.io/techarohq/anubis:main + imagePullPolicy: Always + env: + - name: "BIND" + value: ":8081" + - name: "DIFFICULTY" + value: "4" + - name: "METRICS_BIND" + value: ":9090" + - name: "POLICY_FNAME" + value: "/xe/cfg/anubis/botPolicies.yaml" + - name: "SERVE_ROBOTS_TXT" + value: "false" + - name: "TARGET" + value: "http://localhost:80" + # - name: "SLOG_LEVEL" + # value: "debug" + volumeMounts: + - name: anubis + mountPath: /xe/cfg/anubis + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 250m + memory: 128Mi + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + runAsNonRoot: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault diff --git a/docs/manifest/kustomization.yaml b/docs/manifest/kustomization.yaml index 8f9b88e..a06219a 100644 --- a/docs/manifest/kustomization.yaml +++ b/docs/manifest/kustomization.yaml @@ -2,4 +2,10 @@ resources: - deployment.yaml - ingress.yaml - onionservice.yaml - - service.yaml \ No newline at end of file + - service.yaml + +configMapGenerator: + - name: anubis-cfg + behavior: create + files: + - ./cfg/anubis/botPolicies.yaml