mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-09-17 00:33:54 -04:00
Merge branch 'main' into feat/tr-locale
Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
commit
bd45eca74e
7
.github/actions/spelling/expect.txt
vendored
7
.github/actions/spelling/expect.txt
vendored
@ -20,11 +20,13 @@ bdba
|
||||
berr
|
||||
bingbot
|
||||
bitcoin
|
||||
bitrate
|
||||
blogging
|
||||
Bluesky
|
||||
blueskybot
|
||||
boi
|
||||
botnet
|
||||
botstopper
|
||||
BPort
|
||||
Brightbot
|
||||
broked
|
||||
@ -52,6 +54,7 @@ ckie
|
||||
cloudflare
|
||||
Codespaces
|
||||
confd
|
||||
connnection
|
||||
containerbuild
|
||||
coreutils
|
||||
Cotoyogi
|
||||
@ -93,6 +96,7 @@ facebookgo
|
||||
Factset
|
||||
fastcgi
|
||||
fediverse
|
||||
ffprobe
|
||||
finfos
|
||||
Firecrawl
|
||||
flagenv
|
||||
@ -199,6 +203,7 @@ omgilibot
|
||||
openai
|
||||
opengraph
|
||||
openrc
|
||||
oswald
|
||||
pag
|
||||
palemoon
|
||||
Pangu
|
||||
@ -272,6 +277,8 @@ SVCNAME
|
||||
tagline
|
||||
tarballs
|
||||
tarrif
|
||||
tbn
|
||||
tbr
|
||||
techaro
|
||||
techarohq
|
||||
templ
|
||||
|
@ -7,5 +7,5 @@
|
||||
# Warning: May contain user agents that _must_ be blocked in robots.txt, or the opt-out will have no effect.
|
||||
- name: "ai-catchall"
|
||||
user_agent_regex: >-
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Brightbot 1.0|Bytespider|CCBot|Claude-Web|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|GoogleOther|GoogleOther-Image|GoogleOther-Video|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YouBot
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Brightbot 1.0|Bytespider|Claude-Web|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|GoogleOther|GoogleOther-Image|GoogleOther-Video|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YouBot
|
||||
action: DENY
|
||||
|
@ -1,6 +1,8 @@
|
||||
# Warning: Contains user agents that _must_ be blocked in robots.txt, or the opt-out will have no effect.
|
||||
# Note: Blocks human-directed/non-training user agents
|
||||
#
|
||||
# CCBot is allowed because if Common Crawl is allowed, then scrapers don't need to scrape to get the data.
|
||||
- name: "ai-robots-txt"
|
||||
user_agent_regex: >-
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|MyCentralAIScraperBot|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|Poseidon Research Crawler|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot-BA|SemrushBot-CT|SemrushBot-OCOB|SemrushBot-SI|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|MyCentralAIScraperBot|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|Poseidon Research Crawler|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot-BA|SemrushBot-CT|SemrushBot-OCOB|SemrushBot-SI|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
|
||||
action: DENY
|
||||
|
@ -6,4 +6,5 @@
|
||||
- import: (data)/crawlers/internet-archive.yaml
|
||||
- import: (data)/crawlers/kagibot.yaml
|
||||
- import: (data)/crawlers/marginalia.yaml
|
||||
- import: (data)/crawlers/mojeekbot.yaml
|
||||
- import: (data)/crawlers/mojeekbot.yaml
|
||||
- import: (data)/crawlers/commoncrawl.yaml
|
||||
|
12
data/crawlers/commoncrawl.yaml
Normal file
12
data/crawlers/commoncrawl.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
- name: common-crawl
|
||||
user_agent_regex: CCBot
|
||||
action: ALLOW
|
||||
# https://index.commoncrawl.org/ccbot.json
|
||||
remote_addresses:
|
||||
[
|
||||
"2600:1f28:365:80b0::/60",
|
||||
"18.97.9.168/29",
|
||||
"18.97.14.80/29",
|
||||
"18.97.14.88/30",
|
||||
"98.85.178.216/32",
|
||||
]
|
@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Remove the "Success" interstitial after a proof of work challenge is concluded.
|
||||
- Add option for forcing a specific language ([#742](https://github.com/TecharoHQ/anubis/pull/742))
|
||||
- Add translation for Turkish language ([#751](https://github.com/TecharoHQ/anubis/pull/751))
|
||||
- Allow [Common Crawl](https://commoncrawl.org/) by default so scrapers have less incentive to scrape
|
||||
|
||||
### Potentially breaking changes
|
||||
|
||||
|
215
docs/docs/admin/botstopper.mdx
Normal file
215
docs/docs/admin/botstopper.mdx
Normal file
@ -0,0 +1,215 @@
|
||||
---
|
||||
title: "Commercial support and an unbranded version"
|
||||
---
|
||||
|
||||
If you want to use Anubis but organizational policies prevent you from using the branding that the open source project ships, we offer a commercial version of Anubis named BotStopper. BotStopper builds off of the open source core of Anubis and offers organizations more control over the branding, including but not limited to:
|
||||
|
||||
- Custom images for different states of the challenge process (in process, success, failure)
|
||||
- Custom CSS and fonts
|
||||
- Custom titles for the challenge and error pages
|
||||
- "Anubis" replaced with "BotStopper" across the UI
|
||||
- A private bug tracker for issues
|
||||
|
||||
In the near future this will expand to:
|
||||
|
||||
- A private challenge implementation that does advanced fingerprinting to check if the client is a genuine browser or not
|
||||
- Advanced fingerprinting via [Thoth-based advanced checks](./thoth.mdx)
|
||||
|
||||
In order to sign up for BotStopper, please do one of the following:
|
||||
|
||||
- Sign up [on GitHub Sponsors](https://github.com/sponsors/Xe) at the $50 per month tier or higher
|
||||
- Email [sales@techaro.lol](mailto:sales@techaro.lol) with your requirements for invoicing, please note that custom invoicing will cost more than using GitHub Sponsors for understandable overhead reasons
|
||||
|
||||
## Installation
|
||||
|
||||
Install BotStopper like you would Anubis, but replace the image reference. EG:
|
||||
|
||||
```diff
|
||||
-ghcr.io/techarohq/anubis:latest
|
||||
+ghcr.io/techarohq/botstopper/anubis:latest
|
||||
```
|
||||
|
||||
### Binary packages
|
||||
|
||||
Binary packages are available [in the GitHub Releases page](https://github.com/TecharoHQ/botstopper/releases), the main difference is that the package name is `techaro-botstopper`, the systemd service is `techaro-botstopper@your-instance.service`, the binary is `/usr/bin/botstopper`, and the configuration is in `/etc/techaro-botstopper`. All other instructions in the [native package install guide](./native-install.mdx) apply.
|
||||
|
||||
### Docker / Podman
|
||||
|
||||
In order to pull the BotStopper image, you need to [authenticate with GitHub's Container Registry](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry#authenticating-to-the-container-registry).
|
||||
|
||||
```text
|
||||
docker login ghcr.io -u your-username --password-stdin
|
||||
```
|
||||
|
||||
Then you can use the image as normal.
|
||||
|
||||
### Kubernetes
|
||||
|
||||
If you are using Kubernetes, you will need to create an image pull secret:
|
||||
|
||||
```text
|
||||
kubectl create secret docker-registry \
|
||||
techarohq-botstopper \
|
||||
--docker-server ghcr.io \
|
||||
--docker-username your-username \
|
||||
--docker-password your-access-token \
|
||||
--docker-email your@email.address
|
||||
```
|
||||
|
||||
Then attach it to your Deployment:
|
||||
|
||||
```diff
|
||||
spec:
|
||||
securityContext:
|
||||
fsGroup: 1000
|
||||
+ imagePullSecrets:
|
||||
+ - name: techarohq-botstopper
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Docker compose
|
||||
|
||||
Follow [the upstream Docker compose directions](https://anubis.techaro.lol/docs/admin/environments/docker-compose) with the following additional options:
|
||||
|
||||
```diff
|
||||
anubis:
|
||||
image: ghcr.io/techarohq/botstopper/anubis:latest
|
||||
environment:
|
||||
BIND: ":8080"
|
||||
DIFFICULTY: "4"
|
||||
METRICS_BIND: ":9090"
|
||||
SERVE_ROBOTS_TXT: "true"
|
||||
TARGET: "http://nginx"
|
||||
OG_PASSTHROUGH: "true"
|
||||
OG_EXPIRY_TIME: "24h"
|
||||
|
||||
+ # botstopper config here
|
||||
+ CHALLENGE_TITLE: "Doing math for your connnection!"
|
||||
+ ERROR_TITLE: "Something went wrong!"
|
||||
+ OVERLAY_FOLDER: /assets
|
||||
+ volumes:
|
||||
+ - "./your_folder:/assets"
|
||||
```
|
||||
|
||||
#### Example
|
||||
|
||||
There is an example in [docker-compose.yaml](https://github.com/TecharoHQ/botstopper/blob/main/docker-compose.yaml). Start the example with `docker compose up`:
|
||||
|
||||
```text
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
And then open [https://botstopper.local.cetacean.club:8443](https://botstopper.local.cetacean.club:8443) in your browser.
|
||||
|
||||
> [!NOTE]
|
||||
> This uses locally signed sacrificial TLS certificates stored in `./demo/pki`. Your browser will rightly reject these. Here is what the example looks like:
|
||||
>
|
||||
> 
|
||||
|
||||
## Custom images and CSS
|
||||
|
||||
Anubis uses an internal filesystem that contains CSS, JavaScript, and images. The BotStopper variant of Anubis lets you specify an overlay folder with the environment variable `OVERLAY_FOLDER`. The contents of this folder will be overlaid on top of Anubis' internal filesystem, allowing you to easily customize the images and CSS.
|
||||
|
||||
Your directory tree should look like this, assuming your data is in `./your_folder`:
|
||||
|
||||
```text
|
||||
./your_folder
|
||||
└── static
|
||||
├── css
|
||||
│ └── custom.css
|
||||
└── img
|
||||
├── happy.webp
|
||||
├── pensive.webp
|
||||
└── reject.webp
|
||||
```
|
||||
|
||||
For an example directory tree using some off-the-shelf images the Tango icon set, see the [testdata](https://github.com/TecharoHQ/botstopper/tree/main/testdata/static/img) folder.
|
||||
|
||||
### Custom CSS
|
||||
|
||||
CSS customization is done mainly with CSS variables. View [the example custom CSS file](https://github.com/TecharoHQ/botstopper/blob/main/testdata/static/css/custom.css) for more information about what can be customized.
|
||||
|
||||
### Custom fonts
|
||||
|
||||
If you want to add custom fonts, copy the `woff2` files alongside your `custom.css` file and then include them with the [`@font-face` CSS at-rule](https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face):
|
||||
|
||||
```css
|
||||
@font-face {
|
||||
font-family: "Oswald";
|
||||
font-style: normal;
|
||||
font-weight: 200 900;
|
||||
font-display: swap;
|
||||
src: url("./fonts/oswald.woff2") format("woff2");
|
||||
}
|
||||
```
|
||||
|
||||
Then adjust your CSS variables accordingly:
|
||||
|
||||
```css
|
||||
:root {
|
||||
--body-sans-font: Oswald, sans-serif;
|
||||
--body-preformatted-font: monospace;
|
||||
--body-title-font: serif;
|
||||
}
|
||||
```
|
||||
|
||||
To convert `.ttf` fonts to [Web-optimized woff2 fonts](https://www.w3.org/TR/WOFF2/), use the `woff2_compress` command from the `woff2` or `woff2-tools` package:
|
||||
|
||||
```console
|
||||
$ woff2_compress oswald.ttf
|
||||
Processing oswald.ttf => oswald.woff2
|
||||
Compressed 159517 to 70469.
|
||||
```
|
||||
|
||||
Then you can import and use it as normal.
|
||||
|
||||
### Customizing images
|
||||
|
||||
Anubis uses three images to visually communicate the state of the program. These are:
|
||||
|
||||
| Image name | Intended message | Example |
|
||||
| :------------- | :----------------------------------------------- | :-------------------------------- |
|
||||
| `happy.webp` | You have passed validation, all is good |  |
|
||||
| `pensive.webp` | Checking is running, hold steady until it's done |  |
|
||||
| `reject.webp` | Something went wrong, this is a terminal state |  |
|
||||
|
||||
To make your own images at the optimal quality, use the following ffmpeg command:
|
||||
|
||||
```text
|
||||
ffmpeg -i /path/to/image -vf scale=-1:384 happy.webp
|
||||
```
|
||||
|
||||
`ffprobe` should report something like this on the generated images:
|
||||
|
||||
```text
|
||||
Input #0, webp_pipe, from 'happy.webp':
|
||||
Duration: N/A, bitrate: N/A
|
||||
Stream #0:0: Video: webp, none, 25 fps, 25 tbr, 25 tbn
|
||||
```
|
||||
|
||||
In testing 384 by 384 pixels gives the best balance between filesize, quality, and clarity.
|
||||
|
||||
```text
|
||||
$ du -hs *
|
||||
4.0K happy.webp
|
||||
12K pensive.webp
|
||||
8.0K reject.webp
|
||||
```
|
||||
|
||||
## Customizing messages
|
||||
|
||||
You can customize messages using the following environment variables:
|
||||
|
||||
| Message | Environment variable | Default |
|
||||
| :------------------- | :------------------- | :----------------------------------------- |
|
||||
| Challenge page title | `CHALLENGE_TITLE` | `Ensuring the security of your connection` |
|
||||
| Error page title | `ERROR_TITLE` | `Error` |
|
||||
|
||||
For example:
|
||||
|
||||
```sh
|
||||
# /etc/techaro-botstopper/gitea.env
|
||||
CHALLENGE_TITLE="Wait a moment please!"
|
||||
ERROR_TITLE="Client error"
|
||||
```
|
@ -11,6 +11,7 @@
|
||||
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
||||
|
||||
bots:
|
||||
- import: (data)/crawlers/commoncrawl.yaml
|
||||
# Pathological bots to deny
|
||||
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
||||
|
BIN
docs/static/img/botstopper/example-screenshot.webp
vendored
Normal file
BIN
docs/static/img/botstopper/example-screenshot.webp
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 29 KiB |
BIN
docs/static/img/botstopper/happy.webp
vendored
Normal file
BIN
docs/static/img/botstopper/happy.webp
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.5 KiB |
BIN
docs/static/img/botstopper/pensive.webp
vendored
Normal file
BIN
docs/static/img/botstopper/pensive.webp
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.0 KiB |
BIN
docs/static/img/botstopper/reject.webp
vendored
Normal file
BIN
docs/static/img/botstopper/reject.webp
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.1 KiB |
@ -9,7 +9,6 @@ User-agent: Applebot-Extended
|
||||
User-agent: bedrockbot
|
||||
User-agent: Brightbot 1.0
|
||||
User-agent: Bytespider
|
||||
User-agent: CCBot
|
||||
User-agent: ChatGPT-User
|
||||
User-agent: Claude-SearchBot
|
||||
User-agent: Claude-User
|
||||
|
Loading…
x
Reference in New Issue
Block a user