From 281b6c5c00cde223737aadc8a6aa2a364abc2ffd Mon Sep 17 00:00:00 2001 From: Dryusdan Date: Sun, 8 Jun 2025 20:54:47 +0200 Subject: [PATCH] Bump ai.robots.txt to v1.34 (#632) --- data/bots/ai-robots-txt.yaml | 2 +- docs/docs/CHANGELOG.md | 5 +++-- web/static/robots.txt | 11 ++++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/data/bots/ai-robots-txt.yaml b/data/bots/ai-robots-txt.yaml index e515201..16df8cb 100644 --- a/data/bots/ai-robots-txt.yaml +++ b/data/bots/ai-robots-txt.yaml @@ -2,5 +2,5 @@ # Note: Blocks human-directed/non-training user agents - name: "ai-robots-txt" user_agent_regex: >- - AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YouBot + AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot action: DENY diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 73243f2..3278abe 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -10,13 +10,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] -- Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in - development +- Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in + development - Refactor challenge presentation logic to use a challenge registry - Allow challenge implementations to register HTTP routes - Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95)) +- Bump AI-robots.txt to version 1.34 ## v1.19.1: Jenomis cen Lexentale - Echo 1 diff --git a/web/static/robots.txt b/web/static/robots.txt index 71fb1f1..1b0f841 100644 --- a/web/static/robots.txt +++ b/web/static/robots.txt @@ -2,9 +2,11 @@ User-agent: AI2Bot User-agent: Ai2Bot-Dolma User-agent: aiHitBot User-agent: Amazonbot +User-agent: Andibot User-agent: anthropic-ai User-agent: Applebot User-agent: Applebot-Extended +User-agent: bedrockbot User-agent: Brightbot 1.0 User-agent: Bytespider User-agent: CCBot @@ -33,7 +35,6 @@ User-agent: iaskspider/2.0 User-agent: ICC-Crawler User-agent: ImagesiftBot User-agent: img2dataset -User-agent: imgproxy User-agent: ISSCyberRiskCrawler User-agent: Kangaroo Bot User-agent: meta-externalagent @@ -47,10 +48,16 @@ User-agent: omgili User-agent: omgilibot User-agent: Operator User-agent: PanguBot +User-agent: Panscient +User-agent: panscient.com User-agent: Perplexity-User User-agent: PerplexityBot User-agent: PetalBot +User-agent: PhindBot User-agent: QualifiedBot +User-agent: QuillBot +User-agent: quillbot.com +User-agent: SBIntuitionsBot User-agent: Scrapy User-agent: SemrushBot-OCOB User-agent: SemrushBot-SWA @@ -60,6 +67,8 @@ User-agent: Timpibot User-agent: VelenPublicWebCrawler User-agent: Webzio-Extended User-agent: wpbot +User-agent: YandexAdditional +User-agent: YandexAdditionalBot User-agent: YouBot Disallow: /