diff --git a/data/bots/ai-robots-txt.yaml b/data/bots/ai-robots-txt.yaml index 5beb090..bcbacc3 100644 --- a/data/bots/ai-robots-txt.yaml +++ b/data/bots/ai-robots-txt.yaml @@ -1,4 +1,4 @@ - name: "ai-robots-txt" user_agent_regex: >- - AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot + AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot action: DENY diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index f23650e..702f4ac 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated the nonce value in the challenge JWT cookie to be a string instead of a number - Rename cookies in response to user feedback - Ensure cookie renaming is consistent across configuration options +- Bump AI-robots.txt to version 1.30 (add QualifiedBot) - Add `RuntimeDirectory` to systemd unit settings so native packages can listen over unix sockets ## v1.18.0: Varis zos Galvus @@ -44,7 +45,7 @@ Or as complicated as: expression: all: - >- - ( + ( userAgent.startsWith("git/") || userAgent.contains("libgit") || userAgent.startsWith("go-git") || diff --git a/web/static/robots.txt b/web/static/robots.txt index e9dd073..5c4c748 100644 --- a/web/static/robots.txt +++ b/web/static/robots.txt @@ -46,6 +46,7 @@ User-agent: PanguBot User-agent: Perplexity-User User-agent: PerplexityBot User-agent: PetalBot +User-agent: QualifiedBot User-agent: Scrapy User-agent: SemrushBot-OCOB User-agent: SemrushBot-SWA