mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 09:48:08 -04:00
Bump AI-robots.txt rules to version 1.29 (#383)
This commit is contained in:
parent
b0f0913ea2
commit
76514f9f32
@ -1,4 +1,4 @@
|
||||
- name: "ai-robots-txt"
|
||||
user_agent_regex: >-
|
||||
AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot
|
||||
action: DENY
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot
|
||||
action: DENY
|
||||
|
@ -1,9 +1,11 @@
|
||||
User-agent: AI2Bot
|
||||
User-agent: Ai2Bot-Dolma
|
||||
User-agent: aiHitBot
|
||||
User-agent: Amazonbot
|
||||
User-agent: anthropic-ai
|
||||
User-agent: Applebot
|
||||
User-agent: Applebot-Extended
|
||||
User-agent: Brightbot 1.0
|
||||
User-agent: Bytespider
|
||||
User-agent: CCBot
|
||||
User-agent: ChatGPT-User
|
||||
@ -11,9 +13,13 @@ User-agent: Claude-Web
|
||||
User-agent: ClaudeBot
|
||||
User-agent: cohere-ai
|
||||
User-agent: cohere-training-data-crawler
|
||||
User-agent: Cotoyogi
|
||||
User-agent: Crawlspace
|
||||
User-agent: Diffbot
|
||||
User-agent: DuckAssistBot
|
||||
User-agent: FacebookBot
|
||||
User-agent: Factset_spyderbot
|
||||
User-agent: FirecrawlAgent
|
||||
User-agent: FriendlyCrawler
|
||||
User-agent: Google-Extended
|
||||
User-agent: GoogleOther
|
||||
@ -24,19 +30,27 @@ User-agent: iaskspider/2.0
|
||||
User-agent: ICC-Crawler
|
||||
User-agent: ImagesiftBot
|
||||
User-agent: img2dataset
|
||||
User-agent: imgproxy
|
||||
User-agent: ISSCyberRiskCrawler
|
||||
User-agent: Kangaroo Bot
|
||||
User-agent: meta-externalagent
|
||||
User-agent: Meta-ExternalAgent
|
||||
User-agent: meta-externalfetcher
|
||||
User-agent: Meta-ExternalFetcher
|
||||
User-agent: NovaAct
|
||||
User-agent: OAI-SearchBot
|
||||
User-agent: omgili
|
||||
User-agent: omgilibot
|
||||
User-agent: Operator
|
||||
User-agent: PanguBot
|
||||
User-agent: Perplexity-User
|
||||
User-agent: PerplexityBot
|
||||
User-agent: PetalBot
|
||||
User-agent: Scrapy
|
||||
User-agent: SemrushBot
|
||||
User-agent: SemrushBot-OCOB
|
||||
User-agent: SemrushBot-SWA
|
||||
User-agent: Sidetrade indexer bot
|
||||
User-agent: TikTokSpider
|
||||
User-agent: Timpibot
|
||||
User-agent: VelenPublicWebCrawler
|
||||
User-agent: Webzio-Extended
|
||||
@ -44,4 +58,4 @@ User-agent: YouBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: *
|
||||
Disallow: /
|
||||
Disallow: /
|
||||
|
Loading…
x
Reference in New Issue
Block a user