mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 17:59:24 -04:00
Add Applebot definition (#589)
* Add Applebot definition Adds Apple's search indexing bot, and allowlists it by default. Allowlisted by default because it is equivalent to Googlebot/Bingbot. Remove Applebot from `ai-robots-txt.yaml` for the same reasons. Remove `Applebot-Extended` from `ai-robots-txt.yaml` as it has no effect. * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net> Co-authored-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
fbbab5a035
commit
68a71c6a99
2
.github/actions/spelling/expect.txt
vendored
2
.github/actions/spelling/expect.txt
vendored
@ -6,6 +6,7 @@ amazonbot
|
||||
anthro
|
||||
anubis
|
||||
anubistest
|
||||
Applebot
|
||||
archlinux
|
||||
badregexes
|
||||
berr
|
||||
@ -113,6 +114,7 @@ keypair
|
||||
KHTML
|
||||
kinda
|
||||
KUBECONFIG
|
||||
lcj
|
||||
ldflags
|
||||
letsencrypt
|
||||
Lexentale
|
||||
|
@ -22,6 +22,7 @@ bots:
|
||||
|
||||
# Search engine crawlers to allow, defaults to:
|
||||
# - Google (so they don't try to bypass Anubis)
|
||||
# - Apple
|
||||
# - Bing
|
||||
# - DuckDuckGo
|
||||
# - Qwant
|
||||
|
@ -1,4 +1,6 @@
|
||||
# Warning: Contains user agents that _must_ be blocked in robots.txt, or the opt-out will have no effect.
|
||||
# Note: Blocks human-directed/non-training user agents
|
||||
- name: "ai-robots-txt"
|
||||
user_agent_regex: >-
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YouBot
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YouBot
|
||||
action: DENY
|
||||
|
@ -1,4 +1,5 @@
|
||||
- import: (data)/crawlers/googlebot.yaml
|
||||
- import: (data)/crawlers/applebot.yaml
|
||||
- import: (data)/crawlers/bingbot.yaml
|
||||
- import: (data)/crawlers/duckduckbot.yaml
|
||||
- import: (data)/crawlers/qwantbot.yaml
|
||||
|
20
data/crawlers/applebot.yaml
Normal file
20
data/crawlers/applebot.yaml
Normal file
@ -0,0 +1,20 @@
|
||||
# Indexing for search and Siri
|
||||
# https://support.apple.com/en-us/119829
|
||||
- name: applebot
|
||||
user_agent_regex: Applebot
|
||||
action: ALLOW
|
||||
# https://search.developer.apple.com/applebot.json
|
||||
remote_addresses: [
|
||||
"17.241.208.160/27",
|
||||
"17.241.193.160/27",
|
||||
"17.241.200.160/27",
|
||||
"17.22.237.0/24",
|
||||
"17.22.245.0/24",
|
||||
"17.22.253.0/24",
|
||||
"17.241.75.0/24",
|
||||
"17.241.219.0/24",
|
||||
"17.241.227.0/24",
|
||||
"17.246.15.0/24",
|
||||
"17.246.19.0/24",
|
||||
"17.246.23.0/24",
|
||||
]
|
@ -40,6 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Added OpenRC init.d script.
|
||||
- Added `--version` flag.
|
||||
- Added `anubis_proxied_requests_total` metric to count proxied requests.
|
||||
- Add `Applebot` as "good" web crawler
|
||||
|
||||
## v1.18.0: Varis zos Galvus
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user