mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 17:59:24 -04:00
Opt-in policies for OpenAI and MistralAI bots (#590)
* Define OpenAI bot ALLOW policies Allows OpenAI bots to be allowlisted at the choice of the Anubis administrator. None are enabled by default. * Define MistralAI bot ALLOW policy * chore: spelling
This commit is contained in:
parent
ba00cdacd2
commit
0d9ebebff6
3
.github/actions/spelling/expect.txt
vendored
3
.github/actions/spelling/expect.txt
vendored
@ -85,6 +85,7 @@ goodbot
|
|||||||
googlebot
|
googlebot
|
||||||
govulncheck
|
govulncheck
|
||||||
GPG
|
GPG
|
||||||
|
GPT
|
||||||
grw
|
grw
|
||||||
Hashcash
|
Hashcash
|
||||||
hashrate
|
hashrate
|
||||||
@ -136,6 +137,7 @@ memes
|
|||||||
metrix
|
metrix
|
||||||
mimi
|
mimi
|
||||||
minica
|
minica
|
||||||
|
mistralai
|
||||||
Mojeek
|
Mojeek
|
||||||
mojeekbot
|
mojeekbot
|
||||||
mozilla
|
mozilla
|
||||||
@ -146,6 +148,7 @@ NONINFRINGEMENT
|
|||||||
nosleep
|
nosleep
|
||||||
ogtags
|
ogtags
|
||||||
onionservice
|
onionservice
|
||||||
|
openai
|
||||||
openrc
|
openrc
|
||||||
pag
|
pag
|
||||||
parseable
|
parseable
|
||||||
|
10
data/clients/mistral-mistralai-user.yaml
Normal file
10
data/clients/mistral-mistralai-user.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# Acts on behalf of user requests
|
||||||
|
# https://docs.mistral.ai/robots/
|
||||||
|
- name: mistral-mistralai-user
|
||||||
|
user_agent_regex: MistralAI-User/.+; \+https\://docs\.mistral\.ai/robots
|
||||||
|
action: ALLOW
|
||||||
|
# https://mistral.ai/mistralai-user-ips.json
|
||||||
|
remote_addresses: [
|
||||||
|
"20.240.160.161/32",
|
||||||
|
"20.240.160.1/32",
|
||||||
|
]
|
93
data/clients/openai-chatgpt-user.yaml
Normal file
93
data/clients/openai-chatgpt-user.yaml
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
# Acts on behalf of user requests
|
||||||
|
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
|
||||||
|
- name: openai-chatgpt-user
|
||||||
|
user_agent_regex: ChatGPT-User/.+; \+https\://openai\.com/bot
|
||||||
|
action: ALLOW
|
||||||
|
# https://openai.com/chatgpt-user.json
|
||||||
|
# curl 'https://openai.com/chatgpt-user.json' | jq '.prefixes.[].ipv4Prefix' | sed 's/$/,/'
|
||||||
|
remote_addresses: [
|
||||||
|
"13.65.138.112/28",
|
||||||
|
"23.98.179.16/28",
|
||||||
|
"13.65.138.96/28",
|
||||||
|
"172.183.222.128/28",
|
||||||
|
"20.102.212.144/28",
|
||||||
|
"40.116.73.208/28",
|
||||||
|
"172.183.143.224/28",
|
||||||
|
"52.190.190.16/28",
|
||||||
|
"13.83.237.176/28",
|
||||||
|
"51.8.155.64/28",
|
||||||
|
"74.249.86.176/28",
|
||||||
|
"51.8.155.48/28",
|
||||||
|
"20.55.229.144/28",
|
||||||
|
"135.237.131.208/28",
|
||||||
|
"135.237.133.48/28",
|
||||||
|
"51.8.155.112/28",
|
||||||
|
"135.237.133.112/28",
|
||||||
|
"52.159.249.96/28",
|
||||||
|
"52.190.137.16/28",
|
||||||
|
"52.255.111.112/28",
|
||||||
|
"40.84.181.32/28",
|
||||||
|
"172.178.141.112/28",
|
||||||
|
"52.190.142.64/28",
|
||||||
|
"172.178.140.144/28",
|
||||||
|
"52.190.137.144/28",
|
||||||
|
"172.178.141.128/28",
|
||||||
|
"57.154.187.32/28",
|
||||||
|
"4.196.118.112/28",
|
||||||
|
"20.193.50.32/28",
|
||||||
|
"20.215.188.192/28",
|
||||||
|
"20.215.214.16/28",
|
||||||
|
"4.197.22.112/28",
|
||||||
|
"4.197.115.112/28",
|
||||||
|
"172.213.21.16/28",
|
||||||
|
"172.213.11.144/28",
|
||||||
|
"172.213.12.112/28",
|
||||||
|
"172.213.21.144/28",
|
||||||
|
"20.90.7.144/28",
|
||||||
|
"57.154.175.0/28",
|
||||||
|
"57.154.174.112/28",
|
||||||
|
"52.236.94.144/28",
|
||||||
|
"137.135.191.176/28",
|
||||||
|
"23.98.186.192/28",
|
||||||
|
"23.98.186.96/28",
|
||||||
|
"23.98.186.176/28",
|
||||||
|
"23.98.186.64/28",
|
||||||
|
"68.221.67.192/28",
|
||||||
|
"68.221.67.160/28",
|
||||||
|
"13.83.167.128/28",
|
||||||
|
"20.228.106.176/28",
|
||||||
|
"52.159.227.32/28",
|
||||||
|
"68.220.57.64/28",
|
||||||
|
"172.213.21.112/28",
|
||||||
|
"68.221.67.224/28",
|
||||||
|
"68.221.75.16/28",
|
||||||
|
"20.97.189.96/28",
|
||||||
|
"52.252.113.240/28",
|
||||||
|
"52.230.163.32/28",
|
||||||
|
"172.212.159.64/28",
|
||||||
|
"52.255.111.80/28",
|
||||||
|
"52.255.111.0/28",
|
||||||
|
"4.151.241.240/28",
|
||||||
|
"52.255.111.32/28",
|
||||||
|
"52.255.111.48/28",
|
||||||
|
"52.255.111.16/28",
|
||||||
|
"52.230.164.176/28",
|
||||||
|
"52.176.139.176/28",
|
||||||
|
"52.173.234.16/28",
|
||||||
|
"4.151.71.176/28",
|
||||||
|
"4.151.119.48/28",
|
||||||
|
"52.255.109.112/28",
|
||||||
|
"52.255.109.80/28",
|
||||||
|
"20.161.75.208/28",
|
||||||
|
"68.154.28.96/28",
|
||||||
|
"52.255.109.128/28",
|
||||||
|
"52.225.75.208/28",
|
||||||
|
"52.190.139.48/28",
|
||||||
|
"68.221.67.240/28",
|
||||||
|
"52.156.77.144/28",
|
||||||
|
"52.148.129.32/28",
|
||||||
|
"40.84.221.208/28",
|
||||||
|
"104.210.139.224/28",
|
||||||
|
"40.84.221.224/28",
|
||||||
|
"104.210.139.192/28",
|
||||||
|
]
|
16
data/crawlers/openai-gptbot.yaml
Normal file
16
data/crawlers/openai-gptbot.yaml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# Collects AI training data
|
||||||
|
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
|
||||||
|
- name: openai-gptbot
|
||||||
|
user_agent_regex: GPTBot/1\.1; \+https\://openai\.com/gptbot
|
||||||
|
action: ALLOW
|
||||||
|
# https://openai.com/gptbot.json
|
||||||
|
remote_addresses: [
|
||||||
|
"52.230.152.0/24",
|
||||||
|
"20.171.206.0/24",
|
||||||
|
"20.171.207.0/24",
|
||||||
|
"4.227.36.0/25",
|
||||||
|
"20.125.66.80/28",
|
||||||
|
"172.182.204.0/24",
|
||||||
|
"172.182.214.0/24",
|
||||||
|
"172.182.215.0/24",
|
||||||
|
]
|
13
data/crawlers/openai-searchbot.yaml
Normal file
13
data/crawlers/openai-searchbot.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# Indexing for search, does not collect training data
|
||||||
|
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
|
||||||
|
- name: openai-searchbot
|
||||||
|
user_agent_regex: OAI-SearchBot/1\.0; \+https\://openai\.com/searchbot
|
||||||
|
action: ALLOW
|
||||||
|
# https://openai.com/searchbot.json
|
||||||
|
remote_addresses: [
|
||||||
|
"20.42.10.176/28",
|
||||||
|
"172.203.190.128/28",
|
||||||
|
"104.210.140.128/28",
|
||||||
|
"51.8.102.0/24",
|
||||||
|
"135.234.64.0/24"
|
||||||
|
]
|
Loading…
x
Reference in New Issue
Block a user