mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-14 12:38:45 +00:00
chore(default-config): allowlist common crawl
This may seem strange, but allowlisting common crawl means that scrapers have less incentive to scrape because they can just grab the data from common crawl instead of scraping it again.
This commit is contained in:
12
data/crawlers/commoncrawl.yaml
Normal file
12
data/crawlers/commoncrawl.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
- name: common-crawl
|
||||
user_agent_regex: CCBot
|
||||
action: ALLOW
|
||||
# https://index.commoncrawl.org/ccbot.json
|
||||
remote_addresses:
|
||||
[
|
||||
"2600:1f28:365:80b0::/60",
|
||||
"18.97.9.168/29",
|
||||
"18.97.14.80/29",
|
||||
"18.97.14.88/30",
|
||||
"98.85.178.216/32",
|
||||
]
|
||||
Reference in New Issue
Block a user