mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-20 07:06:40 +00:00
7c0996448a
This may seem strange, but allowlisting common crawl means that scrapers have less incentive to scrape because they can just grab the data from common crawl instead of scraping it again.
13 lines
271 B
YAML
13 lines
271 B
YAML
- name: common-crawl
|
|
user_agent_regex: CCBot
|
|
action: ALLOW
|
|
# https://index.commoncrawl.org/ccbot.json
|
|
remote_addresses:
|
|
[
|
|
"2600:1f28:365:80b0::/60",
|
|
"18.97.9.168/29",
|
|
"18.97.14.80/29",
|
|
"18.97.14.88/30",
|
|
"98.85.178.216/32",
|
|
]
|