mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-17 05:44:57 +00:00
feat: add various robots.txt and YAML configurations for user agent handling and crawl delays
This commit is contained in:
36
cmd/robots2policy/testdata/blacklist.yaml
vendored
Normal file
36
cmd/robots2policy/testdata/blacklist.yaml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
- name: robots-txt-policy-crawl-delay-1
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: "true"
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-2
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/admin")
|
||||
- name: robots-txt-policy-blacklist-3
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("BadBot")
|
||||
- name: robots-txt-policy-crawl-delay-4
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("SpamBot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-blacklist-5
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("SpamBot")
|
||||
- name: robots-txt-policy-crawl-delay-6
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("Googlebot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-7
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
Reference in New Issue
Block a user