mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-13 03:58:45 +00:00
feat: add various robots.txt and YAML configurations for user agent handling and crawl delays
This commit is contained in:
80
cmd/robots2policy/testdata/complex.yaml
vendored
Normal file
80
cmd/robots2policy/testdata/complex.yaml
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
- name: robots-txt-policy-crawl-delay-1
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: "true"
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-2
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/admin/")
|
||||
- name: robots-txt-policy-disallow-3
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/private/")
|
||||
- name: robots-txt-policy-disallow-4
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/api/internal/")
|
||||
- name: robots-txt-policy-crawl-delay-5
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("Googlebot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-6
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
- name: robots-txt-policy-crawl-delay-7
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("Bingbot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-8
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
- name: robots-txt-policy-disallow-9
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
- name: robots-txt-policy-blacklist-10
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("BadBot")
|
||||
- name: robots-txt-policy-crawl-delay-11
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("SeoBot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-blacklist-12
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("SeoBot")
|
||||
- name: robots-txt-policy-disallow-13
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
- name: robots-txt-policy-disallow-14
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
- name: robots-txt-policy-disallow-15
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
Reference in New Issue
Block a user