fix: handle multiple user agents

This commit is contained in:
Jason Cameron
2025-07-26 20:27:00 -04:00
parent a735770c93
commit 291ed2a084
5 changed files with 269 additions and 140 deletions

View File

@@ -3,28 +3,28 @@
name: robots-txt-policy-crawl-delay-1
weight:
adjust: 3
- action: CHALLENGE
expression: path.startsWith("/admin")
name: robots-txt-policy-disallow-2
- action: DENY
expression: userAgent.contains("BadBot")
name: robots-txt-policy-blacklist-3
- action: WEIGH
expression: userAgent.contains("SpamBot")
name: robots-txt-policy-crawl-delay-4
weight:
adjust: 3
- action: DENY
expression: userAgent.contains("SpamBot")
name: robots-txt-policy-blacklist-5
- action: WEIGH
expression: userAgent.contains("Googlebot")
name: robots-txt-policy-crawl-delay-6
name: robots-txt-policy-crawl-delay-2
weight:
adjust: 3
- action: WEIGH
expression: userAgent.contains("SpamBot")
name: robots-txt-policy-crawl-delay-3
weight:
adjust: 3
- action: DENY
expression:
any:
- userAgent.contains("BadBot")
- userAgent.contains("SpamBot")
name: robots-txt-policy-blacklist-4
- action: CHALLENGE
expression: path.startsWith("/admin")
name: robots-txt-policy-disallow-5
- action: CHALLENGE
expression:
all:
- userAgent.contains("Googlebot")
- path.startsWith("/search")
name: robots-txt-policy-disallow-7
- userAgent.contains("Googlebot")
- path.startsWith("/search")
name: robots-txt-policy-disallow-6

View File

@@ -1,71 +1,68 @@
- action: WEIGH
expression: "true"
expression: userAgent.contains("Bingbot")
name: robots-txt-policy-crawl-delay-1
weight:
adjust: 5
- action: CHALLENGE
expression: path.startsWith("/admin/")
name: robots-txt-policy-disallow-2
- action: CHALLENGE
expression: path.startsWith("/private/")
name: robots-txt-policy-disallow-3
- action: CHALLENGE
expression: path.startsWith("/api/internal/")
name: robots-txt-policy-disallow-4
- action: WEIGH
expression: userAgent.contains("Googlebot")
name: robots-txt-policy-crawl-delay-5
name: robots-txt-policy-crawl-delay-2
weight:
adjust: 5
- action: CHALLENGE
expression:
all:
- userAgent.contains("Googlebot")
- path.startsWith("/search/")
name: robots-txt-policy-disallow-6
- action: WEIGH
expression: userAgent.contains("Bingbot")
name: robots-txt-policy-crawl-delay-7
expression: userAgent.contains("SeoBot")
name: robots-txt-policy-crawl-delay-3
weight:
adjust: 5
- action: WEIGH
expression: "true"
name: robots-txt-policy-crawl-delay-4
weight:
adjust: 5
- action: DENY
expression:
any:
- userAgent.contains("BadBot")
- userAgent.contains("SeoBot")
name: robots-txt-policy-blacklist-5
- action: CHALLENGE
expression:
all:
- userAgent.contains("Bingbot")
- path.startsWith("/search/")
- userAgent.contains("TestBot")
- path.matches("^/.*/admin")
name: robots-txt-policy-disallow-6
- action: CHALLENGE
expression:
all:
- userAgent.contains("Bingbot")
- path.startsWith("/admin/")
name: robots-txt-policy-disallow-8
- action: CHALLENGE
expression:
all:
- userAgent.contains("Bingbot")
- path.startsWith("/admin/")
expression: path.startsWith("/api/internal/")
name: robots-txt-policy-disallow-9
- action: DENY
expression: userAgent.contains("BadBot")
name: robots-txt-policy-blacklist-10
- action: WEIGH
expression: userAgent.contains("SeoBot")
name: robots-txt-policy-crawl-delay-11
weight:
adjust: 5
- action: DENY
expression: userAgent.contains("SeoBot")
name: robots-txt-policy-blacklist-12
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/.*/admin")
- userAgent.contains("TestBot")
- path.matches("^/file.\\.log")
name: robots-txt-policy-disallow-10
- action: CHALLENGE
expression: path.startsWith("/private/")
name: robots-txt-policy-disallow-11
- action: CHALLENGE
expression:
all:
- userAgent.contains("Googlebot")
- path.startsWith("/search/")
name: robots-txt-policy-disallow-13
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/temp.*\\.html")
- userAgent.contains("Bingbot")
- path.startsWith("/search/")
name: robots-txt-policy-disallow-14
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/file.\\.log")
- userAgent.contains("TestBot")
- path.matches("^/temp.*\\.html")
name: robots-txt-policy-disallow-15

View File

@@ -1,12 +1,12 @@
[
{
"action": "CHALLENGE",
"expression": "path.startsWith(\"/admin/\")",
"name": "robots-txt-policy-disallow-1"
"name": "robots-txt-policy-disallow-1",
"action": "CHALLENGE"
},
{
"action": "CHALLENGE",
"expression": "path.startsWith(\"/private\")",
"name": "robots-txt-policy-disallow-2"
"name": "robots-txt-policy-disallow-2",
"action": "CHALLENGE"
}
]

View File

@@ -1,12 +1,12 @@
- action: CHALLENGE
expression: path.matches("^/search.*")
name: robots-txt-policy-disallow-1
- action: CHALLENGE
expression: path.matches("^/.*/private")
name: robots-txt-policy-disallow-1
- action: CHALLENGE
expression: path.matches("^/admin/.*.action=delete")
name: robots-txt-policy-disallow-2
- action: CHALLENGE
expression: path.matches("^/file.\\.txt")
name: robots-txt-policy-disallow-3
- action: CHALLENGE
expression: path.matches("^/admin/.*.action=delete")
expression: path.matches("^/search.*")
name: robots-txt-policy-disallow-4