chore(data): reformat some things for expressions

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso
2025-04-25 16:04:15 -04:00
parent 92a3e5ba81
commit 80bd7c563b
6 changed files with 73 additions and 3 deletions

View File

@@ -12,6 +12,12 @@
{
"import": "(data)/bots/us-ai-scraper.yaml"
},
{
"import": "(data)/bots/aggressive-brazilian-scrapers.yaml"
},
{
"import": "(data)/clients/curl-impersonate.yaml"
},
{
"import": "(data)/crawlers/googlebot.yaml"
},
@@ -46,4 +52,4 @@
}
],
"dnsbl": false
}
}

View File

@@ -17,6 +17,8 @@ bots:
- import: (data)/bots/cloudflare-workers.yaml
- import: (data)/bots/headless-browsers.yaml
- import: (data)/bots/us-ai-scraper.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
- import: (data)/clients/curl-impersonate.yaml
# Search engines to allow
- import: (data)/crawlers/googlebot.yaml

View File

@@ -0,0 +1,30 @@
- name: deny-aggressive-brazilian-scrapers
action: DENY
expression:
any:
# Internet Explorer should be out of support
- userAgent.contains("MSIE")
# Trident is the Internet Explorer browser engine
- userAgent.contains("Trident")
# iPods are not in common use
- userAgent.contains("iPod")
# Opera is a fork of chrome now
- userAgent.contains("Presto")
# Windows CE is discontinued
- userAgent.contains("Windows CE")
# Windows 95 is discontinued
- userAgent.contains("Windows 95")
# Windows 98 is discontinued
- userAgent.contains("Windows 98")
# Windows 9.x is discontinued
- userAgent.contains("Win 9x")
# Amazon does not have an Alexa Toolbar.
- userAgent.contains("Alexa Toolbar")
- name: challenge-aggressive-brazilian-scrapers
action: CHALLENGE
expression:
any:
# This is not released, even Windows 11 calls itself Windows 10
- userAgent.contains("Windows NT 11.0")
# Safari's User-Agent doesn't contain Safari
- (!userAgent.contains("Chrome") && userAgent.contains("Safari"))

View File

@@ -1,4 +1,4 @@
- name: cloudflare-workers
headers_regex:
CF-Worker: .*
expression: >
"Cf-Worker" in headers
action: DENY

View File

@@ -0,0 +1,32 @@
- name: curl-impersonate
action: CHALLENGE
expression:
any:
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == " Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == " Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Google Chrome";v="107", "Chromium";v="107", "Not=A?Brand";v="24"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == " Not A;Brand";v="99", "Chromium";v="101", "Microsoft Edge";v="101"
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == " Not A;Brand";v="99", "Chromium";v="99", "Microsoft Edge";v="99"