diff --git a/data/botPolicies.json b/data/botPolicies.json index 160bbf0f..9e7ded29 100644 --- a/data/botPolicies.json +++ b/data/botPolicies.json @@ -12,6 +12,12 @@ { "import": "(data)/bots/us-ai-scraper.yaml" }, + { + "import": "(data)/bots/aggressive-brazilian-scrapers.yaml" + }, + { + "import": "(data)/clients/curl-impersonate.yaml" + }, { "import": "(data)/crawlers/googlebot.yaml" }, @@ -46,4 +52,4 @@ } ], "dnsbl": false -} \ No newline at end of file +} diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 51af499e..12319b39 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -17,6 +17,8 @@ bots: - import: (data)/bots/cloudflare-workers.yaml - import: (data)/bots/headless-browsers.yaml - import: (data)/bots/us-ai-scraper.yaml +- import: (data)/bots/aggressive-brazilian-scrapers.yaml +- import: (data)/clients/curl-impersonate.yaml # Search engines to allow - import: (data)/crawlers/googlebot.yaml diff --git a/data/bots/aggressive-brazilian-scrapers.yaml b/data/bots/aggressive-brazilian-scrapers.yaml new file mode 100644 index 00000000..6b38d540 --- /dev/null +++ b/data/bots/aggressive-brazilian-scrapers.yaml @@ -0,0 +1,30 @@ +- name: deny-aggressive-brazilian-scrapers + action: DENY + expression: + any: + # Internet Explorer should be out of support + - userAgent.contains("MSIE") + # Trident is the Internet Explorer browser engine + - userAgent.contains("Trident") + # iPods are not in common use + - userAgent.contains("iPod") + # Opera is a fork of chrome now + - userAgent.contains("Presto") + # Windows CE is discontinued + - userAgent.contains("Windows CE") + # Windows 95 is discontinued + - userAgent.contains("Windows 95") + # Windows 98 is discontinued + - userAgent.contains("Windows 98") + # Windows 9.x is discontinued + - userAgent.contains("Win 9x") + # Amazon does not have an Alexa Toolbar. + - userAgent.contains("Alexa Toolbar") +- name: challenge-aggressive-brazilian-scrapers + action: CHALLENGE + expression: + any: + # This is not released, even Windows 11 calls itself Windows 10 + - userAgent.contains("Windows NT 11.0") + # Safari's User-Agent doesn't contain Safari + - (!userAgent.contains("Chrome") && userAgent.contains("Safari")) \ No newline at end of file diff --git a/data/bots/cloudflare-workers.yaml b/data/bots/cloudflare-workers.yaml index 3fe051be..03059d87 100644 --- a/data/bots/cloudflare-workers.yaml +++ b/data/bots/cloudflare-workers.yaml @@ -1,4 +1,4 @@ - name: cloudflare-workers - headers_regex: - CF-Worker: .* + expression: > + "Cf-Worker" in headers action: DENY \ No newline at end of file diff --git a/data/clients/curl-impersonate.yaml b/data/clients/curl-impersonate.yaml new file mode 100644 index 00000000..dbb21160 --- /dev/null +++ b/data/clients/curl-impersonate.yaml @@ -0,0 +1,32 @@ +- name: curl-impersonate + action: CHALLENGE + expression: + any: + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == " Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == " Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Google Chrome";v="107", "Chromium";v="107", "Not=A?Brand";v="24" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == "Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == " Not A;Brand";v="99", "Chromium";v="101", "Microsoft Edge";v="101" + - > + "Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == " Not A;Brand";v="99", "Chromium";v="99", "Microsoft Edge";v="99" \ No newline at end of file diff --git a/data/apps/git-client.yaml b/data/clients/git.yaml similarity index 100% rename from data/apps/git-client.yaml rename to data/clients/git.yaml