mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-14 04:28:49 +00:00
Compare commits
2 Commits
Xe/unflake
...
Xe/preact-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
db0a5809d3 | ||
|
|
ce1d877012 |
1
.github/workflows/smoke-tests.yml
vendored
1
.github/workflows/smoke-tests.yml
vendored
@@ -14,7 +14,6 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
test:
|
test:
|
||||||
- default-config-macro
|
|
||||||
- double_slash
|
- double_slash
|
||||||
- forced-language
|
- forced-language
|
||||||
- git-clone
|
- git-clone
|
||||||
|
|||||||
@@ -11,9 +11,6 @@
|
|||||||
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
||||||
|
|
||||||
bots:
|
bots:
|
||||||
# You can import the entire default config with this macro:
|
|
||||||
# - import: (data)/meta/default-config.yaml
|
|
||||||
|
|
||||||
# Pathological bots to deny
|
# Pathological bots to deny
|
||||||
- # This correlates to data/bots/_deny-pathological.yaml in the source tree
|
- # This correlates to data/bots/_deny-pathological.yaml in the source tree
|
||||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/_deny-pathological.yaml
|
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/_deny-pathological.yaml
|
||||||
@@ -96,44 +93,6 @@ bots:
|
|||||||
# weight:
|
# weight:
|
||||||
# adjust: -10
|
# adjust: -10
|
||||||
|
|
||||||
# Assert behaviour that only genuine browsers display. This ensures that Chrome
|
|
||||||
# or Firefox versions
|
|
||||||
- name: realistic-browser-catchall
|
|
||||||
expression:
|
|
||||||
all:
|
|
||||||
- '"User-Agent" in headers'
|
|
||||||
- '( userAgent.contains("Firefox") ) || ( userAgent.contains("Chrome") ) || ( userAgent.contains("Safari") )'
|
|
||||||
- '"Accept" in headers'
|
|
||||||
- '"Sec-Fetch-Dest" in headers'
|
|
||||||
- '"Sec-Fetch-Mode" in headers'
|
|
||||||
- '"Sec-Fetch-Site" in headers'
|
|
||||||
- '"Upgrade-Insecure-Requests" in headers'
|
|
||||||
- '"Accept-Encoding" in headers'
|
|
||||||
- '( headers["Accept-Encoding"].contains("zstd") || headers["Accept-Encoding"].contains("br") )'
|
|
||||||
- '"Accept-Language" in headers'
|
|
||||||
action: WEIGH
|
|
||||||
weight:
|
|
||||||
adjust: -10
|
|
||||||
|
|
||||||
# Chrome should behave like Chrome
|
|
||||||
- name: chrome-is-proper
|
|
||||||
expression:
|
|
||||||
all:
|
|
||||||
- userAgent.contains("Chrome")
|
|
||||||
- '"Sec-Ch-Ua" in headers'
|
|
||||||
- 'headers["Sec-Ch-Ua"].contains("Chromium")'
|
|
||||||
- '"Sec-Ch-Ua-Mobile" in headers'
|
|
||||||
- '"Sec-Ch-Ua-Platform" in headers'
|
|
||||||
action: WEIGH
|
|
||||||
weight:
|
|
||||||
adjust: -5
|
|
||||||
|
|
||||||
- name: should-have-accept
|
|
||||||
expression: '!("Accept" in headers)'
|
|
||||||
action: WEIGH
|
|
||||||
weight:
|
|
||||||
adjust: 5
|
|
||||||
|
|
||||||
# Generic catchall rule
|
# Generic catchall rule
|
||||||
- name: generic-browser
|
- name: generic-browser
|
||||||
user_agent_regex: >-
|
user_agent_regex: >-
|
||||||
|
|||||||
@@ -2,19 +2,13 @@
|
|||||||
action: ALLOW
|
action: ALLOW
|
||||||
expression:
|
expression:
|
||||||
all:
|
all:
|
||||||
- >
|
- >
|
||||||
(
|
(
|
||||||
userAgent.startsWith("git/") ||
|
userAgent.startsWith("git/") ||
|
||||||
userAgent.contains("libgit") ||
|
userAgent.contains("libgit") ||
|
||||||
userAgent.startsWith("go-git") ||
|
userAgent.startsWith("go-git") ||
|
||||||
userAgent.startsWith("JGit/") ||
|
userAgent.startsWith("JGit/") ||
|
||||||
userAgent.startsWith("JGit-")
|
userAgent.startsWith("JGit-")
|
||||||
)
|
)
|
||||||
- '"Accept" in headers'
|
- '"Git-Protocol" in headers'
|
||||||
- headers["Accept"] == "*/*"
|
- headers["Git-Protocol"] == "version=2"
|
||||||
- '"Cache-Control" in headers'
|
|
||||||
- headers["Cache-Control"] == "no-cache"
|
|
||||||
- '"Pragma" in headers'
|
|
||||||
- headers["Pragma"] == "no-cache"
|
|
||||||
- '"Accept-Encoding" in headers'
|
|
||||||
- headers["Accept-Encoding"].contains("gzip")
|
|
||||||
@@ -1,127 +0,0 @@
|
|||||||
- # Pathological bots to deny
|
|
||||||
# This correlates to data/bots/_deny-pathological.yaml in the source tree
|
|
||||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/_deny-pathological.yaml
|
|
||||||
import: (data)/bots/_deny-pathological.yaml
|
|
||||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
|
||||||
|
|
||||||
# Aggressively block AI/LLM related bots/agents by default
|
|
||||||
- import: (data)/meta/ai-block-aggressive.yaml
|
|
||||||
|
|
||||||
# Consider replacing the aggressive AI policy with more selective policies:
|
|
||||||
# - import: (data)/meta/ai-block-moderate.yaml
|
|
||||||
# - import: (data)/meta/ai-block-permissive.yaml
|
|
||||||
|
|
||||||
# Search engine crawlers to allow, defaults to:
|
|
||||||
# - Google (so they don't try to bypass Anubis)
|
|
||||||
# - Apple
|
|
||||||
# - Bing
|
|
||||||
# - DuckDuckGo
|
|
||||||
# - Qwant
|
|
||||||
# - The Internet Archive
|
|
||||||
# - Kagi
|
|
||||||
# - Marginalia
|
|
||||||
# - Mojeek
|
|
||||||
- import: (data)/crawlers/_allow-good.yaml
|
|
||||||
# Challenge Firefox AI previews
|
|
||||||
- import: (data)/clients/x-firefox-ai.yaml
|
|
||||||
|
|
||||||
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
|
||||||
- import: (data)/common/keep-internet-working.yaml
|
|
||||||
|
|
||||||
# # Punish any bot with "bot" in the user-agent string
|
|
||||||
# # This is known to have a high false-positive rate, use at your own risk
|
|
||||||
# - name: generic-bot-catchall
|
|
||||||
# user_agent_regex: (?i:bot|crawler)
|
|
||||||
# action: CHALLENGE
|
|
||||||
# challenge:
|
|
||||||
# difficulty: 16 # impossible
|
|
||||||
# report_as: 4 # lie to the operator
|
|
||||||
# algorithm: slow # intentionally waste CPU cycles and time
|
|
||||||
|
|
||||||
# Requires a subscription to Thoth to use, see
|
|
||||||
# https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
|
|
||||||
- name: countries-with-aggressive-scrapers
|
|
||||||
action: WEIGH
|
|
||||||
geoip:
|
|
||||||
countries:
|
|
||||||
- BR
|
|
||||||
- CN
|
|
||||||
weight:
|
|
||||||
adjust: 10
|
|
||||||
|
|
||||||
# Requires a subscription to Thoth to use, see
|
|
||||||
# https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
|
|
||||||
- name: aggressive-asns-without-functional-abuse-contact
|
|
||||||
action: WEIGH
|
|
||||||
asns:
|
|
||||||
match:
|
|
||||||
- 13335 # Cloudflare
|
|
||||||
- 136907 # Huawei Cloud
|
|
||||||
- 45102 # Alibaba Cloud
|
|
||||||
weight:
|
|
||||||
adjust: 10
|
|
||||||
|
|
||||||
# ## System load based checks.
|
|
||||||
# # If the system is under high load, add weight.
|
|
||||||
# - name: high-load-average
|
|
||||||
# action: WEIGH
|
|
||||||
# expression: load_1m >= 10.0 # make sure to end the load comparison in a .0
|
|
||||||
# weight:
|
|
||||||
# adjust: 20
|
|
||||||
|
|
||||||
## If your backend service is running on the same operating system as Anubis,
|
|
||||||
## you can uncomment this rule to make the challenge easier when the system is
|
|
||||||
## under low load.
|
|
||||||
##
|
|
||||||
## If it is not, remove weight.
|
|
||||||
# - name: low-load-average
|
|
||||||
# action: WEIGH
|
|
||||||
# expression: load_15m <= 4.0 # make sure to end the load comparison in a .0
|
|
||||||
# weight:
|
|
||||||
# adjust: -10
|
|
||||||
|
|
||||||
# Assert behaviour that only genuine browsers display. This ensures that Chrome
|
|
||||||
# or Firefox versions
|
|
||||||
- name: realistic-browser-catchall
|
|
||||||
expression:
|
|
||||||
all:
|
|
||||||
- '"User-Agent" in headers'
|
|
||||||
- '( userAgent.contains("Firefox") ) || ( userAgent.contains("Chrome") ) || ( userAgent.contains("Safari") )'
|
|
||||||
- '"Accept" in headers'
|
|
||||||
- '"Sec-Fetch-Dest" in headers'
|
|
||||||
- '"Sec-Fetch-Mode" in headers'
|
|
||||||
- '"Sec-Fetch-Site" in headers'
|
|
||||||
- '"Upgrade-Insecure-Requests" in headers'
|
|
||||||
- '"Accept-Encoding" in headers'
|
|
||||||
- '( headers["Accept-Encoding"].contains("zstd") || headers["Accept-Encoding"].contains("br") )'
|
|
||||||
- '"Accept-Language" in headers'
|
|
||||||
action: WEIGH
|
|
||||||
weight:
|
|
||||||
adjust: -10
|
|
||||||
|
|
||||||
# Chrome should behave like Chrome
|
|
||||||
- name: chrome-is-proper
|
|
||||||
expression:
|
|
||||||
all:
|
|
||||||
- userAgent.contains("Chrome")
|
|
||||||
- '"Sec-Ch-Ua" in headers'
|
|
||||||
- 'headers["Sec-Ch-Ua"].contains("Chromium")'
|
|
||||||
- '"Sec-Ch-Ua-Mobile" in headers'
|
|
||||||
- '"Sec-Ch-Ua-Platform" in headers'
|
|
||||||
action: WEIGH
|
|
||||||
weight:
|
|
||||||
adjust: -5
|
|
||||||
|
|
||||||
- name: should-have-accept
|
|
||||||
expression: '!("Accept" in headers)'
|
|
||||||
action: WEIGH
|
|
||||||
weight:
|
|
||||||
adjust: 5
|
|
||||||
|
|
||||||
# Generic catchall rule
|
|
||||||
- name: generic-browser
|
|
||||||
user_agent_regex: >-
|
|
||||||
Mozilla|Opera
|
|
||||||
action: WEIGH
|
|
||||||
weight:
|
|
||||||
adjust: 10
|
|
||||||
@@ -13,7 +13,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
<!-- This changes the project to: -->
|
<!-- This changes the project to: -->
|
||||||
|
|
||||||
- Added `(data)/meta/default-config.yaml` for importing the entire default configuration at once.
|
|
||||||
- Add `-custom-real-ip-header` flag to get the original request IP from a different header than `x-real-ip`.
|
- Add `-custom-real-ip-header` flag to get the original request IP from a different header than `x-real-ip`.
|
||||||
- Add `contentLength` variable to bot expressions.
|
- Add `contentLength` variable to bot expressions.
|
||||||
- Add `COOKIE_SAME_SITE_MODE` to force anubis cookies SameSite value, and downgrade automatically from `None` to `Lax` if cookie is insecure.
|
- Add `COOKIE_SAME_SITE_MODE` to force anubis cookies SameSite value, and downgrade automatically from `None` to `Lax` if cookie is insecure.
|
||||||
@@ -30,8 +29,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
- Fixes concurrency problems with very old browsers ([#1082](https://github.com/TecharoHQ/anubis/issues/1082)).
|
- Fixes concurrency problems with very old browsers ([#1082](https://github.com/TecharoHQ/anubis/issues/1082)).
|
||||||
- Randomly use the Refresh header instead of the meta refresh tag in the metarefresh challenge.
|
- Randomly use the Refresh header instead of the meta refresh tag in the metarefresh challenge.
|
||||||
- Update OpenRC service to truncate the runtime directory before starting Anubis.
|
- Update OpenRC service to truncate the runtime directory before starting Anubis.
|
||||||
- Make the git client profile more strictly match how the git client behaves.
|
|
||||||
- Make the default configuration reward users using normal browsers.
|
|
||||||
- Allow multiple consecutive slashes in a row in application paths ([#754](https://github.com/TecharoHQ/anubis/issues/754)).
|
- Allow multiple consecutive slashes in a row in application paths ([#754](https://github.com/TecharoHQ/anubis/issues/754)).
|
||||||
- Add option to set `targetSNI` to special keyword 'auto' to indicate that it should be automatically set to the request Host name ([424](https://github.com/TecharoHQ/anubis/issues/424)).
|
- Add option to set `targetSNI` to special keyword 'auto' to indicate that it should be automatically set to the request Host name ([424](https://github.com/TecharoHQ/anubis/issues/424)).
|
||||||
- The Preact challenge has been removed from the default configuration. It will be deprecated in the future.
|
- The Preact challenge has been removed from the default configuration. It will be deprecated in the future.
|
||||||
|
|||||||
@@ -194,7 +194,6 @@ func (u *userAgentRoundTripper) RoundTrip(req *http.Request) (*http.Response, er
|
|||||||
// Only set if not already present
|
// Only set if not already present
|
||||||
req = req.Clone(req.Context()) // avoid mutating original request
|
req = req.Clone(req.Context()) // avoid mutating original request
|
||||||
req.Header.Set("User-Agent", "Mozilla/5.0")
|
req.Header.Set("User-Agent", "Mozilla/5.0")
|
||||||
req.Header.Set("Accept-Encoding", "gzip")
|
|
||||||
return u.rt.RoundTrip(req)
|
return u.rt.RoundTrip(req)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,82 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Script to verify that the 'bots' field in data/botPolicies.yaml
|
|
||||||
has the same semantic contents as data/meta/default-config.yaml.
|
|
||||||
|
|
||||||
CW: generated by AI
|
|
||||||
"""
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import difflib
|
|
||||||
|
|
||||||
def load_yaml(file_path):
|
|
||||||
"""Load YAML file and return the data."""
|
|
||||||
try:
|
|
||||||
with open(file_path, 'r') as f:
|
|
||||||
return yaml.safe_load(f)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error loading {file_path}: {e}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def normalize_yaml(data):
|
|
||||||
"""Normalize YAML data by removing comments and standardizing structure."""
|
|
||||||
# For lists, just return as is, since YAML comments are stripped by safe_load
|
|
||||||
return data
|
|
||||||
|
|
||||||
def get_repo_root():
|
|
||||||
"""Get the root directory of the git repository."""
|
|
||||||
try:
|
|
||||||
result = subprocess.run(['git', 'rev-parse', '--show-toplevel'], capture_output=True, text=True, check=True)
|
|
||||||
return result.stdout.strip()
|
|
||||||
except subprocess.CalledProcessError:
|
|
||||||
print("Error: Not in a git repository")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# Get the git repository root
|
|
||||||
repo_root = get_repo_root()
|
|
||||||
|
|
||||||
# Paths relative to the repo root
|
|
||||||
bot_policies_path = os.path.join(repo_root, 'data', 'botPolicies.yaml')
|
|
||||||
default_config_path = os.path.join(repo_root, 'data', 'meta', 'default-config.yaml')
|
|
||||||
|
|
||||||
# Load the files
|
|
||||||
bot_policies = load_yaml(bot_policies_path)
|
|
||||||
default_config = load_yaml(default_config_path)
|
|
||||||
|
|
||||||
# Extract the 'bots' field from botPolicies.yaml
|
|
||||||
if 'bots' not in bot_policies:
|
|
||||||
print("Error: 'bots' field not found in botPolicies.yaml")
|
|
||||||
sys.exit(1)
|
|
||||||
bots_field = bot_policies['bots']
|
|
||||||
|
|
||||||
# The default-config.yaml is a list directly
|
|
||||||
default_bots = default_config
|
|
||||||
|
|
||||||
# Normalize both
|
|
||||||
normalized_bots = normalize_yaml(bots_field)
|
|
||||||
normalized_default = normalize_yaml(default_bots)
|
|
||||||
|
|
||||||
# Compare
|
|
||||||
if normalized_bots == normalized_default:
|
|
||||||
print("SUCCESS: The 'bots' field in botPolicies.yaml matches the contents of default-config.yaml")
|
|
||||||
sys.exit(0)
|
|
||||||
else:
|
|
||||||
print("FAILURE: The 'bots' field in botPolicies.yaml does not match the contents of default-config.yaml")
|
|
||||||
print("\nDiff:")
|
|
||||||
bots_yaml = yaml.dump(normalized_bots, default_flow_style=False)
|
|
||||||
default_yaml = yaml.dump(normalized_default, default_flow_style=False)
|
|
||||||
diff = difflib.unified_diff(
|
|
||||||
bots_yaml.splitlines(keepends=True),
|
|
||||||
default_yaml.splitlines(keepends=True),
|
|
||||||
fromfile='bots field in botPolicies.yaml',
|
|
||||||
tofile='default-config.yaml'
|
|
||||||
)
|
|
||||||
print(''.join(diff))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
cd "$(dirname "$0")"
|
|
||||||
python3 -c 'import yaml'
|
|
||||||
python3 ./compare_bots.py
|
|
||||||
Reference in New Issue
Block a user