mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-09 18:18:49 +00:00
Compare commits
8 Commits
json/dup
...
v1.20.0-pr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ecc716940e | ||
|
|
4948036f39 | ||
|
|
7aa732c700 | ||
|
|
226cf36bf7 | ||
|
|
1d5fa49eb0 | ||
|
|
97c1d4f353 | ||
|
|
244f1c505a | ||
|
|
ae4d3b0ce5 |
2
.github/actions/spelling/excludes.txt
vendored
2
.github/actions/spelling/excludes.txt
vendored
@@ -83,7 +83,9 @@
|
||||
^\Q.github/FUNDING.yml\E$
|
||||
^\Q.github/workflows/spelling.yml\E$
|
||||
^data/crawlers/
|
||||
^docs/blog/tags\.yml$
|
||||
^docs/manifest/.*$
|
||||
^docs/static/\.nojekyll$
|
||||
^lib/policy/config/testdata/bad/unparseable\.json$
|
||||
ignore$
|
||||
robots.txt
|
||||
|
||||
5
.github/actions/spelling/expect.txt
vendored
5
.github/actions/spelling/expect.txt
vendored
@@ -44,7 +44,6 @@ chall
|
||||
challengemozilla
|
||||
checkpath
|
||||
checkresult
|
||||
chen
|
||||
chibi
|
||||
cidranger
|
||||
ckie
|
||||
@@ -61,7 +60,6 @@ DDOS
|
||||
Debian
|
||||
debrpm
|
||||
decaymap
|
||||
decompiling
|
||||
Diffbot
|
||||
discordapp
|
||||
discordbot
|
||||
@@ -185,9 +183,11 @@ NONINFRINGEMENT
|
||||
nosleep
|
||||
OCOB
|
||||
ogtags
|
||||
ogtitle
|
||||
omgili
|
||||
omgilibot
|
||||
openai
|
||||
opengraph
|
||||
openrc
|
||||
pag
|
||||
palemoon
|
||||
@@ -300,6 +300,7 @@ xess
|
||||
xff
|
||||
XForwarded
|
||||
XNG
|
||||
XOB
|
||||
XReal
|
||||
yae
|
||||
YAMLTo
|
||||
|
||||
@@ -273,14 +273,6 @@
|
||||
# Most people only have two hands. Reword.
|
||||
\b(?i)on the third hand\b
|
||||
|
||||
# Should be `Open Graph`
|
||||
# unless talking about a specific Open Graph implementation:
|
||||
# - Java
|
||||
# - Node
|
||||
# - Py
|
||||
# - Ruby
|
||||
\bOpenGraph\b
|
||||
|
||||
# Should be `OpenShift`
|
||||
\bOpenshift\b
|
||||
|
||||
|
||||
@@ -331,22 +331,28 @@ func main() {
|
||||
slog.Warn("REDIRECT_DOMAINS is not set, Anubis will only redirect to the same domain a request is coming from, see https://anubis.techaro.lol/docs/admin/configuration/redirect-domains")
|
||||
}
|
||||
|
||||
// If OpenGraph configuration values are not set in the config file, use the
|
||||
// values from flags / envvars.
|
||||
if !policy.OpenGraph.Enabled {
|
||||
policy.OpenGraph.Enabled = *ogPassthrough
|
||||
policy.OpenGraph.ConsiderHost = *ogCacheConsiderHost
|
||||
policy.OpenGraph.TimeToLive = *ogTimeToLive
|
||||
policy.OpenGraph.Override = map[string]string{}
|
||||
}
|
||||
|
||||
s, err := libanubis.New(libanubis.Options{
|
||||
BasePrefix: *basePrefix,
|
||||
StripBasePrefix: *stripBasePrefix,
|
||||
Next: rp,
|
||||
Policy: policy,
|
||||
ServeRobotsTXT: *robotsTxt,
|
||||
PrivateKey: priv,
|
||||
CookieDomain: *cookieDomain,
|
||||
CookieExpiration: *cookieExpiration,
|
||||
CookiePartitioned: *cookiePartitioned,
|
||||
OGPassthrough: *ogPassthrough,
|
||||
OGTimeToLive: *ogTimeToLive,
|
||||
RedirectDomains: redirectDomainsList,
|
||||
Target: *target,
|
||||
WebmasterEmail: *webmasterEmail,
|
||||
OGCacheConsidersHost: *ogCacheConsiderHost,
|
||||
BasePrefix: *basePrefix,
|
||||
StripBasePrefix: *stripBasePrefix,
|
||||
Next: rp,
|
||||
Policy: policy,
|
||||
ServeRobotsTXT: *robotsTxt,
|
||||
PrivateKey: priv,
|
||||
CookieDomain: *cookieDomain,
|
||||
CookieExpiration: *cookieExpiration,
|
||||
CookiePartitioned: *cookiePartitioned,
|
||||
RedirectDomains: redirectDomainsList,
|
||||
Target: *target,
|
||||
WebmasterEmail: *webmasterEmail,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("can't construct libanubis.Server: %v", err)
|
||||
|
||||
@@ -56,7 +56,7 @@ bots:
|
||||
- name: countries-with-aggressive-scrapers
|
||||
action: WEIGH
|
||||
geoip:
|
||||
counties:
|
||||
countries:
|
||||
- BR
|
||||
- CN
|
||||
weight:
|
||||
@@ -84,6 +84,30 @@ bots:
|
||||
|
||||
dnsbl: false
|
||||
|
||||
# Open Graph passthrough configuration, see here for more information:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
|
||||
openGraph:
|
||||
# Enables Open Graph passthrough
|
||||
enabled: false
|
||||
# Enables the use of the HTTP host in the cache key, this enables
|
||||
# caching metadata for multiple http hosts at once.
|
||||
considerHost: false
|
||||
# How long cached OpenGraph metadata should last in memory
|
||||
ttl: 24h
|
||||
# # If set, return these opengraph values instead of looking them up with
|
||||
# # the target service.
|
||||
# #
|
||||
# # Correlates to properties in https://ogp.me/
|
||||
# override:
|
||||
# # og:title is required, it is the title of the website
|
||||
# "og:title": "Techaro Anubis"
|
||||
# "og:description": >-
|
||||
# Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
# away so that you can maintain uptime at work!
|
||||
# "description": >-
|
||||
# Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
# away so that you can maintain uptime at work!
|
||||
|
||||
# By default, send HTTP 200 back to clients that either get issued a challenge
|
||||
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
||||
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
|
||||
@@ -91,3 +115,57 @@ dnsbl: false
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
||||
|
||||
# The weight thresholds for when to trigger individual challenges. Any
|
||||
# CHALLENGE will take precedence over this.
|
||||
#
|
||||
# A threshold has four configuration options:
|
||||
#
|
||||
# - name: the name that is reported down the stack and used for metrics
|
||||
# - expression: A CEL expression with the request weight in the variable
|
||||
# weight
|
||||
# - action: the Anubis action to apply, similar to in a bot policy
|
||||
# - challenge: which challenge to send to the user, similar to in a bot policy
|
||||
#
|
||||
# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
|
||||
# information.
|
||||
thresholds:
|
||||
# By default Anubis ships with the following thresholds:
|
||||
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
|
||||
expression: weight < 0 # a feather weighs zero units
|
||||
action: ALLOW # Allow the traffic through
|
||||
# For clients that had some weight reduced through custom rules, give them a
|
||||
# lightweight challenge.
|
||||
- name: mild-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 0
|
||||
- weight < 10
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
# For clients that are browser-like but have either gained points from custom rules or
|
||||
# report as a standard browser.
|
||||
- name: moderate-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 10
|
||||
- weight < 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 2 # two leading zeros, very fast for most clients
|
||||
report_as: 2
|
||||
# For clients that are browser like and have gained many points from custom rules
|
||||
- name: extreme-suspicion
|
||||
expression: weight >= 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
# Note: Blocks human-directed/non-training user agents
|
||||
- name: "ai-robots-txt"
|
||||
user_agent_regex: >-
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|MyCentralAIScraperBot|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|Poseidon Research Crawler|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot-BA|SemrushBot-CT|SemrushBot-OCOB|SemrushBot-SI|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
|
||||
action: DENY
|
||||
|
||||
@@ -11,22 +11,136 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
- Replace cidranger with bart for IP range checking, improving IP matching performance by 3-20x with zero heap
|
||||
## v1.20.0: Thancred Waters
|
||||
|
||||
The big ticket items are as follows:
|
||||
|
||||
- Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95))
|
||||
- Implement request "weight", allowing administrators to customize the behaviour of Anubis based on specific criteria
|
||||
- Implement GeoIP and ASN based checks via [Thoth](https://anubis.techaro.lol/docs/admin/thoth) ([#206](https://github.com/TecharoHQ/anubis/issues/206))
|
||||
- Add [custom weight thresholds](./admin/configuration/thresholds.mdx) via CEL ([#688](https://github.com/TecharoHQ/anubis/pull/688))
|
||||
- Move Open Graph configuration [to the policy file](./admin/configuration/open-graph.mdx)
|
||||
- Enable support for Open Graph metadata to be returned by default instead of doing lookups against the target
|
||||
- Add `robots2policy` CLI utility to convert robots.txt files to Anubis challenge policies using CEL expressions ([#409](https://github.com/TecharoHQ/anubis/issues/409))
|
||||
- Refactor challenge presentation logic to use a challenge registry
|
||||
- Allow challenge implementations to register HTTP routes
|
||||
|
||||
A lot of performance improvements have been made:
|
||||
|
||||
- Replace internal SHA256 hashing with xxhash for 4-6x performance improvement in policy evaluation and cache operations
|
||||
- Optimized the OGTags subsystem with reduced allocations and runtime per request by up to 66%
|
||||
- Replace cidranger with bart for IP range checking, improving IP matching performance by 3-20x with zero heap
|
||||
allocations
|
||||
|
||||
And some cleanups/refactors were added:
|
||||
|
||||
- Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in
|
||||
development
|
||||
- Add `--xff-strip-private` flag/envvar to toggle skipping X-Forwarded-For private addresses or not
|
||||
- Requests can have their weight be adjusted, if a request weighs zero or less than it is allowed through
|
||||
- Refactor challenge presentation logic to use a challenge registry
|
||||
- Allow challenge implementations to register HTTP routes
|
||||
- Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95))
|
||||
- Bump AI-robots.txt to version 1.34
|
||||
- Bump AI-robots.txt to version 1.37
|
||||
- Make progress bar styling more compatible (UXP, etc)
|
||||
- Optimized the OGTags subsystem with reduced allocations and runtime per request by up to 66%
|
||||
- Add `--strip-base-prefix` flag/envvar to strip the base prefix from request paths when forwarding to target servers
|
||||
- Add `robots2policy` CLI utility to convert robots.txt files to Anubis challenge policies using CEL expressions ([#409](https://github.com/TecharoHQ/anubis/issues/409))
|
||||
- Implement GeoIP and ASN based checks via [Thoth](https://anubis.techaro.lol/docs/admin/thoth) ([#206](https://github.com/TecharoHQ/anubis/issues/206))
|
||||
- Replace internal SHA256 hashing with xxhash for 4-6x performance improvement in policy evaluation and cache operations
|
||||
|
||||
Request weight is one of the biggest ticket features in Anubis. This enables Anubis to be much closer to a Web Application Firewall and when combined with custom thresholds allows administrators to have Anubis take advanced reactions. For more information about request weight, see [the request weight section](./admin/policies.mdx#request-weight) of the policy file documentation.
|
||||
|
||||
TL;DR when you have one or more WEIGHT rules like this:
|
||||
|
||||
```yaml
|
||||
bots:
|
||||
- name: gitea-session-token
|
||||
action: WEIGH
|
||||
expression:
|
||||
all:
|
||||
- '"Cookie" in headers'
|
||||
- headers["Cookie"].contains("i_love_gitea=")
|
||||
# Remove 5 weight points
|
||||
weight:
|
||||
adjust: -5
|
||||
```
|
||||
|
||||
You can configure custom thresholds like this:
|
||||
|
||||
```yaml
|
||||
thresholds:
|
||||
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
|
||||
expression: weight < 0 # a feather weighs zero units
|
||||
action: ALLOW # Allow the traffic through
|
||||
|
||||
# For clients that had some weight reduced through custom rules, give them a
|
||||
# lightweight challenge.
|
||||
- name: mild-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 0
|
||||
- weight < 10
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
|
||||
# For clients that are browser-like but have either gained points from custom
|
||||
# rules or report as a standard browser.
|
||||
- name: moderate-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 10
|
||||
- weight < 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 2 # two leading zeros, very fast for most clients
|
||||
report_as: 2
|
||||
|
||||
# For clients that are browser like and have gained many points from custom
|
||||
# rules
|
||||
- name: extreme-suspicion
|
||||
expression: weight >= 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
```
|
||||
|
||||
These thresholds apply when no other `ALLOW`, `DENY`, or `CHALLENGE` rule matches the request. `WEIGHT` rules add and remove request weight as needed:
|
||||
|
||||
```yaml
|
||||
bots:
|
||||
- name: gitea-session-token
|
||||
action: WEIGH
|
||||
expression:
|
||||
all:
|
||||
- '"Cookie" in headers'
|
||||
- headers["Cookie"].contains("i_love_gitea=")
|
||||
# Remove 5 weight points
|
||||
weight:
|
||||
adjust: -5
|
||||
|
||||
- name: bot-like-user-agent
|
||||
action: WEIGH
|
||||
expression: '"Bot" in userAgent'
|
||||
# Add 5 weight points
|
||||
weight:
|
||||
adjust: 5
|
||||
```
|
||||
|
||||
Of note: the default "generic browser" rule assigns 10 weight points:
|
||||
|
||||
```yaml
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: 10
|
||||
```
|
||||
|
||||
Adjust this as you see fit.
|
||||
|
||||
## v1.19.1: Jenomis cen Lexentale - Echo 1
|
||||
|
||||
@@ -162,7 +276,6 @@ Other changes:
|
||||
- Moved all CSS inline to the Xess package, changed colors to be CSS variables
|
||||
- Set or append to `X-Forwarded-For` header unless the remote connects over a loopback address [#328](https://github.com/TecharoHQ/anubis/issues/328)
|
||||
- Fixed mojeekbot user agent regex
|
||||
- Added support for running anubis behind a base path (e.g. `/myapp`)
|
||||
- Reduce Anubis' paranoia with user cookies ([#365](https://github.com/TecharoHQ/anubis/pull/365))
|
||||
- Added support for Open Graph passthrough while using unix sockets
|
||||
- The Open Graph subsystem now passes the HTTP `HOST` header through to the origin
|
||||
|
||||
@@ -9,12 +9,45 @@ This page provides detailed information on how to configure [Open Graph tag](htt
|
||||
|
||||
## Configuration Options
|
||||
|
||||
Open Graph settings are configured in the `openGraph` section of the [Policy File](../policies.mdx).
|
||||
|
||||
```yaml
|
||||
openGraph:
|
||||
# Enables Open Graph passthrough
|
||||
enabled: true
|
||||
# Enables the use of the HTTP host in the cache key, this enables
|
||||
# caching metadata for multiple http hosts at once.
|
||||
considerHost: true
|
||||
# How long cached OpenGraph metadata should last in memory
|
||||
ttl: 24h
|
||||
# If set, return these opengraph values instead of looking them up with
|
||||
# the target service.
|
||||
#
|
||||
# Correlates to properties in https://ogp.me/
|
||||
override:
|
||||
# og:title is required, it is the title of the website
|
||||
"og:title": "Techaro Anubis"
|
||||
"og:description": >-
|
||||
Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
away so that you can maintain uptime at work!
|
||||
"description": >-
|
||||
Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
away so that you can maintain uptime at work!
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Configuration flags / envvars (old)</summary>
|
||||
|
||||
Open Graph passthrough used to be configured with configuration flags / environment variables. Reference to these settings are maintained for backwards compatibility's sake.
|
||||
|
||||
| Name | Description | Type | Default | Example |
|
||||
| ------------------------ | --------------------------------------------------------- | -------- | ------- | ----------------------------- |
|
||||
| `OG_PASSTHROUGH` | Enables or disables the Open Graph tag passthrough system | Boolean | `true` | `OG_PASSTHROUGH=true` |
|
||||
| `OG_EXPIRY_TIME` | Configurable cache expiration time for Open Graph tags | Duration | `24h` | `OG_EXPIRY_TIME=1h` |
|
||||
| `OG_CACHE_CONSIDER_HOST` | Enables or disables the use of the host in the cache key | Boolean | `false` | `OG_CACHE_CONSIDER_HOST=true` |
|
||||
|
||||
</details>
|
||||
|
||||
## Usage
|
||||
|
||||
To configure Open Graph tags, you can set the following environment variables, environment file or as flags in your Anubis configuration:
|
||||
|
||||
140
docs/docs/admin/configuration/thresholds.mdx
Normal file
140
docs/docs/admin/configuration/thresholds.mdx
Normal file
@@ -0,0 +1,140 @@
|
||||
# Weight Threshold Configuration
|
||||
|
||||
Anubis offers the ability to assign "weight" to requests. This is a custom level of suspicion that rules can add to or remove from. For example, here's how you assign 10 weight points to anything that might be a browser:
|
||||
|
||||
```yaml
|
||||
# botPolicies.yaml
|
||||
|
||||
bots:
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: 10
|
||||
```
|
||||
|
||||
Thresholds let you take this per-request weight value and take actions in response to it. Thresholds are defined alongside your bot configuration in `botPolicies.yaml`.
|
||||
|
||||
:::note
|
||||
|
||||
Thresholds DO NOT apply when a request matches a bot rule with the CHALLENGE action. Thresholds only apply when requests don't match any terminal bot rules.
|
||||
|
||||
:::
|
||||
|
||||
```yaml
|
||||
# botPolicies.yaml
|
||||
|
||||
bots: ...
|
||||
|
||||
thresholds:
|
||||
- name: minimal-suspicion
|
||||
expression: weight < 0
|
||||
action: ALLOW
|
||||
|
||||
- name: mild-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 0
|
||||
- weight < 10
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
|
||||
- name: moderate-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 10
|
||||
- weight < 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: fast
|
||||
difficulty: 2
|
||||
report_as: 2
|
||||
|
||||
- name: extreme-suspicion
|
||||
expression: weight >= 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
```
|
||||
|
||||
This defines a suite of 4 thresholds:
|
||||
|
||||
1. If the request weight is less than zero, allow it through.
|
||||
2. If the request weight is greater than or equal to zero, but less than ten: give it [a very lightweight challenge](./challenges/metarefresh.mdx).
|
||||
3. If the request weight is greater than or equal to ten, but less than twenty: give it [a slightly heavier challenge](./challenges/proof-of-work.mdx).
|
||||
4. Otherwise, give it [the heaviest challenge](./challenges/proof-of-work.mdx).
|
||||
|
||||
Thresholds can be configured with the following options:
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Description</th>
|
||||
<th>Example</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>`name`</td>
|
||||
<td>The human-readable name for this threshold.</td>
|
||||
<td>
|
||||
|
||||
```yaml
|
||||
name: extreme-suspicion
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>`expression`</td>
|
||||
<td>A [CEL](https://cel.dev/) expression taking the request weight and returning true or false</td>
|
||||
<td>
|
||||
|
||||
To check if the request weight is less than zero:
|
||||
|
||||
```yaml
|
||||
expression: weight < 0
|
||||
```
|
||||
|
||||
To check if it's between 0 and 10 (inclusive):
|
||||
|
||||
```yaml
|
||||
expression:
|
||||
all:
|
||||
- weight >= 0
|
||||
- weight < 10
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>`action`</td>
|
||||
<td>The Anubis action to apply: `ALLOW`, `CHALLENGE`, or `DENY`</td>
|
||||
<td>
|
||||
|
||||
```yaml
|
||||
action: ALLOW
|
||||
```
|
||||
|
||||
If you set the CHALLENGE action, you must set challenge details:
|
||||
|
||||
```yaml
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
</tbody>
|
||||
</table>
|
||||
@@ -4,7 +4,6 @@ title: Setting up Anubis
|
||||
|
||||
import RandomKey from "@site/src/components/RandomKey";
|
||||
|
||||
|
||||
Anubis is meant to sit between your reverse proxy (such as Nginx or Caddy) and your target service. One instance of Anubis must be used per service you are protecting.
|
||||
|
||||
<center>
|
||||
@@ -30,7 +29,7 @@ TLS terminator)
|
||||
Anubis is shipped in the Docker repo [`ghcr.io/techarohq/anubis`](https://github.com/TecharoHQ/anubis/pkgs/container/anubis). The following tags exist for your convenience:
|
||||
|
||||
| Tag | Meaning |
|
||||
|:--------------------|:-----------------------------------------------------------------------------------------------------------------------------------|
|
||||
| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `latest` | The latest [tagged release](https://github.com/TecharoHQ/anubis/releases), if you are in doubt, start here. |
|
||||
| `v<version number>` | The Anubis image for [any given tagged release](https://github.com/TecharoHQ/anubis/tags) |
|
||||
| `main` | The current build on the `main` branch. Only use this if you need the latest and greatest features as they are merged into `main`. |
|
||||
@@ -43,12 +42,24 @@ Anubis has very minimal system requirements. I suspect that 128Mi of ram may be
|
||||
|
||||
For more detailed information on installing Anubis with native packages, please read [the native install directions](./native-install.mdx).
|
||||
|
||||
## Environment variables
|
||||
## Configuration
|
||||
|
||||
Anubis is configurable via environment variables and [the policy file](./policies.mdx). Most settings are currently exposed with environment variables but they are being slowly moved over to the policy file.
|
||||
|
||||
### Configuration via the policy file
|
||||
|
||||
Currently the following settings are configurable via the policy file:
|
||||
|
||||
- [Bot policies](./policies.mdx)
|
||||
- [Open Graph passthrough](./configuration/open-graph.mdx)
|
||||
- [Weight thresholds](./configuration/thresholds.mdx)
|
||||
|
||||
### Environment variables
|
||||
|
||||
Anubis uses these environment variables for configuration:
|
||||
|
||||
| Environment Variable | Default value | Explanation |
|
||||
|:-------------------------------|:------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| :----------------------------- | :---------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `BASE_PREFIX` | unset | If set, adds a global prefix to all Anubis endpoints. For example, setting this to `/myapp` would make Anubis accessible at `/myapp/` instead of `/`. This is useful when running Anubis behind a reverse proxy that routes based on path prefixes. |
|
||||
| `BIND` | `:8923` | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock` |
|
||||
| `BIND_NETWORK` | `tcp` | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports. |
|
||||
@@ -60,9 +71,9 @@ Anubis uses these environment variables for configuration:
|
||||
| `ED25519_PRIVATE_KEY_HEX_FILE` | unset | Path to a file containing the hex-encoded ed25519 private key. Only one of this or its sister option may be set. |
|
||||
| `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. |
|
||||
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
|
||||
| `OG_EXPIRY_TIME` | `24h` | The expiration time for the Open Graph tag cache. |
|
||||
| `OG_PASSTHROUGH` | `false` | If set to `true`, Anubis will enable Open Graph tag passthrough. |
|
||||
| `OG_CACHE_CONSIDER_HOST` | `false` | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. |
|
||||
| `OG_EXPIRY_TIME` | `24h` | The expiration time for the Open Graph tag cache. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
|
||||
| `OG_PASSTHROUGH` | `false` | If set to `true`, Anubis will enable Open Graph tag passthrough. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
|
||||
| `OG_CACHE_CONSIDER_HOST` | `false` | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
|
||||
| `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.mdx). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. |
|
||||
| `REDIRECT_DOMAINS` | unset | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain.<br/><br/>Note that if you are hosting Anubis on a non-standard port (`https://example:com:8443`, `http://www.example.net:8080`, etc.), you must also include the port number here. |
|
||||
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
|
||||
@@ -138,6 +149,7 @@ STRIP_BASE_PREFIX=true
|
||||
```
|
||||
|
||||
With this configuration:
|
||||
|
||||
- A request to `/myapp/api/users` would be forwarded to your target service as `/api/users`
|
||||
- A request to `/myapp/` would be forwarded as `/`
|
||||
|
||||
|
||||
@@ -261,17 +261,11 @@ Anubis rules can also add or remove "weight" from requests, allowing administrat
|
||||
adjust: -5
|
||||
```
|
||||
|
||||
This would remove five weight points from the request, making Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx).
|
||||
This would remove five weight points from the request, which would make Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) in the default configuration.
|
||||
|
||||
### Weight Thresholds
|
||||
|
||||
Weight thresholds and challenge associations will be configurable with CEL expressions in the configuration file in an upcoming patch, for now here's how Anubis configures the weight thresholds:
|
||||
|
||||
| Weight Expression | Action |
|
||||
| -----------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `weight < 0` (weight is less than 0) | Allow the request through. |
|
||||
| `weight < 10` (weight is less than 10) | Challenge the client with the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) at the default difficulty level. |
|
||||
| `weight >= 10` (weight is greater than or equal to 10) | Challenge the client with the [Proof of Work challenge](./configuration/challenges/proof-of-work.mdx) at the default difficulty level. |
|
||||
For more information on configuring weight thresholds, see [Weight Threshold Configuration](./configuration/thresholds.mdx)
|
||||
|
||||
### Advice
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ For example, to add 10 weight points to requests from Brazil and China:
|
||||
- name: countries-with-aggressive-scrapers
|
||||
action: WEIGH
|
||||
geoip:
|
||||
counties:
|
||||
countries:
|
||||
- BR
|
||||
- CN
|
||||
weight:
|
||||
|
||||
@@ -44,6 +44,7 @@ This page contains a non-exhaustive list with all websites using Anubis.
|
||||
- https://squirreljme.cc/
|
||||
- https://gitlab.postmarketos.org/
|
||||
- https://wiki.koha-community.org/
|
||||
- https://extensions.typo3.org/
|
||||
- <details>
|
||||
<summary>FreeCAD</summary>
|
||||
- https://forum.freecad.org/
|
||||
|
||||
@@ -13,6 +13,10 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
|
||||
return nil, errors.New("nil URL provided, cannot fetch OG tags")
|
||||
}
|
||||
|
||||
if len(c.ogOverride) != 0 {
|
||||
return c.ogOverride, nil
|
||||
}
|
||||
|
||||
target := c.getTarget(url)
|
||||
cacheKey := c.generateCacheKey(target, originalHost)
|
||||
|
||||
|
||||
@@ -7,10 +7,49 @@ import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestCacheReturnsDefault(t *testing.T) {
|
||||
want := map[string]string{
|
||||
"og:title": "Foo bar",
|
||||
"og:description": "The best website ever made!!!1!",
|
||||
}
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
Override: want,
|
||||
})
|
||||
|
||||
u, err := url.Parse("https://anubis.techaro.lol")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
result, err := cache.GetOGTags(u, "anubis.techaro.lol")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for k, v := range want {
|
||||
t.Run(k, func(t *testing.T) {
|
||||
if got := result[k]; got != v {
|
||||
t.Logf("want: tags[%q] = %q", k, v)
|
||||
t.Logf("got: tags[%q] = %q", k, got)
|
||||
t.Error("invalid result from function")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckCache(t *testing.T) {
|
||||
cache := NewOGTagCache("http://example.com", true, time.Minute, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
// Set up test data
|
||||
urlStr := "http://example.com/page"
|
||||
@@ -69,7 +108,11 @@ func TestGetOGTags(t *testing.T) {
|
||||
defer ts.Close()
|
||||
|
||||
// Create an instance of OGTagCache with a short TTL for testing
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
// Parse the test server URL
|
||||
parsedURL, err := url.Parse(ts.URL)
|
||||
@@ -216,7 +259,11 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
loadCount = 0 // Reset load count for each test case
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, tc.ogCacheConsiderHost)
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: tc.ogCacheConsiderHost,
|
||||
})
|
||||
|
||||
for i, req := range tc.requests {
|
||||
ogTags, err := cache.GetOGTags(parsedURL, req.host)
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -80,7 +81,11 @@ func TestFetchHTMLDocument(t *testing.T) {
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
cache := NewOGTagCache("", true, time.Minute, false)
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
|
||||
|
||||
if tt.expectError {
|
||||
@@ -107,7 +112,11 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
|
||||
t.Skip("test requires theoretical network egress")
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("", true, time.Minute, false)
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")
|
||||
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestIntegrationGetOGTags(t *testing.T) {
|
||||
@@ -104,7 +106,11 @@ func TestIntegrationGetOGTags(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Create cache instance
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
// Create URL for test
|
||||
testURL, _ := url.Parse(ts.URL)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -29,7 +30,7 @@ func BenchmarkGetTarget(b *testing.B) {
|
||||
|
||||
for _, tt := range tests {
|
||||
b.Run(tt.name, func(b *testing.B) {
|
||||
cache := NewOGTagCache(tt.target, false, 0, false)
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{})
|
||||
urls := make([]*url.URL, len(tt.paths))
|
||||
for i, path := range tt.paths {
|
||||
u, _ := url.Parse(path)
|
||||
@@ -65,7 +66,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
|
||||
</head><body><div><p>Content</p></div></body></html>`,
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", false, 0, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
docs := make([]*html.Node, len(htmlSamples))
|
||||
|
||||
for i, sample := range htmlSamples {
|
||||
@@ -83,7 +84,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
|
||||
|
||||
// Memory usage test
|
||||
func TestMemoryUsage(t *testing.T) {
|
||||
cache := NewOGTagCache("http://example.com", false, 0, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Force GC and wait for it to complete
|
||||
runtime.GC()
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/decaymap"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -32,9 +33,10 @@ type OGTagCache struct {
|
||||
ogTimeToLive time.Duration
|
||||
ogCacheConsiderHost bool
|
||||
ogPassthrough bool
|
||||
ogOverride map[string]string
|
||||
}
|
||||
|
||||
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
|
||||
func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
|
||||
// Predefined approved tags and prefixes
|
||||
defaultApprovedTags := []string{"description", "keywords", "author"}
|
||||
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
|
||||
@@ -77,9 +79,10 @@ func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration
|
||||
return &OGTagCache{
|
||||
cache: decaymap.New[string, map[string]string](),
|
||||
targetURL: parsedTargetURL,
|
||||
ogPassthrough: ogPassthrough,
|
||||
ogTimeToLive: ogTimeToLive,
|
||||
ogCacheConsiderHost: ogTagsConsiderHost,
|
||||
ogPassthrough: conf.Enabled,
|
||||
ogTimeToLive: conf.TimeToLive,
|
||||
ogCacheConsiderHost: conf.ConsiderHost,
|
||||
ogOverride: conf.Override,
|
||||
approvedTags: defaultApprovedTags,
|
||||
approvedPrefixes: defaultApprovedPrefixes,
|
||||
client: client,
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -45,7 +46,7 @@ func FuzzGetTarget(f *testing.F) {
|
||||
}
|
||||
|
||||
// Create cache - should not panic
|
||||
cache := NewOGTagCache(target, false, 0, false)
|
||||
cache := NewOGTagCache(target, config.OpenGraph{})
|
||||
|
||||
// Create URL
|
||||
u := &url.URL{
|
||||
@@ -129,7 +130,7 @@ func FuzzExtractOGTags(f *testing.F) {
|
||||
return
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", false, 0, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Should not panic
|
||||
tags := cache.extractOGTags(doc)
|
||||
@@ -185,7 +186,7 @@ func FuzzGetTargetRoundTrip(f *testing.F) {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
cache := NewOGTagCache(target, false, 0, false)
|
||||
cache := NewOGTagCache(target, config.OpenGraph{})
|
||||
u := &url.URL{Path: path, RawQuery: query}
|
||||
|
||||
result := cache.getTarget(u)
|
||||
@@ -242,7 +243,7 @@ func FuzzExtractMetaTagInfo(f *testing.F) {
|
||||
},
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", false, 0, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Should not panic
|
||||
property, content := cache.extractMetaTagInfo(node)
|
||||
@@ -295,7 +296,7 @@ func BenchmarkFuzzedGetTarget(b *testing.B) {
|
||||
|
||||
for _, input := range inputs {
|
||||
b.Run(input.name, func(b *testing.B) {
|
||||
cache := NewOGTagCache(input.target, false, 0, false)
|
||||
cache := NewOGTagCache(input.target, config.OpenGraph{})
|
||||
u := &url.URL{Path: input.path, RawQuery: input.query}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
@@ -13,6 +13,8 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestNewOGTagCache(t *testing.T) {
|
||||
@@ -38,7 +40,11 @@ func TestNewOGTagCache(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cache := NewOGTagCache(tt.target, tt.ogPassthrough, tt.ogTimeToLive, false)
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{
|
||||
Enabled: tt.ogPassthrough,
|
||||
TimeToLive: tt.ogTimeToLive,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
if cache == nil {
|
||||
t.Fatal("expected non-nil cache, got nil")
|
||||
@@ -74,7 +80,11 @@ func TestNewOGTagCache_UnixSocket(t *testing.T) {
|
||||
socketPath := filepath.Join(tempDir, "test.sock")
|
||||
target := "unix://" + socketPath
|
||||
|
||||
cache := NewOGTagCache(target, true, 5*time.Minute, false)
|
||||
cache := NewOGTagCache(target, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: 5 * time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
if cache == nil {
|
||||
t.Fatal("expected non-nil cache, got nil")
|
||||
@@ -155,7 +165,11 @@ func TestGetTarget(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cache := NewOGTagCache(tt.target, false, time.Minute, false)
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
u := &url.URL{
|
||||
Path: tt.path,
|
||||
@@ -175,7 +189,9 @@ func TestGetTarget(t *testing.T) {
|
||||
func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
socketPath := filepath.Join(tempDir, "anubis-test.sock")
|
||||
// XXX(Xe): if this is named longer, macOS fails with `bind: invalid argument`
|
||||
// because the unix socket path is too long. I love computers.
|
||||
socketPath := filepath.Join(tempDir, "t")
|
||||
|
||||
// Ensure the socket does not exist initially
|
||||
_ = os.Remove(socketPath)
|
||||
@@ -222,7 +238,11 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
|
||||
|
||||
// Create cache instance pointing to the Unix socket
|
||||
targetURL := "unix://" + socketPath
|
||||
cache := NewOGTagCache(targetURL, true, 1*time.Minute, false)
|
||||
cache := NewOGTagCache(targetURL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
// Create a dummy URL for the request (path and query matter)
|
||||
testReqURL, _ := url.Parse("/some/page?query=1")
|
||||
|
||||
@@ -6,13 +6,18 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// TestExtractOGTags updated with correct expectations based on filtering logic
|
||||
func TestExtractOGTags(t *testing.T) {
|
||||
// Use a cache instance that reflects the default approved lists
|
||||
testCache := NewOGTagCache("", false, time.Minute, false)
|
||||
testCache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: false,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: time.Minute,
|
||||
})
|
||||
// Manually set approved tags/prefixes based on the user request for clarity
|
||||
testCache.approvedTags = []string{"description"}
|
||||
testCache.approvedPrefixes = []string{"og:"}
|
||||
@@ -189,7 +194,11 @@ func TestIsOGMetaTag(t *testing.T) {
|
||||
|
||||
func TestExtractMetaTagInfo(t *testing.T) {
|
||||
// Use a cache instance that reflects the default approved lists
|
||||
testCache := NewOGTagCache("", false, time.Minute, false)
|
||||
testCache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: false,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: time.Minute,
|
||||
})
|
||||
testCache.approvedTags = []string{"description"}
|
||||
testCache.approvedPrefixes = []string{"og:"}
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
"github.com/google/cel-go/common/types"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
@@ -411,12 +412,6 @@ func cr(name string, rule config.Rule, weight int) policy.CheckResult {
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
weightOkayStatic = policy.NewStaticHashChecker("weight/okay")
|
||||
weightMildSusStatic = policy.NewStaticHashChecker("weight/mild-suspicion")
|
||||
weightVerySusStatic = policy.NewStaticHashChecker("weight/extreme-suspicion")
|
||||
)
|
||||
|
||||
// Check evaluates the list of rules, and returns the result
|
||||
func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) {
|
||||
host := r.Header.Get("X-Real-Ip")
|
||||
@@ -448,34 +443,25 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error)
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case weight <= 0:
|
||||
return cr("weight/okay", config.RuleAllow, weight), &policy.Bot{
|
||||
Challenge: &config.ChallengeRules{
|
||||
Difficulty: s.policy.DefaultDifficulty,
|
||||
ReportAs: s.policy.DefaultDifficulty,
|
||||
Algorithm: config.DefaultAlgorithm,
|
||||
},
|
||||
Rules: weightOkayStatic,
|
||||
}, nil
|
||||
case weight > 0 && weight < 10:
|
||||
return cr("weight/mild-suspicion", config.RuleChallenge, weight), &policy.Bot{
|
||||
Challenge: &config.ChallengeRules{
|
||||
Difficulty: s.policy.DefaultDifficulty,
|
||||
ReportAs: s.policy.DefaultDifficulty,
|
||||
Algorithm: "metarefresh",
|
||||
},
|
||||
Rules: weightMildSusStatic,
|
||||
}, nil
|
||||
case weight >= 10:
|
||||
return cr("weight/extreme-suspicion", config.RuleChallenge, weight), &policy.Bot{
|
||||
Challenge: &config.ChallengeRules{
|
||||
Difficulty: s.policy.DefaultDifficulty,
|
||||
ReportAs: s.policy.DefaultDifficulty,
|
||||
Algorithm: "fast",
|
||||
},
|
||||
Rules: weightVerySusStatic,
|
||||
}, nil
|
||||
for _, t := range s.policy.Thresholds {
|
||||
result, _, err := t.Program.ContextEval(r.Context(), &policy.ThresholdRequest{Weight: weight})
|
||||
if err != nil {
|
||||
slog.Error("error when evaluating threshold expression", "expression", t.Expression.String(), "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var matches bool
|
||||
|
||||
if val, ok := result.(types.Bool); ok {
|
||||
matches = bool(val)
|
||||
}
|
||||
|
||||
if matches {
|
||||
return cr("threshold/"+t.Name, t.Action, weight), &policy.Bot{
|
||||
Challenge: t.Challenge,
|
||||
Rules: &checker.List{},
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
return cr("default/allow", config.RuleAllow, weight), &policy.Bot{
|
||||
|
||||
@@ -24,12 +24,16 @@ func init() {
|
||||
internal.InitSlog("debug")
|
||||
}
|
||||
|
||||
func loadPolicies(t *testing.T, fname string) *policy.ParsedConfig {
|
||||
func loadPolicies(t *testing.T, fname string, difficulty int) *policy.ParsedConfig {
|
||||
t.Helper()
|
||||
|
||||
ctx := thothmock.WithMockThoth(t)
|
||||
|
||||
anubisPolicy, err := LoadPoliciesOrDefault(ctx, fname, anubis.DefaultDifficulty)
|
||||
if fname == "" {
|
||||
fname = "./testdata/test_config.yaml"
|
||||
}
|
||||
|
||||
anubisPolicy, err := LoadPoliciesOrDefault(ctx, fname, difficulty)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -40,6 +44,10 @@ func loadPolicies(t *testing.T, fname string) *policy.ParsedConfig {
|
||||
func spawnAnubis(t *testing.T, opts Options) *Server {
|
||||
t.Helper()
|
||||
|
||||
if opts.Policy == nil {
|
||||
opts.Policy = loadPolicies(t, "", 4)
|
||||
}
|
||||
|
||||
s, err := New(opts)
|
||||
if err != nil {
|
||||
t.Fatalf("can't construct libanubis.Server: %v", err)
|
||||
@@ -176,8 +184,7 @@ func TestLoadPolicies(t *testing.T) {
|
||||
|
||||
// Regression test for CVE-2025-24369
|
||||
func TestCVE2025_24369(t *testing.T) {
|
||||
pol := loadPolicies(t, "")
|
||||
pol.DefaultDifficulty = 4
|
||||
pol := loadPolicies(t, "", anubis.DefaultDifficulty)
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: http.NewServeMux(),
|
||||
@@ -200,8 +207,7 @@ func TestCVE2025_24369(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCookieCustomExpiration(t *testing.T) {
|
||||
pol := loadPolicies(t, "")
|
||||
pol.DefaultDifficulty = 0
|
||||
pol := loadPolicies(t, "", 0)
|
||||
ckieExpiration := 10 * time.Minute
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
@@ -250,8 +256,7 @@ func TestCookieCustomExpiration(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCookieSettings(t *testing.T) {
|
||||
pol := loadPolicies(t, "")
|
||||
pol.DefaultDifficulty = 0
|
||||
pol := loadPolicies(t, "", 0)
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: http.NewServeMux(),
|
||||
@@ -316,10 +321,7 @@ func TestCheckDefaultDifficultyMatchesPolicy(t *testing.T) {
|
||||
|
||||
for i := 1; i < 10; i++ {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
anubisPolicy, err := LoadPoliciesOrDefault(t.Context(), "", i)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
anubisPolicy := loadPolicies(t, "", i)
|
||||
|
||||
s, err := New(Options{
|
||||
Next: h,
|
||||
@@ -337,11 +339,13 @@ func TestCheckDefaultDifficultyMatchesPolicy(t *testing.T) {
|
||||
|
||||
req.Header.Add("X-Real-Ip", "127.0.0.1")
|
||||
|
||||
_, bot, err := s.check(req)
|
||||
cr, bot, err := s.check(req)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
t.Log(cr.Name)
|
||||
|
||||
if bot.Challenge.Difficulty != i {
|
||||
t.Errorf("Challenge.Difficulty is wrong, wanted %d, got: %d", i, bot.Challenge.Difficulty)
|
||||
}
|
||||
@@ -389,8 +393,7 @@ func TestBasePrefix(t *testing.T) {
|
||||
// Reset the global BasePrefix before each test
|
||||
anubis.BasePrefix = ""
|
||||
|
||||
pol := loadPolicies(t, "")
|
||||
pol.DefaultDifficulty = 4
|
||||
pol := loadPolicies(t, "", 4)
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: h,
|
||||
@@ -518,8 +521,7 @@ func TestCustomStatusCodes(t *testing.T) {
|
||||
"DENY": 403,
|
||||
}
|
||||
|
||||
pol := loadPolicies(t, "./testdata/aggressive_403.yaml")
|
||||
pol.DefaultDifficulty = 4
|
||||
pol := loadPolicies(t, "./testdata/aggressive_403.yaml", 4)
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: h,
|
||||
@@ -553,7 +555,7 @@ func TestCustomStatusCodes(t *testing.T) {
|
||||
func TestCloudflareWorkersRule(t *testing.T) {
|
||||
for _, variant := range []string{"cel", "header"} {
|
||||
t.Run(variant, func(t *testing.T) {
|
||||
pol := loadPolicies(t, "./testdata/cloudflare-workers-"+variant+".yaml")
|
||||
pol := loadPolicies(t, "./testdata/cloudflare-workers-"+variant+".yaml", 0)
|
||||
|
||||
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintln(w, "OK")
|
||||
@@ -609,8 +611,7 @@ func TestCloudflareWorkersRule(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRuleChange(t *testing.T) {
|
||||
pol := loadPolicies(t, "testdata/rule_change.yaml")
|
||||
pol.DefaultDifficulty = 0
|
||||
pol := loadPolicies(t, "testdata/rule_change.yaml", 0)
|
||||
ckieExpiration := 10 * time.Minute
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
|
||||
@@ -21,27 +21,26 @@ import (
|
||||
"github.com/TecharoHQ/anubis/internal/ogtags"
|
||||
"github.com/TecharoHQ/anubis/lib/challenge"
|
||||
"github.com/TecharoHQ/anubis/lib/policy"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/web"
|
||||
"github.com/TecharoHQ/anubis/xess"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
Next http.Handler
|
||||
Policy *policy.ParsedConfig
|
||||
Target string
|
||||
CookieDomain string
|
||||
CookieName string
|
||||
BasePrefix string
|
||||
WebmasterEmail string
|
||||
RedirectDomains []string
|
||||
PrivateKey ed25519.PrivateKey
|
||||
CookieExpiration time.Duration
|
||||
OGTimeToLive time.Duration
|
||||
StripBasePrefix bool
|
||||
OGCacheConsidersHost bool
|
||||
OGPassthrough bool
|
||||
CookiePartitioned bool
|
||||
ServeRobotsTXT bool
|
||||
Next http.Handler
|
||||
Policy *policy.ParsedConfig
|
||||
Target string
|
||||
CookieDomain string
|
||||
CookieName string
|
||||
BasePrefix string
|
||||
WebmasterEmail string
|
||||
RedirectDomains []string
|
||||
PrivateKey ed25519.PrivateKey
|
||||
CookieExpiration time.Duration
|
||||
StripBasePrefix bool
|
||||
OpenGraph config.OpenGraph
|
||||
CookiePartitioned bool
|
||||
ServeRobotsTXT bool
|
||||
}
|
||||
|
||||
func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty int) (*policy.ParsedConfig, error) {
|
||||
@@ -112,7 +111,7 @@ func New(opts Options) (*Server, error) {
|
||||
policy: opts.Policy,
|
||||
opts: opts,
|
||||
DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](),
|
||||
OGTags: ogtags.NewOGTagCache(opts.Target, opts.OGPassthrough, opts.OGTimeToLive, opts.OGCacheConsidersHost),
|
||||
OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph),
|
||||
cookieName: cookieName,
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ func TestBadConfigs(t *testing.T) {
|
||||
for _, st := range finfos {
|
||||
st := st
|
||||
t.Run(st.Name(), func(t *testing.T) {
|
||||
if _, err := LoadPoliciesOrDefault(t.Context(), filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err == nil {
|
||||
if _, err := LoadPoliciesOrDefault(t.Context(), filepath.Join("policy", "config", "testdata", "bad", st.Name()), anubis.DefaultDifficulty); err == nil {
|
||||
t.Fatal(err)
|
||||
} else {
|
||||
t.Log(err)
|
||||
|
||||
@@ -80,7 +80,7 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
|
||||
challengeStr := s.challengeFor(r, rule.Challenge.Difficulty)
|
||||
|
||||
var ogTags map[string]string = nil
|
||||
if s.opts.OGPassthrough {
|
||||
if s.opts.OpenGraph.Enabled {
|
||||
var err error
|
||||
ogTags, err = s.OGTags.GetOGTags(r.URL, r.Host)
|
||||
if err != nil {
|
||||
|
||||
@@ -17,47 +17,18 @@ type CELChecker struct {
|
||||
}
|
||||
|
||||
func NewCELChecker(cfg *config.ExpressionOrList) (*CELChecker, error) {
|
||||
env, err := expressions.NewEnvironment()
|
||||
env, err := expressions.BotEnvironment()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var src string
|
||||
var ast *cel.Ast
|
||||
|
||||
if cfg.Expression != "" {
|
||||
src = cfg.Expression
|
||||
var iss *cel.Issues
|
||||
intermediate, iss := env.Compile(src)
|
||||
if iss != nil {
|
||||
return nil, iss.Err()
|
||||
}
|
||||
|
||||
ast, iss = env.Check(intermediate)
|
||||
if iss != nil {
|
||||
return nil, iss.Err()
|
||||
}
|
||||
}
|
||||
|
||||
if len(cfg.All) != 0 {
|
||||
ast, err = expressions.Join(env, expressions.JoinAnd, cfg.All...)
|
||||
}
|
||||
|
||||
if len(cfg.Any) != 0 {
|
||||
ast, err = expressions.Join(env, expressions.JoinOr, cfg.Any...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
program, err := expressions.Compile(env, ast)
|
||||
program, err := expressions.Compile(env, cfg.String())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't compile CEL program: %w", err)
|
||||
}
|
||||
|
||||
return &CELChecker{
|
||||
src: src,
|
||||
src: cfg.String(),
|
||||
program: program,
|
||||
}, nil
|
||||
}
|
||||
|
||||
55
lib/policy/config/asn_test.go
Normal file
55
lib/policy/config/asn_test.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestASNsValid(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input *ASNs
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "basic valid",
|
||||
input: &ASNs{
|
||||
Match: []uint32{13335}, // Cloudflare
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "private ASN",
|
||||
input: &ASNs{
|
||||
Match: []uint32{64513, 4206942069}, // 16 and 32 bit private ASN
|
||||
},
|
||||
err: ErrPrivateASN,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
|
||||
t.Logf("want: %v", tt.err)
|
||||
t.Logf("got: %v", err)
|
||||
t.Error("got wrong validation error")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPrivateASN(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
input uint32
|
||||
output bool
|
||||
}{
|
||||
{13335, false}, // Cloudflare
|
||||
{64513, true}, // 16 bit private ASN
|
||||
{4206942069, true}, // 32 bit private ASN
|
||||
} {
|
||||
t.Run(fmt.Sprint(tt.input, "->", tt.output), func(t *testing.T) {
|
||||
result := isPrivateASN(tt.input)
|
||||
if result != tt.output {
|
||||
t.Errorf("wanted isPrivateASN(%d) == %v, got: %v", tt.input, tt.output, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/data"
|
||||
"k8s.io/apimachinery/pkg/util/yaml"
|
||||
@@ -43,6 +44,15 @@ const (
|
||||
RuleBenchmark Rule = "DEBUG_BENCHMARK"
|
||||
)
|
||||
|
||||
func (r Rule) Valid() error {
|
||||
switch r {
|
||||
case RuleAllow, RuleDeny, RuleChallenge, RuleWeigh, RuleBenchmark:
|
||||
return nil
|
||||
default:
|
||||
return ErrUnknownAction
|
||||
}
|
||||
}
|
||||
|
||||
const DefaultAlgorithm = "fast"
|
||||
|
||||
type BotConfig struct {
|
||||
@@ -184,13 +194,18 @@ type ChallengeRules struct {
|
||||
}
|
||||
|
||||
var (
|
||||
ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)")
|
||||
ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)")
|
||||
ErrChallengeDifficultyTooLow = errors.New("config.ChallengeRules: difficulty is too low (must be >= 1)")
|
||||
ErrChallengeDifficultyTooHigh = errors.New("config.ChallengeRules: difficulty is too high (must be <= 64)")
|
||||
ErrChallengeMustHaveAlgorithm = errors.New("config.ChallengeRules: must have algorithm name set")
|
||||
)
|
||||
|
||||
func (cr ChallengeRules) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if cr.Algorithm == "" {
|
||||
errs = append(errs, ErrChallengeMustHaveAlgorithm)
|
||||
}
|
||||
|
||||
if cr.Difficulty < 1 {
|
||||
errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty))
|
||||
}
|
||||
@@ -309,20 +324,28 @@ func (sc StatusCodes) Valid() error {
|
||||
}
|
||||
|
||||
type fileConfig struct {
|
||||
Bots []BotOrImport `json:"bots"`
|
||||
DNSBL bool `json:"dnsbl"`
|
||||
StatusCodes StatusCodes `json:"status_codes"`
|
||||
Bots []BotOrImport `json:"bots"`
|
||||
DNSBL bool `json:"dnsbl"`
|
||||
OpenGraph openGraphFileConfig `json:"openGraph,omitempty"`
|
||||
StatusCodes StatusCodes `json:"status_codes"`
|
||||
Thresholds []Threshold `json:"thresholds"`
|
||||
}
|
||||
|
||||
func (c fileConfig) Valid() error {
|
||||
func (c *fileConfig) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if len(c.Bots) == 0 {
|
||||
errs = append(errs, ErrNoBotRulesDefined)
|
||||
}
|
||||
|
||||
for _, b := range c.Bots {
|
||||
for i, b := range c.Bots {
|
||||
if err := b.Valid(); err != nil {
|
||||
errs = append(errs, fmt.Errorf("bot %d: %w", i, err))
|
||||
}
|
||||
}
|
||||
|
||||
if c.OpenGraph.Enabled {
|
||||
if err := c.OpenGraph.Valid(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
@@ -331,6 +354,12 @@ func (c fileConfig) Valid() error {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
for i, t := range c.Thresholds {
|
||||
if err := t.Valid(); err != nil {
|
||||
errs = append(errs, fmt.Errorf("threshold %d: %w", i, err))
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
|
||||
}
|
||||
@@ -339,11 +368,13 @@ func (c fileConfig) Valid() error {
|
||||
}
|
||||
|
||||
func Load(fin io.Reader, fname string) (*Config, error) {
|
||||
var c fileConfig
|
||||
c.StatusCodes = StatusCodes{
|
||||
Challenge: http.StatusOK,
|
||||
Deny: http.StatusOK,
|
||||
c := &fileConfig{
|
||||
StatusCodes: StatusCodes{
|
||||
Challenge: http.StatusOK,
|
||||
Deny: http.StatusOK,
|
||||
},
|
||||
}
|
||||
|
||||
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
|
||||
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
|
||||
}
|
||||
@@ -353,10 +384,21 @@ func Load(fin io.Reader, fname string) (*Config, error) {
|
||||
}
|
||||
|
||||
result := &Config{
|
||||
DNSBL: c.DNSBL,
|
||||
DNSBL: c.DNSBL,
|
||||
OpenGraph: OpenGraph{
|
||||
Enabled: c.OpenGraph.Enabled,
|
||||
ConsiderHost: c.OpenGraph.ConsiderHost,
|
||||
Override: c.OpenGraph.Override,
|
||||
},
|
||||
StatusCodes: c.StatusCodes,
|
||||
}
|
||||
|
||||
if c.OpenGraph.TimeToLive != "" {
|
||||
// XXX(Xe): already validated in Valid()
|
||||
ogTTL, _ := time.ParseDuration(c.OpenGraph.TimeToLive)
|
||||
result.OpenGraph.TimeToLive = ogTTL
|
||||
}
|
||||
|
||||
var validationErrs []error
|
||||
|
||||
for _, boi := range c.Bots {
|
||||
@@ -379,6 +421,19 @@ func Load(fin io.Reader, fname string) (*Config, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if len(c.Thresholds) == 0 {
|
||||
c.Thresholds = DefaultThresholds
|
||||
}
|
||||
|
||||
for _, t := range c.Thresholds {
|
||||
if err := t.Valid(); err != nil {
|
||||
validationErrs = append(validationErrs, err)
|
||||
continue
|
||||
}
|
||||
|
||||
result.Thresholds = append(result.Thresholds, t)
|
||||
}
|
||||
|
||||
if len(validationErrs) > 0 {
|
||||
return nil, fmt.Errorf("errors validating policy config %s: %w", fname, errors.Join(validationErrs...))
|
||||
}
|
||||
@@ -388,7 +443,9 @@ func Load(fin io.Reader, fname string) (*Config, error) {
|
||||
|
||||
type Config struct {
|
||||
Bots []BotConfig
|
||||
Thresholds []Threshold
|
||||
DNSBL bool
|
||||
OpenGraph OpenGraph
|
||||
StatusCodes StatusCodes
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/data"
|
||||
"k8s.io/apimachinery/pkg/util/yaml"
|
||||
)
|
||||
|
||||
func p[V any](v V) *V { return &v }
|
||||
@@ -313,12 +312,8 @@ func TestConfigValidBad(t *testing.T) {
|
||||
}
|
||||
defer fin.Close()
|
||||
|
||||
var c fileConfig
|
||||
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
|
||||
t.Fatalf("can't decode file: %v", err)
|
||||
}
|
||||
|
||||
if err := c.Valid(); err == nil {
|
||||
_, err = Load(fin, filepath.Join("testdata", "bad", st.Name()))
|
||||
if err == nil {
|
||||
t.Fatal("validation should have failed but didn't somehow")
|
||||
} else {
|
||||
t.Log(err)
|
||||
|
||||
@@ -3,7 +3,9 @@ package config
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -18,6 +20,32 @@ type ExpressionOrList struct {
|
||||
Any []string `json:"any,omitempty" yaml:"any,omitempty"`
|
||||
}
|
||||
|
||||
func (eol ExpressionOrList) String() string {
|
||||
switch {
|
||||
case len(eol.Expression) != 0:
|
||||
return eol.Expression
|
||||
case len(eol.All) != 0:
|
||||
var sb strings.Builder
|
||||
for i, pred := range eol.All {
|
||||
if i != 0 {
|
||||
fmt.Fprintf(&sb, " && ")
|
||||
}
|
||||
fmt.Fprintf(&sb, "( %s )", pred)
|
||||
}
|
||||
return sb.String()
|
||||
case len(eol.Any) != 0:
|
||||
var sb strings.Builder
|
||||
for i, pred := range eol.Any {
|
||||
if i != 0 {
|
||||
fmt.Fprintf(&sb, " || ")
|
||||
}
|
||||
fmt.Fprintf(&sb, "( %s )", pred)
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
panic("this should not happen")
|
||||
}
|
||||
|
||||
func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
|
||||
if eol.Expression != rhs.Expression {
|
||||
return false
|
||||
|
||||
@@ -213,3 +213,54 @@ func TestExpressionOrListUnmarshalJSON(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpressionOrListString(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
in ExpressionOrList
|
||||
out string
|
||||
}{
|
||||
{
|
||||
name: "single expression",
|
||||
in: ExpressionOrList{
|
||||
Expression: "true",
|
||||
},
|
||||
out: "true",
|
||||
},
|
||||
{
|
||||
name: "all",
|
||||
in: ExpressionOrList{
|
||||
All: []string{"true"},
|
||||
},
|
||||
out: "( true )",
|
||||
},
|
||||
{
|
||||
name: "all with &&",
|
||||
in: ExpressionOrList{
|
||||
All: []string{"true", "true"},
|
||||
},
|
||||
out: "( true ) && ( true )",
|
||||
},
|
||||
{
|
||||
name: "any",
|
||||
in: ExpressionOrList{
|
||||
All: []string{"true"},
|
||||
},
|
||||
out: "( true )",
|
||||
},
|
||||
{
|
||||
name: "any with ||",
|
||||
in: ExpressionOrList{
|
||||
Any: []string{"true", "true"},
|
||||
},
|
||||
out: "( true ) || ( true )",
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.in.String()
|
||||
if result != tt.out {
|
||||
t.Errorf("wanted %q, got: %q", tt.out, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
countryCodeRegexp = regexp.MustCompile(`^\w{2}$`)
|
||||
countryCodeRegexp = regexp.MustCompile(`^[a-zA-Z]{2}$`)
|
||||
|
||||
ErrNotCountryCode = errors.New("config.Bot: invalid country code")
|
||||
)
|
||||
|
||||
36
lib/policy/config/geoip_test.go
Normal file
36
lib/policy/config/geoip_test.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGeoIPValid(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input *GeoIP
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "basic valid",
|
||||
input: &GeoIP{
|
||||
Countries: []string{"CA"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid country",
|
||||
input: &GeoIP{
|
||||
Countries: []string{"XOB"},
|
||||
},
|
||||
err: ErrNotCountryCode,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
|
||||
t.Logf("want: %v", tt.err)
|
||||
t.Logf("got: %v", err)
|
||||
t.Error("got wrong validation error")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
51
lib/policy/config/opengraph.go
Normal file
51
lib/policy/config/opengraph.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidOpenGraphConfig = errors.New("config.OpenGraph: invalid OpenGraph configuration")
|
||||
ErrOpenGraphTTLDoesNotParse = errors.New("config.OpenGraph: ttl does not parse as a Duration, see https://pkg.go.dev/time#ParseDuration (formatted like 5m -> 5 minutes, 2h -> 2 hours, etc)")
|
||||
ErrOpenGraphMissingProperty = errors.New("config.OpenGraph: default opengraph tags missing a property")
|
||||
)
|
||||
|
||||
type openGraphFileConfig struct {
|
||||
Enabled bool `json:"enabled" yaml:"enabled"`
|
||||
ConsiderHost bool `json:"considerHost" yaml:"enabled"`
|
||||
TimeToLive string `json:"ttl" yaml:"ttl"`
|
||||
Override map[string]string `json:"override,omitempty" yaml:"override,omitempty"`
|
||||
}
|
||||
|
||||
type OpenGraph struct {
|
||||
Enabled bool `json:"enabled" yaml:"enabled"`
|
||||
ConsiderHost bool `json:"considerHost" yaml:"enabled"`
|
||||
Override map[string]string `json:"override,omitempty" yaml:"override,omitempty"`
|
||||
TimeToLive time.Duration `json:"ttl" yaml:"ttl"`
|
||||
}
|
||||
|
||||
func (og *openGraphFileConfig) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if _, err := time.ParseDuration(og.TimeToLive); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w: ParseDuration(%q) returned: %w", ErrOpenGraphTTLDoesNotParse, og.TimeToLive, err))
|
||||
}
|
||||
|
||||
if len(og.Override) != 0 {
|
||||
for _, tag := range []string{
|
||||
"og:title",
|
||||
} {
|
||||
if _, ok := og.Override[tag]; !ok {
|
||||
errs = append(errs, fmt.Errorf("%w: %s", ErrOpenGraphMissingProperty, tag))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return errors.Join(ErrInvalidOpenGraphConfig, errors.Join(errs...))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
67
lib/policy/config/opengraph_test.go
Normal file
67
lib/policy/config/opengraph_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestOpenGraphFileConfigValid(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input *openGraphFileConfig
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "basic happy path",
|
||||
input: &openGraphFileConfig{
|
||||
Enabled: true,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: "1h",
|
||||
Override: map[string]string{},
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "basic happy path with default",
|
||||
input: &openGraphFileConfig{
|
||||
Enabled: true,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: "1h",
|
||||
Override: map[string]string{
|
||||
"og:title": "foobar",
|
||||
},
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "invalid time duration",
|
||||
input: &openGraphFileConfig{
|
||||
Enabled: true,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: "taco",
|
||||
Override: map[string]string{},
|
||||
},
|
||||
err: ErrOpenGraphTTLDoesNotParse,
|
||||
},
|
||||
{
|
||||
name: "missing og:title in defaults",
|
||||
input: &openGraphFileConfig{
|
||||
Enabled: true,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: "1h",
|
||||
Override: map[string]string{
|
||||
"description": "foobar",
|
||||
},
|
||||
},
|
||||
err: ErrOpenGraphMissingProperty,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
|
||||
t.Logf("wanted error: %v", tt.err)
|
||||
t.Logf("got error: %v", err)
|
||||
t.Error("validation failed")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
12
lib/policy/config/testdata/bad/opengraph_bad_ttl.yaml
vendored
Normal file
12
lib/policy/config/testdata/bad/opengraph_bad_ttl.yaml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
bots:
|
||||
- name: everything
|
||||
user_agent_regex: .*
|
||||
action: DENY
|
||||
|
||||
openGraph:
|
||||
enabled: true
|
||||
considerHost: false
|
||||
ttl: taco
|
||||
default:
|
||||
"og:title": "Xe's magic land of fun"
|
||||
"og:description": "We're no strangers to love, you know the rules and so do I"
|
||||
11
lib/policy/config/testdata/bad/threshold-challenge-without-challenge.yaml
vendored
Normal file
11
lib/policy/config/testdata/bad/threshold-challenge-without-challenge.yaml
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
bots:
|
||||
- name: simple-weight-adjust
|
||||
action: WEIGH
|
||||
user_agent_regex: Mozilla
|
||||
weight:
|
||||
adjust: 5
|
||||
|
||||
thresholds:
|
||||
- name: extreme-suspicion
|
||||
expression: "true"
|
||||
action: WEIGH
|
||||
15
lib/policy/config/testdata/bad/thresholds.yaml
vendored
Normal file
15
lib/policy/config/testdata/bad/thresholds.yaml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
bots:
|
||||
- name: simple-weight-adjust
|
||||
action: WEIGH
|
||||
user_agent_regex: Mozilla
|
||||
weight:
|
||||
adjust: 5
|
||||
|
||||
thresholds:
|
||||
- name: extreme-suspicion
|
||||
expression: "true"
|
||||
action: WEIGH
|
||||
challenge:
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
1
lib/policy/config/testdata/bad/unparseable.json
vendored
Normal file
1
lib/policy/config/testdata/bad/unparseable.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
}
|
||||
1
lib/policy/config/testdata/bad/unparseable.yaml
vendored
Normal file
1
lib/policy/config/testdata/bad/unparseable.yaml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
}
|
||||
8
lib/policy/config/testdata/good/no-thresholds.yaml
vendored
Normal file
8
lib/policy/config/testdata/good/no-thresholds.yaml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
bots:
|
||||
- name: simple-weight-adjust
|
||||
action: WEIGH
|
||||
user_agent_regex: Mozilla
|
||||
weight:
|
||||
adjust: 5
|
||||
|
||||
thresholds: []
|
||||
12
lib/policy/config/testdata/good/opengraph_all_good.yaml
vendored
Normal file
12
lib/policy/config/testdata/good/opengraph_all_good.yaml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
bots:
|
||||
- name: everything
|
||||
user_agent_regex: .*
|
||||
action: DENY
|
||||
|
||||
openGraph:
|
||||
enabled: true
|
||||
considerHost: false
|
||||
ttl: 1h
|
||||
default:
|
||||
"og:title": "Xe's magic land of fun"
|
||||
"og:description": "We're no strangers to love, you know the rules and so do I"
|
||||
38
lib/policy/config/testdata/good/thresholds.yaml
vendored
Normal file
38
lib/policy/config/testdata/good/thresholds.yaml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
bots:
|
||||
- name: simple-weight-adjust
|
||||
action: WEIGH
|
||||
user_agent_regex: Mozilla
|
||||
weight:
|
||||
adjust: 5
|
||||
|
||||
thresholds:
|
||||
- name: minimal-suspicion
|
||||
expression: weight < 0
|
||||
action: ALLOW
|
||||
- name: mild-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 0
|
||||
- weight < 10
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
- name: moderate-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 10
|
||||
- weight < 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: fast
|
||||
difficulty: 2
|
||||
report_as: 2
|
||||
- name: extreme-suspicion
|
||||
expression: weight >= 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
80
lib/policy/config/threshold.go
Normal file
80
lib/policy/config/threshold.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/TecharoHQ/anubis"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNoThresholdRulesDefined = errors.New("config: no thresholds defined")
|
||||
ErrThresholdMustHaveName = errors.New("config.Threshold: must set name")
|
||||
ErrThresholdMustHaveExpression = errors.New("config.Threshold: must set expression")
|
||||
ErrThresholdChallengeMustHaveChallenge = errors.New("config.Threshold: a threshold with the CHALLENGE action must have challenge set")
|
||||
ErrThresholdCannotHaveWeighAction = errors.New("config.Threshold: a threshold cannot have the WEIGH action")
|
||||
|
||||
DefaultThresholds = []Threshold{
|
||||
{
|
||||
Name: "legacy-anubis-behaviour",
|
||||
Expression: &ExpressionOrList{
|
||||
Expression: "weight > 0",
|
||||
},
|
||||
Action: RuleChallenge,
|
||||
Challenge: &ChallengeRules{
|
||||
Algorithm: "fast",
|
||||
Difficulty: anubis.DefaultDifficulty,
|
||||
ReportAs: anubis.DefaultDifficulty,
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
type Threshold struct {
|
||||
Name string `json:"name" yaml:"name"`
|
||||
Expression *ExpressionOrList `json:"expression" yaml:"expression"`
|
||||
Action Rule `json:"action" yaml:"action"`
|
||||
Challenge *ChallengeRules `json:"challenge" yaml:"challenge"`
|
||||
}
|
||||
|
||||
func (t Threshold) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if len(t.Name) == 0 {
|
||||
errs = append(errs, ErrThresholdMustHaveName)
|
||||
}
|
||||
|
||||
if t.Expression == nil {
|
||||
errs = append(errs, ErrThresholdMustHaveExpression)
|
||||
}
|
||||
|
||||
if t.Expression != nil {
|
||||
if err := t.Expression.Valid(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := t.Action.Valid(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
if t.Action == RuleWeigh {
|
||||
errs = append(errs, ErrThresholdCannotHaveWeighAction)
|
||||
}
|
||||
|
||||
if t.Action == RuleChallenge && t.Challenge == nil {
|
||||
errs = append(errs, ErrThresholdChallengeMustHaveChallenge)
|
||||
}
|
||||
|
||||
if t.Challenge != nil {
|
||||
if err := t.Challenge.Valid(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("config: threshold entry for %q is not valid:\n%w", t.Name, errors.Join(errs...))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
111
lib/policy/config/threshold_test.go
Normal file
111
lib/policy/config/threshold_test.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestThresholdValid(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input *Threshold
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "basic allow",
|
||||
input: &Threshold{
|
||||
Name: "basic-allow",
|
||||
Expression: &ExpressionOrList{Expression: "true"},
|
||||
Action: RuleAllow,
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "basic challenge",
|
||||
input: &Threshold{
|
||||
Name: "basic-challenge",
|
||||
Expression: &ExpressionOrList{Expression: "true"},
|
||||
Action: RuleChallenge,
|
||||
Challenge: &ChallengeRules{
|
||||
Algorithm: "fast",
|
||||
Difficulty: 1,
|
||||
ReportAs: 1,
|
||||
},
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "no name",
|
||||
input: &Threshold{},
|
||||
err: ErrThresholdMustHaveName,
|
||||
},
|
||||
{
|
||||
name: "no expression",
|
||||
input: &Threshold{},
|
||||
err: ErrThresholdMustHaveName,
|
||||
},
|
||||
{
|
||||
name: "invalid expression",
|
||||
input: &Threshold{
|
||||
Expression: &ExpressionOrList{},
|
||||
},
|
||||
err: ErrExpressionEmpty,
|
||||
},
|
||||
{
|
||||
name: "invalid action",
|
||||
input: &Threshold{},
|
||||
err: ErrUnknownAction,
|
||||
},
|
||||
{
|
||||
name: "challenge action but no challenge",
|
||||
input: &Threshold{
|
||||
Action: RuleChallenge,
|
||||
},
|
||||
err: ErrThresholdChallengeMustHaveChallenge,
|
||||
},
|
||||
{
|
||||
name: "challenge invalid",
|
||||
input: &Threshold{
|
||||
Action: RuleChallenge,
|
||||
Challenge: &ChallengeRules{Difficulty: 0, ReportAs: 0},
|
||||
},
|
||||
err: ErrChallengeDifficultyTooLow,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
|
||||
t.Errorf("threshold is invalid: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultThresholdsValid(t *testing.T) {
|
||||
for i, th := range DefaultThresholds {
|
||||
t.Run(fmt.Sprintf("%d %s", i, th.Name), func(t *testing.T) {
|
||||
if err := th.Valid(); err != nil {
|
||||
t.Errorf("threshold invalid: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadActuallyLoadsThresholds(t *testing.T) {
|
||||
fin, err := os.Open(filepath.Join(".", "testdata", "good", "thresholds.yaml"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fin.Close()
|
||||
|
||||
c, err := Load(fin, fin.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(c.Thresholds) != 4 {
|
||||
t.Errorf("wanted 4 thresholds, got %d thresholds", len(c.Thresholds))
|
||||
}
|
||||
}
|
||||
@@ -9,20 +9,12 @@ import (
|
||||
"github.com/google/cel-go/ext"
|
||||
)
|
||||
|
||||
// NewEnvironment creates a new CEL environment, this is the set of
|
||||
// BotEnvironment creates a new CEL environment, this is the set of
|
||||
// variables and functions that are passed into the CEL scope so that
|
||||
// Anubis can fail loudly and early when something is invalid instead
|
||||
// of blowing up at runtime.
|
||||
func NewEnvironment() (*cel.Env, error) {
|
||||
return cel.NewEnv(
|
||||
ext.Strings(
|
||||
ext.StringsLocale("en_US"),
|
||||
ext.StringsValidateFormatCalls(true),
|
||||
),
|
||||
|
||||
// default all timestamps to UTC
|
||||
cel.DefaultUTCTimeZone(true),
|
||||
|
||||
func BotEnvironment() (*cel.Env, error) {
|
||||
return New(
|
||||
// Variables exposed to CEL programs:
|
||||
cel.Variable("remoteAddress", cel.StringType),
|
||||
cel.Variable("host", cel.StringType),
|
||||
@@ -31,8 +23,27 @@ func NewEnvironment() (*cel.Env, error) {
|
||||
cel.Variable("path", cel.StringType),
|
||||
cel.Variable("query", cel.MapType(cel.StringType, cel.StringType)),
|
||||
cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)),
|
||||
)
|
||||
}
|
||||
|
||||
// Functions exposed to CEL programs:
|
||||
// NewThreshold creates a new CEL environment for threshold checking.
|
||||
func ThresholdEnvironment() (*cel.Env, error) {
|
||||
return New(
|
||||
cel.Variable("weight", cel.IntType),
|
||||
)
|
||||
}
|
||||
|
||||
func New(opts ...cel.EnvOption) (*cel.Env, error) {
|
||||
args := []cel.EnvOption{
|
||||
ext.Strings(
|
||||
ext.StringsLocale("en_US"),
|
||||
ext.StringsValidateFormatCalls(true),
|
||||
),
|
||||
|
||||
// default all timestamps to UTC
|
||||
cel.DefaultUTCTimeZone(true),
|
||||
|
||||
// Functions exposed to all CEL programs:
|
||||
cel.Function("randInt",
|
||||
cel.Overload("randInt_int",
|
||||
[]*cel.Type{cel.IntType},
|
||||
@@ -47,12 +58,25 @@ func NewEnvironment() (*cel.Env, error) {
|
||||
}),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
args = append(args, opts...)
|
||||
return cel.NewEnv(args...)
|
||||
}
|
||||
|
||||
// Compile takes CEL environment and syntax tree then emits an optimized
|
||||
// Program for execution.
|
||||
func Compile(env *cel.Env, ast *cel.Ast) (cel.Program, error) {
|
||||
func Compile(env *cel.Env, src string) (cel.Program, error) {
|
||||
intermediate, iss := env.Compile(src)
|
||||
if iss != nil {
|
||||
return nil, iss.Err()
|
||||
}
|
||||
|
||||
ast, iss := env.Check(intermediate)
|
||||
if iss != nil {
|
||||
return nil, iss.Err()
|
||||
}
|
||||
|
||||
return env.Program(
|
||||
ast,
|
||||
cel.EvalOptions(
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
package expressions
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/google/cel-go/cel"
|
||||
)
|
||||
|
||||
// JoinOperator is a type wrapper for and/or operators.
|
||||
//
|
||||
// This is a separate type so that validation can be done at the type level.
|
||||
type JoinOperator string
|
||||
|
||||
// Possible values for JoinOperator
|
||||
const (
|
||||
JoinAnd JoinOperator = "&&"
|
||||
JoinOr JoinOperator = "||"
|
||||
)
|
||||
|
||||
// Valid ensures that JoinOperator is semantically valid.
|
||||
func (jo JoinOperator) Valid() error {
|
||||
switch jo {
|
||||
case JoinAnd, JoinOr:
|
||||
return nil
|
||||
default:
|
||||
return ErrWrongJoinOperator
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
ErrWrongJoinOperator = errors.New("expressions: invalid join operator")
|
||||
ErrNoExpressions = errors.New("expressions: cannot join zero expressions")
|
||||
ErrCantCompile = errors.New("expressions: can't compile one expression")
|
||||
)
|
||||
|
||||
// JoinClauses joins a list of compiled clauses into one big if statement.
|
||||
//
|
||||
// Imagine the following two clauses:
|
||||
//
|
||||
// ball.color == "red"
|
||||
// ball.shape == "round"
|
||||
//
|
||||
// JoinClauses would emit one "joined" clause such as:
|
||||
//
|
||||
// ( ball.color == "red" ) && ( ball.shape == "round" )
|
||||
func JoinClauses(env *cel.Env, operator JoinOperator, clauses ...*cel.Ast) (*cel.Ast, error) {
|
||||
if err := operator.Valid(); err != nil {
|
||||
return nil, fmt.Errorf("%w: wanted && or ||, got: %q", err, operator)
|
||||
}
|
||||
|
||||
switch len(clauses) {
|
||||
case 0:
|
||||
return nil, ErrNoExpressions
|
||||
case 1:
|
||||
return clauses[0], nil
|
||||
}
|
||||
|
||||
var exprs []string
|
||||
var errs []error
|
||||
|
||||
for _, clause := range clauses {
|
||||
clauseStr, err := cel.AstToString(clause)
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
continue
|
||||
}
|
||||
|
||||
exprs = append(exprs, "( "+clauseStr+" )")
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return nil, fmt.Errorf("errors while decompiling statements: %w", errors.Join(errs...))
|
||||
}
|
||||
|
||||
statement := strings.Join(exprs, " "+string(operator)+" ")
|
||||
result, iss := env.Compile(statement)
|
||||
if iss != nil {
|
||||
return nil, iss.Err()
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func Join(env *cel.Env, operator JoinOperator, clauses ...string) (*cel.Ast, error) {
|
||||
var statements []*cel.Ast
|
||||
var errs []error
|
||||
|
||||
for _, clause := range clauses {
|
||||
stmt, iss := env.Compile(clause)
|
||||
if iss != nil && iss.Err() != nil {
|
||||
errs = append(errs, fmt.Errorf("%w: %q gave: %w", ErrCantCompile, clause, iss.Err()))
|
||||
continue
|
||||
}
|
||||
statements = append(statements, stmt)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return nil, fmt.Errorf("errors while joining clauses: %w", errors.Join(errs...))
|
||||
}
|
||||
|
||||
return JoinClauses(env, operator, statements...)
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
package expressions
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/google/cel-go/cel"
|
||||
)
|
||||
|
||||
func TestJoin(t *testing.T) {
|
||||
env, err := NewEnvironment()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, tt := range []struct {
|
||||
err error
|
||||
name string
|
||||
op JoinOperator
|
||||
resultStr string
|
||||
clauses []string
|
||||
}{
|
||||
{
|
||||
name: "no-clauses",
|
||||
clauses: []string{},
|
||||
op: JoinAnd,
|
||||
err: ErrNoExpressions,
|
||||
},
|
||||
{
|
||||
name: "one-clause-identity",
|
||||
clauses: []string{`remoteAddress == "8.8.8.8"`},
|
||||
op: JoinAnd,
|
||||
err: nil,
|
||||
resultStr: `remoteAddress == "8.8.8.8"`,
|
||||
},
|
||||
{
|
||||
name: "multi-clause-and",
|
||||
clauses: []string{
|
||||
`remoteAddress == "8.8.8.8"`,
|
||||
`host == "anubis.techaro.lol"`,
|
||||
},
|
||||
op: JoinAnd,
|
||||
err: nil,
|
||||
resultStr: `remoteAddress == "8.8.8.8" && host == "anubis.techaro.lol"`,
|
||||
},
|
||||
{
|
||||
name: "multi-clause-or",
|
||||
clauses: []string{
|
||||
`remoteAddress == "8.8.8.8"`,
|
||||
`host == "anubis.techaro.lol"`,
|
||||
},
|
||||
op: JoinOr,
|
||||
err: nil,
|
||||
resultStr: `remoteAddress == "8.8.8.8" || host == "anubis.techaro.lol"`,
|
||||
},
|
||||
{
|
||||
name: "git-user-agent",
|
||||
clauses: []string{
|
||||
`userAgent.startsWith("git/") || userAgent.contains("libgit")`,
|
||||
`"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"`,
|
||||
},
|
||||
op: JoinAnd,
|
||||
err: nil,
|
||||
resultStr: `(userAgent.startsWith("git/") || userAgent.contains("libgit")) && "Git-Protocol" in headers &&
|
||||
headers["Git-Protocol"] == "version=2"`,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := Join(env, tt.op, tt.clauses...)
|
||||
if !errors.Is(err, tt.err) {
|
||||
t.Errorf("wanted error %v but got: %v", tt.err, err)
|
||||
}
|
||||
|
||||
if tt.err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
program, err := cel.AstToString(result)
|
||||
if err != nil {
|
||||
t.Fatalf("can't decompile program: %v", err)
|
||||
}
|
||||
|
||||
if tt.resultStr != program {
|
||||
t.Logf("wanted: %s", tt.resultStr)
|
||||
t.Logf("got: %s", program)
|
||||
t.Error("program did not compile as expected")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal/thoth"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/checker"
|
||||
@@ -21,13 +22,16 @@ var (
|
||||
}, []string{"rule", "action"})
|
||||
|
||||
ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid")
|
||||
warnedAboutThresholds = &atomic.Bool{}
|
||||
)
|
||||
|
||||
type ParsedConfig struct {
|
||||
orig *config.Config
|
||||
|
||||
Bots []Bot
|
||||
Thresholds []*Threshold
|
||||
DNSBL bool
|
||||
OpenGraph config.OpenGraph
|
||||
DefaultDifficulty int
|
||||
StatusCodes config.StatusCodes
|
||||
}
|
||||
@@ -35,6 +39,7 @@ type ParsedConfig struct {
|
||||
func NewParsedConfig(orig *config.Config) *ParsedConfig {
|
||||
return &ParsedConfig{
|
||||
orig: orig,
|
||||
OpenGraph: orig.OpenGraph,
|
||||
StatusCodes: orig.StatusCodes,
|
||||
}
|
||||
}
|
||||
@@ -150,6 +155,26 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic
|
||||
result.Bots = append(result.Bots, parsedBot)
|
||||
}
|
||||
|
||||
for _, t := range c.Thresholds {
|
||||
if t.Name == "legacy-anubis-behaviour" && t.Expression.String() == "true" {
|
||||
if !warnedAboutThresholds.Load() {
|
||||
slog.Warn("configuration file does not contain thresholds, see docs for details on how to upgrade", "fname", fname, "docs_url", "https://anubis.techaro.lol/docs/admin/configuration/thresholds/")
|
||||
warnedAboutThresholds.Store(true)
|
||||
}
|
||||
|
||||
t.Challenge.Difficulty = defaultDifficulty
|
||||
t.Challenge.ReportAs = defaultDifficulty
|
||||
}
|
||||
|
||||
threshold, err := ParsedThresholdFromConfig(t)
|
||||
if err != nil {
|
||||
validationErrs = append(validationErrs, fmt.Errorf("can't compile threshold config for %s: %w", t.Name, err))
|
||||
continue
|
||||
}
|
||||
|
||||
result.Thresholds = append(result.Thresholds, threshold)
|
||||
}
|
||||
|
||||
if len(validationErrs) > 0 {
|
||||
return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, errors.Join(validationErrs...))
|
||||
}
|
||||
|
||||
47
lib/policy/thresholds.go
Normal file
47
lib/policy/thresholds.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package policy
|
||||
|
||||
import (
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/expressions"
|
||||
"github.com/google/cel-go/cel"
|
||||
)
|
||||
|
||||
type Threshold struct {
|
||||
config.Threshold
|
||||
Program cel.Program
|
||||
}
|
||||
|
||||
func ParsedThresholdFromConfig(t config.Threshold) (*Threshold, error) {
|
||||
result := &Threshold{
|
||||
Threshold: t,
|
||||
}
|
||||
|
||||
env, err := expressions.ThresholdEnvironment()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
program, err := expressions.Compile(env, t.Expression.String())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result.Program = program
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type ThresholdRequest struct {
|
||||
Weight int
|
||||
}
|
||||
|
||||
func (tr *ThresholdRequest) Parent() cel.Activation { return nil }
|
||||
|
||||
func (tr *ThresholdRequest) ResolveName(name string) (any, bool) {
|
||||
switch name {
|
||||
case "weight":
|
||||
return tr.Weight, true
|
||||
default:
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
14
lib/testdata/aggressive_403.yaml
vendored
14
lib/testdata/aggressive_403.yaml
vendored
@@ -1,12 +1,12 @@
|
||||
bots:
|
||||
- name: deny
|
||||
user_agent_regex: DENY
|
||||
action: DENY
|
||||
- name: deny
|
||||
user_agent_regex: DENY
|
||||
action: DENY
|
||||
|
||||
- name: challenge
|
||||
user_agent_regex: CHALLENGE
|
||||
action: CHALLENGE
|
||||
- name: challenge
|
||||
user_agent_regex: CHALLENGE
|
||||
action: CHALLENGE
|
||||
|
||||
status_codes:
|
||||
CHALLENGE: 401
|
||||
DENY: 403
|
||||
DENY: 403
|
||||
|
||||
38
lib/testdata/test_config.yaml
vendored
Normal file
38
lib/testdata/test_config.yaml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
bots:
|
||||
- import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
- import: (data)/meta/ai-block-aggressive.yaml
|
||||
- import: (data)/crawlers/_allow-good.yaml
|
||||
- import: (data)/clients/x-firefox-ai.yaml
|
||||
- import: (data)/common/keep-internet-working.yaml
|
||||
- name: countries-with-aggressive-scrapers
|
||||
action: WEIGH
|
||||
geoip:
|
||||
countries:
|
||||
- BR
|
||||
- CN
|
||||
weight:
|
||||
adjust: 10
|
||||
- name: aggressive-asns-without-functional-abuse-contact
|
||||
action: WEIGH
|
||||
asns:
|
||||
match:
|
||||
- 13335 # Cloudflare
|
||||
- 136907 # Huawei Cloud
|
||||
- 45102 # Alibaba Cloud
|
||||
weight:
|
||||
adjust: 10
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: 10
|
||||
|
||||
dnsbl: false
|
||||
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
||||
|
||||
thresholds: []
|
||||
4
package-lock.json
generated
4
package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@techaro/anubis",
|
||||
"version": "1.19.1",
|
||||
"version": "1.20.0-pre1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@techaro/anubis",
|
||||
"version": "1.19.1",
|
||||
"version": "1.20.0-pre1",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"cssnano": "^7.0.7",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@techaro/anubis",
|
||||
"version": "1.19.1",
|
||||
"version": "1.20.0-pre1",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
|
||||
@@ -21,7 +21,9 @@ User-agent: Cotoyogi
|
||||
User-agent: Crawlspace
|
||||
User-agent: Diffbot
|
||||
User-agent: DuckAssistBot
|
||||
User-agent: EchoboxBot
|
||||
User-agent: FacebookBot
|
||||
User-agent: facebookexternalhit
|
||||
User-agent: Factset_spyderbot
|
||||
User-agent: FirecrawlAgent
|
||||
User-agent: FriendlyCrawler
|
||||
@@ -42,6 +44,7 @@ User-agent: Meta-ExternalAgent
|
||||
User-agent: meta-externalfetcher
|
||||
User-agent: Meta-ExternalFetcher
|
||||
User-agent: MistralAI-User/1.0
|
||||
User-agent: MyCentralAIScraperBot
|
||||
User-agent: NovaAct
|
||||
User-agent: OAI-SearchBot
|
||||
User-agent: omgili
|
||||
@@ -54,12 +57,17 @@ User-agent: Perplexity-User
|
||||
User-agent: PerplexityBot
|
||||
User-agent: PetalBot
|
||||
User-agent: PhindBot
|
||||
User-agent: Poseidon Research Crawler
|
||||
User-agent: QualifiedBot
|
||||
User-agent: QuillBot
|
||||
User-agent: quillbot.com
|
||||
User-agent: SBIntuitionsBot
|
||||
User-agent: Scrapy
|
||||
User-agent: SemrushBot
|
||||
User-agent: SemrushBot-BA
|
||||
User-agent: SemrushBot-CT
|
||||
User-agent: SemrushBot-OCOB
|
||||
User-agent: SemrushBot-SI
|
||||
User-agent: SemrushBot-SWA
|
||||
User-agent: Sidetrade indexer bot
|
||||
User-agent: TikTokSpider
|
||||
|
||||
Reference in New Issue
Block a user