Compare commits

..

20 Commits

Author SHA1 Message Date
Xe Iaso
b6c361c294 fix(default-config): isBrowser -> isBrowserLike
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 18:07:36 -04:00
Xe Iaso
e9e602976f docs(admin/configuration): add expressions docs outline
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 18:03:10 -04:00
Xe Iaso
33bb5803a8 docs(admin/environments): update apache/nginx docs
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 17:56:28 -04:00
Xe Iaso
ada7b3a179 docs(admin): add guide for making Anubis far less aggressive by default
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 17:56:06 -04:00
Xe Iaso
dfa7025afe test: add apache, caddy, and nginx smoke test environments
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 17:52:23 -04:00
Xe Iaso
884af5fd4c fix(data/common): use new isBrowserLike function for less aggressive challenge rule
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 17:51:58 -04:00
Xe Iaso
3fb8fa2009 feat(policy/expressions): add userAgent.isBrowserLike method
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 16:15:05 -04:00
Xe Iaso
b43df36f7d feat(policy): add system load average as checker input
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 15:44:12 -04:00
Xe Iaso
fd058964fa feat(data/apps): add API route allow rule for non-HEAD/GET
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 13:06:24 -04:00
Xe Iaso
fb20b36b18 feat(data/bots): add two example IRC bots
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 00:46:59 -04:00
Xe Iaso
84cba05167 fix(data): enable legacy rule for now, admins will have to opt-in
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 00:41:05 -04:00
Xe Iaso
9f988578a4 feat(data): add challenge-browser-like.yaml
This is a huge change to Anubis and will make it a lot more invisible at
the cost of requiring additional server configuration to make it happen.

If you add this bit of nginx config to your location block:

```nginx
proxy_set_header X-Http-Version $server_protocol;
```

And then adjust the bottom bot rule to this:

```yaml
- import: (data)/common/challenge-browser-like.yaml
```

Anubis will be way less aggressive than it was before. This will let
through any traffic that comes from a browser that actually is a browser
via some more advanced heuristics.

I think that this rule alone is the key feature of v1.18.0.

Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-28 00:20:27 -04:00
Xe Iaso
ea4e5751ab test(lib): ensure that Cf-Worker via expression works
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-27 22:50:24 -04:00
Xe Iaso
4e1db3842e fix(policy/expressions): do not Contains missing keys
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-27 22:49:44 -04:00
Xe Iaso
029c79ba28 fix(lib/test): fix failing test and invalid cloudflare workers rule
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-27 22:25:50 -04:00
Xe Iaso
9f8ede7fe3 data/clients: fix curl-impersonate definition
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-27 22:25:50 -04:00
Xe Iaso
80bd7c563b chore(data): reformat some things for expressions
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-27 22:25:49 -04:00
Xe Iaso
92a3e5ba81 fix: rename and/or to all/any
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-27 22:25:49 -04:00
Xe Iaso
65cbc6922c data: add some examples of expression-based rules
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-27 22:25:49 -04:00
Xe Iaso
eae3a7b5e4 feat(lib/policy): add support for CEL checkers
This adds the ability for administrators to use Common Expression
Language[0] (CEL) for more advanced check logic than Anubis previously
offered.

These can be as simple as:

```yaml
- name: allow-api-routes
  action: ALLOW
  expression:
    and:
    - '!(method == "HEAD" || method == "GET")'
    - path.startsWith("/api/")
```

or get as complicated as:

```yaml
- name: allow-git-clients
  action: ALLOW
  expression:
    and:
    - userAgent.startsWith("git/") || userAgent.contains("libgit") || userAgent.startsWith("go-git") || userAgent.startsWith("JGit/") || userAgent.startsWith("JGit-")
    - >
      "Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"
```

Internally these are compiled and evaluated with cel-go[1]. This also
leaves room for extensibility should that be desired in the future. This
will intersect with #338 and eventually intersect with TLS fingerprints
as in #337.

[0]: https://cel.dev/
[1]: https://github.com/google/cel-go

Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-04-27 22:25:49 -04:00
95 changed files with 1680 additions and 906 deletions

View File

@@ -1 +1 @@
1.17.1
1.17.0

View File

@@ -1,8 +1,6 @@
// Package anubis contains the version number of Anubis.
package anubis
import "time"
// Version is the current version of Anubis.
//
// This variable is set at build time using the -X linker flag. If not set,
@@ -13,9 +11,6 @@ var Version = "devel"
// access.
const CookieName = "within.website-x-cmd-anubis-auth"
// CookieDefaultExpirationTime is the amount of time before the cookie/JWT expires.
const CookieDefaultExpirationTime = 7 * 24 * time.Hour
// BasePrefix is a global prefix for all Anubis endpoints. Can be emptied to remove the prefix entirely.
var BasePrefix = ""

View File

@@ -43,7 +43,6 @@ var (
bindNetwork = flag.String("bind-network", "tcp", "network family to bind HTTP to, e.g. unix, tcp")
challengeDifficulty = flag.Int("difficulty", anubis.DefaultDifficulty, "difficulty of the challenge")
cookieDomain = flag.String("cookie-domain", "", "if set, the top-level domain that the Anubis cookie will be valid for")
cookieExpiration = flag.Duration("cookie-expiration-time", anubis.CookieDefaultExpirationTime, "The amount of time the authorization cookie is valid for")
cookiePartitioned = flag.Bool("cookie-partitioned", false, "if true, sets the partitioned flag on Anubis cookies, enabling CHIPS support")
ed25519PrivateKeyHex = flag.String("ed25519-private-key-hex", "", "private key used to sign JWTs, if not set a random one will be assigned")
ed25519PrivateKeyHexFile = flag.String("ed25519-private-key-hex-file", "", "file name containing value for ed25519-private-key-hex")
@@ -58,9 +57,8 @@ var (
healthcheck = flag.Bool("healthcheck", false, "run a health check against Anubis")
useRemoteAddress = flag.Bool("use-remote-address", false, "read the client's IP address from the network request, useful for debugging and running Anubis on bare metal")
debugBenchmarkJS = flag.Bool("debug-benchmark-js", false, "respond to every request with a challenge for benchmarking hashrate")
ogPassthrough = flag.Bool("og-passthrough", true, "enable Open Graph tag passthrough")
ogPassthrough = flag.Bool("og-passthrough", false, "enable Open Graph tag passthrough")
ogTimeToLive = flag.Duration("og-expiry-time", 24*time.Hour, "Open Graph tag cache expiration time")
ogCacheConsiderHost = flag.Bool("og-cache-consider-host", false, "enable or disable the use of the host in the Open Graph tag cache")
extractResources = flag.String("extract-resources", "", "if set, extract the static resources to the specified folder")
webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals")
)
@@ -274,20 +272,18 @@ func main() {
}
s, err := libanubis.New(libanubis.Options{
BasePrefix: *basePrefix,
Next: rp,
Policy: policy,
ServeRobotsTXT: *robotsTxt,
PrivateKey: priv,
CookieDomain: *cookieDomain,
CookieExpiration: *cookieExpiration,
CookiePartitioned: *cookiePartitioned,
OGPassthrough: *ogPassthrough,
OGTimeToLive: *ogTimeToLive,
RedirectDomains: redirectDomainsList,
Target: *target,
WebmasterEmail: *webmasterEmail,
OGCacheConsidersHost: *ogCacheConsiderHost,
BasePrefix: *basePrefix,
Next: rp,
Policy: policy,
ServeRobotsTXT: *robotsTxt,
PrivateKey: priv,
CookieDomain: *cookieDomain,
CookiePartitioned: *cookiePartitioned,
OGPassthrough: *ogPassthrough,
OGTimeToLive: *ogTimeToLive,
RedirectDomains: redirectDomainsList,
Target: *target,
WebmasterEmail: *webmasterEmail,
})
if err != nil {
log.Fatalf("can't construct libanubis.Server: %v", err)
@@ -324,7 +320,6 @@ func main() {
"og-passthrough", *ogPassthrough,
"og-expiry-time", *ogTimeToLive,
"base-prefix", *basePrefix,
"cookie-expiration-time", *cookieExpiration,
)
go func() {

View File

@@ -0,0 +1,6 @@
- name: allow-api-routes
action: ALLOW
expression:
all:
- '!(method == "HEAD" || method == "GET")'
- path.startsWith("/api/")

View File

@@ -12,6 +12,12 @@
{
"import": "(data)/bots/us-ai-scraper.yaml"
},
{
"import": "(data)/bots/aggressive-brazilian-scrapers.yaml"
},
{
"import": "(data)/clients/curl-impersonate.yaml"
},
{
"import": "(data)/crawlers/googlebot.yaml"
},
@@ -46,4 +52,4 @@
}
],
"dnsbl": false
}
}

View File

@@ -17,6 +17,8 @@ bots:
- import: (data)/bots/cloudflare-workers.yaml
- import: (data)/bots/headless-browsers.yaml
- import: (data)/bots/us-ai-scraper.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
- import: (data)/clients/curl-impersonate.yaml
# Search engines to allow
- import: (data)/crawlers/googlebot.yaml
@@ -41,18 +43,11 @@ bots:
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time
# Generic catchall rule
- name: generic-browser
user_agent_regex: >-
Mozilla|Opera
action: CHALLENGE
# Challenge clients with "Mozilla" or "Opera" in their user-agent string
#- import: (data)/common/legacy-challenge-everything.yaml
- name: reject-browsers
action: DENY
expression: userAgent.isBrowserLike()
dnsbl: false
# By default, send HTTP 200 back to clients that either get issued a challenge
# or a denial. This seems weird, but this is load-bearing due to the fact that
# the most aggressive scraper bots seem to really really want an HTTP 200 and
# will stop sending requests once they get it.
status_codes:
CHALLENGE: 200
DENY: 200

View File

@@ -0,0 +1,28 @@
- name: deny-aggressive-brazilian-scrapers
action: DENY
expression:
any:
# Internet Explorer should be out of support
- userAgent.contains("MSIE")
# Trident is the Internet Explorer browser engine
- userAgent.contains("Trident")
# Opera is a fork of chrome now
- userAgent.contains("Presto")
# Windows CE is discontinued
- userAgent.contains("Windows CE")
# Windows 95 is discontinued
- userAgent.contains("Windows 95")
# Windows 98 is discontinued
- userAgent.contains("Windows 98")
# Windows 9.x is discontinued
- userAgent.contains("Win 9x")
# Amazon does not have an Alexa Toolbar.
- userAgent.contains("Alexa Toolbar")
- name: challenge-aggressive-brazilian-scrapers
action: CHALLENGE
expression:
any:
# This is not released, even Windows 11 calls itself Windows 10
- userAgent.contains("Windows NT 11.0")
# iPods are not in common use
- userAgent.contains("iPod")

View File

@@ -1,4 +1,3 @@
- name: cloudflare-workers
headers_regex:
CF-Worker: .*
action: DENY
expression: '"Cf-Worker" in headers'
action: CHALLENGE

View File

@@ -0,0 +1,9 @@
# phrik in the Arch Linux IRC channels
- name: archlinux-phrik
action: ALLOW
expression:
all:
- remoteAddress == "159.69.213.214"
- userAgent == "Mozilla/5.0 (compatible; utils.web Limnoria module)"
- '"X-Http-Version" in headers'
- headers["X-Http-Version"] == "HTTP/1.1"

View File

@@ -0,0 +1,9 @@
# chat in the gentoo IRC channels
- name: gentoo-chat
action: ALLOW
expression:
all:
- remoteAddress == "45.76.166.57"
- userAgent == "Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0"
- '"X-Http-Version" in headers'
- headers["X-Http-Version"] == "HTTP/1.1"

View File

@@ -0,0 +1,32 @@
- name: curl-impersonate
action: CHALLENGE
expression:
any:
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Google Chrome";v="107", "Chromium";v="107", "Not=A?Brand";v="24"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '" Not A;Brand";v="99", "Chromium";v="101", "Microsoft Edge";v="101"'
- >
"Sec-Ch-Ua" in headers && headers["Sec-Ch-Ua"] == '" Not A;Brand";v="99", "Chromium";v="99", "Microsoft Edge";v="99"'

14
data/clients/git.yaml Normal file
View File

@@ -0,0 +1,14 @@
- name: allow-git-clients
action: ALLOW
expression:
all:
- >
(
userAgent.startsWith("git/") ||
userAgent.contains("libgit") ||
userAgent.startsWith("go-git") ||
userAgent.startsWith("JGit/") ||
userAgent.startsWith("JGit-")
)
- '"Git-Protocol" in headers'
- headers["Git-Protocol"] == "version=2"

7
data/clients/go-get.yaml Normal file
View File

@@ -0,0 +1,7 @@
- name: go-get
action: ALLOW
expression:
all:
- userAgent.startsWith("Go-http-client/")
- '"go-get" in query'
- query["go-get"] == "1"

View File

@@ -0,0 +1,6 @@
- name: allow-api-routes
action: ALLOW
expression:
all:
- '!(method == "HEAD" || method == "GET")'
- path.startsWith("/api/")

View File

@@ -0,0 +1,10 @@
# Challenge anything with HTTP/1.1 that claims to be a browser
- name: challenge-lies-browser-but-http-1.1
action: CHALLENGE
expression:
all:
- '"X-Http-Version" in headers'
- headers["X-Http-Version"] == "HTTP/1.1"
- '"X-Forwarded-Proto" in headers'
- headers["X-Forwarded-Proto"] == "https"
- userAgent.isBrowserLike()

View File

@@ -7,4 +7,7 @@
action: ALLOW
- name: robots-txt
path_regex: ^/robots.txt$
action: ALLOW
- name: sitemap
path_regex: ^/sitemap.xml$
action: ALLOW

View File

@@ -0,0 +1,4 @@
# Generic catchall rule
- name: generic-browser
expression: userAgent.isBrowserLike()
action: CHALLENGE

View File

@@ -0,0 +1,3 @@
- name: no-user-agent-string
expression: userAgent == ""
action: DENY

View File

@@ -3,6 +3,6 @@ package data
import "embed"
var (
//go:embed botPolicies.yaml botPolicies.json apps bots common crawlers
//go:embed botPolicies.yaml botPolicies.json apps bots clients common crawlers
BotPolicies embed.FS
)

View File

@@ -11,12 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## v1.17.1: Asahi sas Brutus: Echo 1
- Added customization of authorization cookie expiration time with `--cookie-expiration-time` flag or envvar
- Updated the `OG_PASSTHROUGH` to be true by default, thereby allowing OpenGraph tags to be passed through by default
- Added the ability to [customize Anubis' HTTP status codes](./admin/configuration/custom-status-codes.mdx) ([#355](https://github.com/TecharoHQ/anubis/issues/355))
## v1.17.0: Asahi sas Brutus
- Ensure regexes can't end in newlines ([#372](https://github.com/TecharoHQ/anubis/issues/372))
@@ -47,9 +41,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed mojeekbot user agent regex
- Added support for running anubis behind a base path (e.g. `/myapp`)
- Reduce Anubis' paranoia with user cookies ([#365](https://github.com/TecharoHQ/anubis/pull/365))
- Added support for Opengraph passthrough while using unix sockets
- The opengraph subsystem now passes the HTTP `HOST` header through to the origin
- Updated the `OG_PASSTHROUGH` to be true by default, thereby allowing OpenGraph tags to be passed through by default
## v1.16.0

View File

@@ -1,19 +0,0 @@
# Custom status codes for Anubis errors
Out of the box, Anubis will reply with `HTTP 200` for challenge and denial pages. This is intended to make AI scrapers have a hard time with your website because when they are faced with a non-200 response, they will hammer the page over and over until they get a 200 response. This behavior may not be desirable, as such Anubis lets you customize what HTTP status codes are returned when Anubis throws challenge and denial pages.
This is configured in the `status_codes` block of your [bot policy file](../policies.mdx):
```yaml
status_codes:
CHALLENGE: 200
DENY: 200
```
To match CloudFlare's behavior, use a configuration like this:
```yaml
status_codes:
CHALLENGE: 403
DENY: 403
```

View File

@@ -0,0 +1,25 @@
# Expression-based rule matching
- Anubis offers the ability to use [Common Expression Language (CEL)](https://cel.dev) for advanced rule matching
- A brief summary of CEL
- Imagine the rule as the contents of a function body in programming or the WHERE clause in SQL
- This is an advanced feature and it is easy to get yourself into trouble with it
- Link to the spec, mention docs are WIP
- Variables exposed to Anubis expressions
- `remoteAddress` -> string IP of client
- `host` -> string HTTP/TLS hostname
- `method` -> string HTTP method
- `userAgent` -> string User-Agent header
- `path` -> string HTTP request path
- `query` -> map[string]string URL key values
- `headers` -> map[string]string HTTP request headers
- Load average:
- `load_1m` -> system load in the last minute
- `load_5m` -> system load in the last 5 minutes
- `load_15m` -> system load in the last 15 minutes
- Functions exposed to Anubis expressions
- `userAgent.isBrowserLike` -> returns true if the userAgent is like a browser
- Life advice
- When in doubt, throw a CHALLENGE over a DENY. CHALLENGE makes it more easy to renege
- Example usage
- [How to make Anubis much less aggressive](../less-aggressive.mdx)

View File

@@ -9,11 +9,10 @@ This page provides detailed information on how to configure [OpenGraph tag](http
## Configuration Options
| Name | Description | Type | Default | Example |
| ------------------------ | --------------------------------------------------------- | -------- | ------- | ----------------------------- |
| `OG_PASSTHROUGH` | Enables or disables the Open Graph tag passthrough system | Boolean | `true` | `OG_PASSTHROUGH=true` |
| `OG_EXPIRY_TIME` | Configurable cache expiration time for Open Graph tags | Duration | `24h` | `OG_EXPIRY_TIME=1h` |
| `OG_CACHE_CONSIDER_HOST` | Enables or disables the use of the host in the cache key | Boolean | `false` | `OG_CACHE_CONSIDER_HOST=true` |
| Name | Description | Type | Default | Example |
|------------------|-----------------------------------------------------------|----------|---------|-------------------------|
| `OG_PASSTHROUGH` | Enables or disables the Open Graph tag passthrough system | Boolean | `false` | `OG_PASSTHROUGH=true` |
| `OG_EXPIRY_TIME` | Configurable cache expiration time for Open Graph tags | Duration | `24h` | `OG_EXPIRY_TIME=1h` |
## Usage
@@ -22,7 +21,6 @@ To configure Open Graph tags, you can set the following environment variables, e
```sh
export OG_PASSTHROUGH=true
export OG_EXPIRY_TIME=1h
export OG_CACHE_CONSIDER_HOST=false
```
## Implementation Details
@@ -35,8 +33,6 @@ When `OG_PASSTHROUGH` is enabled, Anubis will:
The cache expiration time is controlled by `OG_EXPIRY_TIME`.
When `OG_CACHE_CONSIDER_HOST` is enabled, Anubis will include the host in the cache key for Open Graph tags. This ensures that tags are cached separately for different hosts.
## Example
Here is an example of how to configure Open Graph tags in your Anubis setup:
@@ -44,19 +40,8 @@ Here is an example of how to configure Open Graph tags in your Anubis setup:
```sh
export OG_PASSTHROUGH=true
export OG_EXPIRY_TIME=1h
export OG_CACHE_CONSIDER_HOST=false
```
With these settings, Anubis will cache Open Graph tags for 1 hour and pass them through to the challenge page, not considering the host in the cache key.
## When to Enable `OG_CACHE_CONSIDER_HOST`
In most cases, you would want to keep `OG_CACHE_CONSIDER_HOST` set to `false` to avoid unnecessary cache fragmentation. However, there are some scenarios where enabling this option can be beneficial:
1. **Multi-Tenant Applications**: If you are running a multi-tenant application where different tenants are hosted on different subdomains, enabling `OG_CACHE_CONSIDER_HOST` ensures that the Open Graph tags are cached separately for each tenant. This prevents one tenant's Open Graph tags from being served to another tenant's users.
2. **Different Content for Different Hosts**: If your application serves different content based on the host, enabling `OG_CACHE_CONSIDER_HOST` ensures that the correct Open Graph tags are cached and served for each host. This is useful for applications that have different branding or content for different domains or subdomains.
3. **Security and Privacy Concerns**: In some cases, you may want to ensure that Open Graph tags are not shared between different hosts for security or privacy reasons. Enabling `OG_CACHE_CONSIDER_HOST` ensures that the tags are cached separately for each host, preventing any potential leakage of information between hosts.
With these settings, Anubis will cache Open Graph tags for 1 hour and pass them through to the challenge page.
For more information, refer to the [installation guide](../installation).

View File

@@ -91,7 +91,8 @@ Assuming you are protecting `anubistest.techaro.lol`, you need the following ser
# These headers need to be set or else Anubis will
# throw an "admin misconfiguration" error.
RequestHeader set "X-Real-Ip" expr=%{REMOTE_ADDR}
RequestHeader set X-Forwarded-Proto "https"
RequestHeader set "X-Forwarded-Proto" "https"
RequestHeader set "X-Http-Version" "%{SERVER_PROTOCOL}s"
ProxyPreserveHost On

View File

@@ -59,8 +59,14 @@ server {
listen [::]:443 ssl http2;
location / {
# Anubis needs these headers to understand the connection
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Http-Version $server_protocol;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Request-Id $request_id;
proxy_set_header X-Forwarded-Host $host;
proxy_pass http://anubis;
}

View File

@@ -1,6 +1,6 @@
---
id: traefik
title: Traefik
title: Integrate Anubis with Traefik in a Docker Compose Environment
---

View File

@@ -55,7 +55,6 @@ Anubis uses these environment variables for configuration:
| `BIND` | `:8923` | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock` |
| `BIND_NETWORK` | `tcp` | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports. |
| `COOKIE_DOMAIN` | unset | The domain the Anubis challenge pass cookie should be set to. This should be set to the domain you bought from your registrar (EG: `techaro.lol` if your webapp is running on `anubis.techaro.lol`). See [here](https://stackoverflow.com/a/1063760) for more information. |
| `COOKIE_EXPIRATION_TIME` | `168h` | The amount of time the authorization cookie is valid for. |
| `COOKIE_PARTITIONED` | `false` | If set to `true`, enables the [partitioned (CHIPS) flag](https://developers.google.com/privacy-sandbox/cookies/chips), meaning that Anubis inside an iframe has a different set of cookies than the domain hosting the iframe. |
| `DIFFICULTY` | `4` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. |
| `ED25519_PRIVATE_KEY_HEX` | unset | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. See below for details. |
@@ -64,7 +63,6 @@ Anubis uses these environment variables for configuration:
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
| `OG_EXPIRY_TIME` | `24h` | The expiration time for the Open Graph tag cache. |
| `OG_PASSTHROUGH` | `false` | If set to `true`, Anubis will enable Open Graph tag passthrough. |
| `OG_CACHE_CONSIDER_HOST` | `false` | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. |
| `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.mdx). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. |
| `REDIRECT_DOMAINS` | unset | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain. |
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |

View File

@@ -0,0 +1,97 @@
# How to make Anubis much less aggressive
Out of the box, Anubis has fairly paranoid defaults. It's designed to stop the bleeding now, so it defaults to a global "challenge everything" rule. This does work, but comes at significant user experience cost if users disable JavaScript or run plugins that interfere with JavaScript execution.
Anubis ships with a rule named `challenge-lies-browser-but-http-1.1` that changes the default behavior to fire much less often. This works on top of [expression support](./configuration/expressions.mdx) to allow you to block the worst of the bad while leaving normal users able to access the website. This requires integration with your HTTP load balancer.
You can import this rule by replacing the `generic-browser` rule with the following:
```yaml
- import: (data)/common/challenge-browser-like.yaml
```
## The new rule
Previously Anubis aggressively challenged everything that had "Mozilla" in its User-Agent string. The rule has been amended to this set of heuristics:
1. If the request headers contain `X-Http-Protocol`
1. AND if the request header `X-Http-Protocol` is `HTTP/1.1`
1. AND if the request headers contain `X-Forwarded-Proto`
1. AND if the request header `X-Forwarded-Proto` is `https`
1. AND if the request's User-Agent string is similar to that of a browser
1. THEN throw a challenge.
This means that users that are using up to date browsers will automatically get through without having to pass a challenge.
## Apache
Ensure [`mod_http2`](https://httpd.apache.org/docs/2.4/mod/mod_http2.html) is loaded.
Make sure that your HTTPS VirtualHost has the right settings for Anubis in place:
```python
# Enable HTTP/2 support so Anubis can issues challenges for HTTP/1.1 clients
Protocols h2 http/1.1
# These headers need to be set or else Anubis will
# throw an "admin misconfiguration" error.
# diff-add
RequestHeader set "X-Real-Ip" expr=%{REMOTE_ADDR}
# diff-add
RequestHeader set "X-Forwarded-Proto" "https"
# diff-add
RequestHeader set "X-Http-Version" "%{SERVER_PROTOCOL}s"
```
## Caddy
Make sure that your [`reverse_proxy` has the right headers configured](https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#headers):
```python
ellenjoe.int.within.lgbt {
# ...
# diff-remove
reverse_proxy http://localhost:3000
# diff-add
reverse_proxy http://localhost:3000 {
# diff-add
header_up X-Real-Ip {remote_host}
# diff-add
header_up X-Http-Version {http.request.proto}
# diff-add
}
# ...
}
```
## ingress-nginx
Edit your `ingress-nginx-controller` ConfigMap:
```yaml
data:
# ...
# diff-add
location-snippet: |
# diff-add
proxy_set_header X-Http-Version $server_protocol;
# diff-add
proxy_set_header X-Tls-Version $ssl_protocol;
```
## Nginx
Edit your `server` blocks to add the following headers:
```python
# diff-add
proxy_set_header Host $host;
# diff-add
proxy_set_header X-Real-IP $remote_addr;
# diff-add
proxy_set_header X-Http-Version $server_protocol;
```
## Traefik
This configuration is not currently supported with Traefik. A Traefik plugin is needed to add the right header.

View File

@@ -3,47 +3,17 @@ title: List of known browser extensions that can break Anubis
---
This page contains a list of all of the browser extensions that are known to break Anubis' functionality and their associated GitHub issues, along with instructions on how to work around the issue.
## [JShelter](https://jshelter.org/)
| Extension | JShelter |
| :----------- | :------------------------------------------------------------------------------------------------------------------------------------------------- |
| Website | [jshelter.org](https://jshelter.org/) |
| GitHub issue | https://github.com/TecharoHQ/anubis/issues/25 |
| Be aware of | [What are Web Workers, and what are the threats that I face?](https://jshelter.org/faq/#what-are-web-workers-and-what-are-the-threats-that-i-face) |
| Extension | JShelter |
| :----------- | :-------------------------------------------- |
| Website | [jshelter.org](https://jshelter.org/) |
| GitHub issue | https://github.com/TecharoHQ/anubis/issues/25 |
### Workaround steps (recommended):
1. Click on the JShelter badge icon (typically in the toolbar next to your navigation bar; if you cannot locate the icon, see [this question](https://jshelter.org/faq/#can-i-see-a-jshelter-badge-icon-next-to-my-navigation-bar-i-want-to-interact-with-the-extension-easily-and-avoid-going-through-settings)).
2. Expand JavaScript Shield settings by clicking on the `Modify` button.
3. Click on the `Detail tweaks of JS shield for this site` button.
4. Click and drag the `WebWorker` slider to the left until `Remove` is replaced by the `Unprotected`.
5. Refresh the page, for example, by clicking on the `Refresh page` button at the top of the JShelter pop up window.
6. You might want to restore the Worker settings once you go through the challenge.
### Workaround steps (alternative if you do not want to dig in JShelter's pop up):
1. Click on the JShelter badge icon (typically in the toolbar next to your navigation bar; if you cannot locate the icon, see [this question](https://jshelter.org/faq/#can-i-see-a-jshelter-badge-icon-next-to-my-navigation-bar-i-want-to-interact-with-the-extension-easily-and-avoid-going-through-settings)).
2. Expand JavaScript Shield settings by clicking on the `Modify` button.
3. Choose "Turn JavaScript Shield off"
4. Refresh the page, for example, by clicking on the `Refresh page` button at the top of the JShelter pop up window.
:::note
Taking these actions will remove all protections of JavaScript Shield for all pages at the visited web site. You might want review and amend your JavaScript shield settings once you go through the challenge based on your operational security model.
:::
### Workaround steps (alternative if you do not like JShelter's pop up):
Workaround steps:
1. Open JShelter extension settings
2. Click on JS Shield details
3. Enter in the domain for a website protected by Anubis
4. Choose "Turn JavaScript Shield off"
5. Hit "Add to list"
:::note
Taking these actions will remove all protections of JavaScript Shield for all pages at the visited web site. You might want review and amend your JavaScript shield settings once you go through the challenge based on your operational security model.
:::

View File

@@ -29,18 +29,8 @@ This page contains a non-exhaustive list with all websites using Anubis.
- https://wiki.archlinux.org/
- https://git.devuan.org/
- https://hydra.nixos.org/
- https://hydra.nixos.org/
- https://codeberg.org/
- https://www.cfaarchive.org/
- https://forum.freecad.org/
- <details>
<summary>Sourceware</summary>
- https://sourceware.org/cgit
- https://sourceware.org/glibc/wiki
- https://builder.sourceware.org/testruns/
- https://patchwork.sourceware.org/
- https://gcc.gnu.org/bugzilla/
- https://gcc.gnu.org/cgit
</details>
- <details>
<summary>The United Nations</summary>
- https://policytoolbox.iiep.unesco.org/

18
go.mod
View File

@@ -6,46 +6,58 @@ require (
github.com/a-h/templ v0.3.857
github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456
github.com/golang-jwt/jwt/v5 v5.2.2
github.com/google/cel-go v0.25.0
github.com/playwright-community/playwright-go v0.5101.0
github.com/prometheus/client_golang v1.22.0
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a
github.com/shirou/gopsutil/v4 v4.25.3
github.com/yl2chen/cidranger v1.0.2
golang.org/x/net v0.39.0
k8s.io/apimachinery v0.32.3
)
require (
cel.dev/expr v0.23.1 // indirect
github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c // indirect
github.com/a-h/parse v0.0.0-20250122154542-74294addb73e // indirect
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cli/browser v1.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/deckarep/golang-set/v2 v2.6.0 // indirect
github.com/ebitengine/purego v0.8.2 // indirect
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-jose/go-jose/v3 v3.0.4 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-stack/stack v1.8.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/natefinch/atomic v1.0.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.62.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/stoewer/go-strcase v1.2.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect
golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678 // indirect
golang.org/x/mod v0.24.0 // indirect
golang.org/x/sync v0.13.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/text v0.24.0 // indirect
golang.org/x/tools v0.32.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/protobuf v1.36.5 // indirect
honnef.co/go/tools v0.6.1 // indirect
k8s.io/apimachinery v0.32.3 // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)

42
go.sum
View File

@@ -1,3 +1,5 @@
cel.dev/expr v0.23.1 h1:K4KOtPCJQjVggkARsjG9RWXP6O4R73aHeJMa/dmCQQg=
cel.dev/expr v0.23.1/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c h1:pxW6RcqyfI9/kWtOwnv/G+AzdKuy2ZrqINhenH4HyNs=
github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/a-h/parse v0.0.0-20250122154542-74294addb73e h1:HjVbSQHy+dnlS6C3XajZ69NYAb5jbGNfHanvm1+iYlo=
@@ -6,6 +8,8 @@ github.com/a-h/templ v0.3.857 h1:6EqcJuGZW4OL+2iZ3MD+NnIcG7nGkaQeF2Zq5kf9ZGg=
github.com/a-h/templ v0.3.857/go.mod h1:qhrhAkRFubE7khxLZHsBFHfX+gWwVNKbzKeF9GlPV4M=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
@@ -14,11 +18,14 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cli/browser v1.3.0 h1:LejqCrpWr+1pRqmEPDGnTZOjsMe7sehifLynZJuqJpo=
github.com/cli/browser v1.3.0/go.mod h1:HH8s+fOAxjhQoBUAsKuPCbqUuxZDhQ2/aD+SzsEfBTk=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set/v2 v2.6.0 h1:XfcQbWM1LlMB8BsJ8N9vW5ehnnPVIw0je80NsVHagjM=
github.com/deckarep/golang-set/v2 v2.6.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/ebitengine/purego v0.8.2 h1:jPPGWs2sZ1UgOSgD2bClL0MJIqu58nOmIcBuXr62z1I=
github.com/ebitengine/purego v0.8.2/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ=
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64=
github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456 h1:CkmB2l68uhvRlwOTPrwnuitSxi/S3Cg4L5QYOcL9MBc=
@@ -33,15 +40,23 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY=
github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw=
github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4=
github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/google/cel-go v0.25.0 h1:jsFw9Fhn+3y2kBbltZR4VEz5xKkcIFRPDnuEzAGv5GY=
github.com/google/cel-go v0.25.0/go.mod h1:hjEb6r5SuOSlhCHmFoLzu8HGCERvIsDAbxDAyNU/MmI=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
@@ -60,6 +75,8 @@ github.com/playwright-community/playwright-go v0.5101.0/go.mod h1:kBNWs/w2aJ2ZUp
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
@@ -68,19 +85,30 @@ github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ
github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a h1:iLcLb5Fwwz7g/DLK89F+uQBDeAhHhwdzB5fSlVdhGcM=
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a/go.mod h1:wozgYq9WEBQBaIJe4YZ0qTSFAMxmcwBhQH0fO0R34Z0=
github.com/shirou/gopsutil/v4 v4.25.3 h1:SeA68lsu8gLggyMbmCn8cmp97V1TI9ld9sVzAUcKcKE=
github.com/shirou/gopsutil/v4 v4.25.3/go.mod h1:xbuxyoZj+UsgnZrENu3lQivsngRR5BdjbJwf2fv4szA=
github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU=
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU=
github.com/yl2chen/cidranger v1.0.2/go.mod h1:9U1yz7WPYDwf0vpNWFaeRh0bjwz5RVgRy/9UEQfHl0g=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678 h1:1P7xPZEwZMoBoz0Yze5Nx2/4pxj6nw9ZqHWXqP0iRgQ=
golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
@@ -97,12 +125,12 @@ golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -124,18 +152,24 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
golang.org/x/tools v0.32.0 h1:Q7N1vhpkQv7ybVzLFtTjvQya2ewbwNDZzUgfXGqtMWU=
golang.org/x/tools v0.32.0/go.mod h1:ZxrU41P/wAbZD8EDa6dDCa6XfpkhJ7HFMjHJXfBDu8s=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw=
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

View File

@@ -8,21 +8,18 @@ import (
)
// GetOGTags is the main function that retrieves Open Graph tags for a URL
func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]string, error) {
func (c *OGTagCache) GetOGTags(url *url.URL) (map[string]string, error) {
if url == nil {
return nil, errors.New("nil URL provided, cannot fetch OG tags")
}
target := c.getTarget(url)
cacheKey := c.generateCacheKey(target, originalHost)
urlStr := c.getTarget(url)
// Check cache first
if cachedTags := c.checkCache(cacheKey); cachedTags != nil {
if cachedTags := c.checkCache(urlStr); cachedTags != nil {
return cachedTags, nil
}
// Fetch HTML content, passing the original host
doc, err := c.fetchHTMLDocumentWithCache(target, originalHost, cacheKey)
// Fetch HTML content
doc, err := c.fetchHTMLDocument(urlStr)
if errors.Is(err, syscall.ECONNREFUSED) {
slog.Debug("Connection refused, returning empty tags")
return nil, nil
@@ -38,28 +35,17 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
ogTags := c.extractOGTags(doc)
// Store in cache
c.cache.Set(cacheKey, ogTags, c.ogTimeToLive)
c.cache.Set(urlStr, ogTags, c.ogTimeToLive)
return ogTags, nil
}
func (c *OGTagCache) generateCacheKey(target string, originalHost string) string {
var cacheKey string
if c.ogCacheConsiderHost {
cacheKey = target + "|" + originalHost
} else {
cacheKey = target
}
return cacheKey
}
// checkCache checks if we have the tags cached and returns them if so
func (c *OGTagCache) checkCache(cacheKey string) map[string]string {
if cachedTags, ok := c.cache.Get(cacheKey); ok {
func (c *OGTagCache) checkCache(urlStr string) map[string]string {
if cachedTags, ok := c.cache.Get(urlStr); ok {
slog.Debug("cache hit", "tags", cachedTags)
return cachedTags
}
slog.Debug("cache miss", "url", cacheKey)
slog.Debug("cache miss", "url", urlStr)
return nil
}

View File

@@ -4,13 +4,12 @@ import (
"net/http"
"net/http/httptest"
"net/url"
"reflect"
"testing"
"time"
)
func TestCheckCache(t *testing.T) {
cache := NewOGTagCache("http://example.com", true, time.Minute, false)
cache := NewOGTagCache("http://example.com", true, time.Minute)
// Set up test data
urlStr := "http://example.com/page"
@@ -18,19 +17,18 @@ func TestCheckCache(t *testing.T) {
"og:title": "Test Title",
"og:description": "Test Description",
}
cacheKey := cache.generateCacheKey(urlStr, "example.com")
// Test cache miss
tags := cache.checkCache(cacheKey)
tags := cache.checkCache(urlStr)
if tags != nil {
t.Errorf("expected nil tags on cache miss, got %v", tags)
}
// Manually add to cache
cache.cache.Set(cacheKey, expectedTags, time.Minute)
cache.cache.Set(urlStr, expectedTags, time.Minute)
// Test cache hit
tags = cache.checkCache(cacheKey)
tags = cache.checkCache(urlStr)
if tags == nil {
t.Fatal("expected non-nil tags on cache hit, got nil")
}
@@ -69,7 +67,7 @@ func TestGetOGTags(t *testing.T) {
defer ts.Close()
// Create an instance of OGTagCache with a short TTL for testing
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
cache := NewOGTagCache(ts.URL, true, 1*time.Minute)
// Parse the test server URL
parsedURL, err := url.Parse(ts.URL)
@@ -78,8 +76,7 @@ func TestGetOGTags(t *testing.T) {
}
// Test fetching OG tags from the test server
// Pass the host from the parsed test server URL
ogTags, err := cache.GetOGTags(parsedURL, parsedURL.Host)
ogTags, err := cache.GetOGTags(parsedURL)
if err != nil {
t.Fatalf("failed to get OG tags: %v", err)
}
@@ -98,15 +95,13 @@ func TestGetOGTags(t *testing.T) {
}
// Test fetching OG tags from the cache
// Pass the host from the parsed test server URL
ogTags, err = cache.GetOGTags(parsedURL, parsedURL.Host)
ogTags, err = cache.GetOGTags(parsedURL)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}
// Test fetching OG tags from the cache (3rd time)
// Pass the host from the parsed test server URL
newOgTags, err := cache.GetOGTags(parsedURL, parsedURL.Host)
newOgTags, err := cache.GetOGTags(parsedURL)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}
@@ -125,116 +120,3 @@ func TestGetOGTags(t *testing.T) {
}
}
// TestGetOGTagsWithHostConsideration tests the behavior of the cache with and without host consideration and for multiple hosts in a theoretical setup.
func TestGetOGTagsWithHostConsideration(t *testing.T) {
var loadCount int // Counter to track how many times the test route is loaded
// Create a test server
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
loadCount++ // Increment counter on each request to the server
w.Header().Set("Content-Type", "text/html")
w.Write([]byte(`
<!DOCTYPE html>
<html>
<head>
<meta property="og:title" content="Test Title" />
<meta property="og:description" content="Test Description" />
</head>
<body><p>Content</p></body>
</html>
`))
}))
defer ts.Close()
parsedURL, err := url.Parse(ts.URL)
if err != nil {
t.Fatalf("failed to parse test server URL: %v", err)
}
expectedTags := map[string]string{
"og:title": "Test Title",
"og:description": "Test Description",
}
testCases := []struct {
name string
ogCacheConsiderHost bool
requests []struct {
host string
expectedLoadCount int // Expected load count *after* this request
}
}{
{
name: "Host Not Considered - Same Host",
ogCacheConsiderHost: false,
requests: []struct {
host string
expectedLoadCount int
}{
{"host1", 1}, // First request, miss
{"host1", 1}, // Second request, same host, hit (host ignored)
},
},
{
name: "Host Not Considered - Different Host",
ogCacheConsiderHost: false,
requests: []struct {
host string
expectedLoadCount int
}{
{"host1", 1}, // First request, miss
{"host2", 1}, // Second request, different host, hit (host ignored)
},
},
{
name: "Host Considered - Same Host",
ogCacheConsiderHost: true,
requests: []struct {
host string
expectedLoadCount int
}{
{"host1", 1}, // First request, miss
{"host1", 1}, // Second request, same host, hit
},
},
{
name: "Host Considered - Different Host",
ogCacheConsiderHost: true,
requests: []struct {
host string
expectedLoadCount int
}{
{"host1", 1}, // First request, miss
{"host2", 2}, // Second request, different host, miss
{"host2", 2}, // Third request, same as second, hit
{"host1", 2}, // Fourth request, same as first, hit
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
loadCount = 0 // Reset load count for each test case
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, tc.ogCacheConsiderHost)
for i, req := range tc.requests {
ogTags, err := cache.GetOGTags(parsedURL, req.host)
if err != nil {
t.Errorf("Request %d (host: %s): unexpected error: %v", i+1, req.host, err)
continue // Skip further checks for this request if error occurred
}
// Verify tags are correct (should always be the same in this setup)
if !reflect.DeepEqual(ogTags, expectedTags) {
t.Errorf("Request %d (host: %s): expected tags %v, got %v", i+1, req.host, expectedTags, ogTags)
}
// Verify the load count to check cache hit/miss behavior
if loadCount != req.expectedLoadCount {
t.Errorf("Request %d (host: %s): expected load count %d, got %d (cache hit/miss mismatch)", i+1, req.host, req.expectedLoadCount, loadCount)
}
}
})
}
}

View File

@@ -1,7 +1,6 @@
package ogtags
import (
"context"
"errors"
"fmt"
"golang.org/x/net/html"
@@ -17,35 +16,17 @@ var (
emptyMap = map[string]string{} // used to indicate an empty result in the cache. Can't use nil as it would be a cache miss.
)
// fetchHTMLDocumentWithCache fetches the HTML document from the given URL string,
// preserving the original host header.
func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost string, cacheKey string) (*html.Node, error) {
req, err := http.NewRequestWithContext(context.Background(), "GET", urlStr, nil)
if err != nil {
return nil, fmt.Errorf("failed to create http request: %w", err)
}
// Set the Host header to the original host
if originalHost != "" {
req.Host = originalHost
}
// Add proxy headers
req.Header.Set("X-Forwarded-Proto", "https")
req.Header.Set("User-Agent", "Anubis-OGTag-Fetcher/1.0") // For tracking purposes
// Send the request
resp, err := c.client.Do(req)
func (c *OGTagCache) fetchHTMLDocument(urlStr string) (*html.Node, error) {
resp, err := c.client.Get(urlStr)
if err != nil {
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
slog.Debug("og: request timed out", "url", urlStr)
c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server
c.cache.Set(urlStr, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server
}
return nil, fmt.Errorf("http get failed: %w", err)
}
// Ensure the response body is closed
// this defer will call MaxBytesReader's Close, which closes the original body.
defer func(Body io.ReadCloser) {
err := Body.Close()
if err != nil {
@@ -55,17 +36,19 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri
if resp.StatusCode != http.StatusOK {
slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode)
c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
c.cache.Set(urlStr, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
return nil, fmt.Errorf("%w: page not found", ErrOgHandled)
}
// Check content type
ct := resp.Header.Get("Content-Type")
if ct == "" {
// assume non html body
return nil, fmt.Errorf("missing Content-Type header")
} else {
mediaType, _, err := mime.ParseMediaType(ct)
if err != nil {
// Malformed Content-Type header
slog.Debug("og: malformed Content-Type header", "url", urlStr, "contentType", ct)
return nil, fmt.Errorf("%w malformed Content-Type header: %w", ErrOgHandled, err)
}
@@ -76,16 +59,17 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri
}
}
resp.Body = http.MaxBytesReader(nil, resp.Body, maxContentLength)
resp.Body = http.MaxBytesReader(nil, resp.Body, c.maxContentLength)
doc, err := html.Parse(resp.Body)
if err != nil {
// Check if the error is specifically because the limit was exceeded
var maxBytesErr *http.MaxBytesError
if errors.As(err, &maxBytesErr) {
slog.Debug("og: content exceeded max length", "url", urlStr, "limit", maxContentLength)
return nil, fmt.Errorf("content too large: exceeded %d bytes", maxContentLength)
slog.Debug("og: content exceeded max length", "url", urlStr, "limit", c.maxContentLength)
return nil, fmt.Errorf("content too large: exceeded %d bytes", c.maxContentLength)
}
// parsing error (e.g., malformed HTML)
return nil, fmt.Errorf("failed to parse HTML: %w", err)
}

View File

@@ -2,7 +2,6 @@ package ogtags
import (
"fmt"
"golang.org/x/net/html"
"io"
"net/http"
"net/http/httptest"
@@ -79,8 +78,8 @@ func TestFetchHTMLDocument(t *testing.T) {
}))
defer ts.Close()
cache := NewOGTagCache("", true, time.Minute, false)
doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
cache := NewOGTagCache("", true, time.Minute)
doc, err := cache.fetchHTMLDocument(ts.URL)
if tt.expectError {
if err == nil {
@@ -106,9 +105,9 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
t.Skip("test requires theoretical network egress")
}
cache := NewOGTagCache("", true, time.Minute, false)
cache := NewOGTagCache("", true, time.Minute)
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example")
if err == nil {
t.Error("expected error for invalid URL, got nil")
@@ -118,9 +117,3 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
t.Error("expected nil document for invalid URL, got non-nil")
}
}
// fetchHTMLDocument allows you to call fetchHTMLDocumentWithCache without a duplicate generateCacheKey call
func (c *OGTagCache) fetchHTMLDocument(urlStr string, originalHost string) (*html.Node, error) {
cacheKey := c.generateCacheKey(urlStr, originalHost)
return c.fetchHTMLDocumentWithCache(urlStr, originalHost, cacheKey)
}

View File

@@ -104,7 +104,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Create cache instance
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
cache := NewOGTagCache(ts.URL, true, 1*time.Minute)
// Create URL for test
testURL, _ := url.Parse(ts.URL)
@@ -112,8 +112,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
testURL.RawQuery = tc.query
// Get OG tags
// Pass the host from the test URL
ogTags, err := cache.GetOGTags(testURL, testURL.Host)
ogTags, err := cache.GetOGTags(testURL)
// Check error expectation
if tc.expectError {
@@ -140,8 +139,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
}
// Test cache retrieval
// Pass the host from the test URL
cachedOGTags, err := cache.GetOGTags(testURL, testURL.Host)
cachedOGTags, err := cache.GetOGTags(testURL)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}

View File

@@ -1,111 +1,51 @@
package ogtags
import (
"context"
"log/slog"
"net"
"net/http"
"net/url"
"strings"
"time"
"github.com/TecharoHQ/anubis/decaymap"
)
const (
maxContentLength = 16 << 20 // 16 MiB in bytes, if there is a reasonable reason that you need more than this...Why?
httpTimeout = 5 * time.Second /*todo: make this configurable?*/
)
type OGTagCache struct {
cache *decaymap.Impl[string, map[string]string]
targetURL *url.URL
ogCacheConsiderHost bool
ogPassthrough bool
ogTimeToLive time.Duration
approvedTags []string
approvedPrefixes []string
client *http.Client
cache *decaymap.Impl[string, map[string]string]
target string
ogPassthrough bool
ogTimeToLive time.Duration
approvedTags []string
approvedPrefixes []string
client *http.Client
maxContentLength int64
}
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration) *OGTagCache {
// Predefined approved tags and prefixes
// In the future, these could come from configuration
defaultApprovedTags := []string{"description", "keywords", "author"}
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
var parsedTargetURL *url.URL
var err error
if target == "" {
// Default to localhost if target is empty
parsedTargetURL, _ = url.Parse("http://localhost")
} else {
parsedTargetURL, err = url.Parse(target)
if err != nil {
slog.Debug("og: failed to parse target URL, treating as non-unix", "target", target, "error", err)
// If parsing fails, treat it as a non-unix target for backward compatibility or default behavior
// For now, assume it's not a scheme issue but maybe an invalid char, etc.
// A simple string target might be intended if it's not a full URL.
parsedTargetURL = &url.URL{Scheme: "http", Host: target} // Assume http if scheme missing and host-like
if !strings.Contains(target, "://") && !strings.HasPrefix(target, "unix:") {
// If it looks like just a host/host:port (and not unix), prepend http:// (todo: is this bad...? Trace path to see if i can yell at user to do it right)
parsedTargetURL, _ = url.Parse("http://" + target) // fetch cares about scheme but anubis doesn't
}
}
}
client := &http.Client{
Timeout: httpTimeout,
Timeout: 5 * time.Second, /*make this configurable?*/
}
// Configure custom transport for Unix sockets
if parsedTargetURL.Scheme == "unix" {
socketPath := parsedTargetURL.Path // For unix scheme, path is the socket path
client.Transport = &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", socketPath)
},
}
}
const maxContentLength = 16 << 20 // 16 MiB in bytes
return &OGTagCache{
cache: decaymap.New[string, map[string]string](),
targetURL: parsedTargetURL, // Store the parsed URL
ogPassthrough: ogPassthrough,
ogTimeToLive: ogTimeToLive,
ogCacheConsiderHost: ogTagsConsiderHost, // todo: refactor to be a separate struct
approvedTags: defaultApprovedTags,
approvedPrefixes: defaultApprovedPrefixes,
client: client,
cache: decaymap.New[string, map[string]string](),
target: target,
ogPassthrough: ogPassthrough,
ogTimeToLive: ogTimeToLive,
approvedTags: defaultApprovedTags,
approvedPrefixes: defaultApprovedPrefixes,
client: client,
maxContentLength: maxContentLength,
}
}
// getTarget constructs the target URL string for fetching OG tags.
// For Unix sockets, it creates a "fake" HTTP URL that the custom dialer understands.
func (c *OGTagCache) getTarget(u *url.URL) string {
if c.targetURL.Scheme == "unix" {
// The custom dialer ignores the host, but we need a valid http URL structure.
// Use "unix" as a placeholder host. Path and Query from original request are appended.
fakeURL := &url.URL{
Scheme: "http", // Scheme must be http/https for client.Get
Host: "unix", // Arbitrary host, ignored by custom dialer
Path: u.Path,
RawQuery: u.RawQuery,
}
return fakeURL.String()
}
// For regular http/https targets
target := *c.targetURL // Make a copy
target.Path = u.Path
target.RawQuery = u.RawQuery
return target.String()
return c.target + u.Path
}
func (c *OGTagCache) Cleanup() {
if c.cache != nil {
c.cache.Cleanup()
}
c.cache.Cleanup()
}

View File

@@ -1,16 +1,7 @@
package ogtags
import (
"context"
"errors"
"fmt"
"net"
"net/http"
"net/url"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"time"
)
@@ -38,23 +29,14 @@ func TestNewOGTagCache(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cache := NewOGTagCache(tt.target, tt.ogPassthrough, tt.ogTimeToLive, false)
cache := NewOGTagCache(tt.target, tt.ogPassthrough, tt.ogTimeToLive)
if cache == nil {
t.Fatal("expected non-nil cache, got nil")
}
// Check the parsed targetURL, handling the default case for empty target
expectedURLStr := tt.target
if tt.target == "" {
// Default behavior when target is empty is now http://localhost
expectedURLStr = "http://localhost"
} else if !strings.Contains(tt.target, "://") && !strings.HasPrefix(tt.target, "unix:") {
// Handle case where target is just host or host:port (and not unix)
expectedURLStr = "http://" + tt.target
}
if cache.targetURL.String() != expectedURLStr {
t.Errorf("expected targetURL %s, got %s", expectedURLStr, cache.targetURL.String())
if cache.target != tt.target {
t.Errorf("expected target %s, got %s", tt.target, cache.target)
}
if cache.ogPassthrough != tt.ogPassthrough {
@@ -68,45 +50,6 @@ func TestNewOGTagCache(t *testing.T) {
}
}
// TestNewOGTagCache_UnixSocket specifically tests unix socket initialization
func TestNewOGTagCache_UnixSocket(t *testing.T) {
tempDir := t.TempDir()
socketPath := filepath.Join(tempDir, "test.sock")
target := "unix://" + socketPath
cache := NewOGTagCache(target, true, 5*time.Minute, false)
if cache == nil {
t.Fatal("expected non-nil cache, got nil")
}
if cache.targetURL.Scheme != "unix" {
t.Errorf("expected targetURL scheme 'unix', got '%s'", cache.targetURL.Scheme)
}
if cache.targetURL.Path != socketPath {
t.Errorf("expected targetURL path '%s', got '%s'", socketPath, cache.targetURL.Path)
}
// Check if the client transport is configured for Unix sockets
transport, ok := cache.client.Transport.(*http.Transport)
if !ok {
t.Fatalf("expected client transport to be *http.Transport, got %T", cache.client.Transport)
}
if transport.DialContext == nil {
t.Fatal("expected client transport DialContext to be non-nil for unix socket")
}
// Attempt a dummy dial to see if it uses the correct path (optional, more involved check)
dummyConn, err := transport.DialContext(context.Background(), "", "")
if err == nil {
dummyConn.Close()
t.Log("DialContext seems functional, but couldn't verify path without a listener")
} else if !strings.Contains(err.Error(), "connect: connection refused") && !strings.Contains(err.Error(), "connect: no such file or directory") {
// We expect connection refused or not found if nothing is listening
t.Errorf("DialContext failed with unexpected error: %v", err)
}
}
func TestGetTarget(t *testing.T) {
tests := []struct {
name string
@@ -123,39 +66,24 @@ func TestGetTarget(t *testing.T) {
expected: "http://example.com",
},
{
name: "With complex path",
target: "http://example.com",
path: "/pag(#*((#@)ΓΓΓΓe/Γ",
query: "id=123",
// Expect URL encoding and query parameter
expected: "http://example.com/pag%28%23%2A%28%28%23@%29%CE%93%CE%93%CE%93%CE%93e/%CE%93?id=123",
name: "With complex path",
target: "http://example.com",
path: "/pag(#*((#@)ΓΓΓΓe/Γ",
query: "id=123",
expected: "http://example.com/pag(#*((#@)ΓΓΓΓe/Γ",
},
{
name: "With query and path",
target: "http://example.com",
path: "/page",
query: "id=123",
expected: "http://example.com/page?id=123",
},
{
name: "Unix socket target",
target: "unix:/tmp/anubis.sock",
path: "/some/path",
query: "key=value&flag=true",
expected: "http://unix/some/path?key=value&flag=true", // Scheme becomes http, host is 'unix'
},
{
name: "Unix socket target with ///",
target: "unix:///var/run/anubis.sock",
path: "/",
query: "",
expected: "http://unix/",
expected: "http://example.com/page",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cache := NewOGTagCache(tt.target, false, time.Minute, false)
cache := NewOGTagCache(tt.target, false, time.Minute)
u := &url.URL{
Path: tt.path,
@@ -170,86 +98,3 @@ func TestGetTarget(t *testing.T) {
})
}
}
// TestIntegrationGetOGTags_UnixSocket tests fetching OG tags via a Unix socket.
func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
tempDir := t.TempDir()
socketPath := filepath.Join(tempDir, "anubis-test.sock")
// Ensure the socket does not exist initially
_ = os.Remove(socketPath)
// Create a simple HTTP server listening on the Unix socket
listener, err := net.Listen("unix", socketPath)
if err != nil {
t.Fatalf("Failed to listen on unix socket %s: %v", socketPath, err)
}
defer func(listener net.Listener, socketPath string) {
if listener != nil {
if err := listener.Close(); err != nil && !errors.Is(err, net.ErrClosed) {
t.Logf("Error closing listener: %v", err)
}
}
if _, err := os.Stat(socketPath); err == nil {
if err := os.Remove(socketPath); err != nil {
t.Logf("Error removing socket file %s: %v", socketPath, err)
}
}
}(listener, socketPath)
server := &http.Server{
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
fmt.Fprintln(w, `<!DOCTYPE html><html><head><meta property="og:title" content="Unix Socket Test" /></head><body>Test</body></html>`)
}),
}
go func() {
if err := server.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) {
t.Logf("Unix socket server error: %v", err)
}
}()
defer func(server *http.Server, ctx context.Context) {
err := server.Shutdown(ctx)
if err != nil {
t.Logf("Error shutting down server: %v", err)
}
}(server, context.Background()) // Ensure server is shut down
// Wait a moment for the server to start
time.Sleep(100 * time.Millisecond)
// Create cache instance pointing to the Unix socket
targetURL := "unix://" + socketPath
cache := NewOGTagCache(targetURL, true, 1*time.Minute, false)
// Create a dummy URL for the request (path and query matter)
testReqURL, _ := url.Parse("/some/page?query=1")
// Get OG tags
// Pass an empty string for host, as it's irrelevant for unix sockets
ogTags, err := cache.GetOGTags(testReqURL, "")
if err != nil {
t.Fatalf("GetOGTags failed for unix socket: %v", err)
}
expectedTags := map[string]string{
"og:title": "Unix Socket Test",
}
if !reflect.DeepEqual(ogTags, expectedTags) {
t.Errorf("Expected OG tags %v, got %v", expectedTags, ogTags)
}
// Test cache retrieval (should hit cache)
// Pass an empty string for host
cachedTags, err := cache.GetOGTags(testReqURL, "")
if err != nil {
t.Fatalf("GetOGTags (cache hit) failed for unix socket: %v", err)
}
if !reflect.DeepEqual(cachedTags, expectedTags) {
t.Errorf("Expected cached OG tags %v, got %v", expectedTags, cachedTags)
}
}

View File

@@ -12,7 +12,7 @@ import (
// TestExtractOGTags updated with correct expectations based on filtering logic
func TestExtractOGTags(t *testing.T) {
// Use a cache instance that reflects the default approved lists
testCache := NewOGTagCache("", false, time.Minute, false)
testCache := NewOGTagCache("", false, time.Minute)
// Manually set approved tags/prefixes based on the user request for clarity
testCache.approvedTags = []string{"description"}
testCache.approvedPrefixes = []string{"og:"}
@@ -189,7 +189,7 @@ func TestIsOGMetaTag(t *testing.T) {
func TestExtractMetaTagInfo(t *testing.T) {
// Use a cache instance that reflects the default approved lists
testCache := NewOGTagCache("", false, time.Minute, false)
testCache := NewOGTagCache("", false, time.Minute)
testCache.approvedTags = []string{"description"}
testCache.approvedPrefixes = []string{"og:"}

View File

@@ -170,7 +170,7 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch
hash := rule.Hash()
lg.Debug("rule hash", "hash", hash)
s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), s.policy.StatusCodes.Deny)
s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), http.StatusOK)
return true
case config.RuleChallenge:
lg.Debug("challenge requested")
@@ -202,7 +202,7 @@ func (s *Server) handleDNSBL(w http.ResponseWriter, r *http.Request, ip string,
if resp != dnsbl.AllGood {
lg.Info("DNSBL hit", "status", resp.String())
s.respondWithStatus(w, r, fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip), s.policy.StatusCodes.Deny)
s.respondWithStatus(w, r, fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip), http.StatusOK)
return true
}
}
@@ -348,7 +348,7 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
"response": response,
"iat": time.Now().Unix(),
"nbf": time.Now().Add(-1 * time.Minute).Unix(),
"exp": time.Now().Add(s.opts.CookieExpiration).Unix(),
"exp": time.Now().Add(24 * 7 * time.Hour).Unix(),
})
tokenString, err := token.SignedString(s.priv)
if err != nil {
@@ -361,7 +361,7 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
http.SetCookie(w, &http.Cookie{
Name: anubis.CookieName,
Value: tokenString,
Expires: time.Now().Add(s.opts.CookieExpiration),
Expires: time.Now().Add(24 * 7 * time.Hour),
SameSite: http.SameSiteLaxMode,
Domain: s.opts.CookieDomain,
Partitioned: s.opts.CookiePartitioned,

View File

@@ -8,12 +8,12 @@ import (
"os"
"strings"
"testing"
"time"
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/data"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/policy"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
func loadPolicies(t *testing.T, fname string) *policy.ParsedConfig {
@@ -84,7 +84,7 @@ func TestCVE2025_24369(t *testing.T) {
Next: http.NewServeMux(),
Policy: pol,
CookieDomain: "local.cetacean.club",
CookieDomain: ".local.cetacean.club",
CookiePartitioned: true,
CookieName: t.Name(),
})
@@ -127,96 +127,6 @@ func TestCVE2025_24369(t *testing.T) {
}
}
func TestCookieCustomExpiration(t *testing.T) {
pol := loadPolicies(t, "")
pol.DefaultDifficulty = 0
ckieExpiration := 10 * time.Minute
srv := spawnAnubis(t, Options{
Next: http.NewServeMux(),
Policy: pol,
CookieDomain: "local.cetacean.club",
CookieName: t.Name(),
CookieExpiration: ckieExpiration,
})
ts := httptest.NewServer(internal.RemoteXRealIP(true, "tcp", srv))
defer ts.Close()
cli := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := cli.Post(ts.URL+"/.within.website/x/cmd/anubis/api/make-challenge", "", nil)
if err != nil {
t.Fatalf("can't request challenge: %v", err)
}
defer resp.Body.Close()
var chall = struct {
Challenge string `json:"challenge"`
}{}
if err := json.NewDecoder(resp.Body).Decode(&chall); err != nil {
t.Fatalf("can't read challenge response body: %v", err)
}
nonce := 0
elapsedTime := 420
redir := "/"
calculated := ""
calcString := fmt.Sprintf("%s%d", chall.Challenge, nonce)
calculated = internal.SHA256sum(calcString)
req, err := http.NewRequest(http.MethodGet, ts.URL+"/.within.website/x/cmd/anubis/api/pass-challenge", nil)
if err != nil {
t.Fatalf("can't make request: %v", err)
}
q := req.URL.Query()
q.Set("response", calculated)
q.Set("nonce", fmt.Sprint(nonce))
q.Set("redir", redir)
q.Set("elapsedTime", fmt.Sprint(elapsedTime))
req.URL.RawQuery = q.Encode()
requestRecieveLowerBound := time.Now()
resp, err = cli.Do(req)
requestRecieveUpperBound := time.Now()
if err != nil {
t.Fatalf("can't do challenge passing")
}
if resp.StatusCode != http.StatusFound {
resp.Write(os.Stderr)
t.Errorf("wanted %d, got: %d", http.StatusFound, resp.StatusCode)
}
var ckie *http.Cookie
for _, cookie := range resp.Cookies() {
t.Logf("%#v", cookie)
if cookie.Name == anubis.CookieName {
ckie = cookie
break
}
}
if ckie == nil {
t.Errorf("Cookie %q not found", anubis.CookieName)
return
}
expirationLowerBound := requestRecieveLowerBound.Add(ckieExpiration)
expirationUpperBound := requestRecieveUpperBound.Add(ckieExpiration)
// Since the cookie expiration precision is only to the second due to the Unix() call, we can
// lower the level of expected precision.
if ckie.Expires.Unix() < expirationLowerBound.Unix() || ckie.Expires.Unix() > expirationUpperBound.Unix() {
t.Errorf("cookie expiration is not within the expected range. expected between: %v and %v. got: %v", expirationLowerBound, expirationUpperBound, ckie.Expires)
return
}
}
func TestCookieSettings(t *testing.T) {
pol := loadPolicies(t, "")
pol.DefaultDifficulty = 0
@@ -228,7 +138,6 @@ func TestCookieSettings(t *testing.T) {
CookieDomain: "local.cetacean.club",
CookiePartitioned: true,
CookieName: t.Name(),
CookieExpiration: anubis.CookieDefaultExpirationTime,
})
ts := httptest.NewServer(internal.RemoteXRealIP(true, "tcp", srv))
@@ -272,9 +181,7 @@ func TestCookieSettings(t *testing.T) {
q.Set("elapsedTime", fmt.Sprint(elapsedTime))
req.URL.RawQuery = q.Encode()
requestRecieveLowerBound := time.Now()
resp, err = cli.Do(req)
requestRecieveUpperBound := time.Now()
if err != nil {
t.Fatalf("can't do challenge passing")
}
@@ -301,15 +208,6 @@ func TestCookieSettings(t *testing.T) {
t.Errorf("cookie domain is wrong, wanted local.cetacean.club, got: %s", ckie.Domain)
}
expirationLowerBound := requestRecieveLowerBound.Add(anubis.CookieDefaultExpirationTime)
expirationUpperBound := requestRecieveUpperBound.Add(anubis.CookieDefaultExpirationTime)
// Since the cookie expiration precision is only to the second due to the Unix() call, we can
// lower the level of expected precision.
if ckie.Expires.Unix() < expirationLowerBound.Unix() || ckie.Expires.Unix() > expirationUpperBound.Unix() {
t.Errorf("cookie expiration is not within the expected range. expected between: %v and %v. got: %v", expirationLowerBound, expirationUpperBound, ckie.Expires)
return
}
if ckie.Partitioned != srv.opts.CookiePartitioned {
t.Errorf("wanted partitioned flag %v, got: %v", srv.opts.CookiePartitioned, ckie.Partitioned)
}
@@ -497,47 +395,41 @@ func TestBasePrefix(t *testing.T) {
}
}
func TestCustomStatusCodes(t *testing.T) {
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Log(r.UserAgent())
w.WriteHeader(http.StatusOK)
fmt.Fprintln(w, "OK")
})
func TestCloudflareWorkersRule(t *testing.T) {
for _, variant := range []string{"cel", "header"} {
t.Run(variant, func(t *testing.T) {
pol := loadPolicies(t, "./testdata/cloudflare-workers-"+variant+".yaml")
statusMap := map[string]int{
"ALLOW": 200,
"CHALLENGE": 401,
"DENY": 403,
}
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, "OK")
})
pol := loadPolicies(t, "./testdata/aggressive_403.yaml")
pol.DefaultDifficulty = 4
srv := spawnAnubis(t, Options{
Next: h,
Policy: pol,
})
ts := httptest.NewServer(internal.RemoteXRealIP(true, "tcp", srv))
defer ts.Close()
for userAgent, statusCode := range statusMap {
t.Run(userAgent, func(t *testing.T) {
req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, ts.URL, nil)
s, err := New(Options{
Next: h,
Policy: pol,
ServeRobotsTXT: true,
})
if err != nil {
t.Fatal(err)
t.Fatalf("can't construct libanubis.Server: %v", err)
}
req.Header.Set("User-Agent", userAgent)
t.Run("no-cf-worker-header", func(t *testing.T) {
req, err := http.NewRequest(http.MethodGet, "/", nil)
if err != nil {
t.Fatal(err)
}
resp, err := ts.Client().Do(req)
if err != nil {
t.Fatal(err)
}
req.Header.Add("X-Real-Ip", "127.0.0.1")
if resp.StatusCode != statusCode {
t.Errorf("wanted status code %d but got: %d", statusCode, resp.StatusCode)
}
cr, _, err := s.check(req)
if err != nil {
t.Fatal(err)
}
if cr.Rule != config.RuleAllow {
t.Errorf("rule is wrong, wanted %s, got: %s", config.RuleAllow, cr.Rule)
}
})
})
}
}

View File

@@ -29,15 +29,13 @@ type Options struct {
ServeRobotsTXT bool
PrivateKey ed25519.PrivateKey
CookieExpiration time.Duration
CookieDomain string
CookieName string
CookiePartitioned bool
OGPassthrough bool
OGTimeToLive time.Duration
OGCacheConsidersHost bool
Target string
OGPassthrough bool
OGTimeToLive time.Duration
Target string
WebmasterEmail string
BasePrefix string
@@ -91,7 +89,7 @@ func New(opts Options) (*Server, error) {
policy: opts.Policy,
opts: opts,
DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](),
OGTags: ogtags.NewOGTagCache(opts.Target, opts.OGPassthrough, opts.OGTimeToLive, opts.OGCacheConsidersHost),
OGTags: ogtags.NewOGTagCache(opts.Target, opts.OGPassthrough, opts.OGTimeToLive),
}
mux := http.NewServeMux()

View File

@@ -54,7 +54,7 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
var ogTags map[string]string = nil
if s.opts.OGPassthrough {
var err error
ogTags, err = s.OGTags.GetOGTags(r.URL, r.Host)
ogTags, err = s.OGTags.GetOGTags(r.URL)
if err != nil {
lg.Error("failed to get OG tags", "err", err)
}
@@ -67,10 +67,7 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
return
}
handler := internal.NoStoreCache(templ.Handler(
component,
templ.WithStatus(s.opts.Policy.StatusCodes.Challenge),
))
handler := internal.NoStoreCache(templ.Handler(component))
handler.ServeHTTP(w, r)
}

114
lib/policy/celchecker.go Normal file
View File

@@ -0,0 +1,114 @@
package policy
import (
"fmt"
"net/http"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/policy/expressions"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
)
type CELChecker struct {
src string
program cel.Program
}
func NewCELChecker(cfg *config.ExpressionOrList) (*CELChecker, error) {
env, err := expressions.NewEnvironment()
if err != nil {
return nil, err
}
var src string
var ast *cel.Ast
if cfg.Expression != "" {
src = cfg.Expression
var iss *cel.Issues
interm, iss := env.Compile(src)
if iss != nil {
return nil, iss.Err()
}
ast, iss = env.Check(interm)
if iss != nil {
return nil, iss.Err()
}
}
if len(cfg.All) != 0 {
ast, err = expressions.Join(env, expressions.JoinAnd, cfg.All...)
}
if len(cfg.Any) != 0 {
ast, err = expressions.Join(env, expressions.JoinOr, cfg.Any...)
}
if err != nil {
return nil, err
}
program, err := expressions.Compile(env, ast)
if err != nil {
return nil, fmt.Errorf("can't compile CEL program: %w", err)
}
return &CELChecker{
src: src,
program: program,
}, nil
}
func (cc *CELChecker) Hash() string {
return internal.SHA256sum(cc.src)
}
func (cc *CELChecker) Check(r *http.Request) (bool, error) {
result, _, err := cc.program.ContextEval(r.Context(), &CELRequest{r})
if err != nil {
return false, err
}
if val, ok := result.(types.Bool); ok {
return bool(val), nil
}
return false, nil
}
type CELRequest struct {
*http.Request
}
func (cr *CELRequest) Parent() cel.Activation { return nil }
func (cr *CELRequest) ResolveName(name string) (any, bool) {
switch name {
case "remoteAddress":
return cr.Header.Get("X-Real-Ip"), true
case "host":
return cr.Host, true
case "method":
return cr.Method, true
case "userAgent":
return cr.UserAgent(), true
case "path":
return cr.URL.Path, true
case "query":
return expressions.URLValues{Values: cr.URL.Query()}, true
case "headers":
return expressions.HTTPHeaders{Header: cr.Header}, true
case "load_1m":
return expressions.Load1(), true
case "load_5m":
return expressions.Load5(), true
case "load_15m":
return expressions.Load15(), true
default:
return nil, false
}
}

View File

@@ -6,7 +6,6 @@ import (
"io"
"io/fs"
"net"
"net/http"
"os"
"regexp"
"strings"
@@ -29,7 +28,6 @@ var (
ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file")
ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both")
ErrStatusCodeNotValid = errors.New("config.StatusCode: status code not valid, must be between 100 and 599")
)
type Rule string
@@ -55,9 +53,11 @@ type BotConfig struct {
UserAgentRegex *string `json:"user_agent_regex"`
PathRegex *string `json:"path_regex"`
HeadersRegex map[string]string `json:"headers_regex"`
Action Rule `json:"action"`
RemoteAddr []string `json:"remote_addresses"`
Challenge *ChallengeRules `json:"challenge,omitempty"`
Expression *ExpressionOrList `json:"expression"`
Action Rule `json:"action"`
Challenge *ChallengeRules `json:"challenge,omitempty"`
}
func (b BotConfig) Zero() bool {
@@ -85,7 +85,12 @@ func (b BotConfig) Valid() error {
errs = append(errs, ErrBotMustHaveName)
}
if b.UserAgentRegex == nil && b.PathRegex == nil && len(b.RemoteAddr) == 0 && len(b.HeadersRegex) == 0 {
allFieldsEmpty := b.UserAgentRegex == nil &&
b.PathRegex == nil &&
len(b.RemoteAddr) == 0 &&
len(b.HeadersRegex) == 0
if allFieldsEmpty && b.Expression == nil {
errs = append(errs, ErrBotMustHaveUserAgentOrPath)
}
@@ -137,6 +142,12 @@ func (b BotConfig) Valid() error {
}
}
if b.Expression != nil {
if err := b.Expression.Valid(); err != nil {
errs = append(errs, err)
}
}
switch b.Action {
case RuleAllow, RuleBenchmark, RuleChallenge, RuleDeny:
// okay
@@ -264,33 +275,9 @@ func (boi *BotOrImport) Valid() error {
return ErrMustSetBotOrImportRules
}
type StatusCodes struct {
Challenge int `json:"CHALLENGE"`
Deny int `json:"DENY"`
}
func (sc StatusCodes) Valid() error {
var errs []error
if sc.Challenge == 0 || (sc.Challenge < 100 && sc.Challenge >= 599) {
errs = append(errs, fmt.Errorf("%w: challenge is %d", ErrStatusCodeNotValid, sc.Challenge))
}
if sc.Deny == 0 || (sc.Deny < 100 && sc.Deny >= 599) {
errs = append(errs, fmt.Errorf("%w: deny is %d", ErrStatusCodeNotValid, sc.Deny))
}
if len(errs) != 0 {
return fmt.Errorf("status codes not valid:\n%w", errors.Join(errs...))
}
return nil
}
type fileConfig struct {
Bots []BotOrImport `json:"bots"`
DNSBL bool `json:"dnsbl"`
StatusCodes StatusCodes `json:"status_codes"`
Bots []BotOrImport `json:"bots"`
DNSBL bool `json:"dnsbl"`
}
func (c fileConfig) Valid() error {
@@ -306,10 +293,6 @@ func (c fileConfig) Valid() error {
}
}
if err := c.StatusCodes.Valid(); err != nil {
errs = append(errs, err)
}
if len(errs) != 0 {
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
}
@@ -319,10 +302,6 @@ func (c fileConfig) Valid() error {
func Load(fin io.Reader, fname string) (*Config, error) {
var c fileConfig
c.StatusCodes = StatusCodes{
Challenge: http.StatusOK,
Deny: http.StatusOK,
}
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
}
@@ -332,8 +311,7 @@ func Load(fin io.Reader, fname string) (*Config, error) {
}
result := &Config{
DNSBL: c.DNSBL,
StatusCodes: c.StatusCodes,
DNSBL: c.DNSBL,
}
var validationErrs []error
@@ -366,9 +344,8 @@ func Load(fin io.Reader, fname string) (*Config, error) {
}
type Config struct {
Bots []BotConfig
DNSBL bool
StatusCodes StatusCodes
Bots []BotConfig
DNSBL bool
}
func (c Config) Valid() error {

View File

@@ -0,0 +1,62 @@
package config
import (
"encoding/json"
"errors"
"slices"
)
var (
ErrExpressionOrListMustBeStringOrObject = errors.New("config: this must be a string or an object")
ErrExpressionEmpty = errors.New("config: this expression is empty")
ErrExpressionCantHaveBoth = errors.New("config: expression block can't contain multiple expression types")
)
type ExpressionOrList struct {
Expression string `json:"-"`
All []string `json:"all"`
Any []string `json:"any"`
}
func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
if eol.Expression != rhs.Expression {
return false
}
if !slices.Equal(eol.All, rhs.All) {
return false
}
if !slices.Equal(eol.Any, rhs.Any) {
return false
}
return true
}
func (eol *ExpressionOrList) UnmarshalJSON(data []byte) error {
switch string(data[0]) {
case `"`: // string
return json.Unmarshal(data, &eol.Expression)
case "{": // object
type RawExpressionOrList ExpressionOrList
var val RawExpressionOrList
if err := json.Unmarshal(data, &val); err != nil {
return err
}
eol.All = val.All
eol.Any = val.Any
return nil
}
return ErrExpressionOrListMustBeStringOrObject
}
func (eol *ExpressionOrList) Valid() error {
if len(eol.All) != 0 && len(eol.Any) != 0 {
return ErrExpressionCantHaveBoth
}
return nil
}

View File

@@ -0,0 +1,73 @@
package config
import (
"encoding/json"
"errors"
"testing"
)
func TestExpressionOrListUnmarshal(t *testing.T) {
for _, tt := range []struct {
name string
inp string
err error
validErr error
result *ExpressionOrList
}{
{
name: "simple",
inp: `"\"User-Agent\" in headers"`,
result: &ExpressionOrList{
Expression: `"User-Agent" in headers`,
},
},
{
name: "object-and",
inp: `{
"all": ["\"User-Agent\" in headers"]
}`,
result: &ExpressionOrList{
All: []string{
`"User-Agent" in headers`,
},
},
},
{
name: "object-or",
inp: `{
"any": ["\"User-Agent\" in headers"]
}`,
result: &ExpressionOrList{
Any: []string{
`"User-Agent" in headers`,
},
},
},
{
name: "both-or-and",
inp: `{
"all": ["\"User-Agent\" in headers"],
"any": ["\"User-Agent\" in headers"]
}`,
validErr: ErrExpressionCantHaveBoth,
},
} {
t.Run(tt.name, func(t *testing.T) {
var eol ExpressionOrList
if err := json.Unmarshal([]byte(tt.inp), &eol); !errors.Is(err, tt.err) {
t.Errorf("wanted unmarshal error: %v but got: %v", tt.err, err)
}
if tt.result != nil && !eol.Equal(tt.result) {
t.Logf("wanted: %#v", tt.result)
t.Logf("got: %#v", &eol)
t.Fatal("parsed expression is not what was expected")
}
if err := eol.Valid(); !errors.Is(err, tt.validErr) {
t.Errorf("wanted validation error: %v but got: %v", tt.err, err)
}
})
}
}

View File

@@ -0,0 +1,17 @@
{
"bots": [
{
"name": "multiple-expression-types",
"action": "ALLOW",
"expression": {
"all": [
"userAgent.startsWith(\"git/\") || userAgent.contains(\"libgit\")",
"\"Git-Protocol\" in headers && headers[\"Git-Protocol\"] == \"version=2\"\n"
],
"any": [
"userAgent.startsWith(\"evilbot/\")"
]
}
}
]
}

View File

@@ -0,0 +1,10 @@
bots:
- name: multiple-expression-types
action: ALLOW
expression:
all:
- userAgent.startsWith("git/") || userAgent.contains("libgit")
- >
"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"
any:
- userAgent.startsWith("evilbot/")

View File

@@ -1,13 +0,0 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 0,
"DENY": 0
}
}

View File

@@ -1,8 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 0
DENY: 0

View File

@@ -0,0 +1,14 @@
{
"bots": [
{
"name": "allow-git-clients",
"action": "ALLOW",
"expression": {
"all": [
"userAgent.startsWith(\"git/\") || userAgent.contains(\"libgit\")",
"\"Git-Protocol\" in headers && headers[\"Git-Protocol\"] == \"version=2\""
]
}
}
]
}

View File

@@ -0,0 +1,8 @@
bots:
- name: allow-git-clients
action: ALLOW
expression:
all:
- userAgent.startsWith("git/") || userAgent.contains("libgit")
- >
"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"

View File

@@ -1,13 +0,0 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 200,
"DENY": 200
}
}

View File

@@ -1,8 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 200
DENY: 200

View File

@@ -1,13 +0,0 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 403,
"DENY": 403
}
}

View File

@@ -1,8 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 403
DENY: 403

View File

@@ -0,0 +1,3 @@
# Expressions support
The expressions support is based on ideas from [go-away](https://git.gammaspectra.live/git/go-away) but with different opinions about how things should be done.

View File

@@ -0,0 +1,81 @@
package expressions
import (
"strings"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
"github.com/google/cel-go/common/types/ref"
"github.com/google/cel-go/ext"
)
// NewEnvironment creates a new CEL environment, this is the set of
// variables and functions that are passed into the CEL scope so that
// Anubis can fail loudly and early when something is invalid instead
// of blowing up at runtime.
func NewEnvironment() (*cel.Env, error) {
return cel.NewEnv(
ext.Strings(
ext.StringsLocale("en_US"),
ext.StringsValidateFormatCalls(true),
),
// default all timestamps to UTC
cel.DefaultUTCTimeZone(true),
// Variables exposed to CEL programs:
// Request metadata
cel.Variable("remoteAddress", cel.StringType),
cel.Variable("host", cel.StringType),
cel.Variable("method", cel.StringType),
cel.Variable("userAgent", cel.StringType),
cel.Variable("path", cel.StringType),
cel.Variable("query", cel.MapType(cel.StringType, cel.StringType)),
cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)),
// System load metadata
cel.Variable("load_1m", cel.DoubleType),
cel.Variable("load_5m", cel.DoubleType),
cel.Variable("load_15m", cel.DoubleType),
// Functions exposed to CEL programs:
// userAgent.isBrowserLike() method, used to detect if a user agent is likely a browser
// based on shibboleth words in the User-Agent string.
cel.Function("isBrowserLike",
cel.MemberOverload("userAgent_isBrowserLike_string",
[]*cel.Type{cel.StringType},
cel.BoolType,
cel.UnaryBinding(func(userAgentVal ref.Val) ref.Val {
var userAgent string
switch v := userAgentVal.Value().(type) {
case string:
userAgent = v
default:
return types.NewErr("invalid type %T", userAgentVal)
}
switch {
case strings.Contains(userAgent, "Mozilla"), strings.Contains(userAgent, "Opera"), strings.Contains(userAgent, "Gecko"), strings.Contains(userAgent, "WebKit"), strings.Contains(userAgent, "Apple"), strings.Contains(userAgent, "Chrome"), strings.Contains(userAgent, "Windows"), strings.Contains(userAgent, "Linux"):
return types.Bool(true)
default:
return types.Bool(false)
}
}),
),
),
)
}
// Compile takes CEL environment and syntax tree then emits an optimized
// Program for execution.
func Compile(env *cel.Env, ast *cel.Ast) (cel.Program, error) {
return env.Program(
ast,
cel.EvalOptions(
// optimize regular expressions right now instead of on the fly
cel.OptOptimize,
),
)
}

View File

@@ -0,0 +1,75 @@
package expressions
import (
"net/http"
"reflect"
"strings"
"github.com/google/cel-go/common/types"
"github.com/google/cel-go/common/types/ref"
"github.com/google/cel-go/common/types/traits"
)
// HTTPHeaders is a type wrapper to expose HTTP headers into CEL programs.
type HTTPHeaders struct {
http.Header
}
func (h HTTPHeaders) ConvertToNative(typeDesc reflect.Type) (any, error) {
return nil, ErrNotImplemented
}
func (h HTTPHeaders) ConvertToType(typeVal ref.Type) ref.Val {
switch typeVal {
case types.MapType:
return h
case types.TypeType:
return types.MapType
}
return types.NewErr("can't convert from %q to %q", types.MapType, typeVal)
}
func (h HTTPHeaders) Equal(other ref.Val) ref.Val {
return types.Bool(false) // We don't want to compare header maps
}
func (h HTTPHeaders) Type() ref.Type {
return types.MapType
}
func (h HTTPHeaders) Value() any { return h }
func (h HTTPHeaders) Find(key ref.Val) (ref.Val, bool) {
k, ok := key.(types.String)
if !ok {
return nil, false
}
if _, ok := h.Header[string(k)]; !ok {
return nil, false
}
return types.String(strings.Join(h.Header.Values(string(k)), ",")), true
}
func (h HTTPHeaders) Contains(key ref.Val) ref.Val {
_, ok := h.Find(key)
return types.Bool(ok)
}
func (h HTTPHeaders) Get(key ref.Val) ref.Val {
result, ok := h.Find(key)
if !ok {
return types.ValOrErr(result, "no such key: %v", key)
}
return result
}
func (h HTTPHeaders) Iterator() traits.Iterator { panic("TODO(Xe): implement me") }
func (h HTTPHeaders) IsZeroValue() bool {
return len(h.Header) == 0
}
func (h HTTPHeaders) Size() ref.Val { return types.Int(len(h.Header)) }

View File

@@ -0,0 +1,52 @@
package expressions
import (
"net/http"
"testing"
"github.com/google/cel-go/common/types"
)
func TestHTTPHeaders(t *testing.T) {
headers := HTTPHeaders{
Header: http.Header{
"Content-Type": {"application/json"},
"Cf-Worker": {"true"},
"User-Agent": {"Go-http-client/2"},
},
}
t.Run("contains-existing-header", func(t *testing.T) {
resp := headers.Contains(types.String("User-Agent"))
if !bool(resp.(types.Bool)) {
t.Fatal("headers does not contain User-Agent")
}
})
t.Run("not-contains-missing-header", func(t *testing.T) {
resp := headers.Contains(types.String("Xxx-Random-Header"))
if bool(resp.(types.Bool)) {
t.Fatal("headers does not contain User-Agent")
}
})
t.Run("get-existing-header", func(t *testing.T) {
val := headers.Get(types.String("User-Agent"))
switch val.(type) {
case types.String:
// ok
default:
t.Fatalf("result was wrong type %T", val)
}
})
t.Run("not-get-missing-header", func(t *testing.T) {
val := headers.Get(types.String("Xxx-Random-Header"))
switch val.(type) {
case *types.Err:
// ok
default:
t.Fatalf("result was wrong type %T", val)
}
})
}

View File

@@ -0,0 +1,104 @@
package expressions
import (
"errors"
"fmt"
"strings"
"github.com/google/cel-go/cel"
)
// JoinOperator is a type wrapper for and/or operators.
//
// This is a separate type so that validation can be done at the type level.
type JoinOperator string
// Possible values for JoinOperator
const (
JoinAnd JoinOperator = "&&"
JoinOr JoinOperator = "||"
)
// Valid ensures that JoinOperator is semantically valid.
func (jo JoinOperator) Valid() error {
switch jo {
case JoinAnd, JoinOr:
return nil
default:
return ErrWrongJoinOperator
}
}
var (
ErrWrongJoinOperator = errors.New("expressions: invalid join operator")
ErrNoExpressions = errors.New("expressions: cannot join zero expressions")
ErrCantCompile = errors.New("expressions: can't compile one expression")
)
// JoinClauses joins a list of compiled clauses into one big if statement.
//
// Imagine the following two clauses:
//
// ball.color == "red"
// ball.shape == "round"
//
// JoinClauses would emit one "joined" clause such as:
//
// ( ball.color == "red" ) && ( ball.shape == "round" )
func JoinClauses(env *cel.Env, operator JoinOperator, clauses ...*cel.Ast) (*cel.Ast, error) {
if err := operator.Valid(); err != nil {
return nil, fmt.Errorf("%w: wanted && or ||, got: %q", err, operator)
}
switch len(clauses) {
case 0:
return nil, ErrNoExpressions
case 1:
return clauses[0], nil
}
var exprs []string
var errs []error
for _, clause := range clauses {
clauseStr, err := cel.AstToString(clause)
if err != nil {
errs = append(errs, err)
continue
}
exprs = append(exprs, "( "+clauseStr+" )")
}
if len(errs) != 0 {
return nil, fmt.Errorf("errors while decompiling statements: %w", errors.Join(errs...))
}
statement := strings.Join(exprs, " "+string(operator)+" ")
result, iss := env.Compile(statement)
if iss != nil {
return nil, iss.Err()
}
return result, nil
}
func Join(env *cel.Env, operator JoinOperator, clauses ...string) (*cel.Ast, error) {
var statements []*cel.Ast
var errs []error
for _, clause := range clauses {
stmt, iss := env.Compile(clause)
if iss != nil && iss.Err() != nil {
errs = append(errs, fmt.Errorf("%w: %q gave: %w", ErrCantCompile, clause, iss.Err()))
continue
}
statements = append(statements, stmt)
}
if len(errs) != 0 {
return nil, fmt.Errorf("errors while joining clauses: %w", errors.Join(errs...))
}
return JoinClauses(env, operator, statements...)
}

View File

@@ -0,0 +1,90 @@
package expressions
import (
"errors"
"testing"
"github.com/google/cel-go/cel"
)
func TestJoin(t *testing.T) {
env, err := NewEnvironment()
if err != nil {
t.Fatal(err)
}
for _, tt := range []struct {
name string
clauses []string
op JoinOperator
err error
resultStr string
}{
{
name: "no-clauses",
clauses: []string{},
op: JoinAnd,
err: ErrNoExpressions,
},
{
name: "one-clause-identity",
clauses: []string{`remoteAddress == "8.8.8.8"`},
op: JoinAnd,
err: nil,
resultStr: `remoteAddress == "8.8.8.8"`,
},
{
name: "multi-clause-and",
clauses: []string{
`remoteAddress == "8.8.8.8"`,
`host == "anubis.techaro.lol"`,
},
op: JoinAnd,
err: nil,
resultStr: `remoteAddress == "8.8.8.8" && host == "anubis.techaro.lol"`,
},
{
name: "multi-clause-or",
clauses: []string{
`remoteAddress == "8.8.8.8"`,
`host == "anubis.techaro.lol"`,
},
op: JoinOr,
err: nil,
resultStr: `remoteAddress == "8.8.8.8" || host == "anubis.techaro.lol"`,
},
{
name: "git-user-agent",
clauses: []string{
`userAgent.startsWith("git/") || userAgent.contains("libgit")`,
`"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"`,
},
op: JoinAnd,
err: nil,
resultStr: `(userAgent.startsWith("git/") || userAgent.contains("libgit")) && "Git-Protocol" in headers &&
headers["Git-Protocol"] == "version=2"`,
},
} {
t.Run(tt.name, func(t *testing.T) {
result, err := Join(env, tt.op, tt.clauses...)
if !errors.Is(err, tt.err) {
t.Errorf("wanted error %v but got: %v", tt.err, err)
}
if tt.err != nil {
return
}
program, err := cel.AstToString(result)
if err != nil {
t.Fatalf("can't decompile program: %v", err)
}
if tt.resultStr != program {
t.Logf("wanted: %s", tt.resultStr)
t.Logf("got: %s", program)
t.Error("program did not compile as expected")
}
})
}
}

View File

@@ -0,0 +1,67 @@
package expressions
import (
"context"
"log/slog"
"sync"
"time"
"github.com/shirou/gopsutil/v4/load"
)
type loadAvg struct {
lock sync.RWMutex
data *load.AvgStat
}
func (l *loadAvg) updateThread(ctx context.Context) {
ticker := time.NewTicker(15 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
l.update()
case <-ctx.Done():
return
}
}
}
func (l *loadAvg) update() {
l.lock.Lock()
defer l.lock.Unlock()
var err error
l.data, err = load.Avg()
if err != nil {
slog.Debug("can't get load average", "err", err)
}
}
var (
globalLoadAvg *loadAvg
)
func init() {
globalLoadAvg = &loadAvg{}
go globalLoadAvg.updateThread(context.Background())
}
func Load1() float64 {
globalLoadAvg.lock.RLock()
defer globalLoadAvg.lock.RUnlock()
return globalLoadAvg.data.Load1
}
func Load5() float64 {
globalLoadAvg.lock.RLock()
defer globalLoadAvg.lock.RUnlock()
return globalLoadAvg.data.Load5
}
func Load15() float64 {
globalLoadAvg.lock.RLock()
defer globalLoadAvg.lock.RUnlock()
return globalLoadAvg.data.Load15
}

View File

@@ -0,0 +1,78 @@
package expressions
import (
"errors"
"net/url"
"reflect"
"strings"
"github.com/google/cel-go/common/types"
"github.com/google/cel-go/common/types/ref"
"github.com/google/cel-go/common/types/traits"
)
var ErrNotImplemented = errors.New("expressions: not implemented")
// URLValues is a type wrapper to expose url.Values into CEL programs.
type URLValues struct {
url.Values
}
func (u URLValues) ConvertToNative(typeDesc reflect.Type) (any, error) {
return nil, ErrNotImplemented
}
func (u URLValues) ConvertToType(typeVal ref.Type) ref.Val {
switch typeVal {
case types.MapType:
return u
case types.TypeType:
return types.MapType
}
return types.NewErr("can't convert from %q to %q", types.MapType, typeVal)
}
func (u URLValues) Equal(other ref.Val) ref.Val {
return types.Bool(false) // We don't want to compare header maps
}
func (u URLValues) Type() ref.Type {
return types.MapType
}
func (u URLValues) Value() any { return u }
func (u URLValues) Find(key ref.Val) (ref.Val, bool) {
k, ok := key.(types.String)
if !ok {
return nil, false
}
if _, ok := u.Values[string(k)]; !ok {
return nil, false
}
return types.String(strings.Join(u.Values[string(k)], ",")), true
}
func (u URLValues) Contains(key ref.Val) ref.Val {
_, ok := u.Find(key)
return types.Bool(ok)
}
func (u URLValues) Get(key ref.Val) ref.Val {
result, ok := u.Find(key)
if !ok {
return types.ValOrErr(result, "no such key: %v", key)
}
return result
}
func (u URLValues) Iterator() traits.Iterator { panic("TODO(Xe): implement me") }
func (u URLValues) IsZeroValue() bool {
return len(u.Values) == 0
}
func (u URLValues) Size() ref.Val { return types.Int(len(u.Values)) }

View File

@@ -0,0 +1,50 @@
package expressions
import (
"net/url"
"testing"
"github.com/google/cel-go/common/types"
)
func TestURLValues(t *testing.T) {
headers := URLValues{
Values: url.Values{
"format": {"json"},
},
}
t.Run("contains-existing-key", func(t *testing.T) {
resp := headers.Contains(types.String("format"))
if !bool(resp.(types.Bool)) {
t.Fatal("headers does not contain User-Agent")
}
})
t.Run("not-contains-missing-key", func(t *testing.T) {
resp := headers.Contains(types.String("not-there"))
if bool(resp.(types.Bool)) {
t.Fatal("headers does not contain User-Agent")
}
})
t.Run("get-existing-key", func(t *testing.T) {
val := headers.Get(types.String("format"))
switch val.(type) {
case types.String:
// ok
default:
t.Fatalf("result was wrong type %T", val)
}
})
t.Run("not-get-missing-key", func(t *testing.T) {
val := headers.Get(types.String("not-there"))
switch val.(type) {
case *types.Err:
// ok
default:
t.Fatalf("result was wrong type %T", val)
}
})
}

View File

@@ -24,13 +24,11 @@ type ParsedConfig struct {
Bots []Bot
DNSBL bool
DefaultDifficulty int
StatusCodes config.StatusCodes
}
func NewParsedConfig(orig *config.Config) *ParsedConfig {
return &ParsedConfig{
orig: orig,
StatusCodes: orig.StatusCodes,
orig: orig,
}
}
@@ -94,6 +92,15 @@ func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedCon
}
}
if b.Expression != nil {
c, err := NewCELChecker(b.Expression)
if err != nil {
validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s expressions: %w", b.Name, err))
} else {
cl = append(cl, c)
}
}
if b.Challenge == nil {
parsedBot.Challenge = &config.ChallengeRules{
Difficulty: defaultDifficulty,

View File

@@ -1,12 +0,0 @@
bots:
- name: deny
user_agent_regex: DENY
action: DENY
- name: challenge
user_agent_regex: CHALLENGE
action: CHALLENGE
status_codes:
CHALLENGE: 401
DENY: 403

View File

@@ -0,0 +1,4 @@
bots:
- name: cloudflare-workers
expression: '"Cf-Worker" in headers'
action: DENY

View File

@@ -0,0 +1,5 @@
bots:
- name: cloudflare-workers
headers_regex:
CF-Worker: .*
action: DENY

View File

@@ -1,6 +1,6 @@
{
"name": "@techaro/anubis",
"version": "1.17.1",
"version": "1.17.0",
"description": "",
"main": "index.js",
"scripts": {

View File

@@ -1,12 +0,0 @@
bots:
- name: deny
user_agent_regex: DENY
action: DENY
- name: challenge
user_agent_regex: CHALLENGE
action: CHALLENGE
status_codes:
CHALLENGE: 401
DENY: 403

View File

@@ -0,0 +1,2 @@
bots:
- import: (data)/common/challenge-browser-like.yaml

15
test/apache/Dockerfile Normal file
View File

@@ -0,0 +1,15 @@
FROM httpd:2.4
RUN sed -i \
-e 's/^#\(LoadModule .*mod_ssl.so\)/\1/' \
-e 's/^#\(LoadModule .*mod_rewrite.so\)/\1/' \
-e 's/^#\(LoadModule .*mod_proxy.so\)/\1/' \
-e 's/^#\(LoadModule .*mod_proxy_http.so\)/\1/' \
-e 's/^#\(LoadModule .*mod_socache_shmcb.so\)/\1/' \
-e 's/^#\(LoadModule .*mod_http2.so\)/\1/' \
conf/httpd.conf
RUN echo '' >> conf/httpd.conf \
&& echo 'IncludeOptional conf.d/*.conf' >> conf/httpd.conf
COPY conf.d ./conf.d
COPY snippets /etc/httpd/snippets

View File

@@ -0,0 +1,15 @@
<VirtualHost *:80>
ServerAdmin your@email.here
ServerName httpd.local.cetacean.club
DocumentRoot /var/www/httpd.local.cetacean.club
Include /etc/httpd/snippets/proxy-headers.conf
ProxyPreserveHost On
ProxyRequests Off
ProxyVia Off
ProxyPass / http://httpdebug:3000/
ProxyPassReverse / http://httpdebug:3000/
</VirtualHost>

View File

@@ -0,0 +1,22 @@
<IfModule mod_ssl.c>
<VirtualHost *:443>
ServerAdmin me@xeiaso.net
ServerName httpd.local.cetacean.club
DocumentRoot /var/www/httpd.local.cetacean.club
Protocols h2 http/1.1
SSLCertificateFile /etc/techaro/pki/httpd.local.cetacean.club/cert.pem
SSLCertificateKeyFile /etc/techaro/pki/httpd.local.cetacean.club/key.pem
Include /etc/httpd/snippets/options-ssl-apache.conf
Include /etc/httpd/snippets/proxy-headers.conf
ProxyPreserveHost On
ProxyRequests Off
ProxyVia Off
ProxyPass / http://anubis:3000
ProxyPassReverse / http://anubis:3000
</VirtualHost>
</IfModule>

View File

@@ -0,0 +1 @@
Listen 443 https

View File

@@ -0,0 +1,23 @@
services:
httpd:
image: xxxtest/httpd
build: .
volumes:
- "../shared/www:/var/www/httpd.local.cetacean.club"
- "../pki/httpd.local.cetacean.club:/etc/techaro/pki/httpd.local.cetacean.club/"
ports:
- 8080:80
- 8443:443
anubis:
image: git.xeserv.us/techaro/anubis:cel
environment:
BIND: ":3000"
TARGET: http://httpdebug:3000
POLICY_FNAME: /etc/techaro/anubis/less_paranoid.yaml
volumes:
- ../anubis_configs:/etc/techaro/anubis
httpdebug:
image: ghcr.io/xe/x/httpdebug
pull_policy: always

View File

@@ -0,0 +1,13 @@
SSLEngine on
# Intermediate configuration, tweak to your needs
SSLProtocol all -SSLv2 -SSLv3 -TLSv1 -TLSv1.1
SSLCipherSuite ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
SSLHonorCipherOrder off
SSLSessionTickets off
SSLOptions +StrictRequire
# Add vhost name to log entries:
LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" vhost_combined
LogFormat "%v %h %l %u %t \"%r\" %>s %b" vhost_common

View File

@@ -0,0 +1,3 @@
RequestHeader set "X-Real-Ip" expr=%{REMOTE_ADDR}
RequestHeader set "X-Forwarded-Proto" "https"
RequestHeader set "X-Http-Version" "%{SERVER_PROTOCOL}s"

22
test/apache/start.sh Executable file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env bash
# If the transient local TLS certificate doesn't exist, mint a new one
if [ ! -f ../pki/httpd.local.cetacean.club/cert.pem ]; then
# Subshell to contain the directory change
(
cd ../pki \
&& mkdir -p httpd.local.cetacean.club \
&& \
# Try using https://github.com/FiloSottile/mkcert for better DevEx,
# but fall back to using https://github.com/jsha/minica in case
# you don't have that installed.
(
mkcert \
--cert-file ./httpd.local.cetacean.club/cert.pem \
--key-file ./httpd.local.cetacean.club/key.pem httpd.local.cetacean.club \
|| go tool minica -domains httpd.local.cetacean.club
)
)
fi
docker compose up --build

16
test/caddy/Caddyfile Normal file
View File

@@ -0,0 +1,16 @@
:80 {
reverse_proxy http://anubis:3000 {
header_up X-Real-Ip {remote_host}
header_up X-Http-Version {http.request.proto}
}
}
:443 {
tls /etc/techaro/pki/caddy.local.cetacean.club/cert.pem /etc/techaro/pki/caddy.local.cetacean.club/key.pem
reverse_proxy http://anubis:3000 {
header_up X-Real-Ip {remote_host}
header_up X-Http-Version {http.request.proto}
header_up X-Tls-Version {http.request.tls.version}
}
}

9
test/caddy/Dockerfile Normal file
View File

@@ -0,0 +1,9 @@
# FROM caddy:2.10.0-builder AS builder
# RUN xcaddy build \
# --with github.com/lolPants/caddy-requestid
FROM caddy:2.10.0 AS run
# COPY --from=builder /usr/bin/caddy /usr/bin/caddy
COPY Caddyfile /etc/caddy/Caddyfile

View File

@@ -0,0 +1,22 @@
services:
caddy:
image: xxxtest/caddy
build: .
ports:
- 8080:80
- 8443:443
volumes:
- "../pki/caddy.local.cetacean.club:/etc/techaro/pki/caddy.local.cetacean.club/"
anubis:
image: git.xeserv.us/techaro/anubis:cel
environment:
BIND: ":3000"
TARGET: http://httpdebug:3000
POLICY_FNAME: /etc/techaro/anubis/less_paranoid.yaml
volumes:
- ../anubis_configs:/etc/techaro/anubis
httpdebug:
image: ghcr.io/xe/x/httpdebug
pull_policy: always

22
test/caddy/start.sh Executable file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env bash
# If the transient local TLS certificate doesn't exist, mint a new one
if [ ! -f ../pki/caddy.local.cetacean.club/cert.pem ]; then
# Subshell to contain the directory change
(
cd ../pki \
&& mkdir -p caddy.local.cetacean.club \
&& \
# Try using https://github.com/FiloSottile/mkcert for better DevEx,
# but fall back to using https://github.com/jsha/minica in case
# you don't have that installed.
(
mkcert \
--cert-file ./caddy.local.cetacean.club/cert.pem \
--key-file ./caddy.local.cetacean.club/key.pem caddy.local.cetacean.club \
|| go tool minica -domains caddy.local.cetacean.club
)
)
fi
docker compose up --build

4
test/nginx/Dockerfile Normal file
View File

@@ -0,0 +1,4 @@
FROM nginx
COPY conf.d/ /etc/nginx/conf.d/
COPY snippets /etc/nginx/snippets

View File

@@ -0,0 +1,10 @@
server {
listen 80;
listen [::]:80;
server_name nginx.local.cetacean.club;
location / {
proxy_pass http://anubis:3000;
include snippets/proxy_params;
}
}

View File

@@ -0,0 +1,14 @@
server {
listen 443 ssl http2;
listen [::]:443 ssl http2;
server_name nginx.local.cetacean.club;
ssl_certificate /etc/techaro/pki/nginx.local.cetacean.club/cert.pem;
ssl_certificate_key /etc/techaro/pki/nginx.local.cetacean.club/key.pem;
include snippets/ssl_params;
location / {
proxy_pass http://anubis:3000;
include snippets/proxy_params;
}
}

View File

@@ -0,0 +1,22 @@
services:
httpd:
image: xxxtest/nginx
build: .
volumes:
- "../pki/nginx.local.cetacean.club:/etc/techaro/pki/nginx.local.cetacean.club/"
ports:
- 8080:80
- 8443:443
anubis:
image: git.xeserv.us/techaro/anubis:cel
environment:
BIND: ":3000"
TARGET: http://httpdebug:3000
POLICY_FNAME: /etc/techaro/anubis/less_paranoid.yaml
volumes:
- ../anubis_configs:/etc/techaro/anubis
httpdebug:
image: ghcr.io/xe/x/httpdebug
pull_policy: always

View File

@@ -0,0 +1,7 @@
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Http-Version $server_protocol;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Request-Id $request_id;
proxy_set_header X-Forwarded-Host $host;

View File

@@ -0,0 +1,11 @@
ssl_protocols TLSv1.3;
ssl_prefer_server_ciphers on;
ssl_ciphers EECDH+AESGCM:EDH+AESGCM;
ssl_ecdh_curve secp384r1;
ssl_session_timeout 10m;
ssl_session_cache shared:SSL:10m;
ssl_session_tickets off;
ssl_stapling on;
ssl_stapling_verify on;
resolver 8.8.8.8 8.8.4.4 valid=300s;
resolver_timeout 5s;

22
test/nginx/start.sh Executable file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env bash
# If the transient local TLS certificate doesn't exist, mint a new one
if [ ! -f ../pki/nginx.local.cetacean.club/cert.pem ]; then
# Subshell to contain the directory change
(
cd ../pki \
&& mkdir -p nginx.local.cetacean.club \
&& \
# Try using https://github.com/FiloSottile/mkcert for better DevEx,
# but fall back to using https://github.com/jsha/minica in case
# you don't have that installed.
(
mkcert \
--cert-file ./nginx.local.cetacean.club/cert.pem \
--key-file ./nginx.local.cetacean.club/key.pem nginx.local.cetacean.club \
|| go tool minica -domains nginx.local.cetacean.club
)
)
fi
docker compose up --build

View File

@@ -37,7 +37,6 @@ go run ../cmd/unixhttpd &
go tool anubis \
--bind=./anubis.sock \
--bind-network=unix \
--policy-fname=../anubis_configs/aggressive_403.yaml \
--target=unix://$(pwd)/unixhttpd.sock &
# A simple TLS terminator that forwards to Anubis, which will forward to

View File

@@ -1,30 +0,0 @@
async function testWithUserAgent(userAgent) {
const statusCode =
await fetch("https://relayd.local.cetacean.club:3004/reqmeta", {
headers: {
"User-Agent": userAgent,
}
})
.then(resp => resp.status);
return statusCode;
}
const codes = {
allow: await testWithUserAgent("ALLOW"),
challenge: await testWithUserAgent("CHALLENGE"),
deny: await testWithUserAgent("DENY")
}
const expected = {
allow: 200,
challenge: 401,
deny: 403,
};
console.log("ALLOW: ", codes.allow);
console.log("CHALLENGE:", codes.challenge);
console.log("DENY: ", codes.deny);
if (JSON.stringify(codes) !== JSON.stringify(expected)) {
throw new Error(`wanted ${JSON.stringify(expected)}, got: ${JSON.stringify(codes)}`);
}