From c638653172700f06f494bc7dd8d3ca8d11af5cc9 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Mon, 9 Jun 2025 15:25:04 -0400 Subject: [PATCH] feat(lib): implement request weight (#621) * feat(lib): implement request weight Replaces #608 This is a big one and will be what makes Anubis a generic web application firewall. This introduces the WEIGH option, allowing administrators to have facets of request metadata add or remove "weight", or the level of suspicion. This really makes Anubis weigh the soul of requests. Signed-off-by: Xe Iaso * fix(lib): maintain legacy challenge behavior Signed-off-by: Xe Iaso * fix(lib): make weight have dedicated checkers for the hashes Signed-off-by: Xe Iaso * feat(data): convert some rules over to weight points Signed-off-by: Xe Iaso * docs: document request weight Signed-off-by: Xe Iaso * fix(CHANGELOG): spelling error Signed-off-by: Xe Iaso * chore: spelling Signed-off-by: Xe Iaso * docs: fix links to challenge information Signed-off-by: Xe Iaso * docs(policies): fix formatting Signed-off-by: Xe Iaso * fix(config): make default weight adjustment 5 Signed-off-by: Xe Iaso --------- Signed-off-by: Xe Iaso --- .github/actions/spelling/expect.txt | 2 + data/botPolicies.yaml | 4 +- data/bots/aggressive-brazilian-scrapers.yaml | 48 ++++++++-------- data/bots/cloudflare-workers.yaml | 4 +- .../small-internet-browsers/_permissive.yaml | 2 + .../small-internet-browsers/netsurf.yaml | 5 ++ .../small-internet-browsers/palemoon.yaml | 5 ++ data/clients/x-firefox-ai.yaml | 4 +- data/common/allow-private-addresses.yaml | 12 ++-- docs/docs/CHANGELOG.md | 4 +- docs/docs/admin/policies.mdx | 36 ++++++++++++ lib/anubis.go | 55 +++++++++++++++++-- lib/policy/bot.go | 1 + lib/policy/checker.go | 14 +++++ lib/policy/checkresult.go | 9 ++- lib/policy/config/config.go | 20 ++++--- lib/policy/config/config_test.go | 19 +++++++ lib/policy/config/expressionorlist.go | 4 +- .../config/testdata/good/simple-weight.yaml | 6 ++ .../testdata/good/weight-no-weight.yaml | 4 ++ lib/policy/config/weight.go | 5 ++ lib/policy/policy.go | 4 ++ 22 files changed, 214 insertions(+), 53 deletions(-) create mode 100644 data/clients/small-internet-browsers/_permissive.yaml create mode 100644 data/clients/small-internet-browsers/netsurf.yaml create mode 100644 data/clients/small-internet-browsers/palemoon.yaml create mode 100644 lib/policy/config/testdata/good/simple-weight.yaml create mode 100644 lib/policy/config/testdata/good/weight-no-weight.yaml create mode 100644 lib/policy/config/weight.go diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 651d7886..843b4a75 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -158,6 +158,7 @@ Mojeek mojeekbot mozilla nbf +netsurf nginx nobots NONINFRINGEMENT @@ -170,6 +171,7 @@ onionservice openai openrc pag +palemoon Pangu parseable passthrough diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 5e444fe5..78fb0876 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -55,7 +55,9 @@ bots: - name: generic-browser user_agent_regex: >- Mozilla|Opera - action: CHALLENGE + action: WEIGH + weight: + adjust: 10 dnsbl: false diff --git a/data/bots/aggressive-brazilian-scrapers.yaml b/data/bots/aggressive-brazilian-scrapers.yaml index 140811a7..ce0d7c94 100644 --- a/data/bots/aggressive-brazilian-scrapers.yaml +++ b/data/bots/aggressive-brazilian-scrapers.yaml @@ -1,28 +1,26 @@ - name: deny-aggressive-brazilian-scrapers - action: DENY + action: WEIGH + weight: + adjust: 20 expression: any: - # Internet Explorer should be out of support - - userAgent.contains("MSIE") - # Trident is the Internet Explorer browser engine - - userAgent.contains("Trident") - # Opera is a fork of chrome now - - userAgent.contains("Presto") - # Windows CE is discontinued - - userAgent.contains("Windows CE") - # Windows 95 is discontinued - - userAgent.contains("Windows 95") - # Windows 98 is discontinued - - userAgent.contains("Windows 98") - # Windows 9.x is discontinued - - userAgent.contains("Win 9x") - # Amazon does not have an Alexa Toolbar. - - userAgent.contains("Alexa Toolbar") -- name: challenge-aggressive-brazilian-scrapers - action: CHALLENGE - expression: - any: - # This is not released, even Windows 11 calls itself Windows 10 - - userAgent.contains("Windows NT 11.0") - # iPods are not in common use - - userAgent.contains("iPod") \ No newline at end of file + # Internet Explorer should be out of support + - userAgent.contains("MSIE") + # Trident is the Internet Explorer browser engine + - userAgent.contains("Trident") + # Opera is a fork of chrome now + - userAgent.contains("Presto") + # Windows CE is discontinued + - userAgent.contains("Windows CE") + # Windows 95 is discontinued + - userAgent.contains("Windows 95") + # Windows 98 is discontinued + - userAgent.contains("Windows 98") + # Windows 9.x is discontinued + - userAgent.contains("Win 9x") + # Amazon does not have an Alexa Toolbar. + - userAgent.contains("Alexa Toolbar") + # This is not released, even Windows 11 calls itself Windows 10 + - userAgent.contains("Windows NT 11.0") + # iPods are not in common use + - userAgent.contains("iPod") diff --git a/data/bots/cloudflare-workers.yaml b/data/bots/cloudflare-workers.yaml index 3fe051be..86683fe8 100644 --- a/data/bots/cloudflare-workers.yaml +++ b/data/bots/cloudflare-workers.yaml @@ -1,4 +1,6 @@ - name: cloudflare-workers headers_regex: CF-Worker: .* - action: DENY \ No newline at end of file + action: WEIGH + weight: + adjust: 15 diff --git a/data/clients/small-internet-browsers/_permissive.yaml b/data/clients/small-internet-browsers/_permissive.yaml new file mode 100644 index 00000000..395a28f0 --- /dev/null +++ b/data/clients/small-internet-browsers/_permissive.yaml @@ -0,0 +1,2 @@ +- import: (data)/clients/small-internet-browsers/netsurf.yaml +- import: (data)/clients/small-internet-browsers/palemoon.yaml diff --git a/data/clients/small-internet-browsers/netsurf.yaml b/data/clients/small-internet-browsers/netsurf.yaml new file mode 100644 index 00000000..413f965e --- /dev/null +++ b/data/clients/small-internet-browsers/netsurf.yaml @@ -0,0 +1,5 @@ +- name: "reduce-weight-netsurf" + user_agent_regex: "NetSurf" + action: WEIGH + weight: + adjust: -5 diff --git a/data/clients/small-internet-browsers/palemoon.yaml b/data/clients/small-internet-browsers/palemoon.yaml new file mode 100644 index 00000000..876aa026 --- /dev/null +++ b/data/clients/small-internet-browsers/palemoon.yaml @@ -0,0 +1,5 @@ +- name: "reduce-weight-palemoon" + user_agent_regex: "PaleMoon" + action: WEIGH + weight: + adjust: -5 diff --git a/data/clients/x-firefox-ai.yaml b/data/clients/x-firefox-ai.yaml index 3a1e3b7f..c87e14d9 100644 --- a/data/clients/x-firefox-ai.yaml +++ b/data/clients/x-firefox-ai.yaml @@ -1,4 +1,6 @@ # https://connect.mozilla.org/t5/firefox-labs/try-out-link-previews-in-firefox-labs-138-and-share-your/td-p/92012 - name: x-firefox-ai - action: CHALLENGE + action: WEIGH expression: '"X-Firefox-Ai" in headers' + weight: + adjust: 5 diff --git a/data/common/allow-private-addresses.yaml b/data/common/allow-private-addresses.yaml index 3a3c0dc1..694a806c 100644 --- a/data/common/allow-private-addresses.yaml +++ b/data/common/allow-private-addresses.yaml @@ -1,15 +1,15 @@ - name: ipv4-rfc-1918 action: ALLOW remote_addresses: - - 10.0.0.0/8 - - 172.16.0.0/12 - - 192.168.0.0/16 - - 100.64.0.0/10 + - 10.0.0.0/8 + - 172.16.0.0/12 + - 192.168.0.0/16 + - 100.64.0.0/10 - name: ipv6-ula action: ALLOW remote_addresses: - - fc00::/7 + - fc00::/7 - name: ipv6-link-local action: ALLOW remote_addresses: - - fe80::/10 \ No newline at end of file + - fe80::/10 diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 2aba60db..0011ba9b 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -10,11 +10,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + - Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in development - - - Add `--xff-strip-private` flag/envvar to toggle skipping X-Forwarded-For private addresses or not +- Requests can have their weight be adjusted, if a request weighs zero or less than it is allowed through - Refactor challenge presentation logic to use a challenge registry - Allow challenge implementations to register HTTP routes - Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95)) diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx index 8722d9ec..d5acb712 100644 --- a/docs/docs/admin/policies.mdx +++ b/docs/docs/admin/policies.mdx @@ -244,3 +244,39 @@ In case your service needs it for risk calculation reasons, Anubis exposes infor | `X-Anubis-Status` | The status and how strict Anubis was in its checks | `PASS` | Policy rules are matched using [Go's standard library regular expressions package](https://pkg.go.dev/regexp). You can mess around with the syntax at [regex101.com](https://regex101.com), make sure to select the Golang option. + +## Request Weight + +Anubis rules can also add or remove "weight" from requests, allowing administrators to configure custom levels of suspicion. For example, if your application uses session tokens named `i_love_gitea`: + +```yaml +- name: gitea-session-token + action: WEIGH + expression: + all: + - '"Cookie" in headers' + - headers["Cookie"].contains("i_love_gitea=") + # Remove 5 weight points + weight: + adjust: -5 +``` + +This would remove five weight points from the request, making Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx). + +### Weight Thresholds + +Weight thresholds and challenge associations will be configurable with CEL expressions in the configuration file in an upcoming patch, for now here's how Anubis configures the weight thresholds: + +| Weight Expression | Action | +| -----------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------- | +| `weight < 0` (weight is less than 0) | Allow the request through. | +| `weight < 10` (weight is less than 10) | Challenge the client with the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) at the default difficulty level. | +| `weight >= 10` (weight is greater than or equal to 10) | Challenge the client with the [Proof of Work challenge](./configuration/challenges/proof-of-work.mdx) at the default difficulty level. | + +### Advice + +Weight is still very new and needs work. This is an experimental feature and should be treated as such. Here's some advice to help you better tune requests: + +- The default weight for browser-like clients is 10. This triggers an aggressive challenge. +- Remove and add weight in multiples of five. +- Be careful with how you configure weight. diff --git a/lib/anubis.go b/lib/anubis.go index 3e406806..499747ae 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -402,13 +402,20 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, redir, http.StatusFound) } -func cr(name string, rule config.Rule) policy.CheckResult { +func cr(name string, rule config.Rule, weight int) policy.CheckResult { return policy.CheckResult{ - Name: name, - Rule: rule, + Name: name, + Rule: rule, + Weight: weight, } } +var ( + weightOkayStatic = policy.NewStaticHashChecker("weight/okay") + weightMildSusStatic = policy.NewStaticHashChecker("weight/mild-suspicion") + weightVerySusStatic = policy.NewStaticHashChecker("weight/extreme-suspicion") +) + // Check evaluates the list of rules, and returns the result func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) { host := r.Header.Get("X-Real-Ip") @@ -421,6 +428,8 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) return decaymap.Zilch[policy.CheckResult](), nil, fmt.Errorf("[misconfiguration] %q is not an IP address", host) } + weight := 0 + for _, b := range s.policy.Bots { match, err := b.Rules.Check(r) if err != nil { @@ -428,11 +437,47 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) } if match { - return cr("bot/"+b.Name, b.Action), &b, nil + switch b.Action { + case config.RuleDeny, config.RuleAllow, config.RuleBenchmark, config.RuleChallenge: + return cr("bot/"+b.Name, b.Action, weight), &b, nil + case config.RuleWeigh: + slog.Debug("adjusting weight", "name", b.Name, "delta", b.Weight.Adjust) + weight += b.Weight.Adjust + } } } - return cr("default/allow", config.RuleAllow), &policy.Bot{ + switch { + case weight <= 0: + return cr("weight/okay", config.RuleAllow, weight), &policy.Bot{ + Challenge: &config.ChallengeRules{ + Difficulty: s.policy.DefaultDifficulty, + ReportAs: s.policy.DefaultDifficulty, + Algorithm: config.DefaultAlgorithm, + }, + Rules: weightOkayStatic, + }, nil + case weight > 0 && weight < 10: + return cr("weight/mild-suspicion", config.RuleChallenge, weight), &policy.Bot{ + Challenge: &config.ChallengeRules{ + Difficulty: s.policy.DefaultDifficulty, + ReportAs: s.policy.DefaultDifficulty, + Algorithm: "metarefresh", + }, + Rules: weightMildSusStatic, + }, nil + case weight >= 10: + return cr("weight/extreme-suspicion", config.RuleChallenge, weight), &policy.Bot{ + Challenge: &config.ChallengeRules{ + Difficulty: s.policy.DefaultDifficulty, + ReportAs: s.policy.DefaultDifficulty, + Algorithm: "fast", + }, + Rules: weightVerySusStatic, + }, nil + } + + return cr("default/allow", config.RuleAllow, weight), &policy.Bot{ Challenge: &config.ChallengeRules{ Difficulty: s.policy.DefaultDifficulty, ReportAs: s.policy.DefaultDifficulty, diff --git a/lib/policy/bot.go b/lib/policy/bot.go index 3e7a63ad..61075246 100644 --- a/lib/policy/bot.go +++ b/lib/policy/bot.go @@ -12,6 +12,7 @@ type Bot struct { Challenge *config.ChallengeRules Name string Action config.Rule + Weight *config.Weight } func (b Bot) Hash() string { diff --git a/lib/policy/checker.go b/lib/policy/checker.go index dd6deb17..447a7adb 100644 --- a/lib/policy/checker.go +++ b/lib/policy/checker.go @@ -47,6 +47,20 @@ func (cl CheckerList) Hash() string { return internal.SHA256sum(sb.String()) } +type staticHashChecker struct { + hash string +} + +func (staticHashChecker) Check(r *http.Request) (bool, error) { + return true, nil +} + +func (s staticHashChecker) Hash() string { return s.hash } + +func NewStaticHashChecker(hashable string) Checker { + return staticHashChecker{hash: internal.SHA256sum(hashable)} +} + type RemoteAddrChecker struct { ranger cidranger.Ranger hash string diff --git a/lib/policy/checkresult.go b/lib/policy/checkresult.go index c84f326f..31737dda 100644 --- a/lib/policy/checkresult.go +++ b/lib/policy/checkresult.go @@ -7,12 +7,15 @@ import ( ) type CheckResult struct { - Name string - Rule config.Rule + Name string + Rule config.Rule + Weight int } func (cr CheckResult) LogValue() slog.Value { return slog.GroupValue( slog.String("name", cr.Name), - slog.String("rule", string(cr.Rule))) + slog.String("rule", string(cr.Rule)), + slog.Int("weight", cr.Weight), + ) } diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index 0bf46f7e..d140549f 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -39,20 +39,22 @@ const ( RuleAllow Rule = "ALLOW" RuleDeny Rule = "DENY" RuleChallenge Rule = "CHALLENGE" + RuleWeigh Rule = "WEIGH" RuleBenchmark Rule = "DEBUG_BENCHMARK" ) const DefaultAlgorithm = "fast" type BotConfig struct { - UserAgentRegex *string `json:"user_agent_regex"` - PathRegex *string `json:"path_regex"` - HeadersRegex map[string]string `json:"headers_regex"` - Expression *ExpressionOrList `json:"expression"` + UserAgentRegex *string `json:"user_agent_regex,omitempty"` + PathRegex *string `json:"path_regex,omitempty"` + HeadersRegex map[string]string `json:"headers_regex,omitempty"` + Expression *ExpressionOrList `json:"expression,omitempty"` Challenge *ChallengeRules `json:"challenge,omitempty"` + Weight *Weight `json:"weight,omitempty"` Name string `json:"name"` Action Rule `json:"action"` - RemoteAddr []string `json:"remote_addresses"` + RemoteAddr []string `json:"remote_addresses,omitempty"` } func (b BotConfig) Zero() bool { @@ -73,7 +75,7 @@ func (b BotConfig) Zero() bool { return true } -func (b BotConfig) Valid() error { +func (b *BotConfig) Valid() error { var errs []error if b.Name == "" { @@ -144,7 +146,7 @@ func (b BotConfig) Valid() error { } switch b.Action { - case RuleAllow, RuleBenchmark, RuleChallenge, RuleDeny: + case RuleAllow, RuleBenchmark, RuleChallenge, RuleDeny, RuleWeigh: // okay default: errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownAction, b.Action)) @@ -156,6 +158,10 @@ func (b BotConfig) Valid() error { } } + if b.Action == RuleWeigh && b.Weight == nil { + b.Weight = &Weight{Adjust: 5} + } + if len(errs) != 0 { return fmt.Errorf("config: bot entry for %q is not valid:\n%w", b.Name, errors.Join(errs...)) } diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go index be603b05..0a77e04a 100644 --- a/lib/policy/config/config_test.go +++ b/lib/policy/config/config_test.go @@ -168,6 +168,25 @@ func TestBotValid(t *testing.T) { }, err: nil, }, + { + name: "weight rule without weight", + bot: BotConfig{ + Name: "weight-adjust-if-mozilla", + Action: RuleWeigh, + UserAgentRegex: p("Mozilla"), + }, + }, + { + name: "weight rule with weight adjust", + bot: BotConfig{ + Name: "weight-adjust-if-mozilla", + Action: RuleWeigh, + UserAgentRegex: p("Mozilla"), + Weight: &Weight{ + Adjust: 5, + }, + }, + }, } for _, cs := range tests { diff --git a/lib/policy/config/expressionorlist.go b/lib/policy/config/expressionorlist.go index 7088c8dd..8851c5b1 100644 --- a/lib/policy/config/expressionorlist.go +++ b/lib/policy/config/expressionorlist.go @@ -14,8 +14,8 @@ var ( type ExpressionOrList struct { Expression string `json:"-"` - All []string `json:"all"` - Any []string `json:"any"` + All []string `json:"all,omitempty"` + Any []string `json:"any,omitempty"` } func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool { diff --git a/lib/policy/config/testdata/good/simple-weight.yaml b/lib/policy/config/testdata/good/simple-weight.yaml new file mode 100644 index 00000000..ec7a92e9 --- /dev/null +++ b/lib/policy/config/testdata/good/simple-weight.yaml @@ -0,0 +1,6 @@ +bots: + - name: simple-weight-adjust + action: WEIGH + user_agent_regex: Mozilla + weight: + adjust: 5 diff --git a/lib/policy/config/testdata/good/weight-no-weight.yaml b/lib/policy/config/testdata/good/weight-no-weight.yaml new file mode 100644 index 00000000..f137f8ac --- /dev/null +++ b/lib/policy/config/testdata/good/weight-no-weight.yaml @@ -0,0 +1,4 @@ +bots: + - name: weight + action: WEIGH + user_agent_regex: Mozilla diff --git a/lib/policy/config/weight.go b/lib/policy/config/weight.go new file mode 100644 index 00000000..f408111b --- /dev/null +++ b/lib/policy/config/weight.go @@ -0,0 +1,5 @@ +package config + +type Weight struct { + Adjust int `json:"adjust"` +} diff --git a/lib/policy/policy.go b/lib/policy/policy.go index 7183d639..d67ca1c0 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -117,6 +117,10 @@ func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedCon } } + if b.Weight != nil { + parsedBot.Weight = b.Weight + } + parsedBot.Rules = cl result.Bots = append(result.Bots, parsedBot)