Compare commits

..

2 Commits

Author SHA1 Message Date
Xe Iaso
50e48df993 fix(Dockerfile): add HEALTHCHECK
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-06-13 15:24:42 -04:00
Xe Iaso
5e38c7d730 feat: build with docker buildx bake
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-06-10 09:00:57 -04:00
93 changed files with 302 additions and 3634 deletions

View File

@@ -9,4 +9,4 @@ exclude_dir = ["var", "vendor", "docs", "node_modules"]
[logger]
time = true
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address

25
.dockerignore Normal file
View File

@@ -0,0 +1,25 @@
.env
*.deb
*.rpm
# Additional package locks
pnpm-lock.yaml
yarn.lock
# Go binaries and test artifacts
main
*.test
node_modules
# MacOS
.DS_store
# Intellij
.idea
# how does this get here
doc/VERSION
web/static/js/*
!web/static/js/.gitignore

2
.gitattributes vendored
View File

@@ -1 +1 @@
**/*_templ.go linguist-generated=true
web/index_templ.go linguist-generated

View File

@@ -2,4 +2,4 @@ github
https
ssh
ubuntu
workarounds
workarounds

View File

@@ -83,7 +83,6 @@
^\Q.github/FUNDING.yml\E$
^\Q.github/workflows/spelling.yml\E$
^data/crawlers/
^docs/manifest/.*$
^docs/static/\.nojekyll$
ignore$
robots.txt

View File

@@ -6,13 +6,8 @@ amazonbot
anthro
anubis
anubistest
apk
Applebot
archlinux
asnc
asnchecker
asns
aspirational
badregexes
bdba
berr
@@ -28,7 +23,6 @@ Brightbot
broked
Bytespider
cachebuster
cachediptoasn
Caddyfile
caninetools
Cardyb
@@ -36,7 +30,6 @@ celchecker
CELPHASE
cerr
certresolver
cespare
CGNAT
cgr
chainguard
@@ -55,7 +48,6 @@ coreutils
Cotoyogi
CRDs
crt
Cscript
daemonizing
DDOS
Debian
@@ -94,24 +86,17 @@ Fordola
forgejo
fsys
fullchain
gaissmai
Galvus
geoip
geoipchecker
gha
gipc
gitea
godotenv
goland
gomod
goodbot
googlebot
govulncheck
goyaml
GPG
GPT
gptbot
grpcprom
grw
Hashcash
hashrate
@@ -121,10 +106,8 @@ hebis
hec
hmc
hostable
htmlc
htmx
httpdebug
Huawei
hypertext
iaskspider
iat
@@ -132,14 +115,11 @@ ifm
Imagesift
imgproxy
inp
IPTo
iptoasn
iss
isset
ivh
Jenomis
JGit
joho
journalctl
jshelter
JWTs
@@ -187,6 +167,7 @@ OCOB
ogtags
omgili
omgilibot
onionservice
openai
openrc
pag
@@ -214,7 +195,6 @@ qualys
qwant
qwantbot
rac
rawler
rcvar
redir
redirectscheme
@@ -234,7 +214,6 @@ sebest
secretplans
selfsigned
Semrush
Seo
setsebool
shellcheck
Sidetrade
@@ -255,22 +234,18 @@ subrequest
SVCNAME
tagline
tarballs
tarrif
techaro
techarohq
templ
templruntime
testarea
thoth
thothmock
Tik
Timpibot
torproject
traefik
uberspace
unixhttpd
unmarshal
unparseable
uuidgen
uvx
UXP
Varis
@@ -286,7 +261,6 @@ websecure
websites
Webzio
wildbase
withthothmock
wordpress
Workaround
workdir

View File

@@ -131,4 +131,4 @@ go install(?:\s+[a-z]+\.[-@\w/.]+)+
# hit-count: 1 file-count: 1
# microsoft
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*

View File

@@ -3,8 +3,8 @@ name: Docker image builds
on:
workflow_dispatch:
push:
branches: ["main"]
tags: ["v*"]
branches: [ "main" ]
tags: [ "v*" ]
env:
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
@@ -55,7 +55,7 @@ jobs:
run: |
brew bundle
- name: Log into registry
- name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ghcr.io
@@ -77,8 +77,9 @@ jobs:
DOCKER_REPO: ${{ env.IMAGE }}
SLOG_LEVEL: debug
- name: Generate artifact attestation
uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0
uses: actions/attest-build-provenance@db473fddc028af60658334401dc6fa3ffd8669fd # v2.3.0
with:
subject-name: ${{ env.IMAGE }}
subject-digest: ${{ steps.build.outputs.digest }}

View File

@@ -1,37 +0,0 @@
name: Regenerate ssh ci runner image
on:
# pull_request:
# branches: ["main"]
schedule:
- cron: "0 0 1,8,15,22 * *"
workflow_dispatch:
permissions:
pull-requests: write
contents: write
packages: write
jobs:
ssh-ci-rebuild:
if: github.repository == 'TecharoHQ/anubis'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-tags: true
fetch-depth: 0
persist-credentials: false
- name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build and push
run: |
cd ./test/ssh-ci
docker buildx bake --push

View File

@@ -1,37 +0,0 @@
name: SSH CI
on:
push:
branches: ["main"]
# pull_request:
# branches: ["main"]
permissions:
contents: read
jobs:
ssh:
if: github.repository == 'TecharoHQ/anubis'
runs-on: ubuntu-24.04
strategy:
matrix:
host:
- ubuntu@riscv64.techaro.lol
- ci@ppc64le.techaro.lol
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-tags: true
fetch-depth: 0
persist-credentials: false
- name: Install CI target SSH key
uses: shimataro/ssh-key-action@d4fffb50872869abe2d9a9098a6d9c5aa7d16be4 # v2.7.0
with:
key: ${{ secrets.CI_SSH_KEY }}
name: id_rsa
known_hosts: ${{ secrets.CI_SSH_KNOWN_HOSTS }}
- name: Run CI
run: bash test/ssh-ci/rigging.sh ${{ matrix.host }}
env:
GITHUB_RUN_ID: ${{ github.run_id }}

View File

@@ -29,7 +29,7 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload SARIF file
uses: github/codeql-action/upload-sarif@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858 # v3.29.0
uses: github/codeql-action/upload-sarif@fca7ace96b7d713c7035871441bd52efbe39e27e # v3.28.19
with:
sarif_file: results.sarif
category: zizmor

19
.vscode/settings.json vendored
View File

@@ -11,24 +11,5 @@
"zig": false,
"javascript": false,
"properties": false
},
"[markdown]": {
"editor.wordWrap": "wordWrapColumn",
"editor.wordWrapColumn": 80,
"editor.wordBasedSuggestions": "off"
},
"[mdx]": {
"editor.wordWrap": "wordWrapColumn",
"editor.wordWrapColumn": 80,
"editor.wordBasedSuggestions": "off"
},
"[nunjucks]": {
"editor.wordWrap": "wordWrapColumn",
"editor.wordWrapColumn": 80,
"editor.wordBasedSuggestions": "off"
},
"cSpell.enabledFileTypes": {
"mdx": true,
"md": true
}
}

30
Dockerfile Normal file
View File

@@ -0,0 +1,30 @@
ARG ALPINE_VERSION=edge
FROM --platform=${BUILDPLATFORM} alpine:${ALPINE_VERSION} AS build
ARG TARGETOS
ARG TARGETARCH
ARG COMPONENT=anubis
ARG VERSION=devel-docker
RUN apk -U add go nodejs git build-base git npm bash zstd brotli gzip
WORKDIR /app
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN --mount=type=cache,target=/root/.cache npm ci && npm run assets
RUN --mount=type=cache,target=/root/.cache GOOS=${TARGETOS} GOARCH=${TARGETARCH} CGO_ENABLED=0 GOARM=7 go build -gcflags "all=-N -l" -o /app/bin/${COMPONENT} -ldflags "-s -w -extldflags -static -X github.com/TecharoHQ/anubis.Version=${VERSION}" ./cmd/${COMPONENT}
FROM alpine:${ALPINE_VERSION} AS run
WORKDIR /app
RUN apk -U add ca-certificates mailcap
COPY --from=build /app/bin/anubis /app/bin/anubis
CMD ["/app/bin/anubis"]
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD [ "/app/bin/anubis", "--healthcheck" ]
LABEL org.opencontainers.image.source="https://github.com/TecharoHQ/anubis"

View File

@@ -30,13 +30,11 @@ import (
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/data"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/internal/thoth"
libanubis "github.com/TecharoHQ/anubis/lib"
botPolicy "github.com/TecharoHQ/anubis/lib/policy"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/web"
"github.com/facebookgo/flagenv"
_ "github.com/joho/godotenv/autoload"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
@@ -57,7 +55,6 @@ var (
policyFname = flag.String("policy-fname", "", "full path to anubis policy document (defaults to a sensible built-in policy)")
redirectDomains = flag.String("redirect-domains", "", "list of domains separated by commas which anubis is allowed to redirect to. Leaving this unset allows any domain.")
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
stripBasePrefix = flag.Bool("strip-base-prefix", false, "if true, strips the base prefix from requests forwarded to the target server")
target = flag.String("target", "http://localhost:3923", "target to reverse proxy to, set to an empty string to disable proxying when only using auth request")
targetSNI = flag.String("target-sni", "", "if set, the value of the TLS handshake hostname when forwarding requests to the target")
targetHost = flag.String("target-host", "", "if set, the value of the Host header when forwarding requests to the target")
@@ -72,10 +69,6 @@ var (
webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals")
versionFlag = flag.Bool("version", false, "print Anubis version")
xffStripPrivate = flag.Bool("xff-strip-private", true, "if set, strip private addresses from X-Forwarded-For")
thothInsecure = flag.Bool("thoth-insecure", false, "if set, connect to Thoth over plain HTTP/2, don't enable this unless support told you to")
thothURL = flag.String("thoth-url", "", "if set, URL for Thoth, the IP reputation database for Anubis")
thothToken = flag.String("thoth-token", "", "if set, API token for Thoth, the IP reputation database for Anubis")
)
func keyFromHex(value string) (ed25519.PrivateKey, error) {
@@ -239,25 +232,7 @@ func main() {
}
}
ctx := context.Background()
// Thoth configuration
switch {
case *thothURL != "" && *thothToken == "":
slog.Warn("THOTH_URL is set but no THOTH_TOKEN is set")
case *thothURL == "" && *thothToken != "":
slog.Warn("THOTH_TOKEN is set but no THOTH_URL is set")
case *thothURL != "" && *thothToken != "":
slog.Debug("connecting to Thoth")
thothClient, err := thoth.New(ctx, *thothURL, *thothToken, *thothInsecure)
if err != nil {
log.Fatalf("can't dial thoth at %s: %v", *thothURL, err)
}
ctx = thoth.With(ctx, thothClient)
}
policy, err := libanubis.LoadPoliciesOrDefault(ctx, *policyFname, *challengeDifficulty)
policy, err := libanubis.LoadPoliciesOrDefault(*policyFname, *challengeDifficulty)
if err != nil {
log.Fatalf("can't parse policy file: %v", err)
}
@@ -285,10 +260,6 @@ func main() {
} else if strings.HasSuffix(*basePrefix, "/") {
log.Fatalf("[misconfiguration] base-prefix must not end with a slash")
}
if *stripBasePrefix && *basePrefix == "" {
log.Fatalf("[misconfiguration] strip-base-prefix is set to true, but base-prefix is not set, " +
"this may result in unexpected behavior")
}
var priv ed25519.PrivateKey
if *ed25519PrivateKeyHex != "" && *ed25519PrivateKeyHexFile != "" {
@@ -333,7 +304,6 @@ func main() {
s, err := libanubis.New(libanubis.Options{
BasePrefix: *basePrefix,
StripBasePrefix: *stripBasePrefix,
Next: rp,
Policy: policy,
ServeRobotsTXT: *robotsTxt,

View File

@@ -1,78 +0,0 @@
/*
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
into Anubis CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
*/
package main
import (
"fmt"
"io/fs"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
)
func main() {
if len(os.Args) < 2 {
fmt.Println("Usage: go run batch_process.go <cleaned_directory>")
fmt.Println("Example: go run batch_process.go ./cleaned")
os.Exit(1)
}
cleanedDir := os.Args[1]
outputDir := "generated_policies"
// Create output directory
if err := os.MkdirAll(outputDir, 0755); err != nil {
log.Fatalf("Failed to create output directory: %v", err)
}
count := 0
err := filepath.WalkDir(cleanedDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
// Skip directories
if d.IsDir() {
return nil
}
// Generate policy name from file path
relPath, _ := filepath.Rel(cleanedDir, path)
policyName := strings.ReplaceAll(relPath, "/", "-")
policyName = strings.TrimSuffix(policyName, "-robots.txt")
policyName = strings.ReplaceAll(policyName, ".", "-")
outputFile := filepath.Join(outputDir, policyName+".yaml")
cmd := exec.Command("go", "run", "main.go",
"-input", path,
"-output", outputFile,
"-name", policyName,
"-format", "yaml")
if err := cmd.Run(); err != nil {
fmt.Printf("Warning: Failed to process %s: %v\n", path, err)
return nil // Continue processing other files
}
count++
if count%100 == 0 {
fmt.Printf("Processed %d files...\n", count)
} else if count%10 == 0 {
fmt.Print(".")
}
return nil
})
if err != nil {
log.Fatalf("Error walking directory: %v", err)
}
fmt.Printf("Successfully processed %d robots.txt files\n", count)
fmt.Printf("Generated policies saved to: %s/\n", outputDir)
}

View File

@@ -1,313 +0,0 @@
package main
import (
"bufio"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"regexp"
"strings"
"github.com/TecharoHQ/anubis/lib/policy/config"
"sigs.k8s.io/yaml"
)
var (
inputFile = flag.String("input", "", "path to robots.txt file (use - for stdin)")
outputFile = flag.String("output", "", "output file path (use - for stdout, defaults to stdout)")
outputFormat = flag.String("format", "yaml", "output format: yaml or json")
baseAction = flag.String("action", "CHALLENGE", "default action for disallowed paths: ALLOW, DENY, CHALLENGE, WEIGH")
crawlDelay = flag.Int("crawl-delay-weight", 0, "if > 0, add weight adjustment for crawl-delay (difficulty adjustment)")
policyName = flag.String("name", "robots-txt-policy", "name for the generated policy")
userAgentDeny = flag.String("deny-user-agents", "DENY", "action for specifically blocked user agents: DENY, CHALLENGE")
helpFlag = flag.Bool("help", false, "show help")
)
type RobotsRule struct {
UserAgent string
Disallows []string
Allows []string
CrawlDelay int
IsBlacklist bool // true if this is a specifically denied user agent
}
type AnubisRule struct {
Expression *config.ExpressionOrList `yaml:"expression,omitempty" json:"expression,omitempty"`
Challenge *config.ChallengeRules `yaml:"challenge,omitempty" json:"challenge,omitempty"`
Weight *config.Weight `yaml:"weight,omitempty" json:"weight,omitempty"`
Name string `yaml:"name" json:"name"`
Action string `yaml:"action" json:"action"`
}
func init() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
fmt.Fprintf(os.Stderr, "%s [options] -input <robots.txt>\n\n", os.Args[0])
flag.PrintDefaults()
fmt.Fprintln(os.Stderr, "\nExamples:")
fmt.Fprintln(os.Stderr, " # Convert local robots.txt file")
fmt.Fprintln(os.Stderr, " robots2policy -input robots.txt -output policy.yaml")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " # Convert from URL")
fmt.Fprintln(os.Stderr, " robots2policy -input https://example.com/robots.txt -format json")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " # Read from stdin, write to stdout")
fmt.Fprintln(os.Stderr, " curl https://example.com/robots.txt | robots2policy -input -")
os.Exit(2)
}
}
func main() {
flag.Parse()
if len(flag.Args()) > 0 || *helpFlag || *inputFile == "" {
flag.Usage()
}
// Read robots.txt
var input io.Reader
if *inputFile == "-" {
input = os.Stdin
} else if strings.HasPrefix(*inputFile, "http://") || strings.HasPrefix(*inputFile, "https://") {
resp, err := http.Get(*inputFile)
if err != nil {
log.Fatalf("failed to fetch robots.txt from URL: %v", err)
}
defer resp.Body.Close()
input = resp.Body
} else {
file, err := os.Open(*inputFile)
if err != nil {
log.Fatalf("failed to open input file: %v", err)
}
defer file.Close()
input = file
}
// Parse robots.txt
rules, err := parseRobotsTxt(input)
if err != nil {
log.Fatalf("failed to parse robots.txt: %v", err)
}
// Convert to Anubis rules
anubisRules := convertToAnubisRules(rules)
// Check if any rules were generated
if len(anubisRules) == 0 {
log.Fatal("no valid rules generated from robots.txt - file may be empty or contain no disallow directives")
}
// Generate output
var output []byte
switch strings.ToLower(*outputFormat) {
case "yaml":
output, err = yaml.Marshal(anubisRules)
case "json":
output, err = json.MarshalIndent(anubisRules, "", " ")
default:
log.Fatalf("unsupported output format: %s (use yaml or json)", *outputFormat)
}
if err != nil {
log.Fatalf("failed to marshal output: %v", err)
}
// Write output
if *outputFile == "" || *outputFile == "-" {
fmt.Print(string(output))
} else {
err = os.WriteFile(*outputFile, output, 0644)
if err != nil {
log.Fatalf("failed to write output file: %v", err)
}
fmt.Printf("Generated Anubis policy written to %s\n", *outputFile)
}
}
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
scanner := bufio.NewScanner(input)
var rules []RobotsRule
var currentRule *RobotsRule
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// Skip empty lines and comments
if line == "" || strings.HasPrefix(line, "#") {
continue
}
// Split on first colon
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
directive := strings.TrimSpace(strings.ToLower(parts[0]))
value := strings.TrimSpace(parts[1])
switch directive {
case "user-agent":
// Start a new rule section
if currentRule != nil {
rules = append(rules, *currentRule)
}
currentRule = &RobotsRule{
UserAgent: value,
Disallows: make([]string, 0),
Allows: make([]string, 0),
}
case "disallow":
if currentRule != nil && value != "" {
currentRule.Disallows = append(currentRule.Disallows, value)
}
case "allow":
if currentRule != nil && value != "" {
currentRule.Allows = append(currentRule.Allows, value)
}
case "crawl-delay":
if currentRule != nil {
if delay, err := parseIntSafe(value); err == nil {
currentRule.CrawlDelay = delay
}
}
}
}
// Don't forget the last rule
if currentRule != nil {
rules = append(rules, *currentRule)
}
// Mark blacklisted user agents (those with "Disallow: /")
for i := range rules {
for _, disallow := range rules[i].Disallows {
if disallow == "/" {
rules[i].IsBlacklist = true
break
}
}
}
return rules, scanner.Err()
}
func parseIntSafe(s string) (int, error) {
var result int
_, err := fmt.Sscanf(s, "%d", &result)
return result, err
}
func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
var anubisRules []AnubisRule
ruleCounter := 0
for _, robotsRule := range robotsRules {
userAgent := robotsRule.UserAgent
// Handle crawl delay as weight adjustment (do this first before any continues)
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
ruleCounter++
rule := AnubisRule{
Name: fmt.Sprintf("%s-crawl-delay-%d", *policyName, ruleCounter),
Action: "WEIGH",
Weight: &config.Weight{Adjust: *crawlDelay},
}
if userAgent == "*" {
rule.Expression = &config.ExpressionOrList{
All: []string{"true"}, // Always applies
}
} else {
rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
}
}
anubisRules = append(anubisRules, rule)
}
// Handle blacklisted user agents (complete deny/challenge)
if robotsRule.IsBlacklist {
ruleCounter++
rule := AnubisRule{
Name: fmt.Sprintf("%s-blacklist-%d", *policyName, ruleCounter),
Action: *userAgentDeny,
}
if userAgent == "*" {
// This would block everything - convert to a weight adjustment instead
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
rule.Action = "WEIGH"
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
rule.Expression = &config.ExpressionOrList{
All: []string{"true"}, // Always applies
}
} else {
rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
}
}
anubisRules = append(anubisRules, rule)
continue
}
// Handle specific disallow rules
for _, disallow := range robotsRule.Disallows {
if disallow == "/" {
continue // Already handled as blacklist above
}
ruleCounter++
rule := AnubisRule{
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
Action: *baseAction,
}
// Build CEL expression
var conditions []string
// Add user agent condition if not wildcard
if userAgent != "*" {
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
}
// Add path condition
pathCondition := buildPathCondition(disallow)
conditions = append(conditions, pathCondition)
rule.Expression = &config.ExpressionOrList{
All: conditions,
}
anubisRules = append(anubisRules, rule)
}
}
return anubisRules
}
func buildPathCondition(robotsPath string) string {
// Handle wildcards in robots.txt paths
if strings.Contains(robotsPath, "*") || strings.Contains(robotsPath, "?") {
// Convert robots.txt wildcards to regex
regex := regexp.QuoteMeta(robotsPath)
regex = strings.ReplaceAll(regex, `\*`, `.*`) // * becomes .*
regex = strings.ReplaceAll(regex, `\?`, `.`) // ? becomes .
regex = "^" + regex
return fmt.Sprintf("path.matches(%q)", regex)
}
// Simple prefix match for most cases
return fmt.Sprintf("path.startsWith(%q)", robotsPath)
}

View File

@@ -1,418 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"gopkg.in/yaml.v3"
)
type TestCase struct {
name string
robotsFile string
expectedFile string
options TestOptions
}
type TestOptions struct {
format string
action string
crawlDelayWeight int
policyName string
deniedAction string
}
func TestDataFileConversion(t *testing.T) {
testCases := []TestCase{
{
name: "simple_default",
robotsFile: "simple.robots.txt",
expectedFile: "simple.yaml",
options: TestOptions{format: "yaml"},
},
{
name: "simple_json",
robotsFile: "simple.robots.txt",
expectedFile: "simple.json",
options: TestOptions{format: "json"},
},
{
name: "simple_deny_action",
robotsFile: "simple.robots.txt",
expectedFile: "deny-action.yaml",
options: TestOptions{format: "yaml", action: "DENY"},
},
{
name: "simple_custom_name",
robotsFile: "simple.robots.txt",
expectedFile: "custom-name.yaml",
options: TestOptions{format: "yaml", policyName: "my-custom-policy"},
},
{
name: "blacklist_with_crawl_delay",
robotsFile: "blacklist.robots.txt",
expectedFile: "blacklist.yaml",
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
},
{
name: "wildcards",
robotsFile: "wildcards.robots.txt",
expectedFile: "wildcards.yaml",
options: TestOptions{format: "yaml"},
},
{
name: "empty_file",
robotsFile: "empty.robots.txt",
expectedFile: "empty.yaml",
options: TestOptions{format: "yaml"},
},
{
name: "complex_scenario",
robotsFile: "complex.robots.txt",
expectedFile: "complex.yaml",
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
robotsPath := filepath.Join("testdata", tc.robotsFile)
expectedPath := filepath.Join("testdata", tc.expectedFile)
// Read robots.txt input
robotsFile, err := os.Open(robotsPath)
if err != nil {
t.Fatalf("Failed to open robots file %s: %v", robotsPath, err)
}
defer robotsFile.Close()
// Parse robots.txt
rules, err := parseRobotsTxt(robotsFile)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
// Set test options
oldFormat := *outputFormat
oldAction := *baseAction
oldCrawlDelay := *crawlDelay
oldPolicyName := *policyName
oldDeniedAction := *userAgentDeny
if tc.options.format != "" {
*outputFormat = tc.options.format
}
if tc.options.action != "" {
*baseAction = tc.options.action
}
if tc.options.crawlDelayWeight > 0 {
*crawlDelay = tc.options.crawlDelayWeight
}
if tc.options.policyName != "" {
*policyName = tc.options.policyName
}
if tc.options.deniedAction != "" {
*userAgentDeny = tc.options.deniedAction
}
// Restore options after test
defer func() {
*outputFormat = oldFormat
*baseAction = oldAction
*crawlDelay = oldCrawlDelay
*policyName = oldPolicyName
*userAgentDeny = oldDeniedAction
}()
// Convert to Anubis rules
anubisRules := convertToAnubisRules(rules)
// Generate output
var actualOutput []byte
switch strings.ToLower(*outputFormat) {
case "yaml":
actualOutput, err = yaml.Marshal(anubisRules)
case "json":
actualOutput, err = json.MarshalIndent(anubisRules, "", " ")
}
if err != nil {
t.Fatalf("Failed to marshal output: %v", err)
}
// Read expected output
expectedOutput, err := os.ReadFile(expectedPath)
if err != nil {
t.Fatalf("Failed to read expected file %s: %v", expectedPath, err)
}
if strings.ToLower(*outputFormat) == "yaml" {
var actualData []interface{}
var expectedData []interface{}
err = yaml.Unmarshal(actualOutput, &actualData)
if err != nil {
t.Fatalf("Failed to unmarshal actual output: %v", err)
}
err = yaml.Unmarshal(expectedOutput, &expectedData)
if err != nil {
t.Fatalf("Failed to unmarshal expected output: %v", err)
}
// Compare data structures
if !compareData(actualData, expectedData) {
actualStr := strings.TrimSpace(string(actualOutput))
expectedStr := strings.TrimSpace(string(expectedOutput))
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
}
} else {
var actualData []interface{}
var expectedData []interface{}
err = json.Unmarshal(actualOutput, &actualData)
if err != nil {
t.Fatalf("Failed to unmarshal actual JSON output: %v", err)
}
err = json.Unmarshal(expectedOutput, &expectedData)
if err != nil {
t.Fatalf("Failed to unmarshal expected JSON output: %v", err)
}
// Compare data structures
if !compareData(actualData, expectedData) {
actualStr := strings.TrimSpace(string(actualOutput))
expectedStr := strings.TrimSpace(string(expectedOutput))
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
}
}
})
}
}
func TestCaseInsensitiveParsing(t *testing.T) {
robotsTxt := `User-Agent: *
Disallow: /admin
Crawl-Delay: 10
User-agent: TestBot
disallow: /test
crawl-delay: 5
USER-AGENT: UpperBot
DISALLOW: /upper
CRAWL-DELAY: 20`
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse case-insensitive robots.txt: %v", err)
}
expectedRules := 3
if len(rules) != expectedRules {
t.Errorf("Expected %d rules, got %d", expectedRules, len(rules))
}
// Check that all crawl delays were parsed
for i, rule := range rules {
expectedDelays := []int{10, 5, 20}
if rule.CrawlDelay != expectedDelays[i] {
t.Errorf("Rule %d: expected crawl delay %d, got %d", i, expectedDelays[i], rule.CrawlDelay)
}
}
}
func TestVariousOutputFormats(t *testing.T) {
robotsTxt := `User-agent: *
Disallow: /admin`
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldPolicyName := *policyName
*policyName = "test-policy"
defer func() { *policyName = oldPolicyName }()
anubisRules := convertToAnubisRules(rules)
// Test YAML output
yamlOutput, err := yaml.Marshal(anubisRules)
if err != nil {
t.Fatalf("Failed to marshal YAML: %v", err)
}
if !strings.Contains(string(yamlOutput), "name: test-policy-disallow-1") {
t.Errorf("YAML output doesn't contain expected rule name")
}
// Test JSON output
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
if err != nil {
t.Fatalf("Failed to marshal JSON: %v", err)
}
if !strings.Contains(string(jsonOutput), `"name": "test-policy-disallow-1"`) {
t.Errorf("JSON output doesn't contain expected rule name")
}
}
func TestDifferentActions(t *testing.T) {
robotsTxt := `User-agent: *
Disallow: /admin`
testActions := []string{"ALLOW", "DENY", "CHALLENGE", "WEIGH"}
for _, action := range testActions {
t.Run("action_"+action, func(t *testing.T) {
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldAction := *baseAction
*baseAction = action
defer func() { *baseAction = oldAction }()
anubisRules := convertToAnubisRules(rules)
if len(anubisRules) != 1 {
t.Fatalf("Expected 1 rule, got %d", len(anubisRules))
}
if anubisRules[0].Action != action {
t.Errorf("Expected action %s, got %s", action, anubisRules[0].Action)
}
})
}
}
func TestPolicyNaming(t *testing.T) {
robotsTxt := `User-agent: *
Disallow: /admin
Disallow: /private
User-agent: BadBot
Disallow: /`
testNames := []string{"custom-policy", "my-rules", "site-protection"}
for _, name := range testNames {
t.Run("name_"+name, func(t *testing.T) {
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldName := *policyName
*policyName = name
defer func() { *policyName = oldName }()
anubisRules := convertToAnubisRules(rules)
// Check that all rule names use the custom prefix
for _, rule := range anubisRules {
if !strings.HasPrefix(rule.Name, name+"-") {
t.Errorf("Rule name %s doesn't start with expected prefix %s-", rule.Name, name)
}
}
})
}
}
func TestCrawlDelayWeights(t *testing.T) {
robotsTxt := `User-agent: *
Disallow: /admin
Crawl-delay: 10
User-agent: SlowBot
Disallow: /slow
Crawl-delay: 60`
testWeights := []int{1, 5, 10, 25}
for _, weight := range testWeights {
t.Run(fmt.Sprintf("weight_%d", weight), func(t *testing.T) {
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldWeight := *crawlDelay
*crawlDelay = weight
defer func() { *crawlDelay = oldWeight }()
anubisRules := convertToAnubisRules(rules)
// Count weight rules and verify they have correct weight
weightRules := 0
for _, rule := range anubisRules {
if rule.Action == "WEIGH" && rule.Weight != nil {
weightRules++
if rule.Weight.Adjust != weight {
t.Errorf("Expected weight %d, got %d", weight, rule.Weight.Adjust)
}
}
}
expectedWeightRules := 2 // One for *, one for SlowBot
if weightRules != expectedWeightRules {
t.Errorf("Expected %d weight rules, got %d", expectedWeightRules, weightRules)
}
})
}
}
func TestBlacklistActions(t *testing.T) {
robotsTxt := `User-agent: BadBot
Disallow: /
User-agent: SpamBot
Disallow: /`
testActions := []string{"DENY", "CHALLENGE"}
for _, action := range testActions {
t.Run("blacklist_"+action, func(t *testing.T) {
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldAction := *userAgentDeny
*userAgentDeny = action
defer func() { *userAgentDeny = oldAction }()
anubisRules := convertToAnubisRules(rules)
// All rules should be blacklist rules with the specified action
for _, rule := range anubisRules {
if !strings.Contains(rule.Name, "blacklist") {
t.Errorf("Expected blacklist rule, got %s", rule.Name)
}
if rule.Action != action {
t.Errorf("Expected action %s, got %s", action, rule.Action)
}
}
})
}
}
// compareData performs a deep comparison of two data structures,
// ignoring differences that are semantically equivalent in YAML/JSON
func compareData(actual, expected interface{}) bool {
return reflect.DeepEqual(actual, expected)
}

View File

@@ -1,15 +0,0 @@
# Test with blacklisted user agents
User-agent: *
Disallow: /admin
Crawl-delay: 10
User-agent: BadBot
Disallow: /
User-agent: SpamBot
Disallow: /
Crawl-delay: 60
User-agent: Googlebot
Disallow: /search
Crawl-delay: 5

View File

@@ -1,30 +0,0 @@
- action: WEIGH
expression: "true"
name: robots-txt-policy-crawl-delay-1
weight:
adjust: 3
- action: CHALLENGE
expression: path.startsWith("/admin")
name: robots-txt-policy-disallow-2
- action: DENY
expression: userAgent.contains("BadBot")
name: robots-txt-policy-blacklist-3
- action: WEIGH
expression: userAgent.contains("SpamBot")
name: robots-txt-policy-crawl-delay-4
weight:
adjust: 3
- action: DENY
expression: userAgent.contains("SpamBot")
name: robots-txt-policy-blacklist-5
- action: WEIGH
expression: userAgent.contains("Googlebot")
name: robots-txt-policy-crawl-delay-6
weight:
adjust: 3
- action: CHALLENGE
expression:
all:
- userAgent.contains("Googlebot")
- path.startsWith("/search")
name: robots-txt-policy-disallow-7

View File

@@ -1,30 +0,0 @@
# Complex real-world example
User-agent: *
Disallow: /admin/
Disallow: /private/
Disallow: /api/internal/
Allow: /api/public/
Crawl-delay: 5
User-agent: Googlebot
Disallow: /search/
Allow: /api/
Crawl-delay: 2
User-agent: Bingbot
Disallow: /search/
Disallow: /admin/
Crawl-delay: 10
User-agent: BadBot
Disallow: /
User-agent: SeoBot
Disallow: /
Crawl-delay: 300
# Test with various patterns
User-agent: TestBot
Disallow: /*/admin
Disallow: /temp*.html
Disallow: /file?.log

View File

@@ -1,71 +0,0 @@
- action: WEIGH
expression: "true"
name: robots-txt-policy-crawl-delay-1
weight:
adjust: 5
- action: CHALLENGE
expression: path.startsWith("/admin/")
name: robots-txt-policy-disallow-2
- action: CHALLENGE
expression: path.startsWith("/private/")
name: robots-txt-policy-disallow-3
- action: CHALLENGE
expression: path.startsWith("/api/internal/")
name: robots-txt-policy-disallow-4
- action: WEIGH
expression: userAgent.contains("Googlebot")
name: robots-txt-policy-crawl-delay-5
weight:
adjust: 5
- action: CHALLENGE
expression:
all:
- userAgent.contains("Googlebot")
- path.startsWith("/search/")
name: robots-txt-policy-disallow-6
- action: WEIGH
expression: userAgent.contains("Bingbot")
name: robots-txt-policy-crawl-delay-7
weight:
adjust: 5
- action: CHALLENGE
expression:
all:
- userAgent.contains("Bingbot")
- path.startsWith("/search/")
name: robots-txt-policy-disallow-8
- action: CHALLENGE
expression:
all:
- userAgent.contains("Bingbot")
- path.startsWith("/admin/")
name: robots-txt-policy-disallow-9
- action: DENY
expression: userAgent.contains("BadBot")
name: robots-txt-policy-blacklist-10
- action: WEIGH
expression: userAgent.contains("SeoBot")
name: robots-txt-policy-crawl-delay-11
weight:
adjust: 5
- action: DENY
expression: userAgent.contains("SeoBot")
name: robots-txt-policy-blacklist-12
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/.*/admin")
name: robots-txt-policy-disallow-13
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/temp.*\\.html")
name: robots-txt-policy-disallow-14
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/file.\\.log")
name: robots-txt-policy-disallow-15

View File

@@ -1,6 +0,0 @@
- action: CHALLENGE
expression: path.startsWith("/admin/")
name: my-custom-policy-disallow-1
- action: CHALLENGE
expression: path.startsWith("/private")
name: my-custom-policy-disallow-2

View File

@@ -1,6 +0,0 @@
- action: DENY
expression: path.startsWith("/admin/")
name: robots-txt-policy-disallow-1
- action: DENY
expression: path.startsWith("/private")
name: robots-txt-policy-disallow-2

View File

@@ -1,2 +0,0 @@
# Empty robots.txt (comments only)
# No actual rules

View File

@@ -1 +0,0 @@
[]

View File

@@ -1,12 +0,0 @@
[
{
"action": "CHALLENGE",
"expression": "path.startsWith(\"/admin/\")",
"name": "robots-txt-policy-disallow-1"
},
{
"action": "CHALLENGE",
"expression": "path.startsWith(\"/private\")",
"name": "robots-txt-policy-disallow-2"
}
]

View File

@@ -1,5 +0,0 @@
# Simple robots.txt test
User-agent: *
Disallow: /admin/
Disallow: /private
Allow: /public

View File

@@ -1,6 +0,0 @@
- action: CHALLENGE
expression: path.startsWith("/admin/")
name: robots-txt-policy-disallow-1
- action: CHALLENGE
expression: path.startsWith("/private")
name: robots-txt-policy-disallow-2

View File

@@ -1,6 +0,0 @@
# Test wildcard patterns
User-agent: *
Disallow: /search*
Disallow: /*/private
Disallow: /file?.txt
Disallow: /admin/*?action=delete

View File

@@ -1,12 +0,0 @@
- action: CHALLENGE
expression: path.matches("^/search.*")
name: robots-txt-policy-disallow-1
- action: CHALLENGE
expression: path.matches("^/.*/private")
name: robots-txt-policy-disallow-2
- action: CHALLENGE
expression: path.matches("^/file.\\.txt")
name: robots-txt-policy-disallow-3
- action: CHALLENGE
expression: path.matches("^/admin/.*.action=delete")
name: robots-txt-policy-disallow-4

View File

@@ -51,29 +51,6 @@ bots:
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time
# Requires a subscription to Thoth to use, see
# https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
- name: countries-with-aggressive-scrapers
action: WEIGH
geoip:
counties:
- BR
- CN
weight:
adjust: 10
# Requires a subscription to Thoth to use, see
# https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
- name: aggressive-asns-without-functional-abuse-contact
action: WEIGH
asns:
match:
- 13335 # Cloudflare
- 136907 # Huawei Cloud
- 45102 # Alibaba Cloud
weight:
adjust: 10
# Generic catchall rule
- name: generic-browser
user_agent_regex: >-

View File

@@ -1,12 +1,13 @@
variable "ALPINE_VERSION" { default = "3.22" }
variable "GITHUB_SHA" { default = "devel" }
group "default" {
targets = [
"ci-runner",
"anubis",
]
}
target "ci-runner" {
target "anubis" {
args = {
ALPINE_VERSION = "3.22"
}
@@ -21,6 +22,6 @@ target "ci-runner" {
]
pull = true
tags = [
"ghcr.io/techarohq/anubis/ci-runner:latest"
"ghcr.io/techarohq/anubis:${GITHUB_SHA}"
]
}

View File

@@ -1,14 +0,0 @@
---
slug: welcome
title: Welcome to the Anubis blog!
authors: [xe]
tags: [intro]
---
Hello, world!
At Techaro, we've been working on making Anubis even better, and in the process we want to share what we've done, how it works, and signal boost cool things the community has done. As things happen, we'll blog about them so that you can learn from our struggles.
More details to come soon!
{/* truncate */}

View File

@@ -1,9 +0,0 @@
xe:
name: Xe Iaso
title: CEO @ Techaro
url: https://github.com/Xe
image_url: https://github.com/Xe.png
email: xe@techaro.lol
page: true
socials:
github: Xe

View File

@@ -1 +0,0 @@

View File

@@ -11,8 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
- Replace cidranger with bart for IP range checking, improving IP matching performance by 3-20x with zero heap
allocations
- Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in
development
- Add `--xff-strip-private` flag/envvar to toggle skipping X-Forwarded-For private addresses or not
@@ -22,11 +20,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95))
- Bump AI-robots.txt to version 1.34
- Make progress bar styling more compatible (UXP, etc)
- Optimized the OGTags subsystem with reduced allocations and runtime per request by up to 66%
- Add `--strip-base-prefix` flag/envvar to strip the base prefix from request paths when forwarding to target servers
- Add `robots2policy` CLI utility to convert robots.txt files to Anubis challenge policies using CEL expressions ([#409](https://github.com/TecharoHQ/anubis/issues/409))
- Implement GeoIP and ASN based checks via [Thoth](https://anubis.techaro.lol/docs/admin/thoth) ([#206](https://github.com/TecharoHQ/anubis/issues/206))
- Replace internal SHA256 hashing with xxhash for 4-6x performance improvement in policy evaluation and cache operations
## v1.19.1: Jenomis cen Lexentale - Echo 1
@@ -162,6 +155,7 @@ Other changes:
- Moved all CSS inline to the Xess package, changed colors to be CSS variables
- Set or append to `X-Forwarded-For` header unless the remote connects over a loopback address [#328](https://github.com/TecharoHQ/anubis/issues/328)
- Fixed mojeekbot user agent regex
- Added support for running anubis behind a base path (e.g. `/myapp`)
- Reduce Anubis' paranoia with user cookies ([#365](https://github.com/TecharoHQ/anubis/pull/365))
- Added support for Open Graph passthrough while using unix sockets
- The Open Graph subsystem now passes the HTTP `HOST` header through to the origin

View File

@@ -4,6 +4,8 @@ title: Setting up Anubis
import RandomKey from "@site/src/components/RandomKey";
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
Anubis is meant to sit between your reverse proxy (such as Nginx or Caddy) and your target service. One instance of Anubis must be used per service you are protecting.
@@ -30,7 +32,7 @@ TLS terminator)
Anubis is shipped in the Docker repo [`ghcr.io/techarohq/anubis`](https://github.com/TecharoHQ/anubis/pkgs/container/anubis). The following tags exist for your convenience:
| Tag | Meaning |
|:--------------------|:-----------------------------------------------------------------------------------------------------------------------------------|
| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------- |
| `latest` | The latest [tagged release](https://github.com/TecharoHQ/anubis/releases), if you are in doubt, start here. |
| `v<version number>` | The Anubis image for [any given tagged release](https://github.com/TecharoHQ/anubis/tags) |
| `main` | The current build on the `main` branch. Only use this if you need the latest and greatest features as they are merged into `main`. |
@@ -48,7 +50,7 @@ For more detailed information on installing Anubis with native packages, please
Anubis uses these environment variables for configuration:
| Environment Variable | Default value | Explanation |
|:-------------------------------|:------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| :----------------------------- | :---------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `BASE_PREFIX` | unset | If set, adds a global prefix to all Anubis endpoints. For example, setting this to `/myapp` would make Anubis accessible at `/myapp/` instead of `/`. This is useful when running Anubis behind a reverse proxy that routes based on path prefixes. |
| `BIND` | `:8923` | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock` |
| `BIND_NETWORK` | `tcp` | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports. |
@@ -67,7 +69,6 @@ Anubis uses these environment variables for configuration:
| `REDIRECT_DOMAINS` | unset | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain.<br/><br/>Note that if you are hosting Anubis on a non-standard port (`https://example:com:8443`, `http://www.example.net:8080`, etc.), you must also include the port number here. |
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
| `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. |
| `STRIP_BASE_PREFIX` | `false` | If set to `true`, strips the base prefix from request paths when forwarding to the target server. This is useful when your target service expects to receive requests without the base prefix. For example, with `BASE_PREFIX=/foo` and `STRIP_BASE_PREFIX=true`, a request to `/foo/bar` would be forwarded to the target as `/bar`. |
| `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. Supports Unix domain sockets, set this to a URI like so: `unix:///path/to/socket.sock`. |
| `USE_REMOTE_ADDRESS` | unset | If set to `true`, Anubis will take the client's IP from the network socket. For production deployments, it is expected that a reverse proxy is used in front of Anubis, which pass the IP using headers, instead. |
| `WEBMASTER_EMAIL` | unset | If set, shows a contact email address when rendering error pages. This email address will be how users can get in contact with administrators. |
@@ -128,21 +129,6 @@ With corresponding Anubis configuration:
BASE_PREFIX=/myapp
```
#### Stripping Base Prefix
If your target service doesn't expect to receive the base prefix in request paths, you can use the `STRIP_BASE_PREFIX` option:
```
BASE_PREFIX=/myapp
STRIP_BASE_PREFIX=true
```
With this configuration:
- A request to `/myapp/api/users` would be forwarded to your target service as `/api/users`
- A request to `/myapp/` would be forwarded as `/`
This is particularly useful when working with applications that weren't designed to handle path prefixes. However, note that if your target application generates absolute redirects or links (like `/login` instead of `./login`), these may break the subpath routing since they won't include the base prefix.
### Key generation
To generate an ed25519 private key, you can use this command:

View File

@@ -1,84 +0,0 @@
---
title: robots2policy CLI Tool
sidebar_position: 50
---
The `robots2policy` tool converts robots.txt files into Anubis challenge policies. It reads robots.txt rules and generates equivalent CEL expressions for path matching and user-agent filtering.
## Installation
Install directly with Go:
```bash
go install github.com/TecharoHQ/anubis/cmd/robots2policy@latest
```
## Usage
Basic conversion from URL:
```bash
robots2policy -input https://www.example.com/robots.txt
```
Convert local file to YAML:
```bash
robots2policy -input robots.txt -output policy.yaml
```
Convert with custom settings:
```bash
robots2policy -input robots.txt -action DENY -format json
```
## Options
| Flag | Description | Default |
|-----------------------|--------------------------------------------------------------------|---------------------|
| `-input` | robots.txt file path or URL (use `-` for stdin) | *required* |
| `-output` | Output file (use `-` for stdout) | stdout |
| `-format` | Output format: `yaml` or `json` | `yaml` |
| `-action` | Action for disallowed paths: `ALLOW`, `DENY`, `CHALLENGE`, `WEIGH` | `CHALLENGE` |
| `-name` | Policy name prefix | `robots-txt-policy` |
| `-crawl-delay-weight` | Weight adjustment for crawl-delay rules | `3` |
| `-deny-user-agents` | Action for blacklisted user agents | `DENY` |
## Example
Input robots.txt:
```txt
User-agent: *
Disallow: /admin/
Disallow: /private
User-agent: BadBot
Disallow: /
```
Generated policy:
```yaml
- name: robots-txt-policy-disallow-1
action: CHALLENGE
expression:
single: path.startsWith("/admin/")
- name: robots-txt-policy-disallow-2
action: CHALLENGE
expression:
single: path.startsWith("/private")
- name: robots-txt-policy-blacklist-3
action: DENY
expression:
single: userAgent.contains("BadBot")
```
## Using the Generated Policy
Save the output and import it in your main policy file:
```yaml
import:
- path: "./robots-policy.yaml"
```
The tool handles wildcard patterns, user-agent specific rules, and blacklisted bots automatically.

View File

@@ -1,81 +0,0 @@
# Thoth-based advanced checks
Status: Beta
Anubis instances are normally isolated. Each Anubis instance has its own configuration and exists in roughly its own world without any long term memory between requests. As threats, workarounds, and AI scraper toolchains evolve, administrators will need a way to get more up to date information faster than Anubis' release cycle.
Thus, Thoth is being created. Thoth is the reputation database for Anubis. Thoth feeds information to Anubis so that it can make better decisions about which traffic is innocuous and which traffic is suspicious.
:::note
Thoth is hosted by [Techaro](https://techaro.lol). Thoth is a paid service. Thoth is opt-in and requires manual intervention (including payment) to use. The code that powers Thoth is currently closed source.
To get access to Thoth, please subscribe [on GitHub Sponsors](https://github.com/sponsors/Xe) and [email Xe](mailto:xe@techaro.lol). This will be self-service soon.
:::
## Implementation
Thoth is a web service that listens over [gRPC](https://grpc.io/). Thoth's API is documented in protocol buffer definitions in the GitHub repo [TecharoHQ/thoth-proto](https://github.com/TecharoHQ/thoth-proto).
Thoth is designed to be _informative_, not _authoritative_. Thoth cannot and will not arbitrarily block requests, origins, or other traffic. Thoth is there to inform Anubis and influence the weight of requests so that upstream resources can be protected. Additionally, Anubis aggressively caches data from Thoth such that over time Anubis will not need to request data very often. This makes the fast path for repeat visitors even faster and reduces the amount of data that Thoth is exposed to.
## Thoth features
Thoth is currently in active development. Currently, Thoth provides the following features to Anubis:
- BGP Autonomous System (ASN) based filtering
- GeoIP location based filtering
### ASN-based filtering
When companies link their backbone infrastructure to the Internet, they do so via a [BGP Autonomous System](<https://en.wikipedia.org/wiki/Autonomous_system_(Internet)>), denoted by a number (the Autonomous System Number or ASN). Every IP address on the Internet is owned by an ASN with a 1:1 lookup that does not change very frequently.
Anubis uses Thoth to match IP addresses to BGP Autonomous Systems so that you can either issue arbitrary challenges to individual internet service providers (such as Cloudflare or Huawei Cloud) or, at the administrator's explicit instruction, block them altogether. For example, here's how you add 10 weight points to requests from Cloudflare, Huawei Cloud, and Alibaba Cloud:
```yaml
- name: aggressive-asns-without-functional-abuse-contact
action: WEIGH
asns:
match:
- 13335 # Cloudflare
- 136907 # Huawei Cloud
- 45102 # Alibaba Cloud
weight:
adjust: 10
```
You can look up details for [AS13335](https://bgp.tools/as/13335) or any of these other top offenders on [bgp.tools](https://bgp.tools).
### GeoIP-based filtering
In extreme cases, an administrator may have to take action against an entire country. This is not an ideal circumstance, but sometimes reality forces their hands and the administrators just want to sleep at night.
Anubis uses Thoth to look up the geographic location registered to an IP address. This lookup is not the best and will get better with time, but you ship what you can so you can make it better for next time.
For example, to add 10 weight points to requests from Brazil and China:
```yaml
- name: countries-with-aggressive-scrapers
action: WEIGH
geoip:
counties:
- BR
- CN
weight:
adjust: 10
```
Use this with care.
## Work-in-progress features
This section is a bit aspirational and is where Thoth will end up rather than things you can use today.
In general, a lot of Thoth features are focused on taking the same Anubis you know and love and making it better, smarter, and less paranoid. These include:
- Private rulesets for advanced patterns, current known exploits, and other recognition tactics that need to be kept cloak and dagger for operational security reasons
- Private challenge implementations via WebAssembly, including advanced browser detection logic
- Reputation querying so that Thoth can arbitrarily influence the weight of requests based on the net aggregate pass rate so that the most common browsers can get through with no challenge issued at all
- APIs for trusted administrators to report abusive request fingerprints so that Anubis can react to threats as they evolve
- A way for Anubis to periodically report the pass rate per ASN and other fingerprints so that methodology can be improved

View File

@@ -41,23 +41,13 @@ This page contains a non-exhaustive list with all websites using Anubis.
- https://minihoot.site
- https://catgirl.click/
- https://wiki.dolphin-emu.org/
- https://squirreljme.cc/
- https://gitlab.postmarketos.org/
- https://wiki.koha-community.org/
- <details>
<summary>FreeCAD</summary>
- https://forum.freecad.org/
- https://wiki.freecad.org/
</details>
- <details>
<summary>ReactOS</summary>
- https://reactos.org/forum
- https://reactos.org/wiki
- https://git.reactos.org
</details>
- <details>
<summary>ScummVM</summary>
- https://bugs.scummvm.org/
- https://forums.scummvm.org/
- https://wiki.scummvm.org/
</details>

View File

@@ -47,21 +47,21 @@ const config: Config = {
editUrl:
'https://github.com/TecharoHQ/anubis/tree/main/docs/',
},
blog: {
showReadingTime: true,
feedOptions: {
type: ['rss', 'atom', "json"],
xslt: true,
},
// Please change this to your repo.
// Remove this to remove the "edit this page" links.
editUrl:
'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/',
// Useful options to enforce blogging best practices
onInlineTags: 'warn',
onInlineAuthors: 'warn',
onUntruncatedBlogPosts: 'warn',
},
// blog: {
// showReadingTime: true,
// feedOptions: {
// type: ['rss', 'atom', "json"],
// xslt: true,
// },
// // Please change this to your repo.
// // Remove this to remove the "edit this page" links.
// editUrl:
// 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/',
// // Useful options to enforce blogging best practices
// onInlineTags: 'warn',
// onInlineAuthors: 'warn',
// onUntruncatedBlogPosts: 'warn',
// },
theme: {
customCss: './src/css/custom.css',
},
@@ -86,14 +86,9 @@ const config: Config = {
type: 'docSidebar',
sidebarId: 'tutorialSidebar',
position: 'left',
label: 'Docs',
},
{ to: '/blog', label: 'Blog', position: 'left' },
{
href: 'https://github.com/sponsors/Xe',
label: "Sponsorship",
position: 'left'
label: 'Tutorial',
},
// { to: '/blog', label: 'Blog', position: 'left' },
{
href: 'https://github.com/TecharoHQ/anubis',
label: 'GitHub',
@@ -133,10 +128,6 @@ const config: Config = {
{
title: 'More',
items: [
{
label: 'Blog',
to: '/blog',
},
{
label: 'GitHub',
href: 'https://github.com/TecharoHQ/anubis',

View File

@@ -1,6 +0,0 @@
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: anubis-docs-thoth
spec:
itemPath: "vaults/lc5zo4zjz3if3mkeuhufjmgmui/items/pwguumqcmtxvqbeb7y4gj7l36i"

View File

@@ -51,13 +51,6 @@ bots:
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time
- name: rss-feed-blog
action: ALLOW
expression:
any:
- path.startsWith("/blog/atom.")
- path.startsWith("/blog/rss.")
# Generic catchall rule
- name: generic-browser
user_agent_regex: >-

View File

@@ -38,8 +38,6 @@ spec:
value: "4"
- name: "METRICS_BIND"
value: ":9090"
- name: "OG_PASSTHROUGH"
value: "true"
- name: "POLICY_FNAME"
value: "/xe/cfg/anubis/botPolicies.yaml"
- name: "SERVE_ROBOTS_TXT"
@@ -68,6 +66,3 @@ spec:
- ALL
seccompProfile:
type: RuntimeDefault
envFrom:
- secretRef:
name: anubis-docs-thoth

View File

@@ -1,9 +1,7 @@
resources:
- 1password.yaml
- deployment.yaml
- ingress.yaml
- onionservice.yaml
- poddisruptionbudget.yaml
- service.yaml
configMapGenerator:

View File

@@ -1,9 +0,0 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: anubis-docs
spec:
minAvailable: 1
selector:
matchLabels:
app: anubis-docs

Binary file not shown.

Before

Width:  |  Height:  |  Size: 476 B

After

Width:  |  Height:  |  Size: 2.1 KiB

24
go.mod
View File

@@ -3,29 +3,20 @@ module github.com/TecharoHQ/anubis
go 1.24.2
require (
github.com/TecharoHQ/thoth-proto v0.4.0
github.com/a-h/templ v0.3.898
github.com/cespare/xxhash/v2 v2.3.0
github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456
github.com/gaissmai/bart v0.20.4
github.com/golang-jwt/jwt/v5 v5.2.2
github.com/google/cel-go v0.25.0
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0
github.com/joho/godotenv v1.5.1
github.com/playwright-community/playwright-go v0.5200.0
github.com/prometheus/client_golang v1.22.0
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a
github.com/yl2chen/cidranger v1.0.2
golang.org/x/net v0.41.0
google.golang.org/grpc v1.72.2
gopkg.in/yaml.v3 v3.0.1
k8s.io/apimachinery v0.33.1
sigs.k8s.io/yaml v1.4.0
)
require (
al.essio.dev/pkg/shellescape v1.6.0 // indirect
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1 // indirect
cel.dev/expr v0.23.1 // indirect
dario.cat/mergo v1.0.2 // indirect
github.com/AlekSi/pointer v1.2.0 // indirect
@@ -44,9 +35,10 @@ require (
github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb // indirect
github.com/cavaliergopher/cpio v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cli/browser v1.3.0 // indirect
github.com/cli/go-gh v0.1.0 // indirect
github.com/cloudflare/circl v1.6.1 // indirect
github.com/cloudflare/circl v1.6.0 // indirect
github.com/cyphar/filepath-securejoin v0.4.1 // indirect
github.com/deckarep/golang-set/v2 v2.8.0 // indirect
github.com/dlclark/regexp2 v1.11.4 // indirect
@@ -92,7 +84,7 @@ require (
github.com/shopspring/decimal v1.4.0 // indirect
github.com/skeema/knownhosts v1.3.1 // indirect
github.com/spf13/cast v1.7.1 // indirect
github.com/stoewer/go-strcase v1.3.0 // indirect
github.com/stoewer/go-strcase v1.2.0 // indirect
github.com/ulikunitz/xz v0.5.12 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
gitlab.com/digitalxero/go-conventional-commit v1.0.7 // indirect
@@ -108,13 +100,15 @@ require (
golang.org/x/tools v0.33.0 // indirect
golang.org/x/vuln v1.1.4 // indirect
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a // indirect
google.golang.org/protobuf v1.36.6 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/protobuf v1.36.5 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
honnef.co/go/tools v0.6.1 // indirect
mvdan.cc/sh/v3 v3.11.0 // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)
tool (

60
go.sum
View File

@@ -1,7 +1,5 @@
al.essio.dev/pkg/shellescape v1.6.0 h1:NxFcEqzFSEVCGN2yq7Huv/9hyCEGVa/TncnOOBBeXHA=
al.essio.dev/pkg/shellescape v1.6.0/go.mod h1:6sIqp7X2P6mThCQ7twERpZTuigpr6KbZWtls1U8I890=
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1 h1:YhMSc48s25kr7kv31Z8vf7sPUIq5YJva9z1mn/hAt0M=
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1/go.mod h1:avRlCjnFzl98VPaeCtJ24RrV/wwHFzB8sWXhj26+n/U=
cel.dev/expr v0.23.1 h1:K4KOtPCJQjVggkARsjG9RWXP6O4R73aHeJMa/dmCQQg=
cel.dev/expr v0.23.1/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8=
@@ -30,8 +28,6 @@ github.com/ProtonMail/gopenpgp/v2 v2.7.1 h1:Awsg7MPc2gD3I7IFac2qE3Gdls0lZW8SzrFZ
github.com/ProtonMail/gopenpgp/v2 v2.7.1/go.mod h1:/BU5gfAVwqyd8EfC3Eu7zmuhwYQpKs+cGD8M//iiaxs=
github.com/Songmu/gitconfig v0.2.0 h1:pX2++u4KUq+K2k/ZCzGXLtkD3ceCqIdi0tDyb+IbSyo=
github.com/Songmu/gitconfig v0.2.0/go.mod h1:cB5bYJer+pl7W8g6RHFwL/0X6aJROVrYuHlvc7PT+hE=
github.com/TecharoHQ/thoth-proto v0.4.0 h1:UbkvfgCku0Dm1R6O4ug3HOsJNnE6F3wB8x+Dpw2lzFI=
github.com/TecharoHQ/thoth-proto v0.4.0/go.mod h1:IcGnZt3iYUZQVEa0Lwk5l4ix0hCeXlWUV1TJMZvbWx0=
github.com/TecharoHQ/yeet v0.6.0 h1:RCBAjr7wIlllsgy0tpvWpLX7jsZgu2tiuBY3RrprcR0=
github.com/TecharoHQ/yeet v0.6.0/go.mod h1:bj2V4Fg8qKQXoiuPZa3HuawrE8g+LsOQv/9q2WyGSsA=
github.com/a-h/parse v0.0.0-20250122154542-74294addb73e h1:HjVbSQHy+dnlS6C3XajZ69NYAb5jbGNfHanvm1+iYlo=
@@ -67,8 +63,8 @@ github.com/cli/go-gh v0.1.0 h1:kMqFmC3ECBrV2UKzlOHjNOTTchExVc5tjNHtCqk/zYk=
github.com/cli/go-gh v0.1.0/go.mod h1:eTGWl99EMZ+3Iau5C6dHyGAJRRia65MtdBtuhWc+84o=
github.com/cli/safeexec v1.0.0/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q=
github.com/cli/shurcooL-graphql v0.0.1/go.mod h1:U7gCSuMZP/Qy7kbqkk5PrqXEeDgtfG5K+W+u8weorps=
github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0=
github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
github.com/cloudflare/circl v1.6.0 h1:cr5JKic4HI+LkINy2lg3W2jF8sHCVTBncJr5gIIq7qk=
github.com/cloudflare/circl v1.6.0/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
@@ -103,8 +99,6 @@ github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHk
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/gaissmai/bart v0.20.4 h1:Ik47r1fy3jRVU+1eYzKSW3ho2UgBVTVnUS8O993584U=
github.com/gaissmai/bart v0.20.4/go.mod h1:cEed+ge8dalcbpi8wtS9x9m2hn/fNJH5suhdGQOHnYk=
github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=
github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
@@ -117,10 +111,6 @@ github.com/go-git/go-git/v5 v5.14.0 h1:/MD3lCrGjCen5WfEAzKg00MJJffKhC8gzS80ycmCi
github.com/go-git/go-git/v5 v5.14.0/go.mod h1:Z5Xhoia5PcWA3NF8vRLURn9E5FRhSl7dGj9ItW3Wk5k=
github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY=
github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q=
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
@@ -144,8 +134,6 @@ github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeD
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ=
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/cel-go v0.25.0 h1:jsFw9Fhn+3y2kBbltZR4VEz5xKkcIFRPDnuEzAGv5GY=
github.com/google/cel-go v0.25.0/go.mod h1:hjEb6r5SuOSlhCHmFoLzu8HGCERvIsDAbxDAyNU/MmI=
github.com/google/go-cmdtest v0.4.1-0.20220921163831-55ab3332a786 h1:rcv+Ippz6RAtvaGgKxc+8FQIpxHgsF+HBzPyYL2cyVU=
@@ -171,18 +159,12 @@ github.com/goreleaser/fileglob v1.3.0 h1:/X6J7U8lbDpQtBvGcwwPS6OpzkNVlVEsFUVRx9+
github.com/goreleaser/fileglob v1.3.0/go.mod h1:Jx6BoXv3mbYkEzwm9THo7xbr5egkAraxkGorbJb4RxU=
github.com/goreleaser/nfpm/v2 v2.42.1 h1:xu2pLRgQuz2ab+YZFoeIzwU/M5jjjCKDGwv1lRbVGvk=
github.com/goreleaser/nfpm/v2 v2.42.1/go.mod h1:dY53KWYKebkOocxgkmpM7SRX0Nv5hU+jEu2kIaM4/LI=
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA=
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU=
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 h1:pRhl55Yx1eC7BZ1N+BBWwnKaMyD8uC+34TLdndZMAKk=
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0/go.mod h1:XKMd7iuf/RGPSMJ/U4HP0zS2Z9Fh8Ps9a+6X26m/tmI=
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542/go.mod h1:Ow0tF8D4Kplbc8s8sSb3V2oUCygFHVp8gC3Dn6U4MNI=
github.com/henvic/httpretty v0.0.6/go.mod h1:X38wLjWXHkXT7r2+uK8LjCMne9rsuNaBLJ+5cU2/Pmo=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
@@ -266,17 +248,13 @@ github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sS
github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60=
github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU=
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/thlib/go-timezone-local v0.0.0-20210907160436-ef149e42d28e/go.mod h1:/Tnicc6m/lsJE0irFMA0LfIwTBo4QP7A8IfyIv4zZKI=
@@ -286,21 +264,11 @@ github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM
github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU=
github.com/yl2chen/cidranger v1.0.2/go.mod h1:9U1yz7WPYDwf0vpNWFaeRh0bjwz5RVgRy/9UEQfHl0g=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
gitlab.com/digitalxero/go-conventional-commit v1.0.7 h1:8/dO6WWG+98PMhlZowt/YjuiKhqhGlOCwlIV8SqqGh8=
gitlab.com/digitalxero/go-conventional-commit v1.0.7/go.mod h1:05Xc2BFsSyC5tKhK0y+P3bs0AwUtNuTp+mTpbCU/DZ0=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
@@ -385,14 +353,12 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUOU8xyFgXv6cOTp2HASDlsDk=
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a h1:nwKuGPlUAt+aR+pcrkfFRrTU1BVrSmYyYMxYbUIVHr0=
google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a/go.mod h1:3kWAYMk1I75K4vykHtKt2ycnOgpA6974V7bREqbsenU=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a h1:51aaUVRocpvUOSQKM6Q7VuoaktNIaMCLuhZB6DKksq4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a/go.mod h1:uRxBH1mhmO8PGhU89cMcHaXKZqO+OfakD8QQO0oYwlQ=
google.golang.org/grpc v1.72.2 h1:TdbGzwb82ty4OusHWepvFWGLgIbNo1/SUynEN0ssqv8=
google.golang.org/grpc v1.72.2/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw=
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

View File

@@ -3,23 +3,10 @@ package internal
import (
"crypto/sha256"
"encoding/hex"
"strconv"
"github.com/cespare/xxhash/v2"
)
// SHA256sum computes a cryptographic hash. Still used for proof-of-work challenges
// where we need the security properties of a cryptographic hash function.
func SHA256sum(text string) string {
hash := sha256.New()
hash.Write([]byte(text))
return hex.EncodeToString(hash.Sum(nil))
}
// FastHash is a high-performance non-cryptographic hash function suitable for
// internal caching, policy rule identification, and other performance-critical
// use cases where cryptographic security is not required.
func FastHash(text string) string {
h := xxhash.Sum64String(text)
return strconv.FormatUint(h, 16)
}

View File

@@ -1,261 +0,0 @@
package internal
import (
"fmt"
"strings"
"testing"
)
// XXHash64sum is a test alias for FastHash to benchmark against SHA256
func XXHash64sum(text string) string {
return FastHash(text)
}
// Test data that matches real usage patterns in the codebase
var (
// Typical policy checker inputs
policyInputs = []string{
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"User-Agent: bot/1.0",
"User-Agent: GoogleBot/2.1",
"/robots.txt",
"/api/.*",
"10.0.0.0/8",
"192.168.1.0/24",
"172.16.0.0/12",
}
// Challenge data from challengeFor function
challengeInputs = []string{
"Accept-Language=en-US,X-Real-IP=192.168.1.100,User-Agent=Mozilla/5.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=abc123,Difficulty=5",
"Accept-Language=fr-FR,X-Real-IP=10.0.0.50,User-Agent=Chrome/91.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=def456,Difficulty=3",
"Accept-Language=es-ES,X-Real-IP=172.16.1.1,User-Agent=Safari/14.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=ghi789,Difficulty=7",
}
// Bot rule patterns
botRuleInputs = []string{
"GoogleBot::path:/robots.txt",
"BingBot::useragent:Mozilla/5.0 (compatible; bingbot/2.0)",
"FacebookBot::headers:Accept-Language,User-Agent",
"TwitterBot::cidr:192.168.1.0/24",
}
// CEL expressions from policy rules
celInputs = []string{
`request.headers["User-Agent"].contains("bot")`,
`request.path.startsWith("/api/") && request.method == "POST"`,
`request.remoteAddress in ["192.168.1.0/24", "10.0.0.0/8"]`,
`request.userAgent.matches(".*[Bb]ot.*") || request.userAgent.matches(".*[Cc]rawler.*")`,
}
// Thoth ASN checker inputs
asnInputs = []string{
"ASNChecker\nAS 15169\nAS 8075\nAS 32934",
"ASNChecker\nAS 13335\nAS 16509\nAS 14061",
"ASNChecker\nAS 36351\nAS 20940\nAS 8100",
}
)
func BenchmarkSHA256_PolicyInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := policyInputs[i%len(policyInputs)]
_ = SHA256sum(input)
}
}
func BenchmarkXXHash_PolicyInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := policyInputs[i%len(policyInputs)]
_ = XXHash64sum(input)
}
}
func BenchmarkSHA256_ChallengeInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := challengeInputs[i%len(challengeInputs)]
_ = SHA256sum(input)
}
}
func BenchmarkXXHash_ChallengeInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := challengeInputs[i%len(challengeInputs)]
_ = XXHash64sum(input)
}
}
func BenchmarkSHA256_BotRuleInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := botRuleInputs[i%len(botRuleInputs)]
_ = SHA256sum(input)
}
}
func BenchmarkXXHash_BotRuleInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := botRuleInputs[i%len(botRuleInputs)]
_ = XXHash64sum(input)
}
}
func BenchmarkSHA256_CELInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := celInputs[i%len(celInputs)]
_ = SHA256sum(input)
}
}
func BenchmarkXXHash_CELInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := celInputs[i%len(celInputs)]
_ = XXHash64sum(input)
}
}
func BenchmarkSHA256_ASNInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := asnInputs[i%len(asnInputs)]
_ = SHA256sum(input)
}
}
func BenchmarkXXHash_ASNInputs(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
input := asnInputs[i%len(asnInputs)]
_ = XXHash64sum(input)
}
}
// Benchmark the policy list hashing used in checker.go
func BenchmarkSHA256_PolicyList(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
var sb strings.Builder
for _, input := range policyInputs {
fmt.Fprintln(&sb, SHA256sum(input))
}
_ = SHA256sum(sb.String())
}
}
func BenchmarkXXHash_PolicyList(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
var sb strings.Builder
for _, input := range policyInputs {
fmt.Fprintln(&sb, XXHash64sum(input))
}
_ = XXHash64sum(sb.String())
}
}
// Tests that xxhash doesn't have collisions in realistic scenarios
func TestHashCollisions(t *testing.T) {
allInputs := append(append(append(append(policyInputs, challengeInputs...), botRuleInputs...), celInputs...), asnInputs...)
// Start with realistic inputs from actual usage
xxhashHashes := make(map[string]string)
for _, input := range allInputs {
hash := XXHash64sum(input)
if existing, exists := xxhashHashes[hash]; exists {
t.Errorf("XXHash collision detected: %q and %q both hash to %s", input, existing, hash)
}
xxhashHashes[hash] = input
}
t.Logf("Basic test: %d realistic inputs, no collisions", len(allInputs))
// Test similar strings that might cause hash collisions
prefixes := []string{"User-Agent: ", "X-Real-IP: ", "Accept-Language: ", "Host: "}
suffixes := []string{"bot", "crawler", "spider", "scraper", "Mozilla", "Chrome", "Safari", "Firefox"}
variations := []string{"", "/1.0", "/2.0", " (compatible)", " (Windows)", " (Linux)", " (Mac)"}
stressCount := 0
for _, prefix := range prefixes {
for _, suffix := range suffixes {
for _, variation := range variations {
for i := 0; i < 100; i++ {
input := fmt.Sprintf("%s%s%s-%d", prefix, suffix, variation, i)
hash := XXHash64sum(input)
if existing, exists := xxhashHashes[hash]; exists {
t.Errorf("XXHash collision in stress test: %q and %q both hash to %s", input, existing, hash)
}
xxhashHashes[hash] = input
stressCount++
}
}
}
}
t.Logf("Stress test 1: %d similar string variations, no collisions", stressCount)
// Test sequential patterns that might be problematic
patterns := []string{
"192.168.1.%d",
"10.0.0.%d",
"172.16.%d.1",
"challenge-%d",
"bot-rule-%d",
"policy-%016x",
"session-%016x",
}
seqCount := 0
for _, pattern := range patterns {
for i := 0; i < 10000; i++ {
input := fmt.Sprintf(pattern, i)
hash := XXHash64sum(input)
if existing, exists := xxhashHashes[hash]; exists {
t.Errorf("XXHash collision in sequential test: %q and %q both hash to %s", input, existing, hash)
}
xxhashHashes[hash] = input
seqCount++
}
}
t.Logf("Stress test 2: %d sequential patterns, no collisions", seqCount)
totalInputs := len(allInputs) + stressCount + seqCount
t.Logf("TOTAL: Tested %d inputs across realistic scenarios - NO COLLISIONS", totalInputs)
}
// Verify xxhash output works as cache keys
func TestXXHashFormat(t *testing.T) {
testCases := []string{
"short",
"",
"very long string with lots of content that might be used in policy checking and other internal hashing scenarios",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}
for _, input := range testCases {
hash := XXHash64sum(input)
// Check it's valid hex
if len(hash) == 0 {
t.Errorf("Empty hash for input %q", input)
}
// xxhash is 64-bit so max 16 hex chars
if len(hash) > 16 {
t.Errorf("Hash too long for input %q: %s (length %d)", input, hash, len(hash))
}
// Make sure it's all hex characters
for _, char := range hash {
if !((char >= '0' && char <= '9') || (char >= 'a' && char <= 'f')) {
t.Errorf("Non-hex character %c in hash %s for input %q", char, hash, input)
}
}
t.Logf("Input: %q -> Hash: %s", input, hash)
}
}

View File

@@ -1,149 +0,0 @@
package ogtags
import (
"net/url"
"runtime"
"strings"
"testing"
"golang.org/x/net/html"
)
func BenchmarkGetTarget(b *testing.B) {
tests := []struct {
name string
target string
paths []string
}{
{
name: "HTTP",
target: "http://example.com",
paths: []string{"/", "/path", "/path/to/resource", "/path?query=1&foo=bar"},
},
{
name: "Unix",
target: "unix:///var/run/app.sock",
paths: []string{"/", "/api/endpoint", "/api/endpoint?param=value"},
},
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
cache := NewOGTagCache(tt.target, false, 0, false)
urls := make([]*url.URL, len(tt.paths))
for i, path := range tt.paths {
u, _ := url.Parse(path)
urls[i] = u
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = cache.getTarget(urls[i%len(urls)])
}
})
}
}
func BenchmarkExtractOGTags(b *testing.B) {
htmlSamples := []string{
`<html><head>
<meta property="og:title" content="Test Title">
<meta property="og:description" content="Test Description">
<meta name="keywords" content="test,keywords">
</head><body></body></html>`,
`<html><head>
<meta property="og:title" content="Page Title">
<meta property="og:type" content="website">
<meta property="og:url" content="https://example.com">
<meta property="og:image" content="https://example.com/image.jpg">
<meta property="twitter:card" content="summary_large_image">
<meta property="twitter:title" content="Twitter Title">
<meta name="description" content="Page description">
<meta name="author" content="John Doe">
</head><body><div><p>Content</p></div></body></html>`,
}
cache := NewOGTagCache("http://example.com", false, 0, false)
docs := make([]*html.Node, len(htmlSamples))
for i, sample := range htmlSamples {
doc, _ := html.Parse(strings.NewReader(sample))
docs[i] = doc
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = cache.extractOGTags(docs[i%len(docs)])
}
}
// Memory usage test
func TestMemoryUsage(t *testing.T) {
cache := NewOGTagCache("http://example.com", false, 0, false)
// Force GC and wait for it to complete
runtime.GC()
var m1 runtime.MemStats
runtime.ReadMemStats(&m1)
// Run getTarget many times
u, _ := url.Parse("/path/to/resource?query=1&foo=bar&baz=qux")
for i := 0; i < 10000; i++ {
_ = cache.getTarget(u)
}
// Force GC after operations
runtime.GC()
var m2 runtime.MemStats
runtime.ReadMemStats(&m2)
allocatedBytes := int64(m2.TotalAlloc) - int64(m1.TotalAlloc)
allocatedKB := float64(allocatedBytes) / 1024.0
allocatedPerOp := float64(allocatedBytes) / 10000.0
t.Logf("Memory allocated for 10k getTarget calls:")
t.Logf(" Total: %.2f KB (%.2f MB)", allocatedKB, allocatedKB/1024.0)
t.Logf(" Per operation: %.2f bytes", allocatedPerOp)
// Test extractOGTags memory usage
htmlDoc := `<html><head>
<meta property="og:title" content="Test Title">
<meta property="og:description" content="Test Description">
<meta property="og:image" content="https://example.com/image.jpg">
<meta property="twitter:card" content="summary">
<meta name="keywords" content="test,keywords,example">
<meta name="author" content="Test Author">
<meta property="unknown:tag" content="Should be ignored">
</head><body></body></html>`
doc, _ := html.Parse(strings.NewReader(htmlDoc))
runtime.GC()
runtime.ReadMemStats(&m1)
for i := 0; i < 1000; i++ {
_ = cache.extractOGTags(doc)
}
runtime.GC()
runtime.ReadMemStats(&m2)
allocatedBytes = int64(m2.TotalAlloc) - int64(m1.TotalAlloc)
allocatedKB = float64(allocatedBytes) / 1024.0
allocatedPerOp = float64(allocatedBytes) / 1000.0
t.Logf("Memory allocated for 1k extractOGTags calls:")
t.Logf(" Total: %.2f KB (%.2f MB)", allocatedKB, allocatedKB/1024.0)
t.Logf(" Per operation: %.2f bytes", allocatedPerOp)
// Sanity checks
if allocatedPerOp > 10000 {
t.Errorf("extractOGTags allocating too much memory per operation: %.2f bytes", allocatedPerOp)
}
}

View File

@@ -13,20 +13,14 @@ import (
)
const (
maxContentLength = 8 << 20 // 8 MiB is enough for anyone
maxContentLength = 16 << 20 // 16 MiB in bytes, if there is a reasonable reason that you need more than this...Why?
httpTimeout = 5 * time.Second /*todo: make this configurable?*/
schemeSeparatorLength = 3 // Length of "://"
querySeparatorLength = 1 // Length of "?" for query strings
)
type OGTagCache struct {
cache *decaymap.Impl[string, map[string]string]
targetURL *url.URL
client *http.Client
// Pre-built strings for optimization
unixPrefix string // "http://unix"
cache *decaymap.Impl[string, map[string]string]
targetURL *url.URL
client *http.Client
approvedTags []string
approvedPrefixes []string
ogTimeToLive time.Duration
@@ -36,6 +30,7 @@ type OGTagCache struct {
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
// Predefined approved tags and prefixes
// In the future, these could come from configuration
defaultApprovedTags := []string{"description", "keywords", "author"}
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
@@ -76,50 +71,37 @@ func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration
return &OGTagCache{
cache: decaymap.New[string, map[string]string](),
targetURL: parsedTargetURL,
targetURL: parsedTargetURL, // Store the parsed URL
ogPassthrough: ogPassthrough,
ogTimeToLive: ogTimeToLive,
ogCacheConsiderHost: ogTagsConsiderHost,
ogCacheConsiderHost: ogTagsConsiderHost, // todo: refactor to be a separate struct
approvedTags: defaultApprovedTags,
approvedPrefixes: defaultApprovedPrefixes,
client: client,
unixPrefix: "http://unix",
}
}
// getTarget constructs the target URL string for fetching OG tags.
// Optimized to minimize allocations by building strings directly.
// For Unix sockets, it creates a "fake" HTTP URL that the custom dialer understands.
func (c *OGTagCache) getTarget(u *url.URL) string {
var escapedPath = u.EscapedPath() // will cause an allocation if path contains special characters
if c.targetURL.Scheme == "unix" {
// Build URL string directly without creating intermediate URL object
var sb strings.Builder
sb.Grow(len(c.unixPrefix) + len(escapedPath) + len(u.RawQuery) + querySeparatorLength) // Pre-allocate
sb.WriteString(c.unixPrefix)
sb.WriteString(escapedPath)
if u.RawQuery != "" {
sb.WriteByte('?')
sb.WriteString(u.RawQuery)
// The custom dialer ignores the host, but we need a valid http URL structure.
// Use "unix" as a placeholder host. Path and Query from original request are appended.
fakeURL := &url.URL{
Scheme: "http", // Scheme must be http/https for client.Get
Host: "unix", // Arbitrary host, ignored by custom dialer
Path: u.Path,
RawQuery: u.RawQuery,
}
return sb.String()
return fakeURL.String()
}
// For regular http/https targets, build URL string directly
var sb strings.Builder
// Pre-calculate size: scheme + "://" + host + path + "?" + query
estimatedSize := len(c.targetURL.Scheme) + schemeSeparatorLength + len(c.targetURL.Host) + len(escapedPath) + len(u.RawQuery) + querySeparatorLength
sb.Grow(estimatedSize)
// For regular http/https targets
target := *c.targetURL // Make a copy
target.Path = u.Path
target.RawQuery = u.RawQuery
return target.String()
sb.WriteString(c.targetURL.Scheme)
sb.WriteString("://")
sb.WriteString(c.targetURL.Host)
sb.WriteString(escapedPath)
if u.RawQuery != "" {
sb.WriteByte('?')
sb.WriteString(u.RawQuery)
}
return sb.String()
}
func (c *OGTagCache) Cleanup() {

View File

@@ -1,309 +0,0 @@
package ogtags
import (
"net/url"
"strings"
"testing"
"unicode/utf8"
"golang.org/x/net/html"
)
// FuzzGetTarget tests getTarget with various inputs
func FuzzGetTarget(f *testing.F) {
// Seed corpus with interesting test cases
testCases := []struct {
target string
path string
query string
}{
{"http://example.com", "/", ""},
{"http://example.com", "/path", "q=1"},
{"unix:///tmp/socket", "/api", "key=value"},
{"https://example.com:8080", "/path/to/resource", "a=1&b=2"},
{"http://example.com", "/path with spaces", "q=hello world"},
{"http://example.com", "/path/❤️/emoji", "emoji=🎉"},
{"http://example.com", "/path/../../../etc/passwd", ""},
{"http://example.com", "/path%2F%2E%2E%2F", "q=%3Cscript%3E"},
{"unix:///var/run/app.sock", "/../../etc/passwd", ""},
{"http://[::1]:8080", "/ipv6", "test=1"},
{"http://example.com", strings.Repeat("/very/long/path", 100), strings.Repeat("param=value&", 100)},
{"http://example.com", "/path%20with%20encoded", "q=%20encoded%20"},
{"http://example.com", "/пример/кириллица", "q=тест"},
{"http://example.com", "/中文/路径", "查询=值"},
{"", "/path", "q=1"}, // Empty target
}
for _, tc := range testCases {
f.Add(tc.target, tc.path, tc.query)
}
f.Fuzz(func(t *testing.T, target, path, query string) {
// Skip invalid UTF-8 to focus on realistic inputs
if !utf8.ValidString(target) || !utf8.ValidString(path) || !utf8.ValidString(query) {
t.Skip()
}
// Create cache - should not panic
cache := NewOGTagCache(target, false, 0, false)
// Create URL
u := &url.URL{
Path: path,
RawQuery: query,
}
// Call getTarget - should not panic
result := cache.getTarget(u)
// Basic validation
if result == "" {
t.Errorf("getTarget returned empty string for target=%q, path=%q, query=%q", target, path, query)
}
// Verify result is a valid URL (for non-empty targets)
if target != "" {
parsedResult, err := url.Parse(result)
if err != nil {
t.Errorf("getTarget produced invalid URL %q: %v", result, err)
} else {
// For unix sockets, verify the scheme is http
if strings.HasPrefix(target, "unix:") && parsedResult.Scheme != "http" {
t.Errorf("Unix socket URL should have http scheme, got %q", parsedResult.Scheme)
}
}
}
// Ensure no memory corruption by calling multiple times
for i := 0; i < 3; i++ {
result2 := cache.getTarget(u)
if result != result2 {
t.Errorf("getTarget not deterministic: %q != %q", result, result2)
}
}
})
}
// FuzzExtractOGTags tests extractOGTags with various HTML inputs
func FuzzExtractOGTags(f *testing.F) {
// Seed corpus with interesting HTML cases
htmlCases := []string{
`<html><head><meta property="og:title" content="Test"></head></html>`,
`<meta property="og:title" content="No HTML tags">`,
`<html><head>` + strings.Repeat(`<meta property="og:title" content="Many tags">`, 1000) + `</head></html>`,
`<html><head><meta property="og:title" content="<script>alert('xss')</script>"></head></html>`,
`<html><head><meta property="og:title" content="Line1&#10;Line2"></head></html>`,
`<html><head><meta property="og:emoji" content="❤️🎉🎊"></head></html>`,
`<html><head><meta property="og:title" content="` + strings.Repeat("A", 10000) + `"></head></html>`,
`<html><head><meta property="og:title" content='Single quotes'></head></html>`,
`<html><head><meta property=og:title content=no-quotes></head></html>`,
`<html><head><meta name="keywords" content="test,keywords"></head></html>`,
`<html><head><meta property="unknown:tag" content="Should be ignored"></head></html>`,
`<html><head><meta property="` + strings.Repeat("og:", 100) + `title" content="Nested prefixes"></head></html>`,
`<html>` + strings.Repeat(`<div>`, 1000) + `<meta property="og:title" content="Deep nesting">` + strings.Repeat(`</div>`, 1000) + `</html>`,
`<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><meta property="og:title" content="With doctype"/></head></html>`,
`<html><head><meta property="" content="Empty property"></head></html>`,
`<html><head><meta content="Content only"></head></html>`,
`<html><head><meta property="og:title"></head></html>`, // No content
``, // Empty HTML
`<html><head><meta property="og:title" content="Кириллица"></head></html>`,
`<html><head><meta property="og:title" content="中文内容"></head></html>`,
`<html><head><!--<meta property="og:title" content="Commented out">--></head></html>`,
`<html><head><META PROPERTY="OG:TITLE" CONTENT="UPPERCASE"></head></html>`,
}
for _, htmlc := range htmlCases {
f.Add(htmlc)
}
f.Fuzz(func(t *testing.T, htmlContent string) {
// Skip invalid UTF-8
if !utf8.ValidString(htmlContent) {
t.Skip()
}
// Parse HTML - may fail on invalid input
doc, err := html.Parse(strings.NewReader(htmlContent))
if err != nil {
// This is expected for malformed HTML
return
}
cache := NewOGTagCache("http://example.com", false, 0, false)
// Should not panic
tags := cache.extractOGTags(doc)
// Validate results
for property, content := range tags {
// Ensure property is approved
approved := false
for _, prefix := range cache.approvedPrefixes {
if strings.HasPrefix(property, prefix) {
approved = true
break
}
}
if !approved {
for _, tag := range cache.approvedTags {
if property == tag {
approved = true
break
}
}
}
if !approved {
t.Errorf("Unapproved property %q was extracted", property)
}
// Ensure content is valid string
if !utf8.ValidString(content) {
t.Errorf("Invalid UTF-8 in content for property %q", property)
}
}
// Test determinism
tags2 := cache.extractOGTags(doc)
if len(tags) != len(tags2) {
t.Errorf("extractOGTags not deterministic: different lengths %d != %d", len(tags), len(tags2))
}
for k, v := range tags {
if tags2[k] != v {
t.Errorf("extractOGTags not deterministic: %q=%q != %q=%q", k, v, k, tags2[k])
}
}
})
}
// FuzzGetTargetRoundTrip tests that getTarget produces valid URLs that can be parsed back
func FuzzGetTargetRoundTrip(f *testing.F) {
f.Add("http://example.com", "/path/to/resource", "key=value&foo=bar")
f.Add("unix:///tmp/socket", "/api/endpoint", "param=test")
f.Fuzz(func(t *testing.T, target, path, query string) {
if !utf8.ValidString(target) || !utf8.ValidString(path) || !utf8.ValidString(query) {
t.Skip()
}
cache := NewOGTagCache(target, false, 0, false)
u := &url.URL{Path: path, RawQuery: query}
result := cache.getTarget(u)
if result == "" {
return
}
// Parse the result back
parsed, err := url.Parse(result)
if err != nil {
t.Errorf("getTarget produced unparseable URL: %v", err)
return
}
// For non-unix targets, verify path preservation (accounting for encoding)
if !strings.HasPrefix(target, "unix:") && target != "" {
// The paths should match after normalization
expectedPath := u.EscapedPath()
if parsed.EscapedPath() != expectedPath {
t.Errorf("Path not preserved: want %q, got %q", expectedPath, parsed.EscapedPath())
}
// Query should be preserved exactly
if parsed.RawQuery != query {
t.Errorf("Query not preserved: want %q, got %q", query, parsed.RawQuery)
}
}
})
}
// FuzzExtractMetaTagInfo tests the extractMetaTagInfo function directly
func FuzzExtractMetaTagInfo(f *testing.F) {
// Seed with various attribute combinations
f.Add("og:title", "Test Title", "property")
f.Add("keywords", "test,keywords", "name")
f.Add("og:description", "A description with \"quotes\"", "property")
f.Add("twitter:card", "summary", "property")
f.Add("unknown:tag", "Should be filtered", "property")
f.Add("", "Content without property", "property")
f.Add("og:title", "", "property") // Property without content
f.Fuzz(func(t *testing.T, propertyValue, contentValue, propertyKey string) {
if !utf8.ValidString(propertyValue) || !utf8.ValidString(contentValue) || !utf8.ValidString(propertyKey) {
t.Skip()
}
// Create a meta node
node := &html.Node{
Type: html.ElementNode,
Data: "meta",
Attr: []html.Attribute{
{Key: propertyKey, Val: propertyValue},
{Key: "content", Val: contentValue},
},
}
cache := NewOGTagCache("http://example.com", false, 0, false)
// Should not panic
property, content := cache.extractMetaTagInfo(node)
// If property is returned, it must be approved
if property != "" {
approved := false
for _, prefix := range cache.approvedPrefixes {
if strings.HasPrefix(property, prefix) {
approved = true
break
}
}
if !approved {
for _, tag := range cache.approvedTags {
if property == tag {
approved = true
break
}
}
}
if !approved {
t.Errorf("extractMetaTagInfo returned unapproved property: %q", property)
}
}
// Content should match input if property is approved
if property != "" && content != contentValue {
t.Errorf("Content mismatch: want %q, got %q", contentValue, content)
}
})
}
// Benchmark comparison for the fuzzed scenarios
func BenchmarkFuzzedGetTarget(b *testing.B) {
// Test with various challenging inputs found during fuzzing
inputs := []struct {
name string
target string
path string
query string
}{
{"Simple", "http://example.com", "/api", "k=v"},
{"LongPath", "http://example.com", strings.Repeat("/segment", 50), ""},
{"LongQuery", "http://example.com", "/", strings.Repeat("param=value&", 50)},
{"Unicode", "http://example.com", "/путь/路径/path", "q=значение"},
{"Encoded", "http://example.com", "/path%20with%20spaces", "q=%3Cscript%3E"},
{"Unix", "unix:///tmp/socket.sock", "/api/v1/resource", "id=123&format=json"},
}
for _, input := range inputs {
b.Run(input.name, func(b *testing.B) {
cache := NewOGTagCache(input.target, false, 0, false)
u := &url.URL{Path: input.path, RawQuery: input.query}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = cache.getTarget(u)
}
})
}
}

View File

@@ -12,12 +12,15 @@ func (c *OGTagCache) extractOGTags(doc *html.Node) map[string]string {
var traverseNodes func(*html.Node)
traverseNodes = func(n *html.Node) {
// isOGMetaTag only checks if it's a <meta> tag.
// The actual filtering happens in extractMetaTagInfo now.
if isOGMetaTag(n) {
property, content := c.extractMetaTagInfo(n)
if property != "" {
ogTags[property] = content
}
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
traverseNodes(child)
}
@@ -36,40 +39,43 @@ func isOGMetaTag(n *html.Node) bool {
}
// extractMetaTagInfo extracts property and content from a meta tag
// *and* checks if the property is approved.
// Returns empty property string if the tag is not approved.
func (c *OGTagCache) extractMetaTagInfo(n *html.Node) (property, content string) {
var propertyKey string
var rawProperty string // Store the property found before approval check
// Single pass through attributes, using range to avoid bounds checking
for _, attr := range n.Attr {
switch attr.Key {
case "property", "name":
propertyKey = attr.Val
case "content":
if attr.Key == "property" || attr.Key == "name" {
rawProperty = attr.Val
}
if attr.Key == "content" {
content = attr.Val
}
// Early exit if we have both
if propertyKey != "" && content != "" {
}
// Check if the rawProperty is approved
isApproved := false
for _, prefix := range c.approvedPrefixes {
if strings.HasPrefix(rawProperty, prefix) {
isApproved = true
break
}
}
if propertyKey == "" {
return "", content
}
// Check prefixes first (more common case)
for _, prefix := range c.approvedPrefixes {
if strings.HasPrefix(propertyKey, prefix) {
return propertyKey, content
// Check exact approved tags if not already approved by prefix
if !isApproved {
for _, tag := range c.approvedTags {
if rawProperty == tag {
isApproved = true
break
}
}
}
// Check exact matches
for _, tag := range c.approvedTags {
if propertyKey == tag {
return propertyKey, content
}
// Only return the property if it's approved
if isApproved {
property = rawProperty
}
return "", content
// Content is returned regardless, but property will be "" if not approved
return property, content
}

View File

@@ -595,7 +595,7 @@ func spawnAnubisWithOptions(t *testing.T, basePrefix string) string {
fmt.Fprintf(w, "<html><body><span id=anubis-test>%d</span></body></html>", time.Now().Unix())
})
policy, err := libanubis.LoadPoliciesOrDefault(t.Context(), "", anubis.DefaultDifficulty)
policy, err := libanubis.LoadPoliciesOrDefault("", anubis.DefaultDifficulty)
if err != nil {
t.Fatal(err)
}

View File

@@ -1,69 +0,0 @@
package thoth
import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"strings"
"time"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/policy/checker"
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
)
func (c *Client) ASNCheckerFor(asns []uint32) checker.Impl {
asnMap := map[uint32]struct{}{}
var sb strings.Builder
fmt.Fprintln(&sb, "ASNChecker")
for _, asn := range asns {
asnMap[asn] = struct{}{}
fmt.Fprintln(&sb, "AS", asn)
}
return &ASNChecker{
iptoasn: c.IPToASN,
asns: asnMap,
hash: internal.FastHash(sb.String()),
}
}
type ASNChecker struct {
iptoasn iptoasnv1.IpToASNServiceClient
asns map[uint32]struct{}
hash string
}
func (asnc *ASNChecker) Check(r *http.Request) (bool, error) {
ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
defer cancel()
ipInfo, err := asnc.iptoasn.Lookup(ctx, &iptoasnv1.LookupRequest{
IpAddress: r.Header.Get("X-Real-Ip"),
})
if err != nil {
switch {
case errors.Is(err, context.DeadlineExceeded):
slog.Debug("error contacting thoth", "err", err, "actionable", false)
return false, nil
default:
slog.Error("error contacting thoth, please contact support", "err", err, "actionable", true)
return false, nil
}
}
// If IP is not publicly announced, return false
if !ipInfo.GetAnnounced() {
return false, nil
}
_, ok := asnc.asns[uint32(ipInfo.GetAsNumber())]
return ok, nil
}
func (asnc *ASNChecker) Hash() string {
return asnc.hash
}

View File

@@ -1,81 +0,0 @@
package thoth_test
import (
"fmt"
"net/http/httptest"
"testing"
"github.com/TecharoHQ/anubis/internal/thoth"
"github.com/TecharoHQ/anubis/lib/policy/checker"
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
)
var _ checker.Impl = &thoth.ASNChecker{}
func TestASNChecker(t *testing.T) {
cli := loadSecrets(t)
asnc := cli.ASNCheckerFor([]uint32{13335})
for _, cs := range []struct {
ipAddress string
wantMatch bool
wantError bool
}{
{
ipAddress: "1.1.1.1",
wantMatch: true,
wantError: false,
},
{
ipAddress: "2.2.2.2",
wantMatch: false,
wantError: false,
},
{
ipAddress: "taco",
wantMatch: false,
wantError: false,
},
{
ipAddress: "127.0.0.1",
wantMatch: false,
wantError: false,
},
} {
t.Run(fmt.Sprintf("%v", cs), func(t *testing.T) {
req := httptest.NewRequest("GET", "/", nil)
req.Header.Set("X-Real-Ip", cs.ipAddress)
match, err := asnc.Check(req)
if match != cs.wantMatch {
t.Errorf("Wanted match: %v, got: %v", cs.wantMatch, match)
}
switch {
case err != nil && !cs.wantError:
t.Errorf("Did not want error but got: %v", err)
case err == nil && cs.wantError:
t.Error("Wanted error but got none")
}
})
}
}
func BenchmarkWithCache(b *testing.B) {
cli := loadSecrets(b)
req := &iptoasnv1.LookupRequest{IpAddress: "1.1.1.1"}
_, err := cli.IPToASN.Lookup(b.Context(), req)
if err != nil {
b.Error(err)
}
for b.Loop() {
_, err := cli.IPToASN.Lookup(b.Context(), req)
if err != nil {
b.Error(err)
}
}
}

View File

@@ -1,39 +0,0 @@
package thoth
import (
"context"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
)
func authUnaryClientInterceptor(token string) grpc.UnaryClientInterceptor {
return func(
ctx context.Context,
method string,
req interface{},
reply interface{},
cc *grpc.ClientConn,
invoker grpc.UnaryInvoker,
opts ...grpc.CallOption,
) error {
md := metadata.Pairs("authorization", "Bearer "+token)
ctx = metadata.NewOutgoingContext(ctx, md)
return invoker(ctx, method, req, reply, cc, opts...)
}
}
func authStreamClientInterceptor(token string) grpc.StreamClientInterceptor {
return func(
ctx context.Context,
desc *grpc.StreamDesc,
cc *grpc.ClientConn,
method string,
streamer grpc.Streamer,
opts ...grpc.CallOption,
) (grpc.ClientStream, error) {
md := metadata.Pairs("authorization", "Bearer "+token)
ctx = metadata.NewOutgoingContext(ctx, md)
return streamer(ctx, desc, cc, method, opts...)
}
}

View File

@@ -1,84 +0,0 @@
package thoth
import (
"context"
"errors"
"fmt"
"log/slog"
"net/netip"
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
"github.com/gaissmai/bart"
"google.golang.org/grpc"
)
type IPToASNWithCache struct {
next iptoasnv1.IpToASNServiceClient
table *bart.Table[*iptoasnv1.LookupResponse]
}
func NewIpToASNWithCache(next iptoasnv1.IpToASNServiceClient) *IPToASNWithCache {
result := &IPToASNWithCache{
next: next,
table: &bart.Table[*iptoasnv1.LookupResponse]{},
}
for _, pfx := range []netip.Prefix{
netip.MustParsePrefix("10.0.0.0/8"), // RFC 1918
netip.MustParsePrefix("172.16.0.0/12"), // RFC 1918
netip.MustParsePrefix("192.168.0.0/16"), // RFC 1918
netip.MustParsePrefix("127.0.0.0/8"), // Loopback
netip.MustParsePrefix("169.254.0.0/16"), // Link-local
netip.MustParsePrefix("100.64.0.0/10"), // CGNAT
netip.MustParsePrefix("192.0.0.0/24"), // Protocol assignments
netip.MustParsePrefix("192.0.2.0/24"), // TEST-NET-1
netip.MustParsePrefix("198.18.0.0/15"), // Benchmarking
netip.MustParsePrefix("198.51.100.0/24"), // TEST-NET-2
netip.MustParsePrefix("203.0.113.0/24"), // TEST-NET-3
netip.MustParsePrefix("240.0.0.0/4"), // Reserved
netip.MustParsePrefix("255.255.255.255/32"), // Broadcast
netip.MustParsePrefix("fc00::/7"), // Unique local address
netip.MustParsePrefix("fe80::/10"), // Link-local
netip.MustParsePrefix("::1/128"), // Loopback
netip.MustParsePrefix("::/128"), // Unspecified
netip.MustParsePrefix("100::/64"), // Discard-only
netip.MustParsePrefix("2001:db8::/32"), // Documentation
} {
result.table.Insert(pfx, &iptoasnv1.LookupResponse{Announced: false})
}
return result
}
func (ip2asn *IPToASNWithCache) Lookup(ctx context.Context, lr *iptoasnv1.LookupRequest, opts ...grpc.CallOption) (*iptoasnv1.LookupResponse, error) {
addr, err := netip.ParseAddr(lr.GetIpAddress())
if err != nil {
return nil, fmt.Errorf("input is not an IP address: %w", err)
}
cachedResponse, ok := ip2asn.table.Lookup(addr)
if ok {
return cachedResponse, nil
}
resp, err := ip2asn.next.Lookup(ctx, lr, opts...)
if err != nil {
return nil, err
}
var errs []error
for _, cidr := range resp.GetCidr() {
pfx, err := netip.ParsePrefix(cidr)
if err != nil {
errs = append(errs, err)
continue
}
ip2asn.table.Insert(pfx, resp)
}
if len(errs) != 0 {
slog.Error("errors parsing IP prefixes", "err", errors.Join(errs...))
}
return resp, nil
}

View File

@@ -1,14 +0,0 @@
package thoth
import "context"
type ctxKey struct{}
func With(ctx context.Context, cli *Client) context.Context {
return context.WithValue(ctx, ctxKey{}, cli)
}
func FromContext(ctx context.Context) (*Client, bool) {
cli, ok := ctx.Value(ctxKey{}).(*Client)
return cli, ok
}

View File

@@ -1,68 +0,0 @@
package thoth
import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"strings"
"time"
"github.com/TecharoHQ/anubis/lib/policy/checker"
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
)
func (c *Client) GeoIPCheckerFor(countries []string) checker.Impl {
countryMap := map[string]struct{}{}
var sb strings.Builder
fmt.Fprintln(&sb, "GeoIPChecker")
for _, cc := range countries {
countryMap[cc] = struct{}{}
fmt.Fprintln(&sb, cc)
}
return &GeoIPChecker{
IPToASN: c.IPToASN,
Countries: countryMap,
hash: sb.String(),
}
}
type GeoIPChecker struct {
IPToASN iptoasnv1.IpToASNServiceClient
Countries map[string]struct{}
hash string
}
func (gipc *GeoIPChecker) Check(r *http.Request) (bool, error) {
ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
defer cancel()
ipInfo, err := gipc.IPToASN.Lookup(ctx, &iptoasnv1.LookupRequest{
IpAddress: r.Header.Get("X-Real-Ip"),
})
if err != nil {
switch {
case errors.Is(err, context.DeadlineExceeded):
slog.Debug("error contacting thoth", "err", err, "actionable", false)
return false, nil
default:
slog.Error("error contacting thoth, please contact support", "err", err, "actionable", true)
return false, nil
}
}
// If IP is not publicly announced, return false
if !ipInfo.GetAnnounced() {
return false, nil
}
_, ok := gipc.Countries[strings.ToLower(ipInfo.GetCountryCode())]
return ok, nil
}
func (gipc *GeoIPChecker) Hash() string {
return gipc.hash
}

View File

@@ -1,63 +0,0 @@
package thoth_test
import (
"fmt"
"net/http/httptest"
"testing"
"github.com/TecharoHQ/anubis/internal/thoth"
"github.com/TecharoHQ/anubis/lib/policy/checker"
)
var _ checker.Impl = &thoth.GeoIPChecker{}
func TestGeoIPChecker(t *testing.T) {
cli := loadSecrets(t)
asnc := cli.GeoIPCheckerFor([]string{"us"})
for _, cs := range []struct {
ipAddress string
wantMatch bool
wantError bool
}{
{
ipAddress: "1.1.1.1",
wantMatch: true,
wantError: false,
},
{
ipAddress: "2.2.2.2",
wantMatch: false,
wantError: false,
},
{
ipAddress: "taco",
wantMatch: false,
wantError: false,
},
{
ipAddress: "127.0.0.1",
wantMatch: false,
wantError: false,
},
} {
t.Run(fmt.Sprintf("%v", cs), func(t *testing.T) {
req := httptest.NewRequest("GET", "/", nil)
req.Header.Set("X-Real-Ip", cs.ipAddress)
match, err := asnc.Check(req)
if match != cs.wantMatch {
t.Errorf("Wanted match: %v, got: %v", cs.wantMatch, match)
}
switch {
case err != nil && !cs.wantError:
t.Errorf("Did not want error but got: %v", err)
case err == nil && cs.wantError:
t.Error("Wanted error but got none")
}
})
}
}

View File

@@ -1,88 +0,0 @@
package thoth
import (
"context"
"crypto/tls"
"fmt"
"time"
"github.com/TecharoHQ/anubis"
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus"
"github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/timeout"
"github.com/prometheus/client_golang/prometheus"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
)
type Client struct {
conn *grpc.ClientConn
health healthv1.HealthClient
IPToASN iptoasnv1.IpToASNServiceClient
}
func New(ctx context.Context, thothURL, apiToken string, plaintext bool) (*Client, error) {
clMetrics := grpcprom.NewClientMetrics(
grpcprom.WithClientHandlingTimeHistogram(
grpcprom.WithHistogramBuckets([]float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120}),
),
)
prometheus.DefaultRegisterer.Register(clMetrics)
do := []grpc.DialOption{
grpc.WithChainUnaryInterceptor(
timeout.UnaryClientInterceptor(500*time.Millisecond),
clMetrics.UnaryClientInterceptor(),
authUnaryClientInterceptor(apiToken),
),
grpc.WithChainStreamInterceptor(
clMetrics.StreamClientInterceptor(),
authStreamClientInterceptor(apiToken),
),
grpc.WithUserAgent(fmt.Sprint("Techaro/anubis:", anubis.Version)),
}
if plaintext {
do = append(do, grpc.WithTransportCredentials(insecure.NewCredentials()))
} else {
do = append(do, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{})))
}
conn, err := grpc.NewClient(
thothURL,
do...,
)
if err != nil {
return nil, fmt.Errorf("can't dial thoth at %s: %w", thothURL, err)
}
hc := healthv1.NewHealthClient(conn)
resp, err := hc.Check(ctx, &healthv1.HealthCheckRequest{})
if err != nil {
return nil, fmt.Errorf("can't verify thoth health at %s: %w", thothURL, err)
}
if resp.Status != healthv1.HealthCheckResponse_SERVING {
return nil, fmt.Errorf("thoth is not healthy, wanted %s but got %s", healthv1.HealthCheckResponse_SERVING, resp.Status)
}
return &Client{
conn: conn,
health: hc,
IPToASN: NewIpToASNWithCache(iptoasnv1.NewIpToASNServiceClient(conn)),
}, nil
}
func (c *Client) Close() error {
if c.conn != nil {
return c.conn.Close()
}
return nil
}
func (c *Client) WithIPToASNService(impl iptoasnv1.IpToASNServiceClient) {
c.IPToASN = impl
}

View File

@@ -1,36 +0,0 @@
package thoth_test
import (
"os"
"testing"
"github.com/TecharoHQ/anubis/internal/thoth"
"github.com/TecharoHQ/anubis/internal/thoth/thothmock"
"github.com/joho/godotenv"
)
func loadSecrets(t testing.TB) *thoth.Client {
t.Helper()
if err := godotenv.Load(); err != nil {
t.Log("using mock thoth")
result := &thoth.Client{}
result.WithIPToASNService(thothmock.MockIpToASNService())
return result
}
cli, err := thoth.New(t.Context(), os.Getenv("THOTH_URL"), os.Getenv("THOTH_API_KEY"), false)
if err != nil {
t.Fatal(err)
}
return cli
}
func TestNew(t *testing.T) {
cli := loadSecrets(t)
if err := cli.Close(); err != nil {
t.Fatal(err)
}
}

View File

@@ -1,59 +0,0 @@
package thothmock
import (
"context"
"net/netip"
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
func MockIpToASNService() *IpToASNService {
responses := map[string]*iptoasnv1.LookupResponse{
"127.0.0.1": {Announced: false},
"::1": {Announced: false},
"10.10.10.10": {
Announced: true,
AsNumber: 13335,
Cidr: []string{"1.1.1.0/24"},
CountryCode: "US",
Description: "Cloudflare",
},
"2.2.2.2": {
Announced: true,
AsNumber: 420,
Cidr: []string{"2.2.2.0/24"},
CountryCode: "CA",
Description: "test canada",
},
"1.1.1.1": {
Announced: true,
AsNumber: 13335,
Cidr: []string{"1.1.1.0/24"},
CountryCode: "US",
Description: "Cloudflare",
},
}
return &IpToASNService{Responses: responses}
}
type IpToASNService struct {
iptoasnv1.UnimplementedIpToASNServiceServer
Responses map[string]*iptoasnv1.LookupResponse
}
func (ip2asn *IpToASNService) Lookup(ctx context.Context, lr *iptoasnv1.LookupRequest, opts ...grpc.CallOption) (*iptoasnv1.LookupResponse, error) {
if _, err := netip.ParseAddr(lr.GetIpAddress()); err != nil {
return nil, err
}
resp, ok := ip2asn.Responses[lr.GetIpAddress()]
if !ok {
return nil, status.Error(codes.NotFound, "IP address not found in mock")
}
return resp, nil
}

View File

@@ -1,17 +0,0 @@
package thothmock
import (
"context"
"testing"
"github.com/TecharoHQ/anubis/internal/thoth"
)
func WithMockThoth(t *testing.T) context.Context {
t.Helper()
thothCli := &thoth.Client{}
thothCli.WithIPToASNService(MockIpToASNService())
ctx := thoth.With(t.Context(), thothCli)
return ctx
}

View File

@@ -26,7 +26,6 @@ import (
"github.com/TecharoHQ/anubis/internal/ogtags"
"github.com/TecharoHQ/anubis/lib/challenge"
"github.com/TecharoHQ/anubis/lib/policy"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/policy/config"
// challenge implementations
@@ -90,7 +89,7 @@ func (s *Server) challengeFor(r *http.Request, difficulty int) string {
fp,
difficulty,
)
return internal.FastHash(challengeData)
return internal.SHA256sum(challengeData)
}
func (s *Server) maybeReverseProxyHttpStatusOnly(w http.ResponseWriter, r *http.Request) {
@@ -289,15 +288,15 @@ func (s *Server) MakeChallenge(w http.ResponseWriter, r *http.Request) {
return
}
lg = lg.With("check_result", cr)
chal := s.challengeFor(r, rule.Challenge.Difficulty)
challenge := s.challengeFor(r, rule.Challenge.Difficulty)
s.SetCookie(w, anubis.TestCookieName, chal, "/")
s.SetCookie(w, anubis.TestCookieName, challenge, "/")
err = encoder.Encode(struct {
Rules *config.ChallengeRules `json:"rules"`
Challenge string `json:"challenge"`
}{
Challenge: chal,
Challenge: challenge,
Rules: rule.Challenge,
})
if err != nil {
@@ -305,7 +304,7 @@ func (s *Server) MakeChallenge(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
return
}
lg.Debug("made challenge", "challenge", chal, "rules", rule.Challenge, "cr", cr)
lg.Debug("made challenge", "challenge", challenge, "rules", rule.Challenge, "cr", cr)
challengesIssued.WithLabelValues("api").Inc()
}
@@ -318,7 +317,7 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
cookiePath = strings.TrimSuffix(anubis.BasePrefix, "/") + "/"
}
if _, err := r.Cookie(anubis.TestCookieName); errors.Is(err, http.ErrNoCookie) {
if _, err := r.Cookie(anubis.TestCookieName); err == http.ErrNoCookie {
s.ClearCookie(w, s.cookieName, cookiePath)
s.ClearCookie(w, anubis.TestCookieName, "/")
lg.Warn("user has cookies disabled, this is not an anubis bug")
@@ -366,7 +365,7 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
challengeStr := s.challengeFor(r, rule.Challenge.Difficulty)
if err := impl.Validate(r, lg, rule, challengeStr); err != nil {
failedValidations.WithLabelValues(rule.Challenge.Algorithm).Inc()
failedValidations.WithLabelValues(string(rule.Challenge.Algorithm)).Inc()
var cerr *challenge.Error
s.ClearCookie(w, s.cookieName, cookiePath)
lg.Debug("challenge validate call failed", "err", err)
@@ -484,7 +483,7 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error)
ReportAs: s.policy.DefaultDifficulty,
Algorithm: config.DefaultAlgorithm,
},
Rules: &checker.List{},
Rules: &policy.CheckerList{},
}, nil
}

View File

@@ -15,7 +15,6 @@ import (
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/data"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/internal/thoth/thothmock"
"github.com/TecharoHQ/anubis/lib/policy"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
@@ -27,9 +26,7 @@ func init() {
func loadPolicies(t *testing.T, fname string) *policy.ParsedConfig {
t.Helper()
ctx := thothmock.WithMockThoth(t)
anubisPolicy, err := LoadPoliciesOrDefault(ctx, fname, anubis.DefaultDifficulty)
anubisPolicy, err := LoadPoliciesOrDefault(fname, anubis.DefaultDifficulty)
if err != nil {
t.Fatal(err)
}
@@ -167,7 +164,7 @@ func TestLoadPolicies(t *testing.T) {
}
defer fin.Close()
if _, err := policy.ParseConfig(t.Context(), fin, fname, 4); err != nil {
if _, err := policy.ParseConfig(fin, fname, 4); err != nil {
t.Fatal(err)
}
})
@@ -316,7 +313,7 @@ func TestCheckDefaultDifficultyMatchesPolicy(t *testing.T) {
for i := 1; i < 10; i++ {
t.Run(fmt.Sprint(i), func(t *testing.T) {
anubisPolicy, err := LoadPoliciesOrDefault(t.Context(), "", i)
anubisPolicy, err := LoadPoliciesOrDefault("", i)
if err != nil {
t.Fatal(err)
}
@@ -635,102 +632,3 @@ func TestRuleChange(t *testing.T) {
t.Errorf("wanted %d, got: %d", http.StatusFound, resp.StatusCode)
}
}
func TestStripBasePrefixFromRequest(t *testing.T) {
testCases := []struct {
name string
basePrefix string
stripBasePrefix bool
requestPath string
expectedPath string
}{
{
name: "strip disabled - no change",
basePrefix: "/foo",
stripBasePrefix: false,
requestPath: "/foo/bar",
expectedPath: "/foo/bar",
},
{
name: "strip enabled - removes prefix",
basePrefix: "/foo",
stripBasePrefix: true,
requestPath: "/foo/bar",
expectedPath: "/bar",
},
{
name: "strip enabled - root becomes slash",
basePrefix: "/foo",
stripBasePrefix: true,
requestPath: "/foo",
expectedPath: "/",
},
{
name: "strip enabled - trailing slash on base prefix",
basePrefix: "/foo/",
stripBasePrefix: true,
requestPath: "/foo/bar",
expectedPath: "/bar",
},
{
name: "strip enabled - no prefix match",
basePrefix: "/foo",
stripBasePrefix: true,
requestPath: "/other/bar",
expectedPath: "/other/bar",
},
{
name: "strip enabled - empty base prefix",
basePrefix: "",
stripBasePrefix: true,
requestPath: "/foo/bar",
expectedPath: "/foo/bar",
},
{
name: "strip enabled - nested path",
basePrefix: "/app",
stripBasePrefix: true,
requestPath: "/app/api/v1/users",
expectedPath: "/api/v1/users",
},
{
name: "strip enabled - exact match becomes root",
basePrefix: "/myapp",
stripBasePrefix: true,
requestPath: "/myapp/",
expectedPath: "/",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
srv := &Server{
opts: Options{
BasePrefix: tc.basePrefix,
StripBasePrefix: tc.stripBasePrefix,
},
}
req := httptest.NewRequest(http.MethodGet, tc.requestPath, nil)
originalPath := req.URL.Path
result := srv.stripBasePrefixFromRequest(req)
if result.URL.Path != tc.expectedPath {
t.Errorf("expected path %q, got %q", tc.expectedPath, result.URL.Path)
}
// Ensure original request is not modified when no stripping should occur
if !tc.stripBasePrefix || tc.basePrefix == "" || !strings.HasPrefix(tc.requestPath, strings.TrimSuffix(tc.basePrefix, "/")) {
if result != req {
t.Error("expected same request object when no modification needed")
}
} else {
// Ensure original request is not modified when stripping occurs
if req.URL.Path != originalPath {
t.Error("original request was modified")
}
}
})
}
}

View File

@@ -22,9 +22,9 @@ func NewError(verb, publicReason string, privateReason error) *Error {
}
type Error struct {
PrivateReason error
Verb string
PublicReason string
PrivateReason error
StatusCode int
}

View File

@@ -1,7 +1,6 @@
package lib
import (
"context"
"crypto/ed25519"
"crypto/rand"
"errors"
@@ -37,14 +36,13 @@ type Options struct {
PrivateKey ed25519.PrivateKey
CookieExpiration time.Duration
OGTimeToLive time.Duration
StripBasePrefix bool
OGCacheConsidersHost bool
OGPassthrough bool
CookiePartitioned bool
ServeRobotsTXT bool
}
func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty int) (*policy.ParsedConfig, error) {
func LoadPoliciesOrDefault(fname string, defaultDifficulty int) (*policy.ParsedConfig, error) {
var fin io.ReadCloser
var err error
@@ -68,10 +66,7 @@ func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty
}
}(fin)
anubisPolicy, err := policy.ParseConfig(ctx, fin, fname, defaultDifficulty)
if err != nil {
return nil, fmt.Errorf("can't parse policy file %s: %w", fname, err)
}
anubisPolicy, err := policy.ParseConfig(fin, fname, defaultDifficulty)
var validationErrs []error
for _, b := range anubisPolicy.Bots {
@@ -159,7 +154,7 @@ func New(opts Options) (*Server, error) {
// make-challenge is only used in tests. Only enable while version is devel
registerWithPrefix(anubis.APIPrefix+"make-challenge", http.HandlerFunc(result.MakeChallenge), "POST")
}
for _, implKind := range challenge.Methods() {
impl, _ := challenge.Get(implKind)
impl.Setup(mux)

View File

@@ -7,12 +7,11 @@ import (
"testing"
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/internal/thoth/thothmock"
"github.com/TecharoHQ/anubis/lib/policy"
)
func TestInvalidChallengeMethod(t *testing.T) {
if _, err := LoadPoliciesOrDefault(t.Context(), "testdata/invalid-challenge-method.yaml", 4); !errors.Is(err, policy.ErrChallengeRuleHasWrongAlgorithm) {
if _, err := LoadPoliciesOrDefault("testdata/invalid-challenge-method.yaml", 4); !errors.Is(err, policy.ErrChallengeRuleHasWrongAlgorithm) {
t.Fatalf("wanted error %v but got %v", policy.ErrChallengeRuleHasWrongAlgorithm, err)
}
}
@@ -26,7 +25,7 @@ func TestBadConfigs(t *testing.T) {
for _, st := range finfos {
st := st
t.Run(st.Name(), func(t *testing.T) {
if _, err := LoadPoliciesOrDefault(t.Context(), filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err == nil {
if _, err := LoadPoliciesOrDefault(filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err == nil {
t.Fatal(err)
} else {
t.Log(err)
@@ -44,18 +43,9 @@ func TestGoodConfigs(t *testing.T) {
for _, st := range finfos {
st := st
t.Run(st.Name(), func(t *testing.T) {
t.Run("with-thoth", func(t *testing.T) {
ctx := thothmock.WithMockThoth(t)
if _, err := LoadPoliciesOrDefault(ctx, filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil {
t.Fatal(err)
}
})
t.Run("without-thoth", func(t *testing.T) {
if _, err := LoadPoliciesOrDefault(t.Context(), filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil {
t.Fatal(err)
}
})
if _, err := LoadPoliciesOrDefault(filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil {
t.Fatal(err)
}
})
}
}

View File

@@ -134,32 +134,6 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.mux.ServeHTTP(w, r)
}
func (s *Server) stripBasePrefixFromRequest(r *http.Request) *http.Request {
if !s.opts.StripBasePrefix || s.opts.BasePrefix == "" {
return r
}
basePrefix := strings.TrimSuffix(s.opts.BasePrefix, "/")
path := r.URL.Path
if !strings.HasPrefix(path, basePrefix) {
return r
}
trimmedPath := strings.TrimPrefix(path, basePrefix)
if trimmedPath == "" {
trimmedPath = "/"
}
// Clone the request and URL
reqCopy := r.Clone(r.Context())
urlCopy := *r.URL
urlCopy.Path = trimmedPath
reqCopy.URL = &urlCopy
return reqCopy
}
func (s *Server) ServeHTTPNext(w http.ResponseWriter, r *http.Request) {
if s.next == nil {
redir := r.FormValue("redir")
@@ -184,7 +158,6 @@ func (s *Server) ServeHTTPNext(w http.ResponseWriter, r *http.Request) {
).ServeHTTP(w, r)
} else {
requestsProxied.WithLabelValues(r.Host).Inc()
r = s.stripBasePrefixFromRequest(r)
s.next.ServeHTTP(w, r)
}
}

View File

@@ -4,18 +4,17 @@ import (
"fmt"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
type Bot struct {
Rules checker.Impl
Rules Checker
Challenge *config.ChallengeRules
Weight *config.Weight
Name string
Action config.Rule
Weight *config.Weight
}
func (b Bot) Hash() string {
return internal.FastHash(fmt.Sprintf("%s::%s", b.Name, b.Rules.Hash()))
return internal.SHA256sum(fmt.Sprintf("%s::%s", b.Name, b.Rules.Hash()))
}

View File

@@ -63,7 +63,7 @@ func NewCELChecker(cfg *config.ExpressionOrList) (*CELChecker, error) {
}
func (cc *CELChecker) Hash() string {
return internal.FastHash(cc.src)
return internal.SHA256sum(cc.src)
}
func (cc *CELChecker) Check(r *http.Request) (bool, error) {

View File

@@ -3,20 +3,50 @@ package policy
import (
"errors"
"fmt"
"net"
"net/http"
"net/netip"
"regexp"
"strings"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/gaissmai/bart"
"github.com/yl2chen/cidranger"
)
var (
ErrMisconfiguration = errors.New("[unexpected] policy: administrator misconfiguration")
)
type Checker interface {
Check(*http.Request) (bool, error)
Hash() string
}
type CheckerList []Checker
func (cl CheckerList) Check(r *http.Request) (bool, error) {
for _, c := range cl {
ok, err := c.Check(r)
if err != nil {
return ok, err
}
if ok {
return ok, nil
}
}
return false, nil
}
func (cl CheckerList) Hash() string {
var sb strings.Builder
for _, c := range cl {
fmt.Fprintln(&sb, c.Hash())
}
return internal.SHA256sum(sb.String())
}
type staticHashChecker struct {
hash string
}
@@ -27,30 +57,35 @@ func (staticHashChecker) Check(r *http.Request) (bool, error) {
func (s staticHashChecker) Hash() string { return s.hash }
func NewStaticHashChecker(hashable string) checker.Impl {
return staticHashChecker{hash: internal.FastHash(hashable)}
func NewStaticHashChecker(hashable string) Checker {
return staticHashChecker{hash: internal.SHA256sum(hashable)}
}
type RemoteAddrChecker struct {
prefixTable *bart.Lite
hash string
ranger cidranger.Ranger
hash string
}
func NewRemoteAddrChecker(cidrs []string) (checker.Impl, error) {
table := new(bart.Lite)
func NewRemoteAddrChecker(cidrs []string) (Checker, error) {
ranger := cidranger.NewPCTrieRanger()
var sb strings.Builder
for _, cidr := range cidrs {
prefix, err := netip.ParsePrefix(cidr)
_, rng, err := net.ParseCIDR(cidr)
if err != nil {
return nil, fmt.Errorf("%w: range %s not parsing: %w", ErrMisconfiguration, cidr, err)
}
table.Insert(prefix)
err = ranger.Insert(cidranger.NewBasicRangerEntry(*rng))
if err != nil {
return nil, fmt.Errorf("%w: error inserting ip range: %w", ErrMisconfiguration, err)
}
fmt.Fprintln(&sb, cidr)
}
return &RemoteAddrChecker{
prefixTable: table,
hash: internal.FastHash(strings.Join(cidrs, ",")),
ranger: ranger,
hash: internal.SHA256sum(sb.String()),
}, nil
}
@@ -60,12 +95,21 @@ func (rac *RemoteAddrChecker) Check(r *http.Request) (bool, error) {
return false, fmt.Errorf("%w: header X-Real-Ip is not set", ErrMisconfiguration)
}
addr, err := netip.ParseAddr(host)
if err != nil {
return false, fmt.Errorf("%w: %s is not an IP address: %w", ErrMisconfiguration, host, err)
addr := net.ParseIP(host)
if addr == nil {
return false, fmt.Errorf("%w: %s is not an IP address", ErrMisconfiguration, host)
}
return rac.prefixTable.Contains(addr), nil
ok, err := rac.ranger.Contains(addr)
if err != nil {
return false, err
}
if ok {
return true, nil
}
return false, nil
}
func (rac *RemoteAddrChecker) Hash() string {
@@ -78,16 +122,16 @@ type HeaderMatchesChecker struct {
hash string
}
func NewUserAgentChecker(rexStr string) (checker.Impl, error) {
func NewUserAgentChecker(rexStr string) (Checker, error) {
return NewHeaderMatchesChecker("User-Agent", rexStr)
}
func NewHeaderMatchesChecker(header, rexStr string) (checker.Impl, error) {
func NewHeaderMatchesChecker(header, rexStr string) (Checker, error) {
rex, err := regexp.Compile(strings.TrimSpace(rexStr))
if err != nil {
return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, rexStr, err)
}
return &HeaderMatchesChecker{strings.TrimSpace(header), rex, internal.FastHash(header + ": " + rexStr)}, nil
return &HeaderMatchesChecker{strings.TrimSpace(header), rex, internal.SHA256sum(header + ": " + rexStr)}, nil
}
func (hmc *HeaderMatchesChecker) Check(r *http.Request) (bool, error) {
@@ -107,12 +151,12 @@ type PathChecker struct {
hash string
}
func NewPathChecker(rexStr string) (checker.Impl, error) {
func NewPathChecker(rexStr string) (Checker, error) {
rex, err := regexp.Compile(strings.TrimSpace(rexStr))
if err != nil {
return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, rexStr, err)
}
return &PathChecker{rex, internal.FastHash(rexStr)}, nil
return &PathChecker{rex, internal.SHA256sum(rexStr)}, nil
}
func (pc *PathChecker) Check(r *http.Request) (bool, error) {
@@ -127,7 +171,7 @@ func (pc *PathChecker) Hash() string {
return pc.hash
}
func NewHeaderExistsChecker(key string) checker.Impl {
func NewHeaderExistsChecker(key string) Checker {
return headerExistsChecker{strings.TrimSpace(key)}
}
@@ -144,11 +188,11 @@ func (hec headerExistsChecker) Check(r *http.Request) (bool, error) {
}
func (hec headerExistsChecker) Hash() string {
return internal.FastHash(hec.header)
return internal.SHA256sum(hec.header)
}
func NewHeadersChecker(headermap map[string]string) (checker.Impl, error) {
var result checker.List
func NewHeadersChecker(headermap map[string]string) (Checker, error) {
var result CheckerList
var errs []error
for key, rexStr := range headermap {
@@ -163,7 +207,7 @@ func NewHeadersChecker(headermap map[string]string) (checker.Impl, error) {
continue
}
result = append(result, &HeaderMatchesChecker{key, rex, internal.FastHash(key + ": " + rexStr)})
result = append(result, &HeaderMatchesChecker{key, rex, internal.SHA256sum(key + ": " + rexStr)})
}
if len(errs) != 0 {

View File

@@ -1,41 +0,0 @@
// Package checker defines the Checker interface and a helper utility to avoid import cycles.
package checker
import (
"fmt"
"net/http"
"strings"
"github.com/TecharoHQ/anubis/internal"
)
type Impl interface {
Check(*http.Request) (bool, error)
Hash() string
}
type List []Impl
func (l List) Check(r *http.Request) (bool, error) {
for _, c := range l {
ok, err := c.Check(r)
if err != nil {
return ok, err
}
if ok {
return ok, nil
}
}
return false, nil
}
func (l List) Hash() string {
var sb strings.Builder
for _, c := range l {
fmt.Fprintln(&sb, c.Hash())
}
return internal.FastHash(sb.String())
}

View File

@@ -1,44 +0,0 @@
package config
import (
"errors"
"fmt"
)
var (
ErrPrivateASN = errors.New("bot.ASNs: you have specified a private use ASN")
)
type ASNs struct {
Match []uint32 `json:"match"`
}
func (a *ASNs) Valid() error {
var errs []error
for _, asn := range a.Match {
if isPrivateASN(asn) {
errs = append(errs, fmt.Errorf("%w: %d is private (see RFC 6996)", ErrPrivateASN, asn))
}
}
if len(errs) != 0 {
return fmt.Errorf("bot.ASNs: invalid ASN settings: %w", errors.Join(errs...))
}
return nil
}
// isPrivateASN checks if an ASN is in the private use area.
//
// Based on RFC 6996 and IANA allocations.
func isPrivateASN(asn uint32) bool {
switch {
case asn >= 64512 && asn <= 65534:
return true
case asn >= 4200000000 && asn <= 4294967294:
return true
default:
return false
}
}

View File

@@ -46,19 +46,15 @@ const (
const DefaultAlgorithm = "fast"
type BotConfig struct {
UserAgentRegex *string `json:"user_agent_regex,omitempty" yaml:"user_agent_regex,omitempty"`
PathRegex *string `json:"path_regex,omitempty" yaml:"path_regex,omitempty"`
HeadersRegex map[string]string `json:"headers_regex,omitempty" yaml:"headers_regex,omitempty"`
Expression *ExpressionOrList `json:"expression,omitempty" yaml:"expression,omitempty"`
Challenge *ChallengeRules `json:"challenge,omitempty" yaml:"challenge,omitempty"`
Weight *Weight `json:"weight,omitempty" yaml:"weight,omitempty"`
Name string `json:"name" yaml:"name"`
Action Rule `json:"action" yaml:"action"`
RemoteAddr []string `json:"remote_addresses,omitempty" yaml:"remote_addresses,omitempty"`
// Thoth features
GeoIP *GeoIP `json:"geoip,omitempty"`
ASNs *ASNs `json:"asns,omitempty"`
UserAgentRegex *string `json:"user_agent_regex,omitempty"`
PathRegex *string `json:"path_regex,omitempty"`
HeadersRegex map[string]string `json:"headers_regex,omitempty"`
Expression *ExpressionOrList `json:"expression,omitempty"`
Challenge *ChallengeRules `json:"challenge,omitempty"`
Weight *Weight `json:"weight,omitempty"`
Name string `json:"name"`
Action Rule `json:"action"`
RemoteAddr []string `json:"remote_addresses,omitempty"`
}
func (b BotConfig) Zero() bool {
@@ -70,8 +66,6 @@ func (b BotConfig) Zero() bool {
b.Action != "",
len(b.RemoteAddr) != 0,
b.Challenge != nil,
b.GeoIP != nil,
b.ASNs != nil,
} {
if cond {
return false
@@ -91,9 +85,7 @@ func (b *BotConfig) Valid() error {
allFieldsEmpty := b.UserAgentRegex == nil &&
b.PathRegex == nil &&
len(b.RemoteAddr) == 0 &&
len(b.HeadersRegex) == 0 &&
b.ASNs == nil &&
b.GeoIP == nil
len(b.HeadersRegex) == 0
if allFieldsEmpty && b.Expression == nil {
errs = append(errs, ErrBotMustHaveUserAgentOrPath)
@@ -178,9 +170,9 @@ func (b *BotConfig) Valid() error {
}
type ChallengeRules struct {
Algorithm string `json:"algorithm,omitempty" yaml:"algorithm,omitempty"`
Difficulty int `json:"difficulty,omitempty" yaml:"difficulty,omitempty"`
ReportAs int `json:"report_as,omitempty" yaml:"report_as,omitempty"`
Algorithm string `json:"algorithm"`
Difficulty int `json:"difficulty"`
ReportAs int `json:"report_as"`
}
var (

View File

@@ -13,9 +13,9 @@ var (
)
type ExpressionOrList struct {
Expression string `json:"-" yaml:"-"`
All []string `json:"all,omitempty" yaml:"all,omitempty"`
Any []string `json:"any,omitempty" yaml:"any,omitempty"`
Expression string `json:"-"`
All []string `json:"all,omitempty"`
Any []string `json:"any,omitempty"`
}
func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
@@ -34,43 +34,6 @@ func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
return true
}
func (eol *ExpressionOrList) MarshalYAML() (any, error) {
switch {
case len(eol.All) == 1 && len(eol.Any) == 0:
eol.Expression = eol.All[0]
eol.All = nil
case len(eol.Any) == 1 && len(eol.All) == 0:
eol.Expression = eol.Any[0]
eol.Any = nil
}
if eol.Expression != "" {
return eol.Expression, nil
}
type RawExpressionOrList ExpressionOrList
return RawExpressionOrList(*eol), nil
}
func (eol *ExpressionOrList) MarshalJSON() ([]byte, error) {
switch {
case len(eol.All) == 1 && len(eol.Any) == 0:
eol.Expression = eol.All[0]
eol.All = nil
case len(eol.Any) == 1 && len(eol.All) == 0:
eol.Expression = eol.Any[0]
eol.Any = nil
}
if eol.Expression != "" {
return json.Marshal(string(eol.Expression))
}
type RawExpressionOrList ExpressionOrList
val := RawExpressionOrList(*eol)
return json.Marshal(val)
}
func (eol *ExpressionOrList) UnmarshalJSON(data []byte) error {
switch string(data[0]) {
case `"`: // string

View File

@@ -1,147 +1,12 @@
package config
import (
"bytes"
"encoding/json"
"errors"
"testing"
yaml "sigs.k8s.io/yaml/goyaml.v3"
)
func TestExpressionOrListMarshalJSON(t *testing.T) {
for _, tt := range []struct {
name string
input *ExpressionOrList
output []byte
err error
}{
{
name: "single expression",
input: &ExpressionOrList{
Expression: "true",
},
output: []byte(`"true"`),
err: nil,
},
{
name: "all",
input: &ExpressionOrList{
All: []string{"true", "true"},
},
output: []byte(`{"all":["true","true"]}`),
err: nil,
},
{
name: "all one",
input: &ExpressionOrList{
All: []string{"true"},
},
output: []byte(`"true"`),
err: nil,
},
{
name: "any",
input: &ExpressionOrList{
Any: []string{"true", "false"},
},
output: []byte(`{"any":["true","false"]}`),
err: nil,
},
{
name: "any one",
input: &ExpressionOrList{
Any: []string{"true"},
},
output: []byte(`"true"`),
err: nil,
},
} {
t.Run(tt.name, func(t *testing.T) {
result, err := json.Marshal(tt.input)
if !errors.Is(err, tt.err) {
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
}
if !bytes.Equal(result, tt.output) {
t.Logf("wanted: %s", string(tt.output))
t.Logf("got: %s", string(result))
t.Error("mismatched output")
}
})
}
}
func TestExpressionOrListMarshalYAML(t *testing.T) {
for _, tt := range []struct {
name string
input *ExpressionOrList
output []byte
err error
}{
{
name: "single expression",
input: &ExpressionOrList{
Expression: "true",
},
output: []byte(`"true"`),
err: nil,
},
{
name: "all",
input: &ExpressionOrList{
All: []string{"true", "true"},
},
output: []byte(`all:
- "true"
- "true"`),
err: nil,
},
{
name: "all one",
input: &ExpressionOrList{
All: []string{"true"},
},
output: []byte(`"true"`),
err: nil,
},
{
name: "any",
input: &ExpressionOrList{
Any: []string{"true", "false"},
},
output: []byte(`any:
- "true"
- "false"`),
err: nil,
},
{
name: "any one",
input: &ExpressionOrList{
Any: []string{"true"},
},
output: []byte(`"true"`),
err: nil,
},
} {
t.Run(tt.name, func(t *testing.T) {
result, err := yaml.Marshal(tt.input)
if !errors.Is(err, tt.err) {
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
}
result = bytes.TrimSpace(result)
if !bytes.Equal(result, tt.output) {
t.Logf("wanted: %q", string(tt.output))
t.Logf("got: %q", string(result))
t.Error("mismatched output")
}
})
}
}
func TestExpressionOrListUnmarshalJSON(t *testing.T) {
func TestExpressionOrListUnmarshal(t *testing.T) {
for _, tt := range []struct {
err error
validErr error

View File

@@ -1,36 +0,0 @@
package config
import (
"errors"
"fmt"
"regexp"
"strings"
)
var (
countryCodeRegexp = regexp.MustCompile(`^\w{2}$`)
ErrNotCountryCode = errors.New("config.Bot: invalid country code")
)
type GeoIP struct {
Countries []string `json:"countries"`
}
func (g *GeoIP) Valid() error {
var errs []error
for i, cc := range g.Countries {
if !countryCodeRegexp.MatchString(cc) {
errs = append(errs, fmt.Errorf("%w: %s", ErrNotCountryCode, cc))
}
g.Countries[i] = strings.ToLower(cc)
}
if len(errs) != 0 {
return fmt.Errorf("bot.GeoIP: invalid GeoIP settings: %w", errors.Join(errs...))
}
return nil
}

View File

@@ -1,6 +0,0 @@
bots:
- name: challenge-cloudflare
action: CHALLENGE
asns:
match:
- 13335 # Cloudflare

View File

@@ -1,6 +0,0 @@
bots:
- name: compute-tarrif-us
action: CHALLENGE
geoip:
countries:
- US

View File

@@ -1,5 +1,5 @@
package config
type Weight struct {
Adjust int `json:"adjust" yaml:"adjust"`
Adjust int `json:"adjust"`
}

View File

@@ -1,14 +1,10 @@
package policy
import (
"context"
"errors"
"fmt"
"io"
"log/slog"
"github.com/TecharoHQ/anubis/internal/thoth"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
@@ -39,7 +35,7 @@ func NewParsedConfig(orig *config.Config) *ParsedConfig {
}
}
func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) {
func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) {
c, err := config.Load(fin, fname)
if err != nil {
return nil, err
@@ -47,8 +43,6 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic
var validationErrs []error
tc, hasThothClient := thoth.FromContext(ctx)
result := NewParsedConfig(c)
result.DefaultDifficulty = defaultDifficulty
@@ -63,7 +57,7 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic
Action: b.Action,
}
cl := checker.List{}
cl := CheckerList{}
if len(b.RemoteAddr) > 0 {
c, err := NewRemoteAddrChecker(b.RemoteAddr)
@@ -110,24 +104,6 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic
}
}
if b.ASNs != nil {
if !hasThothClient {
slog.Warn("You have specified a Thoth specific check but you have no Thoth client configured. Please read https://anubis.techaro.lol/docs/admin/thoth for more information", "check", "asn", "settings", b.ASNs)
continue
}
cl = append(cl, tc.ASNCheckerFor(b.ASNs.Match))
}
if b.GeoIP != nil {
if !hasThothClient {
slog.Warn("You have specified a Thoth specific check but you have no Thoth client configured. Please read https://anubis.techaro.lol/docs/admin/thoth for more information", "check", "geoip", "settings", b.GeoIP)
continue
}
cl = append(cl, tc.GeoIPCheckerFor(b.GeoIP.Countries))
}
if b.Challenge == nil {
parsedBot.Challenge = &config.ChallengeRules{
Difficulty: defaultDifficulty,

View File

@@ -7,25 +7,21 @@ import (
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/data"
"github.com/TecharoHQ/anubis/internal/thoth/thothmock"
)
func TestDefaultPolicyMustParse(t *testing.T) {
ctx := thothmock.WithMockThoth(t)
fin, err := data.BotPolicies.Open("botPolicies.json")
if err != nil {
t.Fatal(err)
}
defer fin.Close()
if _, err := ParseConfig(ctx, fin, "botPolicies.json", anubis.DefaultDifficulty); err != nil {
if _, err := ParseConfig(fin, "botPolicies.json", anubis.DefaultDifficulty); err != nil {
t.Fatalf("can't parse config: %v", err)
}
}
func TestGoodConfigs(t *testing.T) {
finfos, err := os.ReadDir("config/testdata/good")
if err != nil {
t.Fatal(err)
@@ -34,37 +30,20 @@ func TestGoodConfigs(t *testing.T) {
for _, st := range finfos {
st := st
t.Run(st.Name(), func(t *testing.T) {
t.Run("with-thoth", func(t *testing.T) {
fin, err := os.Open(filepath.Join("config", "testdata", "good", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
fin, err := os.Open(filepath.Join("config", "testdata", "good", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
ctx := thothmock.WithMockThoth(t)
if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty); err != nil {
t.Fatal(err)
}
})
t.Run("without-thoth", func(t *testing.T) {
fin, err := os.Open(filepath.Join("config", "testdata", "good", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
if _, err := ParseConfig(t.Context(), fin, fin.Name(), anubis.DefaultDifficulty); err != nil {
t.Fatal(err)
}
})
if _, err := ParseConfig(fin, fin.Name(), anubis.DefaultDifficulty); err != nil {
t.Fatal(err)
}
})
}
}
func TestBadConfigs(t *testing.T) {
ctx := thothmock.WithMockThoth(t)
finfos, err := os.ReadDir("config/testdata/bad")
if err != nil {
t.Fatal(err)
@@ -79,7 +58,7 @@ func TestBadConfigs(t *testing.T) {
}
defer fin.Close()
if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty); err == nil {
if _, err := ParseConfig(fin, fin.Name(), anubis.DefaultDifficulty); err == nil {
t.Fatal(err)
} else {
t.Log(err)

View File

@@ -5,28 +5,22 @@ go 1.24.2
replace github.com/TecharoHQ/anubis => ..
require (
github.com/TecharoHQ/anubis v1.19.1
github.com/TecharoHQ/anubis v1.18.0
github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456
github.com/google/uuid v1.6.0
)
require (
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1 // indirect
cel.dev/expr v0.24.0 // indirect
github.com/TecharoHQ/thoth-proto v0.4.0 // indirect
github.com/a-h/templ v0.3.898 // indirect
github.com/a-h/templ v0.3.865 // indirect
github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c // indirect
github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 // indirect
github.com/gaissmai/bart v0.20.4 // indirect
github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
github.com/google/cel-go v0.25.0 // indirect
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 // indirect
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 // indirect
github.com/joho/godotenv v1.5.1 // indirect
github.com/jsha/minica v1.1.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
@@ -38,12 +32,11 @@ require (
github.com/stoewer/go-strcase v1.3.0 // indirect
github.com/yl2chen/cidranger v1.0.2 // indirect
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect
golang.org/x/net v0.41.0 // indirect
golang.org/x/net v0.40.0 // indirect
golang.org/x/sys v0.33.0 // indirect
golang.org/x/text v0.26.0 // indirect
golang.org/x/text v0.25.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 // indirect
google.golang.org/grpc v1.72.2 // indirect
google.golang.org/protobuf v1.36.6 // indirect
k8s.io/apimachinery v0.33.1 // indirect
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect

View File

@@ -1,11 +1,7 @@
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1 h1:YhMSc48s25kr7kv31Z8vf7sPUIq5YJva9z1mn/hAt0M=
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1/go.mod h1:avRlCjnFzl98VPaeCtJ24RrV/wwHFzB8sWXhj26+n/U=
cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY=
cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
github.com/TecharoHQ/thoth-proto v0.4.0 h1:UbkvfgCku0Dm1R6O4ug3HOsJNnE6F3wB8x+Dpw2lzFI=
github.com/TecharoHQ/thoth-proto v0.4.0/go.mod h1:IcGnZt3iYUZQVEa0Lwk5l4ix0hCeXlWUV1TJMZvbWx0=
github.com/a-h/templ v0.3.898 h1:g9oxL/dmM6tvwRe2egJS8hBDQTncokbMoOFk1oJMX7s=
github.com/a-h/templ v0.3.898/go.mod h1:oLBbZVQ6//Q6zpvSMPTuBK0F3qOtBdFBcGRspcT+VNQ=
github.com/a-h/templ v0.3.865 h1:nYn5EWm9EiXaDgWcMQaKiKvrydqgxDUtT1+4zU2C43A=
github.com/a-h/templ v0.3.865/go.mod h1:oLBbZVQ6//Q6zpvSMPTuBK0F3qOtBdFBcGRspcT+VNQ=
github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -25,16 +21,8 @@ github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 h1:JWuenKqqX8nojt
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052/go.mod h1:UbMTZqLaRiH3MsBH8va0n7s1pQYcu3uTb8G4tygF4Zg=
github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 h1:7HZCaLC5+BZpmbhCOZJ293Lz68O7PYrF2EzeiFMwCLk=
github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4/go.mod h1:5tD+neXqOorC30/tWg0LCSkrqj/AR6gu8yY8/fpw1q0=
github.com/gaissmai/bart v0.20.4 h1:Ik47r1fy3jRVU+1eYzKSW3ho2UgBVTVnUS8O993584U=
github.com/gaissmai/bart v0.20.4/go.mod h1:cEed+ge8dalcbpi8wtS9x9m2hn/fNJH5suhdGQOHnYk=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/cel-go v0.25.0 h1:jsFw9Fhn+3y2kBbltZR4VEz5xKkcIFRPDnuEzAGv5GY=
github.com/google/cel-go v0.25.0/go.mod h1:hjEb6r5SuOSlhCHmFoLzu8HGCERvIsDAbxDAyNU/MmI=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
@@ -42,12 +30,6 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA=
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU=
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 h1:pRhl55Yx1eC7BZ1N+BBWwnKaMyD8uC+34TLdndZMAKk=
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0/go.mod h1:XKMd7iuf/RGPSMJ/U4HP0zS2Z9Fh8Ps9a+6X26m/tmI=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/jsha/minica v1.1.0 h1:O2ZbzAN75w4RTB+5+HfjIEvY5nxRqDlwj3ZlLVG5JD8=
github.com/jsha/minica v1.1.0/go.mod h1:dxC3wNmD+gU1ewXo/R8jB2ihB6wNpyXrG8aUk5Iuf/k=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
@@ -87,32 +69,18 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU=
github.com/yl2chen/cidranger v1.0.2/go.mod h1:9U1yz7WPYDwf0vpNWFaeRh0bjwz5RVgRy/9UEQfHl0g=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 h1:y5zboxd6LQAqYIhHnB48p0ByQ/GnQx2BE33L8BOHQkI=
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ=
golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 h1:Kog3KlB4xevJlAcbbbzPfRG0+X9fdoGM+UBRKVz6Wr0=
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237/go.mod h1:ezi0AVyMKDWy5xAncvjLWH7UcLBB5n7y2fQ8MzjJcto=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 h1:cJfm9zPbe1e873mHJzmQ1nwVEeRDU/T1wXDK2kUSU34=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
google.golang.org/grpc v1.72.2 h1:TdbGzwb82ty4OusHWepvFWGLgIbNo1/SUynEN0ssqv8=
google.golang.org/grpc v1.72.2/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

View File

@@ -1,5 +0,0 @@
ARG ALPINE_VERSION=3.22
FROM alpine:${ALPINE_VERSION}
RUN apk add -U go nodejs git build-base git npm bash zstd brotli gzip
LABEL org.opencontainers.image.source="https://github.com/TecharoHQ/anubis"

View File

@@ -1,8 +0,0 @@
#!/usr/bin/env sh
set -euo pipefail
set -x
npm ci
npm run build
SKIP_INTEGRATION=1 go test ./...

View File

@@ -1,33 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
[ ! -z "${DEBUG:-}" ] && set -x
if [ "$#" -ne 1 ]; then
echo "Usage: rigging.sh <user@host>"
fi
CIRunnerImage="ghcr.io/techarohq/anubis/ci-runner:latest"
RunID=${GITHUB_RUN_ID:-$(uuidgen)}
RunFolder="anubis/runs/${RunID}"
Target="${1}"
ssh "${Target}" uname -av
ssh "${Target}" mkdir -p "${RunFolder}"
git archive HEAD | ssh "${Target}" tar xC "${RunFolder}"
ssh "${Target}" << EOF
set -euo pipefail
set -x
mkdir -p "anubis/cache/{go,go-build,node}"
podman pull ${CIRunnerImage}
podman run --rm -it \
-v "\$HOME/${RunFolder}:/app/anubis" \
-v "\$HOME/anubis/cache/go:/root/go" \
-v "\$HOME/anubis/cache/go-build:/root/.cache/go-build" \
-v "\$HOME/anubis/cache/node:/root/.npm" \
-w /app/anubis \
${CIRunnerImage} \
sh /app/anubis/test/ssh-ci/in-container.sh
ssh "${Target}" rm -rf "${RunFolder}"
EOF

View File

@@ -22,7 +22,6 @@ $`npm run assets`;
build: ({ bin, etc, systemd, doc }) => {
$`go build -o ${bin}/anubis -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/anubis`;
$`go build -o ${bin}/anubis-robots2policy -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/robots2policy`;
file.install("./run/anubis@.service", `${systemd}/anubis@.service`);
file.install("./run/default.env", `${etc}/default.env`);