mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-07 01:08:19 +00:00
Compare commits
2 Commits
v1.20.0
...
Xe/docker-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
50e48df993 | ||
|
|
5e38c7d730 |
@@ -9,4 +9,4 @@ exclude_dir = ["var", "vendor", "docs", "node_modules"]
|
||||
|
||||
[logger]
|
||||
time = true
|
||||
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address
|
||||
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address
|
||||
25
.dockerignore
Normal file
25
.dockerignore
Normal file
@@ -0,0 +1,25 @@
|
||||
.env
|
||||
*.deb
|
||||
*.rpm
|
||||
|
||||
# Additional package locks
|
||||
pnpm-lock.yaml
|
||||
yarn.lock
|
||||
|
||||
# Go binaries and test artifacts
|
||||
main
|
||||
*.test
|
||||
|
||||
node_modules
|
||||
|
||||
# MacOS
|
||||
.DS_store
|
||||
|
||||
# Intellij
|
||||
.idea
|
||||
|
||||
# how does this get here
|
||||
doc/VERSION
|
||||
|
||||
web/static/js/*
|
||||
!web/static/js/.gitignore
|
||||
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -1 +1 @@
|
||||
**/*_templ.go linguist-generated=true
|
||||
web/index_templ.go linguist-generated
|
||||
|
||||
2
.github/actions/spelling/allow.txt
vendored
2
.github/actions/spelling/allow.txt
vendored
@@ -2,4 +2,4 @@ github
|
||||
https
|
||||
ssh
|
||||
ubuntu
|
||||
workarounds
|
||||
workarounds
|
||||
3
.github/actions/spelling/excludes.txt
vendored
3
.github/actions/spelling/excludes.txt
vendored
@@ -83,9 +83,6 @@
|
||||
^\Q.github/FUNDING.yml\E$
|
||||
^\Q.github/workflows/spelling.yml\E$
|
||||
^data/crawlers/
|
||||
^docs/blog/tags\.yml$
|
||||
^docs/manifest/.*$
|
||||
^docs/static/\.nojekyll$
|
||||
^lib/policy/config/testdata/bad/unparseable\.json$
|
||||
ignore$
|
||||
robots.txt
|
||||
|
||||
44
.github/actions/spelling/expect.txt
vendored
44
.github/actions/spelling/expect.txt
vendored
@@ -6,13 +6,8 @@ amazonbot
|
||||
anthro
|
||||
anubis
|
||||
anubistest
|
||||
apk
|
||||
Applebot
|
||||
archlinux
|
||||
asnc
|
||||
asnchecker
|
||||
asns
|
||||
aspirational
|
||||
badregexes
|
||||
bdba
|
||||
berr
|
||||
@@ -28,7 +23,6 @@ Brightbot
|
||||
broked
|
||||
Bytespider
|
||||
cachebuster
|
||||
cachediptoasn
|
||||
Caddyfile
|
||||
caninetools
|
||||
Cardyb
|
||||
@@ -36,7 +30,6 @@ celchecker
|
||||
CELPHASE
|
||||
cerr
|
||||
certresolver
|
||||
cespare
|
||||
CGNAT
|
||||
cgr
|
||||
chainguard
|
||||
@@ -44,6 +37,7 @@ chall
|
||||
challengemozilla
|
||||
checkpath
|
||||
checkresult
|
||||
chen
|
||||
chibi
|
||||
cidranger
|
||||
ckie
|
||||
@@ -54,12 +48,12 @@ coreutils
|
||||
Cotoyogi
|
||||
CRDs
|
||||
crt
|
||||
Cscript
|
||||
daemonizing
|
||||
DDOS
|
||||
Debian
|
||||
debrpm
|
||||
decaymap
|
||||
decompiling
|
||||
Diffbot
|
||||
discordapp
|
||||
discordbot
|
||||
@@ -67,7 +61,6 @@ distros
|
||||
dnf
|
||||
dnsbl
|
||||
dnserr
|
||||
domainhere
|
||||
dracula
|
||||
dronebl
|
||||
droneblresponse
|
||||
@@ -75,7 +68,6 @@ duckduckbot
|
||||
eerror
|
||||
ellenjoe
|
||||
enbyware
|
||||
etld
|
||||
everyones
|
||||
evilbot
|
||||
evilsite
|
||||
@@ -94,24 +86,17 @@ Fordola
|
||||
forgejo
|
||||
fsys
|
||||
fullchain
|
||||
gaissmai
|
||||
Galvus
|
||||
geoip
|
||||
geoipchecker
|
||||
gha
|
||||
gipc
|
||||
gitea
|
||||
godotenv
|
||||
goland
|
||||
gomod
|
||||
goodbot
|
||||
googlebot
|
||||
govulncheck
|
||||
goyaml
|
||||
GPG
|
||||
GPT
|
||||
gptbot
|
||||
grpcprom
|
||||
grw
|
||||
Hashcash
|
||||
hashrate
|
||||
@@ -121,33 +106,26 @@ hebis
|
||||
hec
|
||||
hmc
|
||||
hostable
|
||||
htmlc
|
||||
htmx
|
||||
httpdebug
|
||||
Huawei
|
||||
hypertext
|
||||
iaskspider
|
||||
iat
|
||||
ifm
|
||||
Imagesift
|
||||
imgproxy
|
||||
impressum
|
||||
inp
|
||||
IPTo
|
||||
iptoasn
|
||||
iss
|
||||
isset
|
||||
ivh
|
||||
Jenomis
|
||||
JGit
|
||||
joho
|
||||
journalctl
|
||||
jshelter
|
||||
JWTs
|
||||
kagi
|
||||
kagibot
|
||||
keikaku
|
||||
Keyfunc
|
||||
keypair
|
||||
KHTML
|
||||
kinda
|
||||
@@ -165,7 +143,6 @@ Linting
|
||||
linuxbrew
|
||||
LLU
|
||||
loadbalancer
|
||||
locahost
|
||||
lol
|
||||
LOMINSA
|
||||
maintainership
|
||||
@@ -190,8 +167,8 @@ OCOB
|
||||
ogtags
|
||||
omgili
|
||||
omgilibot
|
||||
onionservice
|
||||
openai
|
||||
opengraph
|
||||
openrc
|
||||
pag
|
||||
palemoon
|
||||
@@ -212,18 +189,15 @@ privkey
|
||||
promauto
|
||||
promhttp
|
||||
proofofwork
|
||||
publicsuffix
|
||||
pwcmd
|
||||
pwuser
|
||||
qualys
|
||||
qwant
|
||||
qwantbot
|
||||
rac
|
||||
rawler
|
||||
rcvar
|
||||
redir
|
||||
redirectscheme
|
||||
refactors
|
||||
relayd
|
||||
reputational
|
||||
reqmeta
|
||||
@@ -240,11 +214,9 @@ sebest
|
||||
secretplans
|
||||
selfsigned
|
||||
Semrush
|
||||
Seo
|
||||
setsebool
|
||||
shellcheck
|
||||
Sidetrade
|
||||
simprint
|
||||
sitemap
|
||||
sls
|
||||
sni
|
||||
@@ -262,23 +234,18 @@ subrequest
|
||||
SVCNAME
|
||||
tagline
|
||||
tarballs
|
||||
tarrif
|
||||
techaro
|
||||
techarohq
|
||||
templ
|
||||
templruntime
|
||||
testarea
|
||||
Thancred
|
||||
thoth
|
||||
thothmock
|
||||
Tik
|
||||
Timpibot
|
||||
torproject
|
||||
traefik
|
||||
uberspace
|
||||
unixhttpd
|
||||
unmarshal
|
||||
unparseable
|
||||
uuidgen
|
||||
uvx
|
||||
UXP
|
||||
Varis
|
||||
@@ -294,7 +261,6 @@ websecure
|
||||
websites
|
||||
Webzio
|
||||
wildbase
|
||||
withthothmock
|
||||
wordpress
|
||||
Workaround
|
||||
workdir
|
||||
@@ -308,7 +274,6 @@ xess
|
||||
xff
|
||||
XForwarded
|
||||
XNG
|
||||
XOB
|
||||
XReal
|
||||
yae
|
||||
YAMLTo
|
||||
@@ -318,5 +283,4 @@ yourdomain
|
||||
yoursite
|
||||
Zenos
|
||||
zizmor
|
||||
Zonbocom
|
||||
zos
|
||||
|
||||
@@ -273,6 +273,14 @@
|
||||
# Most people only have two hands. Reword.
|
||||
\b(?i)on the third hand\b
|
||||
|
||||
# Should be `Open Graph`
|
||||
# unless talking about a specific Open Graph implementation:
|
||||
# - Java
|
||||
# - Node
|
||||
# - Py
|
||||
# - Ruby
|
||||
\bOpenGraph\b
|
||||
|
||||
# Should be `OpenShift`
|
||||
\bOpenshift\b
|
||||
|
||||
|
||||
2
.github/actions/spelling/patterns.txt
vendored
2
.github/actions/spelling/patterns.txt
vendored
@@ -131,4 +131,4 @@ go install(?:\s+[a-z]+\.[-@\w/.]+)+
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# microsoft
|
||||
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
|
||||
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
|
||||
2
.github/workflows/docker-pr.yml
vendored
2
.github/workflows/docker-pr.yml
vendored
@@ -22,7 +22,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
|
||||
11
.github/workflows/docker.yml
vendored
11
.github/workflows/docker.yml
vendored
@@ -3,8 +3,8 @@ name: Docker image builds
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: ["main"]
|
||||
tags: ["v*"]
|
||||
branches: [ "main" ]
|
||||
tags: [ "v*" ]
|
||||
|
||||
env:
|
||||
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
|
||||
@@ -32,7 +32,7 @@ jobs:
|
||||
echo "IMAGE=ghcr.io/${GITHUB_REPOSITORY,,}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
run: |
|
||||
brew bundle
|
||||
|
||||
- name: Log into registry
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -77,8 +77,9 @@ jobs:
|
||||
DOCKER_REPO: ${{ env.IMAGE }}
|
||||
SLOG_LEVEL: debug
|
||||
|
||||
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0
|
||||
uses: actions/attest-build-provenance@db473fddc028af60658334401dc6fa3ffd8669fd # v2.3.0
|
||||
with:
|
||||
subject-name: ${{ env.IMAGE }}
|
||||
subject-digest: ${{ steps.build.outputs.digest }}
|
||||
|
||||
6
.github/workflows/docs-deploy.yml
vendored
6
.github/workflows/docs-deploy.yml
vendored
@@ -22,7 +22,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
@@ -50,14 +50,14 @@ jobs:
|
||||
push: true
|
||||
|
||||
- name: Apply k8s manifests to aeacus
|
||||
uses: actions-hub/kubectl@d50394b7d704525f93faefce1e65a6329ff67271 # v1.33.2
|
||||
uses: actions-hub/kubectl@f632a31512a74cb35940627c49c20f67723cbaaf # v1.33.1
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
with:
|
||||
args: apply -k docs/manifest
|
||||
|
||||
- name: Apply k8s manifests to aeacus
|
||||
uses: actions-hub/kubectl@d50394b7d704525f93faefce1e65a6329ff67271 # v1.33.2
|
||||
uses: actions-hub/kubectl@f632a31512a74cb35940627c49c20f67723cbaaf # v1.33.1
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
with:
|
||||
|
||||
2
.github/workflows/docs-test.yml
vendored
2
.github/workflows/docs-test.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
|
||||
2
.github/workflows/go.yml
vendored
2
.github/workflows/go.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
|
||||
2
.github/workflows/package-builds-stable.yml
vendored
2
.github/workflows/package-builds-stable.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
|
||||
37
.github/workflows/ssh-ci-runner-cron.yml
vendored
37
.github/workflows/ssh-ci-runner-cron.yml
vendored
@@ -1,37 +0,0 @@
|
||||
name: Regenerate ssh ci runner image
|
||||
|
||||
on:
|
||||
# pull_request:
|
||||
# branches: ["main"]
|
||||
schedule:
|
||||
- cron: "0 0 1,8,15,22 * *"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: write
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
ssh-ci-rebuild:
|
||||
if: github.repository == 'TecharoHQ/anubis'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
- name: Build and push
|
||||
run: |
|
||||
cd ./test/ssh-ci
|
||||
docker buildx bake --push
|
||||
37
.github/workflows/ssh-ci.yml
vendored
37
.github/workflows/ssh-ci.yml
vendored
@@ -1,37 +0,0 @@
|
||||
name: SSH CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
# pull_request:
|
||||
# branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
ssh:
|
||||
if: github.repository == 'TecharoHQ/anubis'
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
host:
|
||||
- ubuntu@riscv64.techaro.lol
|
||||
- ci@ppc64le.techaro.lol
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Install CI target SSH key
|
||||
uses: shimataro/ssh-key-action@d4fffb50872869abe2d9a9098a6d9c5aa7d16be4 # v2.7.0
|
||||
with:
|
||||
key: ${{ secrets.CI_SSH_KEY }}
|
||||
name: id_rsa
|
||||
known_hosts: ${{ secrets.CI_SSH_KNOWN_HOSTS }}
|
||||
- name: Run CI
|
||||
run: bash test/ssh-ci/rigging.sh ${{ matrix.host }}
|
||||
env:
|
||||
GITHUB_RUN_ID: ${{ github.run_id }}
|
||||
4
.github/workflows/zizmor.yml
vendored
4
.github/workflows/zizmor.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@445689ea25e0de0a23313031f5fe577c74ae45a1 # v6.3.0
|
||||
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
|
||||
|
||||
- name: Run zizmor 🌈
|
||||
run: uvx zizmor --format sarif . > results.sarif
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload SARIF file
|
||||
uses: github/codeql-action/upload-sarif@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858 # v3.29.0
|
||||
uses: github/codeql-action/upload-sarif@fca7ace96b7d713c7035871441bd52efbe39e27e # v3.28.19
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
category: zizmor
|
||||
|
||||
19
.vscode/settings.json
vendored
19
.vscode/settings.json
vendored
@@ -11,24 +11,5 @@
|
||||
"zig": false,
|
||||
"javascript": false,
|
||||
"properties": false
|
||||
},
|
||||
"[markdown]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"[mdx]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"[nunjucks]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"cSpell.enabledFileTypes": {
|
||||
"mdx": true,
|
||||
"md": true
|
||||
}
|
||||
}
|
||||
|
||||
30
Dockerfile
Normal file
30
Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
ARG ALPINE_VERSION=edge
|
||||
FROM --platform=${BUILDPLATFORM} alpine:${ALPINE_VERSION} AS build
|
||||
|
||||
ARG TARGETOS
|
||||
ARG TARGETARCH
|
||||
ARG COMPONENT=anubis
|
||||
ARG VERSION=devel-docker
|
||||
|
||||
RUN apk -U add go nodejs git build-base git npm bash zstd brotli gzip
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN --mount=type=cache,target=/root/.cache npm ci && npm run assets
|
||||
RUN --mount=type=cache,target=/root/.cache GOOS=${TARGETOS} GOARCH=${TARGETARCH} CGO_ENABLED=0 GOARM=7 go build -gcflags "all=-N -l" -o /app/bin/${COMPONENT} -ldflags "-s -w -extldflags -static -X github.com/TecharoHQ/anubis.Version=${VERSION}" ./cmd/${COMPONENT}
|
||||
|
||||
FROM alpine:${ALPINE_VERSION} AS run
|
||||
WORKDIR /app
|
||||
|
||||
RUN apk -U add ca-certificates mailcap
|
||||
|
||||
COPY --from=build /app/bin/anubis /app/bin/anubis
|
||||
|
||||
CMD ["/app/bin/anubis"]
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD [ "/app/bin/anubis", "--healthcheck" ]
|
||||
|
||||
LABEL org.opencontainers.image.source="https://github.com/TecharoHQ/anubis"
|
||||
2
Makefile
2
Makefile
@@ -18,7 +18,6 @@ assets: deps
|
||||
|
||||
build: assets
|
||||
$(GO) build -o ./var/anubis ./cmd/anubis
|
||||
$(GO) build -o ./var/robots2policy ./cmd/robots2policy
|
||||
@echo "Anubis is now built to ./var/anubis"
|
||||
|
||||
lint: assets
|
||||
@@ -28,7 +27,6 @@ lint: assets
|
||||
|
||||
prebaked-build:
|
||||
$(GO) build -o ./var/anubis -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/anubis
|
||||
$(GO) build -o ./var/robots2policy -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/robots2policy
|
||||
|
||||
test: assets
|
||||
$(GO) test ./...
|
||||
|
||||
@@ -30,13 +30,11 @@ import (
|
||||
"github.com/TecharoHQ/anubis"
|
||||
"github.com/TecharoHQ/anubis/data"
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/TecharoHQ/anubis/internal/thoth"
|
||||
libanubis "github.com/TecharoHQ/anubis/lib"
|
||||
botPolicy "github.com/TecharoHQ/anubis/lib/policy"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/web"
|
||||
"github.com/facebookgo/flagenv"
|
||||
_ "github.com/joho/godotenv/autoload"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
@@ -46,10 +44,8 @@ var (
|
||||
bindNetwork = flag.String("bind-network", "tcp", "network family to bind HTTP to, e.g. unix, tcp")
|
||||
challengeDifficulty = flag.Int("difficulty", anubis.DefaultDifficulty, "difficulty of the challenge")
|
||||
cookieDomain = flag.String("cookie-domain", "", "if set, the top-level domain that the Anubis cookie will be valid for")
|
||||
cookieDynamicDomain = flag.Bool("cookie-dynamic-domain", false, "if set, automatically set the cookie Domain value based on the request domain")
|
||||
cookieExpiration = flag.Duration("cookie-expiration-time", anubis.CookieDefaultExpirationTime, "The amount of time the authorization cookie is valid for")
|
||||
cookiePartitioned = flag.Bool("cookie-partitioned", false, "if true, sets the partitioned flag on Anubis cookies, enabling CHIPS support")
|
||||
hs512Secret = flag.String("hs512-secret", "", "secret used to sign JWTs, uses ed25519 if not set")
|
||||
ed25519PrivateKeyHex = flag.String("ed25519-private-key-hex", "", "private key used to sign JWTs, if not set a random one will be assigned")
|
||||
ed25519PrivateKeyHexFile = flag.String("ed25519-private-key-hex-file", "", "file name containing value for ed25519-private-key-hex")
|
||||
metricsBind = flag.String("metrics-bind", ":9090", "network address to bind metrics to")
|
||||
@@ -59,7 +55,6 @@ var (
|
||||
policyFname = flag.String("policy-fname", "", "full path to anubis policy document (defaults to a sensible built-in policy)")
|
||||
redirectDomains = flag.String("redirect-domains", "", "list of domains separated by commas which anubis is allowed to redirect to. Leaving this unset allows any domain.")
|
||||
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
|
||||
stripBasePrefix = flag.Bool("strip-base-prefix", false, "if true, strips the base prefix from requests forwarded to the target server")
|
||||
target = flag.String("target", "http://localhost:3923", "target to reverse proxy to, set to an empty string to disable proxying when only using auth request")
|
||||
targetSNI = flag.String("target-sni", "", "if set, the value of the TLS handshake hostname when forwarding requests to the target")
|
||||
targetHost = flag.String("target-host", "", "if set, the value of the Host header when forwarding requests to the target")
|
||||
@@ -74,10 +69,6 @@ var (
|
||||
webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals")
|
||||
versionFlag = flag.Bool("version", false, "print Anubis version")
|
||||
xffStripPrivate = flag.Bool("xff-strip-private", true, "if set, strip private addresses from X-Forwarded-For")
|
||||
|
||||
thothInsecure = flag.Bool("thoth-insecure", false, "if set, connect to Thoth over plain HTTP/2, don't enable this unless support told you to")
|
||||
thothURL = flag.String("thoth-url", "", "if set, URL for Thoth, the IP reputation database for Anubis")
|
||||
thothToken = flag.String("thoth-token", "", "if set, API token for Thoth, the IP reputation database for Anubis")
|
||||
)
|
||||
|
||||
func keyFromHex(value string) (ed25519.PrivateKey, error) {
|
||||
@@ -241,29 +232,7 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
if *cookieDomain != "" && *cookieDynamicDomain {
|
||||
log.Fatalf("you can't set COOKIE_DOMAIN and COOKIE_DYNAMIC_DOMAIN at the same time")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Thoth configuration
|
||||
switch {
|
||||
case *thothURL != "" && *thothToken == "":
|
||||
slog.Warn("THOTH_URL is set but no THOTH_TOKEN is set")
|
||||
case *thothURL == "" && *thothToken != "":
|
||||
slog.Warn("THOTH_TOKEN is set but no THOTH_URL is set")
|
||||
case *thothURL != "" && *thothToken != "":
|
||||
slog.Debug("connecting to Thoth")
|
||||
thothClient, err := thoth.New(ctx, *thothURL, *thothToken, *thothInsecure)
|
||||
if err != nil {
|
||||
log.Fatalf("can't dial thoth at %s: %v", *thothURL, err)
|
||||
}
|
||||
|
||||
ctx = thoth.With(ctx, thothClient)
|
||||
}
|
||||
|
||||
policy, err := libanubis.LoadPoliciesOrDefault(ctx, *policyFname, *challengeDifficulty)
|
||||
policy, err := libanubis.LoadPoliciesOrDefault(*policyFname, *challengeDifficulty)
|
||||
if err != nil {
|
||||
log.Fatalf("can't parse policy file: %v", err)
|
||||
}
|
||||
@@ -291,20 +260,12 @@ func main() {
|
||||
} else if strings.HasSuffix(*basePrefix, "/") {
|
||||
log.Fatalf("[misconfiguration] base-prefix must not end with a slash")
|
||||
}
|
||||
if *stripBasePrefix && *basePrefix == "" {
|
||||
log.Fatalf("[misconfiguration] strip-base-prefix is set to true, but base-prefix is not set, " +
|
||||
"this may result in unexpected behavior")
|
||||
}
|
||||
|
||||
var ed25519Priv ed25519.PrivateKey
|
||||
if *hs512Secret != "" && (*ed25519PrivateKeyHex != "" || *ed25519PrivateKeyHexFile != "") {
|
||||
log.Fatal("do not specify both HS512 and ED25519 secrets")
|
||||
} else if *hs512Secret != "" {
|
||||
ed25519Priv = ed25519.PrivateKey(*hs512Secret)
|
||||
} else if *ed25519PrivateKeyHex != "" && *ed25519PrivateKeyHexFile != "" {
|
||||
var priv ed25519.PrivateKey
|
||||
if *ed25519PrivateKeyHex != "" && *ed25519PrivateKeyHexFile != "" {
|
||||
log.Fatal("do not specify both ED25519_PRIVATE_KEY_HEX and ED25519_PRIVATE_KEY_HEX_FILE")
|
||||
} else if *ed25519PrivateKeyHex != "" {
|
||||
ed25519Priv, err = keyFromHex(*ed25519PrivateKeyHex)
|
||||
priv, err = keyFromHex(*ed25519PrivateKeyHex)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse and validate ED25519_PRIVATE_KEY_HEX: %v", err)
|
||||
}
|
||||
@@ -314,12 +275,12 @@ func main() {
|
||||
log.Fatalf("failed to read ED25519_PRIVATE_KEY_HEX_FILE %s: %v", *ed25519PrivateKeyHexFile, err)
|
||||
}
|
||||
|
||||
ed25519Priv, err = keyFromHex(string(bytes.TrimSpace(hexFile)))
|
||||
priv, err = keyFromHex(string(bytes.TrimSpace(hexFile)))
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse and validate content of ED25519_PRIVATE_KEY_HEX_FILE: %v", err)
|
||||
}
|
||||
} else {
|
||||
_, ed25519Priv, err = ed25519.GenerateKey(rand.Reader)
|
||||
_, priv, err = ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to generate ed25519 key: %v", err)
|
||||
}
|
||||
@@ -341,30 +302,21 @@ func main() {
|
||||
slog.Warn("REDIRECT_DOMAINS is not set, Anubis will only redirect to the same domain a request is coming from, see https://anubis.techaro.lol/docs/admin/configuration/redirect-domains")
|
||||
}
|
||||
|
||||
// If OpenGraph configuration values are not set in the config file, use the
|
||||
// values from flags / envvars.
|
||||
if !policy.OpenGraph.Enabled {
|
||||
policy.OpenGraph.Enabled = *ogPassthrough
|
||||
policy.OpenGraph.ConsiderHost = *ogCacheConsiderHost
|
||||
policy.OpenGraph.TimeToLive = *ogTimeToLive
|
||||
policy.OpenGraph.Override = map[string]string{}
|
||||
}
|
||||
|
||||
s, err := libanubis.New(libanubis.Options{
|
||||
BasePrefix: *basePrefix,
|
||||
StripBasePrefix: *stripBasePrefix,
|
||||
Next: rp,
|
||||
Policy: policy,
|
||||
ServeRobotsTXT: *robotsTxt,
|
||||
ED25519PrivateKey: ed25519Priv,
|
||||
HS512Secret: []byte(*hs512Secret),
|
||||
CookieDomain: *cookieDomain,
|
||||
CookieExpiration: *cookieExpiration,
|
||||
CookiePartitioned: *cookiePartitioned,
|
||||
RedirectDomains: redirectDomainsList,
|
||||
Target: *target,
|
||||
WebmasterEmail: *webmasterEmail,
|
||||
OpenGraph: policy.OpenGraph,
|
||||
BasePrefix: *basePrefix,
|
||||
Next: rp,
|
||||
Policy: policy,
|
||||
ServeRobotsTXT: *robotsTxt,
|
||||
PrivateKey: priv,
|
||||
CookieDomain: *cookieDomain,
|
||||
CookieExpiration: *cookieExpiration,
|
||||
CookiePartitioned: *cookiePartitioned,
|
||||
OGPassthrough: *ogPassthrough,
|
||||
OGTimeToLive: *ogTimeToLive,
|
||||
RedirectDomains: redirectDomainsList,
|
||||
Target: *target,
|
||||
WebmasterEmail: *webmasterEmail,
|
||||
OGCacheConsidersHost: *ogCacheConsiderHost,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("can't construct libanubis.Server: %v", err)
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
|
||||
into Anubis CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
|
||||
*/
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Println("Usage: go run batch_process.go <cleaned_directory>")
|
||||
fmt.Println("Example: go run batch_process.go ./cleaned")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
cleanedDir := os.Args[1]
|
||||
outputDir := "generated_policies"
|
||||
|
||||
// Create output directory
|
||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||
log.Fatalf("Failed to create output directory: %v", err)
|
||||
}
|
||||
|
||||
count := 0
|
||||
err := filepath.WalkDir(cleanedDir, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generate policy name from file path
|
||||
relPath, _ := filepath.Rel(cleanedDir, path)
|
||||
policyName := strings.ReplaceAll(relPath, "/", "-")
|
||||
policyName = strings.TrimSuffix(policyName, "-robots.txt")
|
||||
policyName = strings.ReplaceAll(policyName, ".", "-")
|
||||
|
||||
outputFile := filepath.Join(outputDir, policyName+".yaml")
|
||||
|
||||
cmd := exec.Command("go", "run", "main.go",
|
||||
"-input", path,
|
||||
"-output", outputFile,
|
||||
"-name", policyName,
|
||||
"-format", "yaml")
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
fmt.Printf("Warning: Failed to process %s: %v\n", path, err)
|
||||
return nil // Continue processing other files
|
||||
}
|
||||
|
||||
count++
|
||||
if count%100 == 0 {
|
||||
fmt.Printf("Processed %d files...\n", count)
|
||||
} else if count%10 == 0 {
|
||||
fmt.Print(".")
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error walking directory: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Successfully processed %d robots.txt files\n", count)
|
||||
fmt.Printf("Generated policies saved to: %s/\n", outputDir)
|
||||
}
|
||||
@@ -1,313 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
var (
|
||||
inputFile = flag.String("input", "", "path to robots.txt file (use - for stdin)")
|
||||
outputFile = flag.String("output", "", "output file path (use - for stdout, defaults to stdout)")
|
||||
outputFormat = flag.String("format", "yaml", "output format: yaml or json")
|
||||
baseAction = flag.String("action", "CHALLENGE", "default action for disallowed paths: ALLOW, DENY, CHALLENGE, WEIGH")
|
||||
crawlDelay = flag.Int("crawl-delay-weight", 0, "if > 0, add weight adjustment for crawl-delay (difficulty adjustment)")
|
||||
policyName = flag.String("name", "robots-txt-policy", "name for the generated policy")
|
||||
userAgentDeny = flag.String("deny-user-agents", "DENY", "action for specifically blocked user agents: DENY, CHALLENGE")
|
||||
helpFlag = flag.Bool("help", false, "show help")
|
||||
)
|
||||
|
||||
type RobotsRule struct {
|
||||
UserAgent string
|
||||
Disallows []string
|
||||
Allows []string
|
||||
CrawlDelay int
|
||||
IsBlacklist bool // true if this is a specifically denied user agent
|
||||
}
|
||||
|
||||
type AnubisRule struct {
|
||||
Expression *config.ExpressionOrList `yaml:"expression,omitempty" json:"expression,omitempty"`
|
||||
Challenge *config.ChallengeRules `yaml:"challenge,omitempty" json:"challenge,omitempty"`
|
||||
Weight *config.Weight `yaml:"weight,omitempty" json:"weight,omitempty"`
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Action string `yaml:"action" json:"action"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
flag.Usage = func() {
|
||||
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, "%s [options] -input <robots.txt>\n\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
fmt.Fprintln(os.Stderr, "\nExamples:")
|
||||
fmt.Fprintln(os.Stderr, " # Convert local robots.txt file")
|
||||
fmt.Fprintln(os.Stderr, " robots2policy -input robots.txt -output policy.yaml")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " # Convert from URL")
|
||||
fmt.Fprintln(os.Stderr, " robots2policy -input https://example.com/robots.txt -format json")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " # Read from stdin, write to stdout")
|
||||
fmt.Fprintln(os.Stderr, " curl https://example.com/robots.txt | robots2policy -input -")
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if len(flag.Args()) > 0 || *helpFlag || *inputFile == "" {
|
||||
flag.Usage()
|
||||
}
|
||||
|
||||
// Read robots.txt
|
||||
var input io.Reader
|
||||
if *inputFile == "-" {
|
||||
input = os.Stdin
|
||||
} else if strings.HasPrefix(*inputFile, "http://") || strings.HasPrefix(*inputFile, "https://") {
|
||||
resp, err := http.Get(*inputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to fetch robots.txt from URL: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
input = resp.Body
|
||||
} else {
|
||||
file, err := os.Open(*inputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to open input file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
input = file
|
||||
}
|
||||
|
||||
// Parse robots.txt
|
||||
rules, err := parseRobotsTxt(input)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
// Convert to Anubis rules
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Check if any rules were generated
|
||||
if len(anubisRules) == 0 {
|
||||
log.Fatal("no valid rules generated from robots.txt - file may be empty or contain no disallow directives")
|
||||
}
|
||||
|
||||
// Generate output
|
||||
var output []byte
|
||||
switch strings.ToLower(*outputFormat) {
|
||||
case "yaml":
|
||||
output, err = yaml.Marshal(anubisRules)
|
||||
case "json":
|
||||
output, err = json.MarshalIndent(anubisRules, "", " ")
|
||||
default:
|
||||
log.Fatalf("unsupported output format: %s (use yaml or json)", *outputFormat)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("failed to marshal output: %v", err)
|
||||
}
|
||||
|
||||
// Write output
|
||||
if *outputFile == "" || *outputFile == "-" {
|
||||
fmt.Print(string(output))
|
||||
} else {
|
||||
err = os.WriteFile(*outputFile, output, 0644)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to write output file: %v", err)
|
||||
}
|
||||
fmt.Printf("Generated Anubis policy written to %s\n", *outputFile)
|
||||
}
|
||||
}
|
||||
|
||||
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||
scanner := bufio.NewScanner(input)
|
||||
var rules []RobotsRule
|
||||
var currentRule *RobotsRule
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
// Skip empty lines and comments
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Split on first colon
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
directive := strings.TrimSpace(strings.ToLower(parts[0]))
|
||||
value := strings.TrimSpace(parts[1])
|
||||
|
||||
switch directive {
|
||||
case "user-agent":
|
||||
// Start a new rule section
|
||||
if currentRule != nil {
|
||||
rules = append(rules, *currentRule)
|
||||
}
|
||||
currentRule = &RobotsRule{
|
||||
UserAgent: value,
|
||||
Disallows: make([]string, 0),
|
||||
Allows: make([]string, 0),
|
||||
}
|
||||
|
||||
case "disallow":
|
||||
if currentRule != nil && value != "" {
|
||||
currentRule.Disallows = append(currentRule.Disallows, value)
|
||||
}
|
||||
|
||||
case "allow":
|
||||
if currentRule != nil && value != "" {
|
||||
currentRule.Allows = append(currentRule.Allows, value)
|
||||
}
|
||||
|
||||
case "crawl-delay":
|
||||
if currentRule != nil {
|
||||
if delay, err := parseIntSafe(value); err == nil {
|
||||
currentRule.CrawlDelay = delay
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last rule
|
||||
if currentRule != nil {
|
||||
rules = append(rules, *currentRule)
|
||||
}
|
||||
|
||||
// Mark blacklisted user agents (those with "Disallow: /")
|
||||
for i := range rules {
|
||||
for _, disallow := range rules[i].Disallows {
|
||||
if disallow == "/" {
|
||||
rules[i].IsBlacklist = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rules, scanner.Err()
|
||||
}
|
||||
|
||||
func parseIntSafe(s string) (int, error) {
|
||||
var result int
|
||||
_, err := fmt.Sscanf(s, "%d", &result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
var anubisRules []AnubisRule
|
||||
ruleCounter := 0
|
||||
|
||||
for _, robotsRule := range robotsRules {
|
||||
userAgent := robotsRule.UserAgent
|
||||
|
||||
// Handle crawl delay as weight adjustment (do this first before any continues)
|
||||
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-crawl-delay-%d", *policyName, ruleCounter),
|
||||
Action: "WEIGH",
|
||||
Weight: &config.Weight{Adjust: *crawlDelay},
|
||||
}
|
||||
|
||||
if userAgent == "*" {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
}
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
// Handle blacklisted user agents (complete deny/challenge)
|
||||
if robotsRule.IsBlacklist {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-blacklist-%d", *policyName, ruleCounter),
|
||||
Action: *userAgentDeny,
|
||||
}
|
||||
|
||||
if userAgent == "*" {
|
||||
// This would block everything - convert to a weight adjustment instead
|
||||
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||
rule.Action = "WEIGH"
|
||||
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
}
|
||||
anubisRules = append(anubisRules, rule)
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle specific disallow rules
|
||||
for _, disallow := range robotsRule.Disallows {
|
||||
if disallow == "/" {
|
||||
continue // Already handled as blacklist above
|
||||
}
|
||||
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||
Action: *baseAction,
|
||||
}
|
||||
|
||||
// Build CEL expression
|
||||
var conditions []string
|
||||
|
||||
// Add user agent condition if not wildcard
|
||||
if userAgent != "*" {
|
||||
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
|
||||
}
|
||||
|
||||
// Add path condition
|
||||
pathCondition := buildPathCondition(disallow)
|
||||
conditions = append(conditions, pathCondition)
|
||||
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: conditions,
|
||||
}
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return anubisRules
|
||||
}
|
||||
|
||||
func buildPathCondition(robotsPath string) string {
|
||||
// Handle wildcards in robots.txt paths
|
||||
if strings.Contains(robotsPath, "*") || strings.Contains(robotsPath, "?") {
|
||||
// Convert robots.txt wildcards to regex
|
||||
regex := regexp.QuoteMeta(robotsPath)
|
||||
regex = strings.ReplaceAll(regex, `\*`, `.*`) // * becomes .*
|
||||
regex = strings.ReplaceAll(regex, `\?`, `.`) // ? becomes .
|
||||
regex = "^" + regex
|
||||
return fmt.Sprintf("path.matches(%q)", regex)
|
||||
}
|
||||
|
||||
// Simple prefix match for most cases
|
||||
return fmt.Sprintf("path.startsWith(%q)", robotsPath)
|
||||
}
|
||||
@@ -1,418 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type TestCase struct {
|
||||
name string
|
||||
robotsFile string
|
||||
expectedFile string
|
||||
options TestOptions
|
||||
}
|
||||
|
||||
type TestOptions struct {
|
||||
format string
|
||||
action string
|
||||
crawlDelayWeight int
|
||||
policyName string
|
||||
deniedAction string
|
||||
}
|
||||
|
||||
func TestDataFileConversion(t *testing.T) {
|
||||
|
||||
testCases := []TestCase{
|
||||
{
|
||||
name: "simple_default",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "simple.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "simple_json",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "simple.json",
|
||||
options: TestOptions{format: "json"},
|
||||
},
|
||||
{
|
||||
name: "simple_deny_action",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "deny-action.yaml",
|
||||
options: TestOptions{format: "yaml", action: "DENY"},
|
||||
},
|
||||
{
|
||||
name: "simple_custom_name",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "custom-name.yaml",
|
||||
options: TestOptions{format: "yaml", policyName: "my-custom-policy"},
|
||||
},
|
||||
{
|
||||
name: "blacklist_with_crawl_delay",
|
||||
robotsFile: "blacklist.robots.txt",
|
||||
expectedFile: "blacklist.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
|
||||
},
|
||||
{
|
||||
name: "wildcards",
|
||||
robotsFile: "wildcards.robots.txt",
|
||||
expectedFile: "wildcards.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "empty_file",
|
||||
robotsFile: "empty.robots.txt",
|
||||
expectedFile: "empty.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "complex_scenario",
|
||||
robotsFile: "complex.robots.txt",
|
||||
expectedFile: "complex.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
robotsPath := filepath.Join("testdata", tc.robotsFile)
|
||||
expectedPath := filepath.Join("testdata", tc.expectedFile)
|
||||
|
||||
// Read robots.txt input
|
||||
robotsFile, err := os.Open(robotsPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open robots file %s: %v", robotsPath, err)
|
||||
}
|
||||
defer robotsFile.Close()
|
||||
|
||||
// Parse robots.txt
|
||||
rules, err := parseRobotsTxt(robotsFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
// Set test options
|
||||
oldFormat := *outputFormat
|
||||
oldAction := *baseAction
|
||||
oldCrawlDelay := *crawlDelay
|
||||
oldPolicyName := *policyName
|
||||
oldDeniedAction := *userAgentDeny
|
||||
|
||||
if tc.options.format != "" {
|
||||
*outputFormat = tc.options.format
|
||||
}
|
||||
if tc.options.action != "" {
|
||||
*baseAction = tc.options.action
|
||||
}
|
||||
if tc.options.crawlDelayWeight > 0 {
|
||||
*crawlDelay = tc.options.crawlDelayWeight
|
||||
}
|
||||
if tc.options.policyName != "" {
|
||||
*policyName = tc.options.policyName
|
||||
}
|
||||
if tc.options.deniedAction != "" {
|
||||
*userAgentDeny = tc.options.deniedAction
|
||||
}
|
||||
|
||||
// Restore options after test
|
||||
defer func() {
|
||||
*outputFormat = oldFormat
|
||||
*baseAction = oldAction
|
||||
*crawlDelay = oldCrawlDelay
|
||||
*policyName = oldPolicyName
|
||||
*userAgentDeny = oldDeniedAction
|
||||
}()
|
||||
|
||||
// Convert to Anubis rules
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Generate output
|
||||
var actualOutput []byte
|
||||
switch strings.ToLower(*outputFormat) {
|
||||
case "yaml":
|
||||
actualOutput, err = yaml.Marshal(anubisRules)
|
||||
case "json":
|
||||
actualOutput, err = json.MarshalIndent(anubisRules, "", " ")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal output: %v", err)
|
||||
}
|
||||
|
||||
// Read expected output
|
||||
expectedOutput, err := os.ReadFile(expectedPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read expected file %s: %v", expectedPath, err)
|
||||
}
|
||||
|
||||
if strings.ToLower(*outputFormat) == "yaml" {
|
||||
var actualData []interface{}
|
||||
var expectedData []interface{}
|
||||
|
||||
err = yaml.Unmarshal(actualOutput, &actualData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal actual output: %v", err)
|
||||
}
|
||||
|
||||
err = yaml.Unmarshal(expectedOutput, &expectedData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal expected output: %v", err)
|
||||
}
|
||||
|
||||
// Compare data structures
|
||||
if !compareData(actualData, expectedData) {
|
||||
actualStr := strings.TrimSpace(string(actualOutput))
|
||||
expectedStr := strings.TrimSpace(string(expectedOutput))
|
||||
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
|
||||
}
|
||||
} else {
|
||||
var actualData []interface{}
|
||||
var expectedData []interface{}
|
||||
|
||||
err = json.Unmarshal(actualOutput, &actualData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal actual JSON output: %v", err)
|
||||
}
|
||||
|
||||
err = json.Unmarshal(expectedOutput, &expectedData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal expected JSON output: %v", err)
|
||||
}
|
||||
|
||||
// Compare data structures
|
||||
if !compareData(actualData, expectedData) {
|
||||
actualStr := strings.TrimSpace(string(actualOutput))
|
||||
expectedStr := strings.TrimSpace(string(expectedOutput))
|
||||
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCaseInsensitiveParsing(t *testing.T) {
|
||||
robotsTxt := `User-Agent: *
|
||||
Disallow: /admin
|
||||
Crawl-Delay: 10
|
||||
|
||||
User-agent: TestBot
|
||||
disallow: /test
|
||||
crawl-delay: 5
|
||||
|
||||
USER-AGENT: UpperBot
|
||||
DISALLOW: /upper
|
||||
CRAWL-DELAY: 20`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse case-insensitive robots.txt: %v", err)
|
||||
}
|
||||
|
||||
expectedRules := 3
|
||||
if len(rules) != expectedRules {
|
||||
t.Errorf("Expected %d rules, got %d", expectedRules, len(rules))
|
||||
}
|
||||
|
||||
// Check that all crawl delays were parsed
|
||||
for i, rule := range rules {
|
||||
expectedDelays := []int{10, 5, 20}
|
||||
if rule.CrawlDelay != expectedDelays[i] {
|
||||
t.Errorf("Rule %d: expected crawl delay %d, got %d", i, expectedDelays[i], rule.CrawlDelay)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVariousOutputFormats(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldPolicyName := *policyName
|
||||
*policyName = "test-policy"
|
||||
defer func() { *policyName = oldPolicyName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Test YAML output
|
||||
yamlOutput, err := yaml.Marshal(anubisRules)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal YAML: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(string(yamlOutput), "name: test-policy-disallow-1") {
|
||||
t.Errorf("YAML output doesn't contain expected rule name")
|
||||
}
|
||||
|
||||
// Test JSON output
|
||||
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal JSON: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(string(jsonOutput), `"name": "test-policy-disallow-1"`) {
|
||||
t.Errorf("JSON output doesn't contain expected rule name")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDifferentActions(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin`
|
||||
|
||||
testActions := []string{"ALLOW", "DENY", "CHALLENGE", "WEIGH"}
|
||||
|
||||
for _, action := range testActions {
|
||||
t.Run("action_"+action, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldAction := *baseAction
|
||||
*baseAction = action
|
||||
defer func() { *baseAction = oldAction }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
if len(anubisRules) != 1 {
|
||||
t.Fatalf("Expected 1 rule, got %d", len(anubisRules))
|
||||
}
|
||||
|
||||
if anubisRules[0].Action != action {
|
||||
t.Errorf("Expected action %s, got %s", action, anubisRules[0].Action)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPolicyNaming(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin
|
||||
Disallow: /private
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /`
|
||||
|
||||
testNames := []string{"custom-policy", "my-rules", "site-protection"}
|
||||
|
||||
for _, name := range testNames {
|
||||
t.Run("name_"+name, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldName := *policyName
|
||||
*policyName = name
|
||||
defer func() { *policyName = oldName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Check that all rule names use the custom prefix
|
||||
for _, rule := range anubisRules {
|
||||
if !strings.HasPrefix(rule.Name, name+"-") {
|
||||
t.Errorf("Rule name %s doesn't start with expected prefix %s-", rule.Name, name)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlDelayWeights(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: SlowBot
|
||||
Disallow: /slow
|
||||
Crawl-delay: 60`
|
||||
|
||||
testWeights := []int{1, 5, 10, 25}
|
||||
|
||||
for _, weight := range testWeights {
|
||||
t.Run(fmt.Sprintf("weight_%d", weight), func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldWeight := *crawlDelay
|
||||
*crawlDelay = weight
|
||||
defer func() { *crawlDelay = oldWeight }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Count weight rules and verify they have correct weight
|
||||
weightRules := 0
|
||||
for _, rule := range anubisRules {
|
||||
if rule.Action == "WEIGH" && rule.Weight != nil {
|
||||
weightRules++
|
||||
if rule.Weight.Adjust != weight {
|
||||
t.Errorf("Expected weight %d, got %d", weight, rule.Weight.Adjust)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expectedWeightRules := 2 // One for *, one for SlowBot
|
||||
if weightRules != expectedWeightRules {
|
||||
t.Errorf("Expected %d weight rules, got %d", expectedWeightRules, weightRules)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlacklistActions(t *testing.T) {
|
||||
robotsTxt := `User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SpamBot
|
||||
Disallow: /`
|
||||
|
||||
testActions := []string{"DENY", "CHALLENGE"}
|
||||
|
||||
for _, action := range testActions {
|
||||
t.Run("blacklist_"+action, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldAction := *userAgentDeny
|
||||
*userAgentDeny = action
|
||||
defer func() { *userAgentDeny = oldAction }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// All rules should be blacklist rules with the specified action
|
||||
for _, rule := range anubisRules {
|
||||
if !strings.Contains(rule.Name, "blacklist") {
|
||||
t.Errorf("Expected blacklist rule, got %s", rule.Name)
|
||||
}
|
||||
if rule.Action != action {
|
||||
t.Errorf("Expected action %s, got %s", action, rule.Action)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// compareData performs a deep comparison of two data structures,
|
||||
// ignoring differences that are semantically equivalent in YAML/JSON
|
||||
func compareData(actual, expected interface{}) bool {
|
||||
return reflect.DeepEqual(actual, expected)
|
||||
}
|
||||
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
@@ -1,15 +0,0 @@
|
||||
# Test with blacklisted user agents
|
||||
User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SpamBot
|
||||
Disallow: /
|
||||
Crawl-delay: 60
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search
|
||||
Crawl-delay: 5
|
||||
30
cmd/robots2policy/testdata/blacklist.yaml
vendored
30
cmd/robots2policy/testdata/blacklist.yaml
vendored
@@ -1,30 +0,0 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: DENY
|
||||
expression: userAgent.contains("BadBot")
|
||||
name: robots-txt-policy-blacklist-3
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("SpamBot")
|
||||
name: robots-txt-policy-crawl-delay-4
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: DENY
|
||||
expression: userAgent.contains("SpamBot")
|
||||
name: robots-txt-policy-blacklist-5
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Googlebot")
|
||||
name: robots-txt-policy-crawl-delay-6
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
@@ -1,30 +0,0 @@
|
||||
# Complex real-world example
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private/
|
||||
Disallow: /api/internal/
|
||||
Allow: /api/public/
|
||||
Crawl-delay: 5
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search/
|
||||
Allow: /api/
|
||||
Crawl-delay: 2
|
||||
|
||||
User-agent: Bingbot
|
||||
Disallow: /search/
|
||||
Disallow: /admin/
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SeoBot
|
||||
Disallow: /
|
||||
Crawl-delay: 300
|
||||
|
||||
# Test with various patterns
|
||||
User-agent: TestBot
|
||||
Disallow: /*/admin
|
||||
Disallow: /temp*.html
|
||||
Disallow: /file?.log
|
||||
71
cmd/robots2policy/testdata/complex.yaml
vendored
71
cmd/robots2policy/testdata/complex.yaml
vendored
@@ -1,71 +0,0 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private/")
|
||||
name: robots-txt-policy-disallow-3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/api/internal/")
|
||||
name: robots-txt-policy-disallow-4
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Googlebot")
|
||||
name: robots-txt-policy-crawl-delay-5
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-6
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Bingbot")
|
||||
name: robots-txt-policy-crawl-delay-7
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-8
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-9
|
||||
- action: DENY
|
||||
expression: userAgent.contains("BadBot")
|
||||
name: robots-txt-policy-blacklist-10
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("SeoBot")
|
||||
name: robots-txt-policy-crawl-delay-11
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: DENY
|
||||
expression: userAgent.contains("SeoBot")
|
||||
name: robots-txt-policy-blacklist-12
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
name: robots-txt-policy-disallow-13
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
name: robots-txt-policy-disallow-14
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
name: robots-txt-policy-disallow-15
|
||||
6
cmd/robots2policy/testdata/custom-name.yaml
vendored
6
cmd/robots2policy/testdata/custom-name.yaml
vendored
@@ -1,6 +0,0 @@
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: my-custom-policy-disallow-1
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private")
|
||||
name: my-custom-policy-disallow-2
|
||||
6
cmd/robots2policy/testdata/deny-action.yaml
vendored
6
cmd/robots2policy/testdata/deny-action.yaml
vendored
@@ -1,6 +0,0 @@
|
||||
- action: DENY
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-1
|
||||
- action: DENY
|
||||
expression: path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-2
|
||||
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
@@ -1,2 +0,0 @@
|
||||
# Empty robots.txt (comments only)
|
||||
# No actual rules
|
||||
1
cmd/robots2policy/testdata/empty.yaml
vendored
1
cmd/robots2policy/testdata/empty.yaml
vendored
@@ -1 +0,0 @@
|
||||
[]
|
||||
12
cmd/robots2policy/testdata/simple.json
vendored
12
cmd/robots2policy/testdata/simple.json
vendored
@@ -1,12 +0,0 @@
|
||||
[
|
||||
{
|
||||
"action": "CHALLENGE",
|
||||
"expression": "path.startsWith(\"/admin/\")",
|
||||
"name": "robots-txt-policy-disallow-1"
|
||||
},
|
||||
{
|
||||
"action": "CHALLENGE",
|
||||
"expression": "path.startsWith(\"/private\")",
|
||||
"name": "robots-txt-policy-disallow-2"
|
||||
}
|
||||
]
|
||||
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
@@ -1,5 +0,0 @@
|
||||
# Simple robots.txt test
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private
|
||||
Allow: /public
|
||||
6
cmd/robots2policy/testdata/simple.yaml
vendored
6
cmd/robots2policy/testdata/simple.yaml
vendored
@@ -1,6 +0,0 @@
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-1
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-2
|
||||
@@ -1,6 +0,0 @@
|
||||
# Test wildcard patterns
|
||||
User-agent: *
|
||||
Disallow: /search*
|
||||
Disallow: /*/private
|
||||
Disallow: /file?.txt
|
||||
Disallow: /admin/*?action=delete
|
||||
12
cmd/robots2policy/testdata/wildcards.yaml
vendored
12
cmd/robots2policy/testdata/wildcards.yaml
vendored
@@ -1,12 +0,0 @@
|
||||
- action: CHALLENGE
|
||||
expression: path.matches("^/search.*")
|
||||
name: robots-txt-policy-disallow-1
|
||||
- action: CHALLENGE
|
||||
expression: path.matches("^/.*/private")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: CHALLENGE
|
||||
expression: path.matches("^/file.\\.txt")
|
||||
name: robots-txt-policy-disallow-3
|
||||
- action: CHALLENGE
|
||||
expression: path.matches("^/admin/.*.action=delete")
|
||||
name: robots-txt-policy-disallow-4
|
||||
@@ -51,29 +51,6 @@ bots:
|
||||
# report_as: 4 # lie to the operator
|
||||
# algorithm: slow # intentionally waste CPU cycles and time
|
||||
|
||||
# Requires a subscription to Thoth to use, see
|
||||
# https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
|
||||
- name: countries-with-aggressive-scrapers
|
||||
action: WEIGH
|
||||
geoip:
|
||||
countries:
|
||||
- BR
|
||||
- CN
|
||||
weight:
|
||||
adjust: 10
|
||||
|
||||
# Requires a subscription to Thoth to use, see
|
||||
# https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
|
||||
- name: aggressive-asns-without-functional-abuse-contact
|
||||
action: WEIGH
|
||||
asns:
|
||||
match:
|
||||
- 13335 # Cloudflare
|
||||
- 136907 # Huawei Cloud
|
||||
- 45102 # Alibaba Cloud
|
||||
weight:
|
||||
adjust: 10
|
||||
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
@@ -84,59 +61,6 @@ bots:
|
||||
|
||||
dnsbl: false
|
||||
|
||||
# #
|
||||
# impressum:
|
||||
# # Displayed at the bottom of every page rendered by Anubis.
|
||||
# footer: >-
|
||||
# This website is hosted by Zonbocom. If you have any complaints or notes
|
||||
# about the service, please contact
|
||||
# <a href="mailto:contact@domainhere.example">contact@domainhere.example</a>
|
||||
# and we will assist you as soon as possible.
|
||||
|
||||
# # The imprint page that will be linked to at the footer of every Anubis page.
|
||||
# page:
|
||||
# # The HTML <title> of the page
|
||||
# title: Imprint and Privacy Policy
|
||||
# # The HTML contents of the page. The exact contents of this page can
|
||||
# # and will vary by locale. Please consult with a lawyer if you are not
|
||||
# # sure what to put here
|
||||
# body: >-
|
||||
# <p>Last updated: June 2025</p>
|
||||
|
||||
# <h2>Information that is gathered from visitors</h2>
|
||||
|
||||
# <p>In common with other websites, log files are stored on the web server saving details such as the visitor's IP address, browser type, referring page and time of visit.</p>
|
||||
|
||||
# <p>Cookies may be used to remember visitor preferences when interacting with the website.</p>
|
||||
|
||||
# <p>Where registration is required, the visitor's email and a username will be stored on the server.</p>
|
||||
|
||||
# <!-- ... -->
|
||||
|
||||
# Open Graph passthrough configuration, see here for more information:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
|
||||
openGraph:
|
||||
# Enables Open Graph passthrough
|
||||
enabled: false
|
||||
# Enables the use of the HTTP host in the cache key, this enables
|
||||
# caching metadata for multiple http hosts at once.
|
||||
considerHost: false
|
||||
# How long cached OpenGraph metadata should last in memory
|
||||
ttl: 24h
|
||||
# # If set, return these opengraph values instead of looking them up with
|
||||
# # the target service.
|
||||
# #
|
||||
# # Correlates to properties in https://ogp.me/
|
||||
# override:
|
||||
# # og:title is required, it is the title of the website
|
||||
# "og:title": "Techaro Anubis"
|
||||
# "og:description": >-
|
||||
# Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
# away so that you can maintain uptime at work!
|
||||
# "description": >-
|
||||
# Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
# away so that you can maintain uptime at work!
|
||||
|
||||
# By default, send HTTP 200 back to clients that either get issued a challenge
|
||||
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
||||
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
|
||||
@@ -144,57 +68,3 @@ openGraph:
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
||||
|
||||
# The weight thresholds for when to trigger individual challenges. Any
|
||||
# CHALLENGE will take precedence over this.
|
||||
#
|
||||
# A threshold has four configuration options:
|
||||
#
|
||||
# - name: the name that is reported down the stack and used for metrics
|
||||
# - expression: A CEL expression with the request weight in the variable
|
||||
# weight
|
||||
# - action: the Anubis action to apply, similar to in a bot policy
|
||||
# - challenge: which challenge to send to the user, similar to in a bot policy
|
||||
#
|
||||
# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
|
||||
# information.
|
||||
thresholds:
|
||||
# By default Anubis ships with the following thresholds:
|
||||
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
|
||||
expression: weight <= 0 # a feather weighs zero units
|
||||
action: ALLOW # Allow the traffic through
|
||||
# For clients that had some weight reduced through custom rules, give them a
|
||||
# lightweight challenge.
|
||||
- name: mild-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight > 0
|
||||
- weight < 10
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
# For clients that are browser-like but have either gained points from custom rules or
|
||||
# report as a standard browser.
|
||||
- name: moderate-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 10
|
||||
- weight < 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 2 # two leading zeros, very fast for most clients
|
||||
report_as: 2
|
||||
# For clients that are browser like and have gained many points from custom rules
|
||||
- name: extreme-suspicion
|
||||
expression: weight >= 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
# Note: Blocks human-directed/non-training user agents
|
||||
- name: "ai-robots-txt"
|
||||
user_agent_regex: >-
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|MyCentralAIScraperBot|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|Poseidon Research Crawler|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot-BA|SemrushBot-CT|SemrushBot-OCOB|SemrushBot-SI|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
|
||||
action: DENY
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
variable "ALPINE_VERSION" { default = "3.22" }
|
||||
variable "GITHUB_SHA" { default = "devel" }
|
||||
|
||||
group "default" {
|
||||
targets = [
|
||||
"ci-runner",
|
||||
"anubis",
|
||||
]
|
||||
}
|
||||
|
||||
target "ci-runner" {
|
||||
target "anubis" {
|
||||
args = {
|
||||
ALPINE_VERSION = "3.22"
|
||||
}
|
||||
@@ -21,6 +22,6 @@ target "ci-runner" {
|
||||
]
|
||||
pull = true
|
||||
tags = [
|
||||
"ghcr.io/techarohq/anubis/ci-runner:latest"
|
||||
"ghcr.io/techarohq/anubis:${GITHUB_SHA}"
|
||||
]
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
---
|
||||
slug: welcome
|
||||
title: Welcome to the Anubis blog!
|
||||
authors: [xe]
|
||||
tags: [intro]
|
||||
---
|
||||
|
||||
Hello, world!
|
||||
|
||||
At Techaro, we've been working on making Anubis even better, and in the process we want to share what we've done, how it works, and signal boost cool things the community has done. As things happen, we'll blog about them so that you can learn from our struggles.
|
||||
|
||||
More details to come soon!
|
||||
|
||||
{/* truncate */}
|
||||
@@ -1,9 +0,0 @@
|
||||
xe:
|
||||
name: Xe Iaso
|
||||
title: CEO @ Techaro
|
||||
url: https://github.com/Xe
|
||||
image_url: https://github.com/Xe.png
|
||||
email: xe@techaro.lol
|
||||
page: true
|
||||
socials:
|
||||
github: Xe
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -11,142 +11,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## v1.20.0: Thancred Waters
|
||||
|
||||
The big ticket items are as follows:
|
||||
|
||||
- Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95))
|
||||
- Implement request "weight", allowing administrators to customize the behaviour of Anubis based on specific criteria
|
||||
- Implement GeoIP and ASN based checks via [Thoth](https://anubis.techaro.lol/docs/admin/thoth) ([#206](https://github.com/TecharoHQ/anubis/issues/206))
|
||||
- Add [custom weight thresholds](./admin/configuration/thresholds.mdx) via CEL ([#688](https://github.com/TecharoHQ/anubis/pull/688))
|
||||
- Move Open Graph configuration [to the policy file](./admin/configuration/open-graph.mdx)
|
||||
- Enable support for Open Graph metadata to be returned by default instead of doing lookups against the target
|
||||
- Add `robots2policy` CLI utility to convert robots.txt files to Anubis challenge policies using CEL expressions ([#409](https://github.com/TecharoHQ/anubis/issues/409))
|
||||
- Refactor challenge presentation logic to use a challenge registry
|
||||
- Allow challenge implementations to register HTTP routes
|
||||
- [Imprint/Impressum support](./admin/configuration/impressum.mdx) ([#362](https://github.com/TecharoHQ/anubis/issues/362))
|
||||
- Fix "invalid response" after "Success!" in Chromium ([#564](https://github.com/TecharoHQ/anubis/issues/564))
|
||||
|
||||
A lot of performance improvements have been made:
|
||||
|
||||
- Replace internal SHA256 hashing with xxhash for 4-6x performance improvement in policy evaluation and cache operations
|
||||
- Optimized the OGTags subsystem with reduced allocations and runtime per request by up to 66%
|
||||
- Replace cidranger with bart for IP range checking, improving IP matching performance by 3-20x with zero heap
|
||||
allocations
|
||||
|
||||
And some cleanups/refactors were added:
|
||||
|
||||
- Fix OpenGraph passthrough ([#717](https://github.com/TecharoHQ/anubis/issues/717))
|
||||
- Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in
|
||||
development
|
||||
- Add `--xff-strip-private` flag/envvar to toggle skipping X-Forwarded-For private addresses or not
|
||||
- Bump AI-robots.txt to version 1.37
|
||||
- Requests can have their weight be adjusted, if a request weighs zero or less than it is allowed through
|
||||
- Refactor challenge presentation logic to use a challenge registry
|
||||
- Allow challenge implementations to register HTTP routes
|
||||
- Implement a no-JS challenge method: [`metarefresh`](./admin/configuration/challenges/metarefresh.mdx) ([#95](https://github.com/TecharoHQ/anubis/issues/95))
|
||||
- Bump AI-robots.txt to version 1.34
|
||||
- Make progress bar styling more compatible (UXP, etc)
|
||||
- Add `--strip-base-prefix` flag/envvar to strip the base prefix from request paths when forwarding to target servers
|
||||
- Fix an off-by-one in the default threshold config
|
||||
- Add functionality for HS512 JWT algorithm
|
||||
- Add support for dynamic cookie domains with the `--cookie-dynamic-domain`/`COOKIE_DYNAMIC_DOMAIN` flag/envvar
|
||||
|
||||
Request weight is one of the biggest ticket features in Anubis. This enables Anubis to be much closer to a Web Application Firewall and when combined with custom thresholds allows administrators to have Anubis take advanced reactions. For more information about request weight, see [the request weight section](./admin/policies.mdx#request-weight) of the policy file documentation.
|
||||
|
||||
TL;DR when you have one or more WEIGHT rules like this:
|
||||
|
||||
```yaml
|
||||
bots:
|
||||
- name: gitea-session-token
|
||||
action: WEIGH
|
||||
expression:
|
||||
all:
|
||||
- '"Cookie" in headers'
|
||||
- headers["Cookie"].contains("i_love_gitea=")
|
||||
# Remove 5 weight points
|
||||
weight:
|
||||
adjust: -5
|
||||
```
|
||||
|
||||
You can configure custom thresholds like this:
|
||||
|
||||
```yaml
|
||||
thresholds:
|
||||
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
|
||||
expression: weight < 0 # a feather weighs zero units
|
||||
action: ALLOW # Allow the traffic through
|
||||
|
||||
# For clients that had some weight reduced through custom rules, give them a
|
||||
# lightweight challenge.
|
||||
- name: mild-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 0
|
||||
- weight < 10
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
|
||||
# For clients that are browser-like but have either gained points from custom
|
||||
# rules or report as a standard browser.
|
||||
- name: moderate-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 10
|
||||
- weight < 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 2 # two leading zeros, very fast for most clients
|
||||
report_as: 2
|
||||
|
||||
# For clients that are browser like and have gained many points from custom
|
||||
# rules
|
||||
- name: extreme-suspicion
|
||||
expression: weight >= 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
```
|
||||
|
||||
These thresholds apply when no other `ALLOW`, `DENY`, or `CHALLENGE` rule matches the request. `WEIGHT` rules add and remove request weight as needed:
|
||||
|
||||
```yaml
|
||||
bots:
|
||||
- name: gitea-session-token
|
||||
action: WEIGH
|
||||
expression:
|
||||
all:
|
||||
- '"Cookie" in headers'
|
||||
- headers["Cookie"].contains("i_love_gitea=")
|
||||
# Remove 5 weight points
|
||||
weight:
|
||||
adjust: -5
|
||||
|
||||
- name: bot-like-user-agent
|
||||
action: WEIGH
|
||||
expression: '"Bot" in userAgent'
|
||||
# Add 5 weight points
|
||||
weight:
|
||||
adjust: 5
|
||||
```
|
||||
|
||||
Of note: the default "generic browser" rule assigns 10 weight points:
|
||||
|
||||
```yaml
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: 10
|
||||
```
|
||||
|
||||
Adjust this as you see fit.
|
||||
|
||||
## v1.19.1: Jenomis cen Lexentale - Echo 1
|
||||
|
||||
@@ -282,6 +155,7 @@ Other changes:
|
||||
- Moved all CSS inline to the Xess package, changed colors to be CSS variables
|
||||
- Set or append to `X-Forwarded-For` header unless the remote connects over a loopback address [#328](https://github.com/TecharoHQ/anubis/issues/328)
|
||||
- Fixed mojeekbot user agent regex
|
||||
- Added support for running anubis behind a base path (e.g. `/myapp`)
|
||||
- Reduce Anubis' paranoia with user cookies ([#365](https://github.com/TecharoHQ/anubis/pull/365))
|
||||
- Added support for Open Graph passthrough while using unix sockets
|
||||
- The Open Graph subsystem now passes the HTTP `HOST` header through to the origin
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
# Imprint / Impressum configuration
|
||||
|
||||
Some jurisdictions (such as the European Union and specifically Germany) [must have contact information freely available](https://www.privacycompany.eu/blog/the-imprint-requirement-a-must-have-for-companies-from-outside-germany) on an imprint/impressum page. Anubis supports creating an Anubis-specific imprint page for your organization with the `impressum` block in your bot policy file. For example:
|
||||
|
||||
```yaml
|
||||
impressum:
|
||||
# Displayed at the bottom of every page rendered by Anubis.
|
||||
footer: >-
|
||||
This website is hosted by Techaro. If you have any complaints or notes
|
||||
about the service, please contact
|
||||
<a href="mailto:contact@techaro.lol">contact@techaro.lol</a> and we
|
||||
will assist you as soon as possible.
|
||||
|
||||
# The imprint page that will be linked to at the footer of every Anubis page.
|
||||
page:
|
||||
# The HTML <title> of the page
|
||||
title: Imprint and Privacy Policy
|
||||
# The HTML contents of the page. The exact contents of this page can
|
||||
# and will vary by locale. Please consult with a lawyer if you are not
|
||||
# sure what to put here
|
||||
body: >-
|
||||
<p>Last updated: June 2025</p>
|
||||
|
||||
<h2>Information that is gathered from visitors</h2>
|
||||
|
||||
<p>In common with other websites, log files are stored on the web server saving details such as the visitor's IP address, browser type, referring page and time of visit.</p>
|
||||
|
||||
<p>Cookies may be used to remember visitor preferences when interacting with the website.</p>
|
||||
|
||||
<p>Where registration is required, the visitor's email and a username will be stored on the server.</p>
|
||||
|
||||
<!-- ... -->
|
||||
```
|
||||
|
||||
If you are subscribed to and using [advanced classification features](../thoth.mdx), be sure to disclose the following:
|
||||
|
||||
```html
|
||||
<h2>Techaro Anubis</h2>
|
||||
|
||||
<p>
|
||||
This website uses a service called
|
||||
<a href="https://anubis.techaro.lol">Anubis</a> by
|
||||
<a href="https://techaro.lol">Techaro</a> to filter malicious traffic. Anubis
|
||||
requires the use of browser cookies to ensure that web clients are running
|
||||
conformant software. Anubis also may report the following data to Techaro to
|
||||
improve service quality:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
IP address (for purposes of matching against geo-location and BGP autonomous
|
||||
systems numbers), which is stored in-memory and not persisted to disk.
|
||||
</li>
|
||||
<li>
|
||||
Unique browser fingerprints (such as HTTP request fingerprints and
|
||||
encryption system fingerprints), which may be stored on Techaro's side for a
|
||||
period of up to one month.
|
||||
</li>
|
||||
<li>
|
||||
HTTP request metadata that may include things such as the User-Agent header
|
||||
and other identifiers.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
This data is processed and stored for the legitimate interest of combatting
|
||||
abusive web clients. This data is encrypted at rest as much as possible and is
|
||||
only decrypted in memory for the purposes of fulfilling requests.
|
||||
</p>
|
||||
```
|
||||
@@ -9,45 +9,12 @@ This page provides detailed information on how to configure [Open Graph tag](htt
|
||||
|
||||
## Configuration Options
|
||||
|
||||
Open Graph settings are configured in the `openGraph` section of the [Policy File](../policies.mdx).
|
||||
|
||||
```yaml
|
||||
openGraph:
|
||||
# Enables Open Graph passthrough
|
||||
enabled: true
|
||||
# Enables the use of the HTTP host in the cache key, this enables
|
||||
# caching metadata for multiple http hosts at once.
|
||||
considerHost: true
|
||||
# How long cached OpenGraph metadata should last in memory
|
||||
ttl: 24h
|
||||
# If set, return these opengraph values instead of looking them up with
|
||||
# the target service.
|
||||
#
|
||||
# Correlates to properties in https://ogp.me/
|
||||
override:
|
||||
# og:title is required, it is the title of the website
|
||||
"og:title": "Techaro Anubis"
|
||||
"og:description": >-
|
||||
Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
away so that you can maintain uptime at work!
|
||||
"description": >-
|
||||
Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
away so that you can maintain uptime at work!
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Configuration flags / envvars (old)</summary>
|
||||
|
||||
Open Graph passthrough used to be configured with configuration flags / environment variables. Reference to these settings are maintained for backwards compatibility's sake.
|
||||
|
||||
| Name | Description | Type | Default | Example |
|
||||
| ------------------------ | --------------------------------------------------------- | -------- | ------- | ----------------------------- |
|
||||
| `OG_PASSTHROUGH` | Enables or disables the Open Graph tag passthrough system | Boolean | `true` | `OG_PASSTHROUGH=true` |
|
||||
| `OG_EXPIRY_TIME` | Configurable cache expiration time for Open Graph tags | Duration | `24h` | `OG_EXPIRY_TIME=1h` |
|
||||
| `OG_CACHE_CONSIDER_HOST` | Enables or disables the use of the host in the cache key | Boolean | `false` | `OG_CACHE_CONSIDER_HOST=true` |
|
||||
|
||||
</details>
|
||||
|
||||
## Usage
|
||||
|
||||
To configure Open Graph tags, you can set the following environment variables, environment file or as flags in your Anubis configuration:
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
# Weight Threshold Configuration
|
||||
|
||||
Anubis offers the ability to assign "weight" to requests. This is a custom level of suspicion that rules can add to or remove from. For example, here's how you assign 10 weight points to anything that might be a browser:
|
||||
|
||||
```yaml
|
||||
# botPolicies.yaml
|
||||
|
||||
bots:
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: 10
|
||||
```
|
||||
|
||||
Thresholds let you take this per-request weight value and take actions in response to it. Thresholds are defined alongside your bot configuration in `botPolicies.yaml`.
|
||||
|
||||
:::note
|
||||
|
||||
Thresholds DO NOT apply when a request matches a bot rule with the CHALLENGE action. Thresholds only apply when requests don't match any terminal bot rules.
|
||||
|
||||
:::
|
||||
|
||||
```yaml
|
||||
# botPolicies.yaml
|
||||
|
||||
bots: ...
|
||||
|
||||
thresholds:
|
||||
- name: minimal-suspicion
|
||||
expression: weight < 0
|
||||
action: ALLOW
|
||||
|
||||
- name: mild-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 0
|
||||
- weight < 10
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
|
||||
- name: moderate-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 10
|
||||
- weight < 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: fast
|
||||
difficulty: 2
|
||||
report_as: 2
|
||||
|
||||
- name: extreme-suspicion
|
||||
expression: weight >= 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
```
|
||||
|
||||
This defines a suite of 4 thresholds:
|
||||
|
||||
1. If the request weight is less than zero, allow it through.
|
||||
2. If the request weight is greater than or equal to zero, but less than ten: give it [a very lightweight challenge](./challenges/metarefresh.mdx).
|
||||
3. If the request weight is greater than or equal to ten, but less than twenty: give it [a slightly heavier challenge](./challenges/proof-of-work.mdx).
|
||||
4. Otherwise, give it [the heaviest challenge](./challenges/proof-of-work.mdx).
|
||||
|
||||
Thresholds can be configured with the following options:
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Description</th>
|
||||
<th>Example</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>`name`</td>
|
||||
<td>The human-readable name for this threshold.</td>
|
||||
<td>
|
||||
|
||||
```yaml
|
||||
name: extreme-suspicion
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>`expression`</td>
|
||||
<td>A [CEL](https://cel.dev/) expression taking the request weight and returning true or false</td>
|
||||
<td>
|
||||
|
||||
To check if the request weight is less than zero:
|
||||
|
||||
```yaml
|
||||
expression: weight < 0
|
||||
```
|
||||
|
||||
To check if it's between 0 and 10 (inclusive):
|
||||
|
||||
```yaml
|
||||
expression:
|
||||
all:
|
||||
- weight >= 0
|
||||
- weight < 10
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>`action`</td>
|
||||
<td>The Anubis action to apply: `ALLOW`, `CHALLENGE`, or `DENY`</td>
|
||||
<td>
|
||||
|
||||
```yaml
|
||||
action: ALLOW
|
||||
```
|
||||
|
||||
If you set the CHALLENGE action, you must set challenge details:
|
||||
|
||||
```yaml
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
</tbody>
|
||||
</table>
|
||||
@@ -34,6 +34,27 @@ These examples assume that you are using a setup where your nginx configuration
|
||||
|
||||
:::
|
||||
|
||||
## Dependencies
|
||||
|
||||
Install the following dependencies for proxying HTTP:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="rpm" label="Red Hat / RPM" default>
|
||||
|
||||
```text
|
||||
dnf -y install mod_proxy_html
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="deb" label="Debian / Ubuntu / apt">
|
||||
|
||||
```text
|
||||
apt-get install -y libapache2-mod-proxy-html libxml2-dev
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Configuration
|
||||
|
||||
Assuming you are protecting `anubistest.techaro.lol`, you need the following server configuration blocks:
|
||||
|
||||
@@ -4,6 +4,9 @@ title: Setting up Anubis
|
||||
|
||||
import RandomKey from "@site/src/components/RandomKey";
|
||||
|
||||
import Tabs from "@theme/Tabs";
|
||||
import TabItem from "@theme/TabItem";
|
||||
|
||||
Anubis is meant to sit between your reverse proxy (such as Nginx or Caddy) and your target service. One instance of Anubis must be used per service you are protecting.
|
||||
|
||||
<center>
|
||||
@@ -42,19 +45,7 @@ Anubis has very minimal system requirements. I suspect that 128Mi of ram may be
|
||||
|
||||
For more detailed information on installing Anubis with native packages, please read [the native install directions](./native-install.mdx).
|
||||
|
||||
## Configuration
|
||||
|
||||
Anubis is configurable via environment variables and [the policy file](./policies.mdx). Most settings are currently exposed with environment variables but they are being slowly moved over to the policy file.
|
||||
|
||||
### Configuration via the policy file
|
||||
|
||||
Currently the following settings are configurable via the policy file:
|
||||
|
||||
- [Bot policies](./policies.mdx)
|
||||
- [Open Graph passthrough](./configuration/open-graph.mdx)
|
||||
- [Weight thresholds](./configuration/thresholds.mdx)
|
||||
|
||||
### Environment variables
|
||||
## Environment variables
|
||||
|
||||
Anubis uses these environment variables for configuration:
|
||||
|
||||
@@ -64,7 +55,6 @@ Anubis uses these environment variables for configuration:
|
||||
| `BIND` | `:8923` | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock` |
|
||||
| `BIND_NETWORK` | `tcp` | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports. |
|
||||
| `COOKIE_DOMAIN` | unset | The domain the Anubis challenge pass cookie should be set to. This should be set to the domain you bought from your registrar (EG: `techaro.lol` if your webapp is running on `anubis.techaro.lol`). See this [stackoverflow explanation of cookies](https://stackoverflow.com/a/1063760) for more information.<br/><br/>Note that unlike `REDIRECT_DOMAINS`, you should never include a port number in this variable. |
|
||||
| `COOKIE_DYNAMIC_DOMAIN` | false | If set to true, automatically set cookie domain fields based on the hostname of the request. EG: if you are making a request to `anubis.techaro.lol`, the Anubis cookie will be valid for any subdomain of `techaro.lol`. |
|
||||
| `COOKIE_EXPIRATION_TIME` | `168h` | The amount of time the authorization cookie is valid for. |
|
||||
| `COOKIE_PARTITIONED` | `false` | If set to `true`, enables the [partitioned (CHIPS) flag](https://developers.google.com/privacy-sandbox/cookies/chips), meaning that Anubis inside an iframe has a different set of cookies than the domain hosting the iframe. |
|
||||
| `DIFFICULTY` | `4` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. |
|
||||
@@ -72,14 +62,13 @@ Anubis uses these environment variables for configuration:
|
||||
| `ED25519_PRIVATE_KEY_HEX_FILE` | unset | Path to a file containing the hex-encoded ed25519 private key. Only one of this or its sister option may be set. |
|
||||
| `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. |
|
||||
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
|
||||
| `OG_EXPIRY_TIME` | `24h` | The expiration time for the Open Graph tag cache. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
|
||||
| `OG_PASSTHROUGH` | `false` | If set to `true`, Anubis will enable Open Graph tag passthrough. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
|
||||
| `OG_CACHE_CONSIDER_HOST` | `false` | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
|
||||
| `OG_EXPIRY_TIME` | `24h` | The expiration time for the Open Graph tag cache. |
|
||||
| `OG_PASSTHROUGH` | `false` | If set to `true`, Anubis will enable Open Graph tag passthrough. |
|
||||
| `OG_CACHE_CONSIDER_HOST` | `false` | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. |
|
||||
| `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.mdx). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. |
|
||||
| `REDIRECT_DOMAINS` | unset | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain.<br/><br/>Note that if you are hosting Anubis on a non-standard port (`https://example:com:8443`, `http://www.example.net:8080`, etc.), you must also include the port number here. |
|
||||
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
|
||||
| `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. |
|
||||
| `STRIP_BASE_PREFIX` | `false` | If set to `true`, strips the base prefix from request paths when forwarding to the target server. This is useful when your target service expects to receive requests without the base prefix. For example, with `BASE_PREFIX=/foo` and `STRIP_BASE_PREFIX=true`, a request to `/foo/bar` would be forwarded to the target as `/bar`. |
|
||||
| `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. Supports Unix domain sockets, set this to a URI like so: `unix:///path/to/socket.sock`. |
|
||||
| `USE_REMOTE_ADDRESS` | unset | If set to `true`, Anubis will take the client's IP from the network socket. For production deployments, it is expected that a reverse proxy is used in front of Anubis, which pass the IP using headers, instead. |
|
||||
| `WEBMASTER_EMAIL` | unset | If set, shows a contact email address when rendering error pages. This email address will be how users can get in contact with administrators. |
|
||||
@@ -94,12 +83,11 @@ If you don't know or understand what these settings mean, ignore them. These are
|
||||
|
||||
:::
|
||||
|
||||
| Environment Variable | Default value | Explanation |
|
||||
| :---------------------------- | :------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `TARGET_SNI` | unset | If set, overrides the TLS handshake hostname in requests forwarded to `TARGET`. |
|
||||
| `TARGET_HOST` | unset | If set, overrides the Host header in requests forwarded to `TARGET`. |
|
||||
| `TARGET_INSECURE_SKIP_VERIFY` | `false` | If `true`, skip TLS certificate validation for targets that listen over `https`. If your backend does not listen over `https`, ignore this setting. |
|
||||
| `HS512_SECRET` | unset | Secret string for JWT HS512 algorithm. If this is not set, Anubis will use ED25519 as defined via the variables above. The longer the better; 128 chars should suffice. |
|
||||
| Environment Variable | Default value | Explanation |
|
||||
| :---------------------------- | :------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `TARGET_SNI` | unset | If set, overrides the TLS handshake hostname in requests forwarded to `TARGET`. |
|
||||
| `TARGET_HOST` | unset | If set, overrides the Host header in requests forwarded to `TARGET`. |
|
||||
| `TARGET_INSECURE_SKIP_VERIFY` | `false` | If `true`, skip TLS certificate validation for targets that listen over `https`. If your backend does not listen over `https`, ignore this setting. |
|
||||
|
||||
</details>
|
||||
|
||||
@@ -141,22 +129,6 @@ With corresponding Anubis configuration:
|
||||
BASE_PREFIX=/myapp
|
||||
```
|
||||
|
||||
#### Stripping Base Prefix
|
||||
|
||||
If your target service doesn't expect to receive the base prefix in request paths, you can use the `STRIP_BASE_PREFIX` option:
|
||||
|
||||
```
|
||||
BASE_PREFIX=/myapp
|
||||
STRIP_BASE_PREFIX=true
|
||||
```
|
||||
|
||||
With this configuration:
|
||||
|
||||
- A request to `/myapp/api/users` would be forwarded to your target service as `/api/users`
|
||||
- A request to `/myapp/` would be forwarded as `/`
|
||||
|
||||
This is particularly useful when working with applications that weren't designed to handle path prefixes. However, note that if your target application generates absolute redirects or links (like `/login` instead of `./login`), these may break the subpath routing since they won't include the base prefix.
|
||||
|
||||
### Key generation
|
||||
|
||||
To generate an ed25519 private key, you can use this command:
|
||||
|
||||
@@ -233,10 +233,6 @@ remote_addresses:
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Imprint / Impressum support
|
||||
|
||||
Anubis has support for showing imprint / impressum information. This is defined in the `impressum` block of your configuration. See [Imprint / Impressum configuration](./configuration/impressum.mdx) for more information.
|
||||
|
||||
## Risk calculation for downstream services
|
||||
|
||||
In case your service needs it for risk calculation reasons, Anubis exposes information about the rules that any requests match using a few headers:
|
||||
@@ -265,11 +261,17 @@ Anubis rules can also add or remove "weight" from requests, allowing administrat
|
||||
adjust: -5
|
||||
```
|
||||
|
||||
This would remove five weight points from the request, which would make Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) in the default configuration.
|
||||
This would remove five weight points from the request, making Anubis present the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx).
|
||||
|
||||
### Weight Thresholds
|
||||
|
||||
For more information on configuring weight thresholds, see [Weight Threshold Configuration](./configuration/thresholds.mdx)
|
||||
Weight thresholds and challenge associations will be configurable with CEL expressions in the configuration file in an upcoming patch, for now here's how Anubis configures the weight thresholds:
|
||||
|
||||
| Weight Expression | Action |
|
||||
| -----------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `weight < 0` (weight is less than 0) | Allow the request through. |
|
||||
| `weight < 10` (weight is less than 10) | Challenge the client with the [Meta Refresh challenge](./configuration/challenges/metarefresh.mdx) at the default difficulty level. |
|
||||
| `weight >= 10` (weight is greater than or equal to 10) | Challenge the client with the [Proof of Work challenge](./configuration/challenges/proof-of-work.mdx) at the default difficulty level. |
|
||||
|
||||
### Advice
|
||||
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
---
|
||||
title: robots2policy CLI Tool
|
||||
sidebar_position: 50
|
||||
---
|
||||
|
||||
The `robots2policy` tool converts robots.txt files into Anubis challenge policies. It reads robots.txt rules and generates equivalent CEL expressions for path matching and user-agent filtering.
|
||||
|
||||
## Installation
|
||||
|
||||
Install directly with Go:
|
||||
|
||||
```bash
|
||||
go install github.com/TecharoHQ/anubis/cmd/robots2policy@latest
|
||||
```
|
||||
## Usage
|
||||
|
||||
Basic conversion from URL:
|
||||
|
||||
```bash
|
||||
robots2policy -input https://www.example.com/robots.txt
|
||||
```
|
||||
|
||||
Convert local file to YAML:
|
||||
|
||||
```bash
|
||||
robots2policy -input robots.txt -output policy.yaml
|
||||
```
|
||||
|
||||
Convert with custom settings:
|
||||
|
||||
```bash
|
||||
robots2policy -input robots.txt -action DENY -format json
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
| Flag | Description | Default |
|
||||
|-----------------------|--------------------------------------------------------------------|---------------------|
|
||||
| `-input` | robots.txt file path or URL (use `-` for stdin) | *required* |
|
||||
| `-output` | Output file (use `-` for stdout) | stdout |
|
||||
| `-format` | Output format: `yaml` or `json` | `yaml` |
|
||||
| `-action` | Action for disallowed paths: `ALLOW`, `DENY`, `CHALLENGE`, `WEIGH` | `CHALLENGE` |
|
||||
| `-name` | Policy name prefix | `robots-txt-policy` |
|
||||
| `-crawl-delay-weight` | Weight adjustment for crawl-delay rules | `3` |
|
||||
| `-deny-user-agents` | Action for blacklisted user agents | `DENY` |
|
||||
|
||||
## Example
|
||||
|
||||
Input robots.txt:
|
||||
```txt
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
```
|
||||
|
||||
Generated policy:
|
||||
```yaml
|
||||
- name: robots-txt-policy-disallow-1
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/admin/")
|
||||
- name: robots-txt-policy-disallow-2
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/private")
|
||||
- name: robots-txt-policy-blacklist-3
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("BadBot")
|
||||
```
|
||||
|
||||
## Using the Generated Policy
|
||||
|
||||
Save the output and import it in your main policy file:
|
||||
|
||||
```yaml
|
||||
import:
|
||||
- path: "./robots-policy.yaml"
|
||||
```
|
||||
|
||||
The tool handles wildcard patterns, user-agent specific rules, and blacklisted bots automatically.
|
||||
@@ -1,81 +0,0 @@
|
||||
# Thoth-based advanced checks
|
||||
|
||||
Status: Beta
|
||||
|
||||
Anubis instances are normally isolated. Each Anubis instance has its own configuration and exists in roughly its own world without any long term memory between requests. As threats, workarounds, and AI scraper toolchains evolve, administrators will need a way to get more up to date information faster than Anubis' release cycle.
|
||||
|
||||
Thus, Thoth is being created. Thoth is the reputation database for Anubis. Thoth feeds information to Anubis so that it can make better decisions about which traffic is innocuous and which traffic is suspicious.
|
||||
|
||||
:::note
|
||||
|
||||
Thoth is hosted by [Techaro](https://techaro.lol). Thoth is a paid service. Thoth is opt-in and requires manual intervention (including payment) to use. The code that powers Thoth is currently closed source.
|
||||
|
||||
To get access to Thoth, please subscribe [on GitHub Sponsors](https://github.com/sponsors/Xe) and [email Xe](mailto:xe@techaro.lol). This will be self-service soon.
|
||||
|
||||
:::
|
||||
|
||||
## Implementation
|
||||
|
||||
Thoth is a web service that listens over [gRPC](https://grpc.io/). Thoth's API is documented in protocol buffer definitions in the GitHub repo [TecharoHQ/thoth-proto](https://github.com/TecharoHQ/thoth-proto).
|
||||
|
||||
Thoth is designed to be _informative_, not _authoritative_. Thoth cannot and will not arbitrarily block requests, origins, or other traffic. Thoth is there to inform Anubis and influence the weight of requests so that upstream resources can be protected. Additionally, Anubis aggressively caches data from Thoth such that over time Anubis will not need to request data very often. This makes the fast path for repeat visitors even faster and reduces the amount of data that Thoth is exposed to.
|
||||
|
||||
## Thoth features
|
||||
|
||||
Thoth is currently in active development. Currently, Thoth provides the following features to Anubis:
|
||||
|
||||
- BGP Autonomous System (ASN) based filtering
|
||||
- GeoIP location based filtering
|
||||
|
||||
### ASN-based filtering
|
||||
|
||||
When companies link their backbone infrastructure to the Internet, they do so via a [BGP Autonomous System](<https://en.wikipedia.org/wiki/Autonomous_system_(Internet)>), denoted by a number (the Autonomous System Number or ASN). Every IP address on the Internet is owned by an ASN with a 1:1 lookup that does not change very frequently.
|
||||
|
||||
Anubis uses Thoth to match IP addresses to BGP Autonomous Systems so that you can either issue arbitrary challenges to individual internet service providers (such as Cloudflare or Huawei Cloud) or, at the administrator's explicit instruction, block them altogether. For example, here's how you add 10 weight points to requests from Cloudflare, Huawei Cloud, and Alibaba Cloud:
|
||||
|
||||
```yaml
|
||||
- name: aggressive-asns-without-functional-abuse-contact
|
||||
action: WEIGH
|
||||
asns:
|
||||
match:
|
||||
- 13335 # Cloudflare
|
||||
- 136907 # Huawei Cloud
|
||||
- 45102 # Alibaba Cloud
|
||||
weight:
|
||||
adjust: 10
|
||||
```
|
||||
|
||||
You can look up details for [AS13335](https://bgp.tools/as/13335) or any of these other top offenders on [bgp.tools](https://bgp.tools).
|
||||
|
||||
### GeoIP-based filtering
|
||||
|
||||
In extreme cases, an administrator may have to take action against an entire country. This is not an ideal circumstance, but sometimes reality forces their hands and the administrators just want to sleep at night.
|
||||
|
||||
Anubis uses Thoth to look up the geographic location registered to an IP address. This lookup is not the best and will get better with time, but you ship what you can so you can make it better for next time.
|
||||
|
||||
For example, to add 10 weight points to requests from Brazil and China:
|
||||
|
||||
```yaml
|
||||
- name: countries-with-aggressive-scrapers
|
||||
action: WEIGH
|
||||
geoip:
|
||||
countries:
|
||||
- BR
|
||||
- CN
|
||||
weight:
|
||||
adjust: 10
|
||||
```
|
||||
|
||||
Use this with care.
|
||||
|
||||
## Work-in-progress features
|
||||
|
||||
This section is a bit aspirational and is where Thoth will end up rather than things you can use today.
|
||||
|
||||
In general, a lot of Thoth features are focused on taking the same Anubis you know and love and making it better, smarter, and less paranoid. These include:
|
||||
|
||||
- Private rulesets for advanced patterns, current known exploits, and other recognition tactics that need to be kept cloak and dagger for operational security reasons
|
||||
- Private challenge implementations via WebAssembly, including advanced browser detection logic
|
||||
- Reputation querying so that Thoth can arbitrarily influence the weight of requests based on the net aggregate pass rate so that the most common browsers can get through with no challenge issued at all
|
||||
- APIs for trusted administrators to report abusive request fingerprints so that Anubis can react to threats as they evolve
|
||||
- A way for Anubis to periodically report the pass rate per ASN and other fingerprints so that methodology can be improved
|
||||
@@ -58,9 +58,7 @@ This will build all static assets (CSS, JavaScript) for distribution.
|
||||
make build
|
||||
```
|
||||
|
||||
From this point it is up to you to make sure that `./var/anubis` and `./var/robots2policy` end up in
|
||||
the right place. You may want to consult the `./run` folder for useful files such as a systemd unit
|
||||
and `anubis.env.default` file.
|
||||
From this point it is up to you to make sure that `./var/anubis` ends up in the right place. You may want to consult the `./run` folder for useful files such as a systemd unit and `anubis.env.default` file.
|
||||
|
||||
## "Pre-baked" tarball
|
||||
|
||||
@@ -77,7 +75,7 @@ When using this tarball, all you need to do is build `./cmd/anubis`:
|
||||
make prebaked-build
|
||||
```
|
||||
|
||||
Anubis will be built to `./var/anubis` and the robots2policy tool to `./var/robots2policy`.
|
||||
Anubis will be built to `./var/anubis`.
|
||||
|
||||
## Development dependencies
|
||||
|
||||
|
||||
@@ -41,24 +41,13 @@ This page contains a non-exhaustive list with all websites using Anubis.
|
||||
- https://minihoot.site
|
||||
- https://catgirl.click/
|
||||
- https://wiki.dolphin-emu.org/
|
||||
- https://squirreljme.cc/
|
||||
- https://gitlab.postmarketos.org/
|
||||
- https://wiki.koha-community.org/
|
||||
- https://extensions.typo3.org/
|
||||
- <details>
|
||||
<summary>FreeCAD</summary>
|
||||
- https://forum.freecad.org/
|
||||
- https://wiki.freecad.org/
|
||||
</details>
|
||||
- <details>
|
||||
<summary>ReactOS</summary>
|
||||
- https://reactos.org/forum
|
||||
- https://reactos.org/wiki
|
||||
- https://git.reactos.org
|
||||
</details>
|
||||
- <details>
|
||||
<summary>ScummVM</summary>
|
||||
- https://bugs.scummvm.org/
|
||||
- https://forums.scummvm.org/
|
||||
- https://wiki.scummvm.org/
|
||||
</details>
|
||||
|
||||
@@ -47,21 +47,21 @@ const config: Config = {
|
||||
editUrl:
|
||||
'https://github.com/TecharoHQ/anubis/tree/main/docs/',
|
||||
},
|
||||
blog: {
|
||||
showReadingTime: true,
|
||||
feedOptions: {
|
||||
type: ['rss', 'atom', "json"],
|
||||
xslt: true,
|
||||
},
|
||||
// Please change this to your repo.
|
||||
// Remove this to remove the "edit this page" links.
|
||||
editUrl:
|
||||
'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/',
|
||||
// Useful options to enforce blogging best practices
|
||||
onInlineTags: 'warn',
|
||||
onInlineAuthors: 'warn',
|
||||
onUntruncatedBlogPosts: 'warn',
|
||||
},
|
||||
// blog: {
|
||||
// showReadingTime: true,
|
||||
// feedOptions: {
|
||||
// type: ['rss', 'atom', "json"],
|
||||
// xslt: true,
|
||||
// },
|
||||
// // Please change this to your repo.
|
||||
// // Remove this to remove the "edit this page" links.
|
||||
// editUrl:
|
||||
// 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/',
|
||||
// // Useful options to enforce blogging best practices
|
||||
// onInlineTags: 'warn',
|
||||
// onInlineAuthors: 'warn',
|
||||
// onUntruncatedBlogPosts: 'warn',
|
||||
// },
|
||||
theme: {
|
||||
customCss: './src/css/custom.css',
|
||||
},
|
||||
@@ -86,14 +86,9 @@ const config: Config = {
|
||||
type: 'docSidebar',
|
||||
sidebarId: 'tutorialSidebar',
|
||||
position: 'left',
|
||||
label: 'Docs',
|
||||
},
|
||||
{ to: '/blog', label: 'Blog', position: 'left' },
|
||||
{
|
||||
href: 'https://github.com/sponsors/Xe',
|
||||
label: "Sponsorship",
|
||||
position: 'left'
|
||||
label: 'Tutorial',
|
||||
},
|
||||
// { to: '/blog', label: 'Blog', position: 'left' },
|
||||
{
|
||||
href: 'https://github.com/TecharoHQ/anubis',
|
||||
label: 'GitHub',
|
||||
@@ -133,10 +128,6 @@ const config: Config = {
|
||||
{
|
||||
title: 'More',
|
||||
items: [
|
||||
{
|
||||
label: 'Blog',
|
||||
to: '/blog',
|
||||
},
|
||||
{
|
||||
label: 'GitHub',
|
||||
href: 'https://github.com/TecharoHQ/anubis',
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: anubis-docs-thoth
|
||||
spec:
|
||||
itemPath: "vaults/lc5zo4zjz3if3mkeuhufjmgmui/items/pwguumqcmtxvqbeb7y4gj7l36i"
|
||||
@@ -51,13 +51,6 @@ bots:
|
||||
# report_as: 4 # lie to the operator
|
||||
# algorithm: slow # intentionally waste CPU cycles and time
|
||||
|
||||
- name: rss-feed-blog
|
||||
action: ALLOW
|
||||
expression:
|
||||
any:
|
||||
- path.startsWith("/blog/atom.")
|
||||
- path.startsWith("/blog/rss.")
|
||||
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
@@ -70,55 +63,6 @@ bots:
|
||||
|
||||
dnsbl: false
|
||||
|
||||
impressum:
|
||||
footer: |
|
||||
This website is hosted by Techaro. If you have any complaints or notes about the service, please contact <a href="mailto:contact@techaro.lol">contact@techaro.lol</a> and we will assist you as soon as possible.
|
||||
|
||||
page:
|
||||
title: Privacy Policy
|
||||
body: |
|
||||
<p>Last updated: June 2025</p>
|
||||
|
||||
<h2>Information that is gathered from visitors</h2>
|
||||
|
||||
<p>In common with other websites, log files are stored on the web server saving details such as the visitor's IP address, browser type, referring page and time of visit.</p>
|
||||
|
||||
<p>Cookies may be used to remember visitor preferences when interacting with the website.</p>
|
||||
|
||||
<p>Where registration is required, the visitor's email and a username will be stored on the server.</p>
|
||||
|
||||
<h2>How the Information is used</h2>
|
||||
|
||||
<p>The information is used to enhance the vistor's experience when using the website to display personalised content and possibly advertising.</p>
|
||||
|
||||
<p>E-mail addresses will not be sold, rented or leased to 3rd parties.</p>
|
||||
|
||||
<p>E-mail may be sent to inform you of news of our services or offers by us or our affiliates.</p>
|
||||
|
||||
<h2>Visitor Options</h2>
|
||||
|
||||
<p>If you have subscribed to one of our services, you may unsubscribe by following the instructions which are included in e-mail that you receive.</p>
|
||||
|
||||
<p>You may be able to block cookies via your browser settings but this may prevent you from access to certain features of the website.</p>
|
||||
|
||||
<h2>Cookies</h2>
|
||||
|
||||
<p>Cookies are small digital signature files that are stored by your web browser that allow your preferences to be recorded when visiting the website. Also they may be used to track your return visits to the website.</p>
|
||||
|
||||
<p>3rd party advertising companies may also use cookies for tracking purposes.</p>
|
||||
|
||||
<h2>Techaro Anubis</h2>
|
||||
|
||||
<p>This website uses a service called <a href="https://anubis.techaro.lol">Anubis</a> to filter malicious traffic. Anubis requires the use of browser cookies to ensure that web clients are running conformant software. Anubis also may report the following data to Techaro to improve service quality:</p>
|
||||
|
||||
<ul>
|
||||
<li>IP address (for purposes of matching against geo-location and BGP autonomous systems numbers), which is stored in-memory and not persisted to disk.</li>
|
||||
<li>Unique browser fingerprints (such as HTTP request fingerprints and encryption system fingerprints), which may be stored on Techaro's side for a period of up to one month.</li>
|
||||
<li>HTTP request metadata that may include things such as the User-Agent header and other identifiers.</li>
|
||||
</ul>
|
||||
|
||||
<p>This data is processed and stored for the legitimate interest of combatting abusive web clients. This data is encrypted at rest as much as possible and is only decrypted in memory for the purposes of fulfilling requests.</p>
|
||||
|
||||
# By default, send HTTP 200 back to clients that either get issued a challenge
|
||||
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
||||
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
|
||||
|
||||
@@ -38,8 +38,6 @@ spec:
|
||||
value: "4"
|
||||
- name: "METRICS_BIND"
|
||||
value: ":9090"
|
||||
- name: "OG_PASSTHROUGH"
|
||||
value: "true"
|
||||
- name: "POLICY_FNAME"
|
||||
value: "/xe/cfg/anubis/botPolicies.yaml"
|
||||
- name: "SERVE_ROBOTS_TXT"
|
||||
@@ -68,6 +66,3 @@ spec:
|
||||
- ALL
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: anubis-docs-thoth
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
resources:
|
||||
- 1password.yaml
|
||||
- deployment.yaml
|
||||
- ingress.yaml
|
||||
- onionservice.yaml
|
||||
- poddisruptionbudget.yaml
|
||||
- service.yaml
|
||||
|
||||
configMapGenerator:
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
name: anubis-docs
|
||||
spec:
|
||||
minAvailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: anubis-docs
|
||||
3083
docs/package-lock.json
generated
3083
docs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -15,9 +15,9 @@
|
||||
"typecheck": "tsc"
|
||||
},
|
||||
"dependencies": {
|
||||
"@docusaurus/core": "^3.8.1",
|
||||
"@docusaurus/preset-classic": "^3.8.1",
|
||||
"@docusaurus/theme-mermaid": "^3.8.1",
|
||||
"@docusaurus/core": "3.7.0",
|
||||
"@docusaurus/preset-classic": "3.7.0",
|
||||
"@docusaurus/theme-mermaid": "^3.7.0",
|
||||
"@mdx-js/react": "^3.0.0",
|
||||
"clsx": "^2.0.0",
|
||||
"prism-react-renderer": "^2.3.0",
|
||||
@@ -25,9 +25,9 @@
|
||||
"react-dom": "^19.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@docusaurus/module-type-aliases": "^3.8.1",
|
||||
"@docusaurus/tsconfig": "^3.8.1",
|
||||
"@docusaurus/types": "^3.8.1",
|
||||
"@docusaurus/module-type-aliases": "3.7.0",
|
||||
"@docusaurus/tsconfig": "3.7.0",
|
||||
"@docusaurus/types": "3.7.0",
|
||||
"typescript": "~5.6.2"
|
||||
},
|
||||
"browserslist": {
|
||||
@@ -45,4 +45,4 @@
|
||||
"engines": {
|
||||
"node": ">=18.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
docs/static/img/sponsors/caninetools-logo.webp
vendored
BIN
docs/static/img/sponsors/caninetools-logo.webp
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 476 B After Width: | Height: | Size: 2.1 KiB |
45
go.mod
45
go.mod
@@ -3,29 +3,20 @@ module github.com/TecharoHQ/anubis
|
||||
go 1.24.2
|
||||
|
||||
require (
|
||||
github.com/TecharoHQ/thoth-proto v0.4.0
|
||||
github.com/a-h/templ v0.3.898
|
||||
github.com/cespare/xxhash/v2 v2.3.0
|
||||
github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456
|
||||
github.com/gaissmai/bart v0.20.4
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2
|
||||
github.com/google/cel-go v0.25.0
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.1.0
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/playwright-community/playwright-go v0.5200.0
|
||||
github.com/prometheus/client_golang v1.22.0
|
||||
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a
|
||||
github.com/yl2chen/cidranger v1.0.2
|
||||
golang.org/x/net v0.41.0
|
||||
google.golang.org/grpc v1.73.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
k8s.io/apimachinery v0.33.2
|
||||
sigs.k8s.io/yaml v1.4.0
|
||||
k8s.io/apimachinery v0.33.1
|
||||
)
|
||||
|
||||
require (
|
||||
al.essio.dev/pkg/shellescape v1.6.0 // indirect
|
||||
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1 // indirect
|
||||
cel.dev/expr v0.23.1 // indirect
|
||||
dario.cat/mergo v1.0.2 // indirect
|
||||
github.com/AlekSi/pointer v1.2.0 // indirect
|
||||
@@ -44,10 +35,10 @@ require (
|
||||
github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb // indirect
|
||||
github.com/cavaliergopher/cpio v1.0.1 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cli/browser v1.3.0 // indirect
|
||||
github.com/cli/go-gh v0.1.0 // indirect
|
||||
github.com/cloudflare/circl v1.6.1 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
||||
github.com/cloudflare/circl v1.6.0 // indirect
|
||||
github.com/cyphar/filepath-securejoin v0.4.1 // indirect
|
||||
github.com/deckarep/golang-set/v2 v2.8.0 // indirect
|
||||
github.com/dlclark/regexp2 v1.11.4 // indirect
|
||||
@@ -67,21 +58,18 @@ require (
|
||||
github.com/gobwas/glob v0.2.3 // indirect
|
||||
github.com/goccy/go-yaml v1.12.0 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
|
||||
github.com/google/go-github/v70 v70.0.0 // indirect
|
||||
github.com/google/go-querystring v1.1.0 // indirect
|
||||
github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect
|
||||
github.com/google/rpmpack v0.6.1-0.20250405124433-758cc6896cbc // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/goreleaser/chglog v0.7.0 // indirect
|
||||
github.com/goreleaser/fileglob v1.3.0 // indirect
|
||||
github.com/goreleaser/nfpm/v2 v2.42.1 // indirect
|
||||
github.com/hashicorp/go-version v1.7.0 // indirect
|
||||
github.com/huandu/xstrings v1.5.0 // indirect
|
||||
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
|
||||
github.com/kevinburke/ssh_config v1.2.0 // indirect
|
||||
github.com/klauspost/compress v1.18.0 // indirect
|
||||
github.com/klauspost/pgzip v1.2.6 // indirect
|
||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mitchellh/copystructure v1.2.0 // indirect
|
||||
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
||||
@@ -92,49 +80,40 @@ require (
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.62.0 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
|
||||
github.com/shopspring/decimal v1.4.0 // indirect
|
||||
github.com/sirupsen/logrus v1.9.3 // indirect
|
||||
github.com/skeema/knownhosts v1.3.1 // indirect
|
||||
github.com/spf13/afero v1.14.0 // indirect
|
||||
github.com/spf13/cast v1.7.1 // indirect
|
||||
github.com/stoewer/go-strcase v1.3.0 // indirect
|
||||
github.com/suzuki-shunsuke/logrus-error v0.1.4 // indirect
|
||||
github.com/suzuki-shunsuke/pinact v1.6.0 // indirect
|
||||
github.com/suzuki-shunsuke/urfave-cli-help-all v0.0.4 // indirect
|
||||
github.com/stoewer/go-strcase v1.2.0 // indirect
|
||||
github.com/ulikunitz/xz v0.5.12 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.6 // indirect
|
||||
github.com/xanzy/ssh-agent v0.3.3 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
||||
gitlab.com/digitalxero/go-conventional-commit v1.0.7 // indirect
|
||||
golang.org/x/crypto v0.39.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
|
||||
golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678 // indirect
|
||||
golang.org/x/mod v0.25.0 // indirect
|
||||
golang.org/x/oauth2 v0.28.0 // indirect
|
||||
golang.org/x/sync v0.15.0 // indirect
|
||||
golang.org/x/sys v0.33.0 // indirect
|
||||
golang.org/x/telemetry v0.0.0-20240522233618-39ace7a40ae7 // indirect
|
||||
golang.org/x/term v0.32.0 // indirect
|
||||
golang.org/x/text v0.26.0 // indirect
|
||||
golang.org/x/tools v0.34.0 // indirect
|
||||
golang.org/x/tools v0.33.0 // indirect
|
||||
golang.org/x/vuln v1.1.4 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463 // indirect
|
||||
google.golang.org/protobuf v1.36.6 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
|
||||
google.golang.org/protobuf v1.36.5 // indirect
|
||||
gopkg.in/warnings.v0 v0.1.2 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
honnef.co/go/tools v0.6.1 // indirect
|
||||
mvdan.cc/sh/v3 v3.11.0 // indirect
|
||||
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
|
||||
sigs.k8s.io/yaml v1.4.0 // indirect
|
||||
)
|
||||
|
||||
tool (
|
||||
github.com/TecharoHQ/yeet/cmd/yeet
|
||||
github.com/a-h/templ/cmd/templ
|
||||
github.com/suzuki-shunsuke/pinact/cmd/pinact
|
||||
golang.org/x/tools/cmd/deadcode
|
||||
golang.org/x/tools/cmd/goimports
|
||||
golang.org/x/tools/cmd/stringer
|
||||
golang.org/x/vuln/cmd/govulncheck
|
||||
|
||||
99
go.sum
99
go.sum
@@ -1,7 +1,5 @@
|
||||
al.essio.dev/pkg/shellescape v1.6.0 h1:NxFcEqzFSEVCGN2yq7Huv/9hyCEGVa/TncnOOBBeXHA=
|
||||
al.essio.dev/pkg/shellescape v1.6.0/go.mod h1:6sIqp7X2P6mThCQ7twERpZTuigpr6KbZWtls1U8I890=
|
||||
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1 h1:YhMSc48s25kr7kv31Z8vf7sPUIq5YJva9z1mn/hAt0M=
|
||||
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.6-20250425153114-8976f5be98c1.1/go.mod h1:avRlCjnFzl98VPaeCtJ24RrV/wwHFzB8sWXhj26+n/U=
|
||||
cel.dev/expr v0.23.1 h1:K4KOtPCJQjVggkARsjG9RWXP6O4R73aHeJMa/dmCQQg=
|
||||
cel.dev/expr v0.23.1/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
|
||||
dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8=
|
||||
@@ -30,8 +28,6 @@ github.com/ProtonMail/gopenpgp/v2 v2.7.1 h1:Awsg7MPc2gD3I7IFac2qE3Gdls0lZW8SzrFZ
|
||||
github.com/ProtonMail/gopenpgp/v2 v2.7.1/go.mod h1:/BU5gfAVwqyd8EfC3Eu7zmuhwYQpKs+cGD8M//iiaxs=
|
||||
github.com/Songmu/gitconfig v0.2.0 h1:pX2++u4KUq+K2k/ZCzGXLtkD3ceCqIdi0tDyb+IbSyo=
|
||||
github.com/Songmu/gitconfig v0.2.0/go.mod h1:cB5bYJer+pl7W8g6RHFwL/0X6aJROVrYuHlvc7PT+hE=
|
||||
github.com/TecharoHQ/thoth-proto v0.4.0 h1:UbkvfgCku0Dm1R6O4ug3HOsJNnE6F3wB8x+Dpw2lzFI=
|
||||
github.com/TecharoHQ/thoth-proto v0.4.0/go.mod h1:IcGnZt3iYUZQVEa0Lwk5l4ix0hCeXlWUV1TJMZvbWx0=
|
||||
github.com/TecharoHQ/yeet v0.6.0 h1:RCBAjr7wIlllsgy0tpvWpLX7jsZgu2tiuBY3RrprcR0=
|
||||
github.com/TecharoHQ/yeet v0.6.0/go.mod h1:bj2V4Fg8qKQXoiuPZa3HuawrE8g+LsOQv/9q2WyGSsA=
|
||||
github.com/a-h/parse v0.0.0-20250122154542-74294addb73e h1:HjVbSQHy+dnlS6C3XajZ69NYAb5jbGNfHanvm1+iYlo=
|
||||
@@ -67,10 +63,8 @@ github.com/cli/go-gh v0.1.0 h1:kMqFmC3ECBrV2UKzlOHjNOTTchExVc5tjNHtCqk/zYk=
|
||||
github.com/cli/go-gh v0.1.0/go.mod h1:eTGWl99EMZ+3Iau5C6dHyGAJRRia65MtdBtuhWc+84o=
|
||||
github.com/cli/safeexec v1.0.0/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q=
|
||||
github.com/cli/shurcooL-graphql v0.0.1/go.mod h1:U7gCSuMZP/Qy7kbqkk5PrqXEeDgtfG5K+W+u8weorps=
|
||||
github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0=
|
||||
github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||
github.com/cloudflare/circl v1.6.0 h1:cr5JKic4HI+LkINy2lg3W2jF8sHCVTBncJr5gIIq7qk=
|
||||
github.com/cloudflare/circl v1.6.0/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
|
||||
github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
|
||||
github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
|
||||
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
|
||||
@@ -105,8 +99,6 @@ github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHk
|
||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
|
||||
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||
github.com/gaissmai/bart v0.20.4 h1:Ik47r1fy3jRVU+1eYzKSW3ho2UgBVTVnUS8O993584U=
|
||||
github.com/gaissmai/bart v0.20.4/go.mod h1:cEed+ge8dalcbpi8wtS9x9m2hn/fNJH5suhdGQOHnYk=
|
||||
github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=
|
||||
github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU=
|
||||
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
|
||||
@@ -119,10 +111,6 @@ github.com/go-git/go-git/v5 v5.14.0 h1:/MD3lCrGjCen5WfEAzKg00MJJffKhC8gzS80ycmCi
|
||||
github.com/go-git/go-git/v5 v5.14.0/go.mod h1:Z5Xhoia5PcWA3NF8vRLURn9E5FRhSl7dGj9ItW3Wk5k=
|
||||
github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY=
|
||||
github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
|
||||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q=
|
||||
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
|
||||
@@ -146,20 +134,13 @@ github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeD
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
|
||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ=
|
||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/google/cel-go v0.25.0 h1:jsFw9Fhn+3y2kBbltZR4VEz5xKkcIFRPDnuEzAGv5GY=
|
||||
github.com/google/cel-go v0.25.0/go.mod h1:hjEb6r5SuOSlhCHmFoLzu8HGCERvIsDAbxDAyNU/MmI=
|
||||
github.com/google/go-cmdtest v0.4.1-0.20220921163831-55ab3332a786 h1:rcv+Ippz6RAtvaGgKxc+8FQIpxHgsF+HBzPyYL2cyVU=
|
||||
github.com/google/go-cmdtest v0.4.1-0.20220921163831-55ab3332a786/go.mod h1:apVn/GCasLZUVpAJ6oWAuyP7Ne7CEsQbTnc0plM3m+o=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/go-github/v70 v70.0.0 h1:/tqCp5KPrcvqCc7vIvYyFYTiCGrYvaWoYMGHSQbo55o=
|
||||
github.com/google/go-github/v70 v70.0.0/go.mod h1:xBUZgo8MI3lUL/hwxl3hlceJW1U8MVnXP3zUyI+rhQY=
|
||||
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
|
||||
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
|
||||
github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U=
|
||||
github.com/google/pprof v0.0.0-20230207041349-798e818bf904/go.mod h1:uglQLonpP8qtYCYyzA+8c/9qtqgA3qsXGYqCPKARAFg=
|
||||
github.com/google/renameio v0.1.0 h1:GOZbcHa3HfsPKPlmyPyN2KEohoMXOhdMbHrvbpl2QaA=
|
||||
@@ -178,20 +159,12 @@ github.com/goreleaser/fileglob v1.3.0 h1:/X6J7U8lbDpQtBvGcwwPS6OpzkNVlVEsFUVRx9+
|
||||
github.com/goreleaser/fileglob v1.3.0/go.mod h1:Jx6BoXv3mbYkEzwm9THo7xbr5egkAraxkGorbJb4RxU=
|
||||
github.com/goreleaser/nfpm/v2 v2.42.1 h1:xu2pLRgQuz2ab+YZFoeIzwU/M5jjjCKDGwv1lRbVGvk=
|
||||
github.com/goreleaser/nfpm/v2 v2.42.1/go.mod h1:dY53KWYKebkOocxgkmpM7SRX0Nv5hU+jEu2kIaM4/LI=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.1.0 h1:QGLs/O40yoNK9vmy4rhUGBVyMf1lISBGtXRpsu/Qu/o=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.1.0/go.mod h1:hM2alZsMUni80N33RBe6J0e423LB+odMj7d3EMP9l20=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2 h1:sGm2vDRFUrQJO/Veii4h4zG2vvqG6uWNkBHSTqXOZk0=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2/go.mod h1:wd1YpapPLivG6nQgbf7ZkG1hhSOXDhhn4MLTknx2aAc=
|
||||
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542/go.mod h1:Ow0tF8D4Kplbc8s8sSb3V2oUCygFHVp8gC3Dn6U4MNI=
|
||||
github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY=
|
||||
github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
|
||||
github.com/henvic/httpretty v0.0.6/go.mod h1:X38wLjWXHkXT7r2+uK8LjCMne9rsuNaBLJ+5cU2/Pmo=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
|
||||
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
|
||||
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
|
||||
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
|
||||
@@ -217,9 +190,8 @@ github.com/matryer/is v1.4.0 h1:sosSmIWwkYITGrxZ25ULNDeKiMNzFSr4V/eqBQP0PeE=
|
||||
github.com/matryer/is v1.4.0/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU=
|
||||
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
|
||||
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
|
||||
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
|
||||
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
@@ -259,8 +231,6 @@ github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoG
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sassoftware/go-rpmutils v0.4.0 h1:ojND82NYBxgwrV+mX1CWsd5QJvvEZTKddtCdFLPWhpg=
|
||||
github.com/sassoftware/go-rpmutils v0.4.0/go.mod h1:3goNWi7PGAT3/dlql2lv3+MSN5jNYPjT5mVcQcIsYzI=
|
||||
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a h1:iLcLb5Fwwz7g/DLK89F+uQBDeAhHhwdzB5fSlVdhGcM=
|
||||
@@ -270,63 +240,35 @@ github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG
|
||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
|
||||
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
||||
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
||||
github.com/smarty/assertions v1.15.0 h1:cR//PqUBUiQRakZWqBiFFQ9wb8emQGDb0HeGdqGByCY=
|
||||
github.com/smarty/assertions v1.15.0/go.mod h1:yABtdzeQs6l1brC900WlRNwj6ZR55d7B+E8C6HtKdec=
|
||||
github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sSznIX1xY=
|
||||
github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60=
|
||||
github.com/spf13/afero v1.14.0 h1:9tH6MapGnn/j0eb0yIXiLjERO8RB6xIVZRDCX7PtqWA=
|
||||
github.com/spf13/afero v1.14.0/go.mod h1:acJQ8t0ohCGuMN3O+Pv0V0hgMxNYDlvdk+VTfyZmbYo=
|
||||
github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
|
||||
github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
|
||||
github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
|
||||
github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
|
||||
github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU=
|
||||
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/suzuki-shunsuke/logrus-error v0.1.4 h1:nWo98uba1fANHdZ9Y5pJ2RKs/PpVjrLzRp5m+mRb9KE=
|
||||
github.com/suzuki-shunsuke/logrus-error v0.1.4/go.mod h1:WsVvvw6SKSt08/fB2qbnsKIMJA4K1MYCUprqsBJbMiM=
|
||||
github.com/suzuki-shunsuke/pinact v1.6.0 h1:2QvSzREOquwLwKXhF9Hj0AInE/Rl63SZz9dKkHFC6so=
|
||||
github.com/suzuki-shunsuke/pinact v1.6.0/go.mod h1:FDUMck0mmL0mcnNZ23Vjh/aOR5cIdZhF1IIpGksT4dQ=
|
||||
github.com/suzuki-shunsuke/urfave-cli-help-all v0.0.4 h1:YGHgrVjGTYHY98II6zijXUHP+OyvrzSCvd8m9iUcaK8=
|
||||
github.com/suzuki-shunsuke/urfave-cli-help-all v0.0.4/go.mod h1:sSi6xaUaHfaqu32ECLeyE7NTMv+ZM5dW0JikhllaalY=
|
||||
github.com/thlib/go-timezone-local v0.0.0-20210907160436-ef149e42d28e/go.mod h1:/Tnicc6m/lsJE0irFMA0LfIwTBo4QP7A8IfyIv4zZKI=
|
||||
github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
|
||||
github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||
github.com/urfave/cli/v2 v2.27.6 h1:VdRdS98FNhKZ8/Az8B7MTyGQmpIr36O1EHybx/LaZ4g=
|
||||
github.com/urfave/cli/v2 v2.27.6/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
|
||||
github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM=
|
||||
github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw=
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
||||
github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU=
|
||||
github.com/yl2chen/cidranger v1.0.2/go.mod h1:9U1yz7WPYDwf0vpNWFaeRh0bjwz5RVgRy/9UEQfHl0g=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
gitlab.com/digitalxero/go-conventional-commit v1.0.7 h1:8/dO6WWG+98PMhlZowt/YjuiKhqhGlOCwlIV8SqqGh8=
|
||||
gitlab.com/digitalxero/go-conventional-commit v1.0.7/go.mod h1:05Xc2BFsSyC5tKhK0y+P3bs0AwUtNuTp+mTpbCU/DZ0=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||
go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
|
||||
go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
|
||||
go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
|
||||
go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
|
||||
go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
|
||||
go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
|
||||
go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
|
||||
go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
@@ -351,8 +293,6 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||
golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
|
||||
golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
|
||||
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
|
||||
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
@@ -404,24 +344,21 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
|
||||
golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
|
||||
golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc=
|
||||
golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI=
|
||||
golang.org/x/vuln v1.1.4 h1:Ju8QsuyhX3Hk8ma3CesTbO8vfJD9EvUBgHvkxHBzj0I=
|
||||
golang.org/x/vuln v1.1.4/go.mod h1:F+45wmU18ym/ca5PLTPLsSzr2KppzswxPP603ldA67s=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
|
||||
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUOU8xyFgXv6cOTp2HASDlsDk=
|
||||
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463 h1:hE3bRWtU6uceqlh4fhrSnUyjKHMKB9KrTLLG+bc0ddM=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463/go.mod h1:U90ffi8eUL9MwPcrJylN5+Mk2v3vuPDptd5yyNUiRR8=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463 h1:e0AIkUUhxyBKh6ssZNrAMeqhA7RKUj42346d1y02i2g=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
|
||||
google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok=
|
||||
google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc=
|
||||
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
|
||||
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
|
||||
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
|
||||
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
@@ -438,8 +375,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
honnef.co/go/tools v0.6.1 h1:R094WgE8K4JirYjBaOpz/AvTyUu/3wbmAoskKN/pxTI=
|
||||
honnef.co/go/tools v0.6.1/go.mod h1:3puzxxljPCe8RGJX7BIy1plGbxEOZni5mR2aXe3/uk4=
|
||||
k8s.io/apimachinery v0.33.2 h1:IHFVhqg59mb8PJWTLi8m1mAoepkUNYmptHsV+Z1m5jY=
|
||||
k8s.io/apimachinery v0.33.2/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
|
||||
k8s.io/apimachinery v0.33.1 h1:mzqXWV8tW9Rw4VeW9rEkqvnxj59k1ezDUl20tFK/oM4=
|
||||
k8s.io/apimachinery v0.33.1/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
|
||||
mvdan.cc/sh/v3 v3.11.0 h1:q5h+XMDRfUGUedCqFFsjoFjrhwf2Mvtt1rkMvVz0blw=
|
||||
mvdan.cc/sh/v3 v3.11.0/go.mod h1:LRM+1NjoYCzuq/WZ6y44x14YNAI0NK7FLPeQSaFagGg=
|
||||
pault.ag/go/debian v0.18.0 h1:nr0iiyOU5QlG1VPnhZLNhnCcHx58kukvBJp+dvaM6CQ=
|
||||
|
||||
@@ -3,23 +3,10 @@ package internal
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"strconv"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
// SHA256sum computes a cryptographic hash. Still used for proof-of-work challenges
|
||||
// where we need the security properties of a cryptographic hash function.
|
||||
func SHA256sum(text string) string {
|
||||
hash := sha256.New()
|
||||
hash.Write([]byte(text))
|
||||
return hex.EncodeToString(hash.Sum(nil))
|
||||
}
|
||||
|
||||
// FastHash is a high-performance non-cryptographic hash function suitable for
|
||||
// internal caching, policy rule identification, and other performance-critical
|
||||
// use cases where cryptographic security is not required.
|
||||
func FastHash(text string) string {
|
||||
h := xxhash.Sum64String(text)
|
||||
return strconv.FormatUint(h, 16)
|
||||
}
|
||||
|
||||
@@ -1,261 +0,0 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// XXHash64sum is a test alias for FastHash to benchmark against SHA256
|
||||
func XXHash64sum(text string) string {
|
||||
return FastHash(text)
|
||||
}
|
||||
|
||||
// Test data that matches real usage patterns in the codebase
|
||||
var (
|
||||
// Typical policy checker inputs
|
||||
policyInputs = []string{
|
||||
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"User-Agent: bot/1.0",
|
||||
"User-Agent: GoogleBot/2.1",
|
||||
"/robots.txt",
|
||||
"/api/.*",
|
||||
"10.0.0.0/8",
|
||||
"192.168.1.0/24",
|
||||
"172.16.0.0/12",
|
||||
}
|
||||
|
||||
// Challenge data from challengeFor function
|
||||
challengeInputs = []string{
|
||||
"Accept-Language=en-US,X-Real-IP=192.168.1.100,User-Agent=Mozilla/5.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=abc123,Difficulty=5",
|
||||
"Accept-Language=fr-FR,X-Real-IP=10.0.0.50,User-Agent=Chrome/91.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=def456,Difficulty=3",
|
||||
"Accept-Language=es-ES,X-Real-IP=172.16.1.1,User-Agent=Safari/14.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=ghi789,Difficulty=7",
|
||||
}
|
||||
|
||||
// Bot rule patterns
|
||||
botRuleInputs = []string{
|
||||
"GoogleBot::path:/robots.txt",
|
||||
"BingBot::useragent:Mozilla/5.0 (compatible; bingbot/2.0)",
|
||||
"FacebookBot::headers:Accept-Language,User-Agent",
|
||||
"TwitterBot::cidr:192.168.1.0/24",
|
||||
}
|
||||
|
||||
// CEL expressions from policy rules
|
||||
celInputs = []string{
|
||||
`request.headers["User-Agent"].contains("bot")`,
|
||||
`request.path.startsWith("/api/") && request.method == "POST"`,
|
||||
`request.remoteAddress in ["192.168.1.0/24", "10.0.0.0/8"]`,
|
||||
`request.userAgent.matches(".*[Bb]ot.*") || request.userAgent.matches(".*[Cc]rawler.*")`,
|
||||
}
|
||||
|
||||
// Thoth ASN checker inputs
|
||||
asnInputs = []string{
|
||||
"ASNChecker\nAS 15169\nAS 8075\nAS 32934",
|
||||
"ASNChecker\nAS 13335\nAS 16509\nAS 14061",
|
||||
"ASNChecker\nAS 36351\nAS 20940\nAS 8100",
|
||||
}
|
||||
)
|
||||
|
||||
func BenchmarkSHA256_PolicyInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := policyInputs[i%len(policyInputs)]
|
||||
_ = SHA256sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkXXHash_PolicyInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := policyInputs[i%len(policyInputs)]
|
||||
_ = XXHash64sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSHA256_ChallengeInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := challengeInputs[i%len(challengeInputs)]
|
||||
_ = SHA256sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkXXHash_ChallengeInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := challengeInputs[i%len(challengeInputs)]
|
||||
_ = XXHash64sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSHA256_BotRuleInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := botRuleInputs[i%len(botRuleInputs)]
|
||||
_ = SHA256sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkXXHash_BotRuleInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := botRuleInputs[i%len(botRuleInputs)]
|
||||
_ = XXHash64sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSHA256_CELInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := celInputs[i%len(celInputs)]
|
||||
_ = SHA256sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkXXHash_CELInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := celInputs[i%len(celInputs)]
|
||||
_ = XXHash64sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSHA256_ASNInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := asnInputs[i%len(asnInputs)]
|
||||
_ = SHA256sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkXXHash_ASNInputs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
input := asnInputs[i%len(asnInputs)]
|
||||
_ = XXHash64sum(input)
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark the policy list hashing used in checker.go
|
||||
func BenchmarkSHA256_PolicyList(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var sb strings.Builder
|
||||
for _, input := range policyInputs {
|
||||
fmt.Fprintln(&sb, SHA256sum(input))
|
||||
}
|
||||
_ = SHA256sum(sb.String())
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkXXHash_PolicyList(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var sb strings.Builder
|
||||
for _, input := range policyInputs {
|
||||
fmt.Fprintln(&sb, XXHash64sum(input))
|
||||
}
|
||||
_ = XXHash64sum(sb.String())
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that xxhash doesn't have collisions in realistic scenarios
|
||||
func TestHashCollisions(t *testing.T) {
|
||||
allInputs := append(append(append(append(policyInputs, challengeInputs...), botRuleInputs...), celInputs...), asnInputs...)
|
||||
|
||||
// Start with realistic inputs from actual usage
|
||||
xxhashHashes := make(map[string]string)
|
||||
for _, input := range allInputs {
|
||||
hash := XXHash64sum(input)
|
||||
if existing, exists := xxhashHashes[hash]; exists {
|
||||
t.Errorf("XXHash collision detected: %q and %q both hash to %s", input, existing, hash)
|
||||
}
|
||||
xxhashHashes[hash] = input
|
||||
}
|
||||
|
||||
t.Logf("Basic test: %d realistic inputs, no collisions", len(allInputs))
|
||||
|
||||
// Test similar strings that might cause hash collisions
|
||||
prefixes := []string{"User-Agent: ", "X-Real-IP: ", "Accept-Language: ", "Host: "}
|
||||
suffixes := []string{"bot", "crawler", "spider", "scraper", "Mozilla", "Chrome", "Safari", "Firefox"}
|
||||
variations := []string{"", "/1.0", "/2.0", " (compatible)", " (Windows)", " (Linux)", " (Mac)"}
|
||||
|
||||
stressCount := 0
|
||||
for _, prefix := range prefixes {
|
||||
for _, suffix := range suffixes {
|
||||
for _, variation := range variations {
|
||||
for i := 0; i < 100; i++ {
|
||||
input := fmt.Sprintf("%s%s%s-%d", prefix, suffix, variation, i)
|
||||
hash := XXHash64sum(input)
|
||||
if existing, exists := xxhashHashes[hash]; exists {
|
||||
t.Errorf("XXHash collision in stress test: %q and %q both hash to %s", input, existing, hash)
|
||||
}
|
||||
xxhashHashes[hash] = input
|
||||
stressCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
t.Logf("Stress test 1: %d similar string variations, no collisions", stressCount)
|
||||
|
||||
// Test sequential patterns that might be problematic
|
||||
patterns := []string{
|
||||
"192.168.1.%d",
|
||||
"10.0.0.%d",
|
||||
"172.16.%d.1",
|
||||
"challenge-%d",
|
||||
"bot-rule-%d",
|
||||
"policy-%016x",
|
||||
"session-%016x",
|
||||
}
|
||||
|
||||
seqCount := 0
|
||||
for _, pattern := range patterns {
|
||||
for i := 0; i < 10000; i++ {
|
||||
input := fmt.Sprintf(pattern, i)
|
||||
hash := XXHash64sum(input)
|
||||
if existing, exists := xxhashHashes[hash]; exists {
|
||||
t.Errorf("XXHash collision in sequential test: %q and %q both hash to %s", input, existing, hash)
|
||||
}
|
||||
xxhashHashes[hash] = input
|
||||
seqCount++
|
||||
}
|
||||
}
|
||||
t.Logf("Stress test 2: %d sequential patterns, no collisions", seqCount)
|
||||
|
||||
totalInputs := len(allInputs) + stressCount + seqCount
|
||||
t.Logf("TOTAL: Tested %d inputs across realistic scenarios - NO COLLISIONS", totalInputs)
|
||||
}
|
||||
|
||||
// Verify xxhash output works as cache keys
|
||||
func TestXXHashFormat(t *testing.T) {
|
||||
testCases := []string{
|
||||
"short",
|
||||
"",
|
||||
"very long string with lots of content that might be used in policy checking and other internal hashing scenarios",
|
||||
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
}
|
||||
|
||||
for _, input := range testCases {
|
||||
hash := XXHash64sum(input)
|
||||
|
||||
// Check it's valid hex
|
||||
if len(hash) == 0 {
|
||||
t.Errorf("Empty hash for input %q", input)
|
||||
}
|
||||
|
||||
// xxhash is 64-bit so max 16 hex chars
|
||||
if len(hash) > 16 {
|
||||
t.Errorf("Hash too long for input %q: %s (length %d)", input, hash, len(hash))
|
||||
}
|
||||
|
||||
// Make sure it's all hex characters
|
||||
for _, char := range hash {
|
||||
if !((char >= '0' && char <= '9') || (char >= 'a' && char <= 'f')) {
|
||||
t.Errorf("Non-hex character %c in hash %s for input %q", char, hash, input)
|
||||
}
|
||||
}
|
||||
|
||||
t.Logf("Input: %q -> Hash: %s", input, hash)
|
||||
}
|
||||
}
|
||||
@@ -13,10 +13,6 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
|
||||
return nil, errors.New("nil URL provided, cannot fetch OG tags")
|
||||
}
|
||||
|
||||
if len(c.ogOverride) != 0 {
|
||||
return c.ogOverride, nil
|
||||
}
|
||||
|
||||
target := c.getTarget(url)
|
||||
cacheKey := c.generateCacheKey(target, originalHost)
|
||||
|
||||
|
||||
@@ -7,49 +7,10 @@ import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestCacheReturnsDefault(t *testing.T) {
|
||||
want := map[string]string{
|
||||
"og:title": "Foo bar",
|
||||
"og:description": "The best website ever made!!!1!",
|
||||
}
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
Override: want,
|
||||
})
|
||||
|
||||
u, err := url.Parse("https://anubis.techaro.lol")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
result, err := cache.GetOGTags(u, "anubis.techaro.lol")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for k, v := range want {
|
||||
t.Run(k, func(t *testing.T) {
|
||||
if got := result[k]; got != v {
|
||||
t.Logf("want: tags[%q] = %q", k, v)
|
||||
t.Logf("got: tags[%q] = %q", k, got)
|
||||
t.Error("invalid result from function")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckCache(t *testing.T) {
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache("http://example.com", true, time.Minute, false)
|
||||
|
||||
// Set up test data
|
||||
urlStr := "http://example.com/page"
|
||||
@@ -108,11 +69,7 @@ func TestGetOGTags(t *testing.T) {
|
||||
defer ts.Close()
|
||||
|
||||
// Create an instance of OGTagCache with a short TTL for testing
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
|
||||
|
||||
// Parse the test server URL
|
||||
parsedURL, err := url.Parse(ts.URL)
|
||||
@@ -259,11 +216,7 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
loadCount = 0 // Reset load count for each test case
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: tc.ogCacheConsiderHost,
|
||||
})
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, tc.ogCacheConsiderHost)
|
||||
|
||||
for i, req := range tc.requests {
|
||||
ogTags, err := cache.GetOGTags(parsedURL, req.host)
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -81,11 +80,7 @@ func TestFetchHTMLDocument(t *testing.T) {
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache("", true, time.Minute, false)
|
||||
doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
|
||||
|
||||
if tt.expectError {
|
||||
@@ -112,11 +107,7 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
|
||||
t.Skip("test requires theoretical network egress")
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache("", true, time.Minute, false)
|
||||
|
||||
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")
|
||||
|
||||
|
||||
@@ -6,8 +6,6 @@ import (
|
||||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestIntegrationGetOGTags(t *testing.T) {
|
||||
@@ -106,11 +104,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Create cache instance
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
|
||||
|
||||
// Create URL for test
|
||||
testURL, _ := url.Parse(ts.URL)
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
package ogtags
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
func BenchmarkGetTarget(b *testing.B) {
|
||||
tests := []struct {
|
||||
name string
|
||||
target string
|
||||
paths []string
|
||||
}{
|
||||
{
|
||||
name: "HTTP",
|
||||
target: "http://example.com",
|
||||
paths: []string{"/", "/path", "/path/to/resource", "/path?query=1&foo=bar"},
|
||||
},
|
||||
{
|
||||
name: "Unix",
|
||||
target: "unix:///var/run/app.sock",
|
||||
paths: []string{"/", "/api/endpoint", "/api/endpoint?param=value"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
b.Run(tt.name, func(b *testing.B) {
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{})
|
||||
urls := make([]*url.URL, len(tt.paths))
|
||||
for i, path := range tt.paths {
|
||||
u, _ := url.Parse(path)
|
||||
urls[i] = u
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = cache.getTarget(urls[i%len(urls)])
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkExtractOGTags(b *testing.B) {
|
||||
htmlSamples := []string{
|
||||
`<html><head>
|
||||
<meta property="og:title" content="Test Title">
|
||||
<meta property="og:description" content="Test Description">
|
||||
<meta name="keywords" content="test,keywords">
|
||||
</head><body></body></html>`,
|
||||
`<html><head>
|
||||
<meta property="og:title" content="Page Title">
|
||||
<meta property="og:type" content="website">
|
||||
<meta property="og:url" content="https://example.com">
|
||||
<meta property="og:image" content="https://example.com/image.jpg">
|
||||
<meta property="twitter:card" content="summary_large_image">
|
||||
<meta property="twitter:title" content="Twitter Title">
|
||||
<meta name="description" content="Page description">
|
||||
<meta name="author" content="John Doe">
|
||||
</head><body><div><p>Content</p></div></body></html>`,
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
docs := make([]*html.Node, len(htmlSamples))
|
||||
|
||||
for i, sample := range htmlSamples {
|
||||
doc, _ := html.Parse(strings.NewReader(sample))
|
||||
docs[i] = doc
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = cache.extractOGTags(docs[i%len(docs)])
|
||||
}
|
||||
}
|
||||
|
||||
// Memory usage test
|
||||
func TestMemoryUsage(t *testing.T) {
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Force GC and wait for it to complete
|
||||
runtime.GC()
|
||||
|
||||
var m1 runtime.MemStats
|
||||
runtime.ReadMemStats(&m1)
|
||||
|
||||
// Run getTarget many times
|
||||
u, _ := url.Parse("/path/to/resource?query=1&foo=bar&baz=qux")
|
||||
for i := 0; i < 10000; i++ {
|
||||
_ = cache.getTarget(u)
|
||||
}
|
||||
|
||||
// Force GC after operations
|
||||
runtime.GC()
|
||||
|
||||
var m2 runtime.MemStats
|
||||
runtime.ReadMemStats(&m2)
|
||||
|
||||
allocatedBytes := int64(m2.TotalAlloc) - int64(m1.TotalAlloc)
|
||||
allocatedKB := float64(allocatedBytes) / 1024.0
|
||||
allocatedPerOp := float64(allocatedBytes) / 10000.0
|
||||
|
||||
t.Logf("Memory allocated for 10k getTarget calls:")
|
||||
t.Logf(" Total: %.2f KB (%.2f MB)", allocatedKB, allocatedKB/1024.0)
|
||||
t.Logf(" Per operation: %.2f bytes", allocatedPerOp)
|
||||
|
||||
// Test extractOGTags memory usage
|
||||
htmlDoc := `<html><head>
|
||||
<meta property="og:title" content="Test Title">
|
||||
<meta property="og:description" content="Test Description">
|
||||
<meta property="og:image" content="https://example.com/image.jpg">
|
||||
<meta property="twitter:card" content="summary">
|
||||
<meta name="keywords" content="test,keywords,example">
|
||||
<meta name="author" content="Test Author">
|
||||
<meta property="unknown:tag" content="Should be ignored">
|
||||
</head><body></body></html>`
|
||||
|
||||
doc, _ := html.Parse(strings.NewReader(htmlDoc))
|
||||
|
||||
runtime.GC()
|
||||
runtime.ReadMemStats(&m1)
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
_ = cache.extractOGTags(doc)
|
||||
}
|
||||
|
||||
runtime.GC()
|
||||
runtime.ReadMemStats(&m2)
|
||||
|
||||
allocatedBytes = int64(m2.TotalAlloc) - int64(m1.TotalAlloc)
|
||||
allocatedKB = float64(allocatedBytes) / 1024.0
|
||||
allocatedPerOp = float64(allocatedBytes) / 1000.0
|
||||
|
||||
t.Logf("Memory allocated for 1k extractOGTags calls:")
|
||||
t.Logf(" Total: %.2f KB (%.2f MB)", allocatedKB, allocatedKB/1024.0)
|
||||
t.Logf(" Per operation: %.2f bytes", allocatedPerOp)
|
||||
|
||||
// Sanity checks
|
||||
if allocatedPerOp > 10000 {
|
||||
t.Errorf("extractOGTags allocating too much memory per operation: %.2f bytes", allocatedPerOp)
|
||||
}
|
||||
}
|
||||
@@ -10,34 +10,27 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/decaymap"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
const (
|
||||
maxContentLength = 8 << 20 // 8 MiB is enough for anyone
|
||||
maxContentLength = 16 << 20 // 16 MiB in bytes, if there is a reasonable reason that you need more than this...Why?
|
||||
httpTimeout = 5 * time.Second /*todo: make this configurable?*/
|
||||
|
||||
schemeSeparatorLength = 3 // Length of "://"
|
||||
querySeparatorLength = 1 // Length of "?" for query strings
|
||||
)
|
||||
|
||||
type OGTagCache struct {
|
||||
cache *decaymap.Impl[string, map[string]string]
|
||||
targetURL *url.URL
|
||||
client *http.Client
|
||||
|
||||
// Pre-built strings for optimization
|
||||
unixPrefix string // "http://unix"
|
||||
cache *decaymap.Impl[string, map[string]string]
|
||||
targetURL *url.URL
|
||||
client *http.Client
|
||||
approvedTags []string
|
||||
approvedPrefixes []string
|
||||
ogTimeToLive time.Duration
|
||||
ogCacheConsiderHost bool
|
||||
ogPassthrough bool
|
||||
ogOverride map[string]string
|
||||
}
|
||||
|
||||
func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
|
||||
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
|
||||
// Predefined approved tags and prefixes
|
||||
// In the future, these could come from configuration
|
||||
defaultApprovedTags := []string{"description", "keywords", "author"}
|
||||
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
|
||||
|
||||
@@ -78,51 +71,37 @@ func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
|
||||
|
||||
return &OGTagCache{
|
||||
cache: decaymap.New[string, map[string]string](),
|
||||
targetURL: parsedTargetURL,
|
||||
ogPassthrough: conf.Enabled,
|
||||
ogTimeToLive: conf.TimeToLive,
|
||||
ogCacheConsiderHost: conf.ConsiderHost,
|
||||
ogOverride: conf.Override,
|
||||
targetURL: parsedTargetURL, // Store the parsed URL
|
||||
ogPassthrough: ogPassthrough,
|
||||
ogTimeToLive: ogTimeToLive,
|
||||
ogCacheConsiderHost: ogTagsConsiderHost, // todo: refactor to be a separate struct
|
||||
approvedTags: defaultApprovedTags,
|
||||
approvedPrefixes: defaultApprovedPrefixes,
|
||||
client: client,
|
||||
unixPrefix: "http://unix",
|
||||
}
|
||||
}
|
||||
|
||||
// getTarget constructs the target URL string for fetching OG tags.
|
||||
// Optimized to minimize allocations by building strings directly.
|
||||
// For Unix sockets, it creates a "fake" HTTP URL that the custom dialer understands.
|
||||
func (c *OGTagCache) getTarget(u *url.URL) string {
|
||||
var escapedPath = u.EscapedPath() // will cause an allocation if path contains special characters
|
||||
if c.targetURL.Scheme == "unix" {
|
||||
// Build URL string directly without creating intermediate URL object
|
||||
var sb strings.Builder
|
||||
sb.Grow(len(c.unixPrefix) + len(escapedPath) + len(u.RawQuery) + querySeparatorLength) // Pre-allocate
|
||||
sb.WriteString(c.unixPrefix)
|
||||
sb.WriteString(escapedPath)
|
||||
if u.RawQuery != "" {
|
||||
sb.WriteByte('?')
|
||||
sb.WriteString(u.RawQuery)
|
||||
// The custom dialer ignores the host, but we need a valid http URL structure.
|
||||
// Use "unix" as a placeholder host. Path and Query from original request are appended.
|
||||
fakeURL := &url.URL{
|
||||
Scheme: "http", // Scheme must be http/https for client.Get
|
||||
Host: "unix", // Arbitrary host, ignored by custom dialer
|
||||
Path: u.Path,
|
||||
RawQuery: u.RawQuery,
|
||||
}
|
||||
return sb.String()
|
||||
return fakeURL.String()
|
||||
}
|
||||
|
||||
// For regular http/https targets, build URL string directly
|
||||
var sb strings.Builder
|
||||
// Pre-calculate size: scheme + "://" + host + path + "?" + query
|
||||
estimatedSize := len(c.targetURL.Scheme) + schemeSeparatorLength + len(c.targetURL.Host) + len(escapedPath) + len(u.RawQuery) + querySeparatorLength
|
||||
sb.Grow(estimatedSize)
|
||||
// For regular http/https targets
|
||||
target := *c.targetURL // Make a copy
|
||||
target.Path = u.Path
|
||||
target.RawQuery = u.RawQuery
|
||||
return target.String()
|
||||
|
||||
sb.WriteString(c.targetURL.Scheme)
|
||||
sb.WriteString("://")
|
||||
sb.WriteString(c.targetURL.Host)
|
||||
sb.WriteString(escapedPath)
|
||||
if u.RawQuery != "" {
|
||||
sb.WriteByte('?')
|
||||
sb.WriteString(u.RawQuery)
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func (c *OGTagCache) Cleanup() {
|
||||
|
||||
@@ -1,310 +0,0 @@
|
||||
package ogtags
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// FuzzGetTarget tests getTarget with various inputs
|
||||
func FuzzGetTarget(f *testing.F) {
|
||||
// Seed corpus with interesting test cases
|
||||
testCases := []struct {
|
||||
target string
|
||||
path string
|
||||
query string
|
||||
}{
|
||||
{"http://example.com", "/", ""},
|
||||
{"http://example.com", "/path", "q=1"},
|
||||
{"unix:///tmp/socket", "/api", "key=value"},
|
||||
{"https://example.com:8080", "/path/to/resource", "a=1&b=2"},
|
||||
{"http://example.com", "/path with spaces", "q=hello world"},
|
||||
{"http://example.com", "/path/❤️/emoji", "emoji=🎉"},
|
||||
{"http://example.com", "/path/../../../etc/passwd", ""},
|
||||
{"http://example.com", "/path%2F%2E%2E%2F", "q=%3Cscript%3E"},
|
||||
{"unix:///var/run/app.sock", "/../../etc/passwd", ""},
|
||||
{"http://[::1]:8080", "/ipv6", "test=1"},
|
||||
{"http://example.com", strings.Repeat("/very/long/path", 100), strings.Repeat("param=value&", 100)},
|
||||
{"http://example.com", "/path%20with%20encoded", "q=%20encoded%20"},
|
||||
{"http://example.com", "/пример/кириллица", "q=тест"},
|
||||
{"http://example.com", "/中文/路径", "查询=值"},
|
||||
{"", "/path", "q=1"}, // Empty target
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
f.Add(tc.target, tc.path, tc.query)
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, target, path, query string) {
|
||||
// Skip invalid UTF-8 to focus on realistic inputs
|
||||
if !utf8.ValidString(target) || !utf8.ValidString(path) || !utf8.ValidString(query) {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
// Create cache - should not panic
|
||||
cache := NewOGTagCache(target, config.OpenGraph{})
|
||||
|
||||
// Create URL
|
||||
u := &url.URL{
|
||||
Path: path,
|
||||
RawQuery: query,
|
||||
}
|
||||
|
||||
// Call getTarget - should not panic
|
||||
result := cache.getTarget(u)
|
||||
|
||||
// Basic validation
|
||||
if result == "" {
|
||||
t.Errorf("getTarget returned empty string for target=%q, path=%q, query=%q", target, path, query)
|
||||
}
|
||||
|
||||
// Verify result is a valid URL (for non-empty targets)
|
||||
if target != "" {
|
||||
parsedResult, err := url.Parse(result)
|
||||
if err != nil {
|
||||
t.Errorf("getTarget produced invalid URL %q: %v", result, err)
|
||||
} else {
|
||||
// For unix sockets, verify the scheme is http
|
||||
if strings.HasPrefix(target, "unix:") && parsedResult.Scheme != "http" {
|
||||
t.Errorf("Unix socket URL should have http scheme, got %q", parsedResult.Scheme)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure no memory corruption by calling multiple times
|
||||
for i := 0; i < 3; i++ {
|
||||
result2 := cache.getTarget(u)
|
||||
if result != result2 {
|
||||
t.Errorf("getTarget not deterministic: %q != %q", result, result2)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// FuzzExtractOGTags tests extractOGTags with various HTML inputs
|
||||
func FuzzExtractOGTags(f *testing.F) {
|
||||
// Seed corpus with interesting HTML cases
|
||||
htmlCases := []string{
|
||||
`<html><head><meta property="og:title" content="Test"></head></html>`,
|
||||
`<meta property="og:title" content="No HTML tags">`,
|
||||
`<html><head>` + strings.Repeat(`<meta property="og:title" content="Many tags">`, 1000) + `</head></html>`,
|
||||
`<html><head><meta property="og:title" content="<script>alert('xss')</script>"></head></html>`,
|
||||
`<html><head><meta property="og:title" content="Line1 Line2"></head></html>`,
|
||||
`<html><head><meta property="og:emoji" content="❤️🎉🎊"></head></html>`,
|
||||
`<html><head><meta property="og:title" content="` + strings.Repeat("A", 10000) + `"></head></html>`,
|
||||
`<html><head><meta property="og:title" content='Single quotes'></head></html>`,
|
||||
`<html><head><meta property=og:title content=no-quotes></head></html>`,
|
||||
`<html><head><meta name="keywords" content="test,keywords"></head></html>`,
|
||||
`<html><head><meta property="unknown:tag" content="Should be ignored"></head></html>`,
|
||||
`<html><head><meta property="` + strings.Repeat("og:", 100) + `title" content="Nested prefixes"></head></html>`,
|
||||
`<html>` + strings.Repeat(`<div>`, 1000) + `<meta property="og:title" content="Deep nesting">` + strings.Repeat(`</div>`, 1000) + `</html>`,
|
||||
`<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><meta property="og:title" content="With doctype"/></head></html>`,
|
||||
`<html><head><meta property="" content="Empty property"></head></html>`,
|
||||
`<html><head><meta content="Content only"></head></html>`,
|
||||
`<html><head><meta property="og:title"></head></html>`, // No content
|
||||
``, // Empty HTML
|
||||
`<html><head><meta property="og:title" content="Кириллица"></head></html>`,
|
||||
`<html><head><meta property="og:title" content="中文内容"></head></html>`,
|
||||
`<html><head><!--<meta property="og:title" content="Commented out">--></head></html>`,
|
||||
`<html><head><META PROPERTY="OG:TITLE" CONTENT="UPPERCASE"></head></html>`,
|
||||
}
|
||||
|
||||
for _, htmlc := range htmlCases {
|
||||
f.Add(htmlc)
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, htmlContent string) {
|
||||
// Skip invalid UTF-8
|
||||
if !utf8.ValidString(htmlContent) {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
// Parse HTML - may fail on invalid input
|
||||
doc, err := html.Parse(strings.NewReader(htmlContent))
|
||||
if err != nil {
|
||||
// This is expected for malformed HTML
|
||||
return
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Should not panic
|
||||
tags := cache.extractOGTags(doc)
|
||||
|
||||
// Validate results
|
||||
for property, content := range tags {
|
||||
// Ensure property is approved
|
||||
approved := false
|
||||
for _, prefix := range cache.approvedPrefixes {
|
||||
if strings.HasPrefix(property, prefix) {
|
||||
approved = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !approved {
|
||||
for _, tag := range cache.approvedTags {
|
||||
if property == tag {
|
||||
approved = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if !approved {
|
||||
t.Errorf("Unapproved property %q was extracted", property)
|
||||
}
|
||||
|
||||
// Ensure content is valid string
|
||||
if !utf8.ValidString(content) {
|
||||
t.Errorf("Invalid UTF-8 in content for property %q", property)
|
||||
}
|
||||
}
|
||||
|
||||
// Test determinism
|
||||
tags2 := cache.extractOGTags(doc)
|
||||
if len(tags) != len(tags2) {
|
||||
t.Errorf("extractOGTags not deterministic: different lengths %d != %d", len(tags), len(tags2))
|
||||
}
|
||||
for k, v := range tags {
|
||||
if tags2[k] != v {
|
||||
t.Errorf("extractOGTags not deterministic: %q=%q != %q=%q", k, v, k, tags2[k])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// FuzzGetTargetRoundTrip tests that getTarget produces valid URLs that can be parsed back
|
||||
func FuzzGetTargetRoundTrip(f *testing.F) {
|
||||
f.Add("http://example.com", "/path/to/resource", "key=value&foo=bar")
|
||||
f.Add("unix:///tmp/socket", "/api/endpoint", "param=test")
|
||||
|
||||
f.Fuzz(func(t *testing.T, target, path, query string) {
|
||||
if !utf8.ValidString(target) || !utf8.ValidString(path) || !utf8.ValidString(query) {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
cache := NewOGTagCache(target, config.OpenGraph{})
|
||||
u := &url.URL{Path: path, RawQuery: query}
|
||||
|
||||
result := cache.getTarget(u)
|
||||
if result == "" {
|
||||
return
|
||||
}
|
||||
|
||||
// Parse the result back
|
||||
parsed, err := url.Parse(result)
|
||||
if err != nil {
|
||||
t.Errorf("getTarget produced unparseable URL: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// For non-unix targets, verify path preservation (accounting for encoding)
|
||||
if !strings.HasPrefix(target, "unix:") && target != "" {
|
||||
// The paths should match after normalization
|
||||
expectedPath := u.EscapedPath()
|
||||
if parsed.EscapedPath() != expectedPath {
|
||||
t.Errorf("Path not preserved: want %q, got %q", expectedPath, parsed.EscapedPath())
|
||||
}
|
||||
|
||||
// Query should be preserved exactly
|
||||
if parsed.RawQuery != query {
|
||||
t.Errorf("Query not preserved: want %q, got %q", query, parsed.RawQuery)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// FuzzExtractMetaTagInfo tests the extractMetaTagInfo function directly
|
||||
func FuzzExtractMetaTagInfo(f *testing.F) {
|
||||
// Seed with various attribute combinations
|
||||
f.Add("og:title", "Test Title", "property")
|
||||
f.Add("keywords", "test,keywords", "name")
|
||||
f.Add("og:description", "A description with \"quotes\"", "property")
|
||||
f.Add("twitter:card", "summary", "property")
|
||||
f.Add("unknown:tag", "Should be filtered", "property")
|
||||
f.Add("", "Content without property", "property")
|
||||
f.Add("og:title", "", "property") // Property without content
|
||||
|
||||
f.Fuzz(func(t *testing.T, propertyValue, contentValue, propertyKey string) {
|
||||
if !utf8.ValidString(propertyValue) || !utf8.ValidString(contentValue) || !utf8.ValidString(propertyKey) {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
// Create a meta node
|
||||
node := &html.Node{
|
||||
Type: html.ElementNode,
|
||||
Data: "meta",
|
||||
Attr: []html.Attribute{
|
||||
{Key: propertyKey, Val: propertyValue},
|
||||
{Key: "content", Val: contentValue},
|
||||
},
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Should not panic
|
||||
property, content := cache.extractMetaTagInfo(node)
|
||||
|
||||
// If property is returned, it must be approved
|
||||
if property != "" {
|
||||
approved := false
|
||||
for _, prefix := range cache.approvedPrefixes {
|
||||
if strings.HasPrefix(property, prefix) {
|
||||
approved = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !approved {
|
||||
for _, tag := range cache.approvedTags {
|
||||
if property == tag {
|
||||
approved = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if !approved {
|
||||
t.Errorf("extractMetaTagInfo returned unapproved property: %q", property)
|
||||
}
|
||||
}
|
||||
|
||||
// Content should match input if property is approved
|
||||
if property != "" && content != contentValue {
|
||||
t.Errorf("Content mismatch: want %q, got %q", contentValue, content)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Benchmark comparison for the fuzzed scenarios
|
||||
func BenchmarkFuzzedGetTarget(b *testing.B) {
|
||||
// Test with various challenging inputs found during fuzzing
|
||||
inputs := []struct {
|
||||
name string
|
||||
target string
|
||||
path string
|
||||
query string
|
||||
}{
|
||||
{"Simple", "http://example.com", "/api", "k=v"},
|
||||
{"LongPath", "http://example.com", strings.Repeat("/segment", 50), ""},
|
||||
{"LongQuery", "http://example.com", "/", strings.Repeat("param=value&", 50)},
|
||||
{"Unicode", "http://example.com", "/путь/路径/path", "q=значение"},
|
||||
{"Encoded", "http://example.com", "/path%20with%20spaces", "q=%3Cscript%3E"},
|
||||
{"Unix", "unix:///tmp/socket.sock", "/api/v1/resource", "id=123&format=json"},
|
||||
}
|
||||
|
||||
for _, input := range inputs {
|
||||
b.Run(input.name, func(b *testing.B) {
|
||||
cache := NewOGTagCache(input.target, config.OpenGraph{})
|
||||
u := &url.URL{Path: input.path, RawQuery: input.query}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = cache.getTarget(u)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -13,8 +13,6 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestNewOGTagCache(t *testing.T) {
|
||||
@@ -40,11 +38,7 @@ func TestNewOGTagCache(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{
|
||||
Enabled: tt.ogPassthrough,
|
||||
TimeToLive: tt.ogTimeToLive,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache(tt.target, tt.ogPassthrough, tt.ogTimeToLive, false)
|
||||
|
||||
if cache == nil {
|
||||
t.Fatal("expected non-nil cache, got nil")
|
||||
@@ -80,11 +74,7 @@ func TestNewOGTagCache_UnixSocket(t *testing.T) {
|
||||
socketPath := filepath.Join(tempDir, "test.sock")
|
||||
target := "unix://" + socketPath
|
||||
|
||||
cache := NewOGTagCache(target, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: 5 * time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache(target, true, 5*time.Minute, false)
|
||||
|
||||
if cache == nil {
|
||||
t.Fatal("expected non-nil cache, got nil")
|
||||
@@ -165,11 +155,7 @@ func TestGetTarget(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache(tt.target, false, time.Minute, false)
|
||||
|
||||
u := &url.URL{
|
||||
Path: tt.path,
|
||||
@@ -189,9 +175,7 @@ func TestGetTarget(t *testing.T) {
|
||||
func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// XXX(Xe): if this is named longer, macOS fails with `bind: invalid argument`
|
||||
// because the unix socket path is too long. I love computers.
|
||||
socketPath := filepath.Join(tempDir, "t")
|
||||
socketPath := filepath.Join(tempDir, "anubis-test.sock")
|
||||
|
||||
// Ensure the socket does not exist initially
|
||||
_ = os.Remove(socketPath)
|
||||
@@ -238,11 +222,7 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
|
||||
|
||||
// Create cache instance pointing to the Unix socket
|
||||
targetURL := "unix://" + socketPath
|
||||
cache := NewOGTagCache(targetURL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
cache := NewOGTagCache(targetURL, true, 1*time.Minute, false)
|
||||
|
||||
// Create a dummy URL for the request (path and query matter)
|
||||
testReqURL, _ := url.Parse("/some/page?query=1")
|
||||
|
||||
@@ -12,12 +12,15 @@ func (c *OGTagCache) extractOGTags(doc *html.Node) map[string]string {
|
||||
|
||||
var traverseNodes func(*html.Node)
|
||||
traverseNodes = func(n *html.Node) {
|
||||
// isOGMetaTag only checks if it's a <meta> tag.
|
||||
// The actual filtering happens in extractMetaTagInfo now.
|
||||
if isOGMetaTag(n) {
|
||||
property, content := c.extractMetaTagInfo(n)
|
||||
if property != "" {
|
||||
ogTags[property] = content
|
||||
}
|
||||
}
|
||||
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
traverseNodes(child)
|
||||
}
|
||||
@@ -36,40 +39,43 @@ func isOGMetaTag(n *html.Node) bool {
|
||||
}
|
||||
|
||||
// extractMetaTagInfo extracts property and content from a meta tag
|
||||
// *and* checks if the property is approved.
|
||||
// Returns empty property string if the tag is not approved.
|
||||
func (c *OGTagCache) extractMetaTagInfo(n *html.Node) (property, content string) {
|
||||
var propertyKey string
|
||||
var rawProperty string // Store the property found before approval check
|
||||
|
||||
// Single pass through attributes, using range to avoid bounds checking
|
||||
for _, attr := range n.Attr {
|
||||
switch attr.Key {
|
||||
case "property", "name":
|
||||
propertyKey = attr.Val
|
||||
case "content":
|
||||
if attr.Key == "property" || attr.Key == "name" {
|
||||
rawProperty = attr.Val
|
||||
}
|
||||
if attr.Key == "content" {
|
||||
content = attr.Val
|
||||
}
|
||||
// Early exit if we have both
|
||||
if propertyKey != "" && content != "" {
|
||||
}
|
||||
|
||||
// Check if the rawProperty is approved
|
||||
isApproved := false
|
||||
for _, prefix := range c.approvedPrefixes {
|
||||
if strings.HasPrefix(rawProperty, prefix) {
|
||||
isApproved = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if propertyKey == "" {
|
||||
return "", content
|
||||
}
|
||||
|
||||
// Check prefixes first (more common case)
|
||||
for _, prefix := range c.approvedPrefixes {
|
||||
if strings.HasPrefix(propertyKey, prefix) {
|
||||
return propertyKey, content
|
||||
// Check exact approved tags if not already approved by prefix
|
||||
if !isApproved {
|
||||
for _, tag := range c.approvedTags {
|
||||
if rawProperty == tag {
|
||||
isApproved = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check exact matches
|
||||
for _, tag := range c.approvedTags {
|
||||
if propertyKey == tag {
|
||||
return propertyKey, content
|
||||
}
|
||||
// Only return the property if it's approved
|
||||
if isApproved {
|
||||
property = rawProperty
|
||||
}
|
||||
|
||||
return "", content
|
||||
// Content is returned regardless, but property will be "" if not approved
|
||||
return property, content
|
||||
}
|
||||
|
||||
@@ -6,18 +6,13 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// TestExtractOGTags updated with correct expectations based on filtering logic
|
||||
func TestExtractOGTags(t *testing.T) {
|
||||
// Use a cache instance that reflects the default approved lists
|
||||
testCache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: false,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: time.Minute,
|
||||
})
|
||||
testCache := NewOGTagCache("", false, time.Minute, false)
|
||||
// Manually set approved tags/prefixes based on the user request for clarity
|
||||
testCache.approvedTags = []string{"description"}
|
||||
testCache.approvedPrefixes = []string{"og:"}
|
||||
@@ -194,11 +189,7 @@ func TestIsOGMetaTag(t *testing.T) {
|
||||
|
||||
func TestExtractMetaTagInfo(t *testing.T) {
|
||||
// Use a cache instance that reflects the default approved lists
|
||||
testCache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: false,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: time.Minute,
|
||||
})
|
||||
testCache := NewOGTagCache("", false, time.Minute, false)
|
||||
testCache.approvedTags = []string{"description"}
|
||||
testCache.approvedPrefixes = []string{"og:"}
|
||||
|
||||
|
||||
@@ -595,7 +595,7 @@ func spawnAnubisWithOptions(t *testing.T, basePrefix string) string {
|
||||
fmt.Fprintf(w, "<html><body><span id=anubis-test>%d</span></body></html>", time.Now().Unix())
|
||||
})
|
||||
|
||||
policy, err := libanubis.LoadPoliciesOrDefault(t.Context(), "", anubis.DefaultDifficulty)
|
||||
policy, err := libanubis.LoadPoliciesOrDefault("", anubis.DefaultDifficulty)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
package thoth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/checker"
|
||||
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
|
||||
)
|
||||
|
||||
func (c *Client) ASNCheckerFor(asns []uint32) checker.Impl {
|
||||
asnMap := map[uint32]struct{}{}
|
||||
var sb strings.Builder
|
||||
fmt.Fprintln(&sb, "ASNChecker")
|
||||
for _, asn := range asns {
|
||||
asnMap[asn] = struct{}{}
|
||||
fmt.Fprintln(&sb, "AS", asn)
|
||||
}
|
||||
|
||||
return &ASNChecker{
|
||||
iptoasn: c.IPToASN,
|
||||
asns: asnMap,
|
||||
hash: internal.FastHash(sb.String()),
|
||||
}
|
||||
}
|
||||
|
||||
type ASNChecker struct {
|
||||
iptoasn iptoasnv1.IpToASNServiceClient
|
||||
asns map[uint32]struct{}
|
||||
hash string
|
||||
}
|
||||
|
||||
func (asnc *ASNChecker) Check(r *http.Request) (bool, error) {
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
ipInfo, err := asnc.iptoasn.Lookup(ctx, &iptoasnv1.LookupRequest{
|
||||
IpAddress: r.Header.Get("X-Real-Ip"),
|
||||
})
|
||||
if err != nil {
|
||||
switch {
|
||||
case errors.Is(err, context.DeadlineExceeded):
|
||||
slog.Debug("error contacting thoth", "err", err, "actionable", false)
|
||||
return false, nil
|
||||
default:
|
||||
slog.Error("error contacting thoth, please contact support", "err", err, "actionable", true)
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If IP is not publicly announced, return false
|
||||
if !ipInfo.GetAnnounced() {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
_, ok := asnc.asns[uint32(ipInfo.GetAsNumber())]
|
||||
|
||||
return ok, nil
|
||||
}
|
||||
|
||||
func (asnc *ASNChecker) Hash() string {
|
||||
return asnc.hash
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
package thoth_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal/thoth"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/checker"
|
||||
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
|
||||
)
|
||||
|
||||
var _ checker.Impl = &thoth.ASNChecker{}
|
||||
|
||||
func TestASNChecker(t *testing.T) {
|
||||
cli := loadSecrets(t)
|
||||
|
||||
asnc := cli.ASNCheckerFor([]uint32{13335})
|
||||
|
||||
for _, cs := range []struct {
|
||||
ipAddress string
|
||||
wantMatch bool
|
||||
wantError bool
|
||||
}{
|
||||
{
|
||||
ipAddress: "1.1.1.1",
|
||||
wantMatch: true,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
ipAddress: "2.2.2.2",
|
||||
wantMatch: false,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
ipAddress: "taco",
|
||||
wantMatch: false,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
ipAddress: "127.0.0.1",
|
||||
wantMatch: false,
|
||||
wantError: false,
|
||||
},
|
||||
} {
|
||||
t.Run(fmt.Sprintf("%v", cs), func(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "/", nil)
|
||||
req.Header.Set("X-Real-Ip", cs.ipAddress)
|
||||
|
||||
match, err := asnc.Check(req)
|
||||
|
||||
if match != cs.wantMatch {
|
||||
t.Errorf("Wanted match: %v, got: %v", cs.wantMatch, match)
|
||||
}
|
||||
|
||||
switch {
|
||||
case err != nil && !cs.wantError:
|
||||
t.Errorf("Did not want error but got: %v", err)
|
||||
case err == nil && cs.wantError:
|
||||
t.Error("Wanted error but got none")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkWithCache(b *testing.B) {
|
||||
cli := loadSecrets(b)
|
||||
req := &iptoasnv1.LookupRequest{IpAddress: "1.1.1.1"}
|
||||
|
||||
_, err := cli.IPToASN.Lookup(b.Context(), req)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
|
||||
for b.Loop() {
|
||||
_, err := cli.IPToASN.Lookup(b.Context(), req)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
package thoth
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/metadata"
|
||||
)
|
||||
|
||||
func authUnaryClientInterceptor(token string) grpc.UnaryClientInterceptor {
|
||||
return func(
|
||||
ctx context.Context,
|
||||
method string,
|
||||
req interface{},
|
||||
reply interface{},
|
||||
cc *grpc.ClientConn,
|
||||
invoker grpc.UnaryInvoker,
|
||||
opts ...grpc.CallOption,
|
||||
) error {
|
||||
md := metadata.Pairs("authorization", "Bearer "+token)
|
||||
ctx = metadata.NewOutgoingContext(ctx, md)
|
||||
return invoker(ctx, method, req, reply, cc, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
func authStreamClientInterceptor(token string) grpc.StreamClientInterceptor {
|
||||
return func(
|
||||
ctx context.Context,
|
||||
desc *grpc.StreamDesc,
|
||||
cc *grpc.ClientConn,
|
||||
method string,
|
||||
streamer grpc.Streamer,
|
||||
opts ...grpc.CallOption,
|
||||
) (grpc.ClientStream, error) {
|
||||
md := metadata.Pairs("authorization", "Bearer "+token)
|
||||
ctx = metadata.NewOutgoingContext(ctx, md)
|
||||
return streamer(ctx, desc, cc, method, opts...)
|
||||
}
|
||||
}
|
||||
@@ -1,84 +0,0 @@
|
||||
package thoth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/netip"
|
||||
|
||||
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
|
||||
"github.com/gaissmai/bart"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
type IPToASNWithCache struct {
|
||||
next iptoasnv1.IpToASNServiceClient
|
||||
table *bart.Table[*iptoasnv1.LookupResponse]
|
||||
}
|
||||
|
||||
func NewIpToASNWithCache(next iptoasnv1.IpToASNServiceClient) *IPToASNWithCache {
|
||||
result := &IPToASNWithCache{
|
||||
next: next,
|
||||
table: &bart.Table[*iptoasnv1.LookupResponse]{},
|
||||
}
|
||||
|
||||
for _, pfx := range []netip.Prefix{
|
||||
netip.MustParsePrefix("10.0.0.0/8"), // RFC 1918
|
||||
netip.MustParsePrefix("172.16.0.0/12"), // RFC 1918
|
||||
netip.MustParsePrefix("192.168.0.0/16"), // RFC 1918
|
||||
netip.MustParsePrefix("127.0.0.0/8"), // Loopback
|
||||
netip.MustParsePrefix("169.254.0.0/16"), // Link-local
|
||||
netip.MustParsePrefix("100.64.0.0/10"), // CGNAT
|
||||
netip.MustParsePrefix("192.0.0.0/24"), // Protocol assignments
|
||||
netip.MustParsePrefix("192.0.2.0/24"), // TEST-NET-1
|
||||
netip.MustParsePrefix("198.18.0.0/15"), // Benchmarking
|
||||
netip.MustParsePrefix("198.51.100.0/24"), // TEST-NET-2
|
||||
netip.MustParsePrefix("203.0.113.0/24"), // TEST-NET-3
|
||||
netip.MustParsePrefix("240.0.0.0/4"), // Reserved
|
||||
netip.MustParsePrefix("255.255.255.255/32"), // Broadcast
|
||||
netip.MustParsePrefix("fc00::/7"), // Unique local address
|
||||
netip.MustParsePrefix("fe80::/10"), // Link-local
|
||||
netip.MustParsePrefix("::1/128"), // Loopback
|
||||
netip.MustParsePrefix("::/128"), // Unspecified
|
||||
netip.MustParsePrefix("100::/64"), // Discard-only
|
||||
netip.MustParsePrefix("2001:db8::/32"), // Documentation
|
||||
} {
|
||||
result.table.Insert(pfx, &iptoasnv1.LookupResponse{Announced: false})
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (ip2asn *IPToASNWithCache) Lookup(ctx context.Context, lr *iptoasnv1.LookupRequest, opts ...grpc.CallOption) (*iptoasnv1.LookupResponse, error) {
|
||||
addr, err := netip.ParseAddr(lr.GetIpAddress())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("input is not an IP address: %w", err)
|
||||
}
|
||||
|
||||
cachedResponse, ok := ip2asn.table.Lookup(addr)
|
||||
if ok {
|
||||
return cachedResponse, nil
|
||||
}
|
||||
|
||||
resp, err := ip2asn.next.Lookup(ctx, lr, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var errs []error
|
||||
for _, cidr := range resp.GetCidr() {
|
||||
pfx, err := netip.ParsePrefix(cidr)
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
continue
|
||||
}
|
||||
ip2asn.table.Insert(pfx, resp)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
slog.Error("errors parsing IP prefixes", "err", errors.Join(errs...))
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
package thoth
|
||||
|
||||
import "context"
|
||||
|
||||
type ctxKey struct{}
|
||||
|
||||
func With(ctx context.Context, cli *Client) context.Context {
|
||||
return context.WithValue(ctx, ctxKey{}, cli)
|
||||
}
|
||||
|
||||
func FromContext(ctx context.Context) (*Client, bool) {
|
||||
cli, ok := ctx.Value(ctxKey{}).(*Client)
|
||||
return cli, ok
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
package thoth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/checker"
|
||||
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
|
||||
)
|
||||
|
||||
func (c *Client) GeoIPCheckerFor(countries []string) checker.Impl {
|
||||
countryMap := map[string]struct{}{}
|
||||
var sb strings.Builder
|
||||
fmt.Fprintln(&sb, "GeoIPChecker")
|
||||
for _, cc := range countries {
|
||||
countryMap[cc] = struct{}{}
|
||||
fmt.Fprintln(&sb, cc)
|
||||
}
|
||||
|
||||
return &GeoIPChecker{
|
||||
IPToASN: c.IPToASN,
|
||||
Countries: countryMap,
|
||||
hash: sb.String(),
|
||||
}
|
||||
}
|
||||
|
||||
type GeoIPChecker struct {
|
||||
IPToASN iptoasnv1.IpToASNServiceClient
|
||||
Countries map[string]struct{}
|
||||
hash string
|
||||
}
|
||||
|
||||
func (gipc *GeoIPChecker) Check(r *http.Request) (bool, error) {
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
ipInfo, err := gipc.IPToASN.Lookup(ctx, &iptoasnv1.LookupRequest{
|
||||
IpAddress: r.Header.Get("X-Real-Ip"),
|
||||
})
|
||||
if err != nil {
|
||||
switch {
|
||||
case errors.Is(err, context.DeadlineExceeded):
|
||||
slog.Debug("error contacting thoth", "err", err, "actionable", false)
|
||||
return false, nil
|
||||
default:
|
||||
slog.Error("error contacting thoth, please contact support", "err", err, "actionable", true)
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If IP is not publicly announced, return false
|
||||
if !ipInfo.GetAnnounced() {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
_, ok := gipc.Countries[strings.ToLower(ipInfo.GetCountryCode())]
|
||||
|
||||
return ok, nil
|
||||
}
|
||||
|
||||
func (gipc *GeoIPChecker) Hash() string {
|
||||
return gipc.hash
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
package thoth_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal/thoth"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/checker"
|
||||
)
|
||||
|
||||
var _ checker.Impl = &thoth.GeoIPChecker{}
|
||||
|
||||
func TestGeoIPChecker(t *testing.T) {
|
||||
cli := loadSecrets(t)
|
||||
|
||||
asnc := cli.GeoIPCheckerFor([]string{"us"})
|
||||
|
||||
for _, cs := range []struct {
|
||||
ipAddress string
|
||||
wantMatch bool
|
||||
wantError bool
|
||||
}{
|
||||
{
|
||||
ipAddress: "1.1.1.1",
|
||||
wantMatch: true,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
ipAddress: "2.2.2.2",
|
||||
wantMatch: false,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
ipAddress: "taco",
|
||||
wantMatch: false,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
ipAddress: "127.0.0.1",
|
||||
wantMatch: false,
|
||||
wantError: false,
|
||||
},
|
||||
} {
|
||||
t.Run(fmt.Sprintf("%v", cs), func(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "/", nil)
|
||||
req.Header.Set("X-Real-Ip", cs.ipAddress)
|
||||
|
||||
match, err := asnc.Check(req)
|
||||
|
||||
if match != cs.wantMatch {
|
||||
t.Errorf("Wanted match: %v, got: %v", cs.wantMatch, match)
|
||||
}
|
||||
|
||||
switch {
|
||||
case err != nil && !cs.wantError:
|
||||
t.Errorf("Did not want error but got: %v", err)
|
||||
case err == nil && cs.wantError:
|
||||
t.Error("Wanted error but got none")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
package thoth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis"
|
||||
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
|
||||
grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus"
|
||||
"github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/timeout"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
conn *grpc.ClientConn
|
||||
health healthv1.HealthClient
|
||||
IPToASN iptoasnv1.IpToASNServiceClient
|
||||
}
|
||||
|
||||
func New(ctx context.Context, thothURL, apiToken string, plaintext bool) (*Client, error) {
|
||||
clMetrics := grpcprom.NewClientMetrics(
|
||||
grpcprom.WithClientHandlingTimeHistogram(
|
||||
grpcprom.WithHistogramBuckets([]float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120}),
|
||||
),
|
||||
)
|
||||
prometheus.DefaultRegisterer.Register(clMetrics)
|
||||
|
||||
do := []grpc.DialOption{
|
||||
grpc.WithChainUnaryInterceptor(
|
||||
timeout.UnaryClientInterceptor(500*time.Millisecond),
|
||||
clMetrics.UnaryClientInterceptor(),
|
||||
authUnaryClientInterceptor(apiToken),
|
||||
),
|
||||
grpc.WithChainStreamInterceptor(
|
||||
clMetrics.StreamClientInterceptor(),
|
||||
authStreamClientInterceptor(apiToken),
|
||||
),
|
||||
grpc.WithUserAgent(fmt.Sprint("Techaro/anubis:", anubis.Version)),
|
||||
}
|
||||
|
||||
if plaintext {
|
||||
do = append(do, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
} else {
|
||||
do = append(do, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{})))
|
||||
}
|
||||
|
||||
conn, err := grpc.NewClient(
|
||||
thothURL,
|
||||
do...,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't dial thoth at %s: %w", thothURL, err)
|
||||
}
|
||||
|
||||
hc := healthv1.NewHealthClient(conn)
|
||||
|
||||
resp, err := hc.Check(ctx, &healthv1.HealthCheckRequest{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't verify thoth health at %s: %w", thothURL, err)
|
||||
}
|
||||
|
||||
if resp.Status != healthv1.HealthCheckResponse_SERVING {
|
||||
return nil, fmt.Errorf("thoth is not healthy, wanted %s but got %s", healthv1.HealthCheckResponse_SERVING, resp.Status)
|
||||
}
|
||||
|
||||
return &Client{
|
||||
conn: conn,
|
||||
health: hc,
|
||||
IPToASN: NewIpToASNWithCache(iptoasnv1.NewIpToASNServiceClient(conn)),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Client) Close() error {
|
||||
if c.conn != nil {
|
||||
return c.conn.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) WithIPToASNService(impl iptoasnv1.IpToASNServiceClient) {
|
||||
c.IPToASN = impl
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
package thoth_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal/thoth"
|
||||
"github.com/TecharoHQ/anubis/internal/thoth/thothmock"
|
||||
"github.com/joho/godotenv"
|
||||
)
|
||||
|
||||
func loadSecrets(t testing.TB) *thoth.Client {
|
||||
t.Helper()
|
||||
|
||||
if err := godotenv.Load(); err != nil {
|
||||
t.Log("using mock thoth")
|
||||
result := &thoth.Client{}
|
||||
result.WithIPToASNService(thothmock.MockIpToASNService())
|
||||
return result
|
||||
}
|
||||
|
||||
cli, err := thoth.New(t.Context(), os.Getenv("THOTH_URL"), os.Getenv("THOTH_API_KEY"), false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return cli
|
||||
}
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
cli := loadSecrets(t)
|
||||
|
||||
if err := cli.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
package thothmock
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/netip"
|
||||
|
||||
iptoasnv1 "github.com/TecharoHQ/thoth-proto/gen/techaro/thoth/iptoasn/v1"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
func MockIpToASNService() *IpToASNService {
|
||||
responses := map[string]*iptoasnv1.LookupResponse{
|
||||
"127.0.0.1": {Announced: false},
|
||||
"::1": {Announced: false},
|
||||
"10.10.10.10": {
|
||||
Announced: true,
|
||||
AsNumber: 13335,
|
||||
Cidr: []string{"1.1.1.0/24"},
|
||||
CountryCode: "US",
|
||||
Description: "Cloudflare",
|
||||
},
|
||||
"2.2.2.2": {
|
||||
Announced: true,
|
||||
AsNumber: 420,
|
||||
Cidr: []string{"2.2.2.0/24"},
|
||||
CountryCode: "CA",
|
||||
Description: "test canada",
|
||||
},
|
||||
"1.1.1.1": {
|
||||
Announced: true,
|
||||
AsNumber: 13335,
|
||||
Cidr: []string{"1.1.1.0/24"},
|
||||
CountryCode: "US",
|
||||
Description: "Cloudflare",
|
||||
},
|
||||
}
|
||||
|
||||
return &IpToASNService{Responses: responses}
|
||||
}
|
||||
|
||||
type IpToASNService struct {
|
||||
iptoasnv1.UnimplementedIpToASNServiceServer
|
||||
Responses map[string]*iptoasnv1.LookupResponse
|
||||
}
|
||||
|
||||
func (ip2asn *IpToASNService) Lookup(ctx context.Context, lr *iptoasnv1.LookupRequest, opts ...grpc.CallOption) (*iptoasnv1.LookupResponse, error) {
|
||||
if _, err := netip.ParseAddr(lr.GetIpAddress()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, ok := ip2asn.Responses[lr.GetIpAddress()]
|
||||
if !ok {
|
||||
return nil, status.Error(codes.NotFound, "IP address not found in mock")
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
package thothmock
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal/thoth"
|
||||
)
|
||||
|
||||
func WithMockThoth(t *testing.T) context.Context {
|
||||
t.Helper()
|
||||
|
||||
thothCli := &thoth.Client{}
|
||||
thothCli.WithIPToASNService(MockIpToASNService())
|
||||
ctx := thoth.With(t.Context(), thothCli)
|
||||
return ctx
|
||||
}
|
||||
149
lib/anubis.go
149
lib/anubis.go
@@ -15,7 +15,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
"github.com/google/cel-go/common/types"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
@@ -27,7 +26,6 @@ import (
|
||||
"github.com/TecharoHQ/anubis/internal/ogtags"
|
||||
"github.com/TecharoHQ/anubis/lib/challenge"
|
||||
"github.com/TecharoHQ/anubis/lib/policy"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/checker"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
|
||||
// challenge implementations
|
||||
@@ -63,47 +61,35 @@ var (
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
next http.Handler
|
||||
mux *http.ServeMux
|
||||
policy *policy.ParsedConfig
|
||||
DNSBLCache *decaymap.Impl[string, dnsbl.DroneBLResponse]
|
||||
OGTags *ogtags.OGTagCache
|
||||
cookieName string
|
||||
ed25519Priv ed25519.PrivateKey
|
||||
hs512Secret []byte
|
||||
opts Options
|
||||
}
|
||||
|
||||
func (s *Server) getTokenKeyfunc() jwt.Keyfunc {
|
||||
// return ED25519 key if HS512 is not set
|
||||
if len(s.hs512Secret) == 0 {
|
||||
return func(token *jwt.Token) (interface{}, error) {
|
||||
return s.ed25519Priv.Public().(ed25519.PublicKey), nil
|
||||
}
|
||||
} else {
|
||||
return func(token *jwt.Token) (interface{}, error) {
|
||||
return s.hs512Secret, nil
|
||||
}
|
||||
}
|
||||
next http.Handler
|
||||
mux *http.ServeMux
|
||||
policy *policy.ParsedConfig
|
||||
DNSBLCache *decaymap.Impl[string, dnsbl.DroneBLResponse]
|
||||
OGTags *ogtags.OGTagCache
|
||||
cookieName string
|
||||
priv ed25519.PrivateKey
|
||||
pub ed25519.PublicKey
|
||||
opts Options
|
||||
}
|
||||
|
||||
func (s *Server) challengeFor(r *http.Request, difficulty int) string {
|
||||
var fp [32]byte
|
||||
if len(s.hs512Secret) == 0 {
|
||||
fp = sha256.Sum256(s.ed25519Priv.Public().(ed25519.PublicKey)[:])
|
||||
} else {
|
||||
fp = sha256.Sum256(s.hs512Secret)
|
||||
fp := sha256.Sum256(s.pub[:])
|
||||
|
||||
acceptLanguage := r.Header.Get("Accept-Language")
|
||||
if len(acceptLanguage) > 5 {
|
||||
acceptLanguage = acceptLanguage[:5]
|
||||
}
|
||||
|
||||
challengeData := fmt.Sprintf(
|
||||
"X-Real-IP=%s,User-Agent=%s,WeekTime=%s,Fingerprint=%x,Difficulty=%d",
|
||||
"Accept-Language=%s,X-Real-IP=%s,User-Agent=%s,WeekTime=%s,Fingerprint=%x,Difficulty=%d",
|
||||
acceptLanguage,
|
||||
r.Header.Get("X-Real-Ip"),
|
||||
r.UserAgent(),
|
||||
time.Now().UTC().Round(24*7*time.Hour).Format(time.RFC3339),
|
||||
fp,
|
||||
difficulty,
|
||||
)
|
||||
return internal.FastHash(challengeData)
|
||||
return internal.SHA256sum(challengeData)
|
||||
}
|
||||
|
||||
func (s *Server) maybeReverseProxyHttpStatusOnly(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -148,30 +134,32 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS
|
||||
ckie, err := r.Cookie(s.cookieName)
|
||||
if err != nil {
|
||||
lg.Debug("cookie not found", "path", r.URL.Path)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.RenderIndex(w, r, rule, httpStatusOnly)
|
||||
return
|
||||
}
|
||||
|
||||
if err := ckie.Valid(); err != nil {
|
||||
lg.Debug("cookie is invalid", "err", err)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.RenderIndex(w, r, rule, httpStatusOnly)
|
||||
return
|
||||
}
|
||||
|
||||
if time.Now().After(ckie.Expires) && !ckie.Expires.IsZero() {
|
||||
lg.Debug("cookie expired", "path", r.URL.Path)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.RenderIndex(w, r, rule, httpStatusOnly)
|
||||
return
|
||||
}
|
||||
|
||||
token, err := jwt.ParseWithClaims(ckie.Value, jwt.MapClaims{}, s.getTokenKeyfunc(), jwt.WithExpirationRequired(), jwt.WithStrictDecoding())
|
||||
token, err := jwt.ParseWithClaims(ckie.Value, jwt.MapClaims{}, func(token *jwt.Token) (interface{}, error) {
|
||||
return s.pub, nil
|
||||
}, jwt.WithExpirationRequired(), jwt.WithStrictDecoding())
|
||||
|
||||
if err != nil || !token.Valid {
|
||||
lg.Debug("invalid token", "path", r.URL.Path, "err", err)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.RenderIndex(w, r, rule, httpStatusOnly)
|
||||
return
|
||||
}
|
||||
@@ -179,7 +167,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS
|
||||
claims, ok := token.Claims.(jwt.MapClaims)
|
||||
if !ok {
|
||||
lg.Debug("invalid token claims type", "path", r.URL.Path)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.RenderIndex(w, r, rule, httpStatusOnly)
|
||||
return
|
||||
}
|
||||
@@ -187,14 +175,14 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS
|
||||
policyRule, ok := claims["policyRule"].(string)
|
||||
if !ok {
|
||||
lg.Debug("policyRule claim is not a string")
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.RenderIndex(w, r, rule, httpStatusOnly)
|
||||
return
|
||||
}
|
||||
|
||||
if policyRule != rule.Hash() {
|
||||
lg.Debug("user originally passed with a different rule, issuing new challenge", "old", policyRule, "new", rule.Name)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.RenderIndex(w, r, rule, httpStatusOnly)
|
||||
return
|
||||
}
|
||||
@@ -216,7 +204,7 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch
|
||||
s.ServeHTTPNext(w, r)
|
||||
return true
|
||||
case config.RuleDeny:
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
lg.Info("explicit deny")
|
||||
if rule == nil {
|
||||
lg.Error("rule is nil, cannot calculate checksum")
|
||||
@@ -235,7 +223,7 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch
|
||||
s.RenderBench(w, r)
|
||||
return true
|
||||
default:
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
slog.Error("CONFIG ERROR: unknown rule", "rule", cr.Rule)
|
||||
s.respondWithError(w, r, "Internal Server Error: administrator has misconfigured Anubis. Please contact the administrator and ask them to look for the logs around \"maybeReverseProxy.Rules\"")
|
||||
return true
|
||||
@@ -300,15 +288,15 @@ func (s *Server) MakeChallenge(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
lg = lg.With("check_result", cr)
|
||||
chal := s.challengeFor(r, rule.Challenge.Difficulty)
|
||||
challenge := s.challengeFor(r, rule.Challenge.Difficulty)
|
||||
|
||||
s.SetCookie(w, anubis.TestCookieName, chal, "/", r.Host)
|
||||
s.SetCookie(w, anubis.TestCookieName, challenge, "/")
|
||||
|
||||
err = encoder.Encode(struct {
|
||||
Rules *config.ChallengeRules `json:"rules"`
|
||||
Challenge string `json:"challenge"`
|
||||
}{
|
||||
Challenge: chal,
|
||||
Challenge: challenge,
|
||||
Rules: rule.Challenge,
|
||||
})
|
||||
if err != nil {
|
||||
@@ -316,7 +304,7 @@ func (s *Server) MakeChallenge(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
lg.Debug("made challenge", "challenge", chal, "rules", rule.Challenge, "cr", cr)
|
||||
lg.Debug("made challenge", "challenge", challenge, "rules", rule.Challenge, "cr", cr)
|
||||
challengesIssued.WithLabelValues("api").Inc()
|
||||
}
|
||||
|
||||
@@ -329,15 +317,15 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
|
||||
cookiePath = strings.TrimSuffix(anubis.BasePrefix, "/") + "/"
|
||||
}
|
||||
|
||||
if _, err := r.Cookie(anubis.TestCookieName); errors.Is(err, http.ErrNoCookie) {
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, anubis.TestCookieName, "/", r.Host)
|
||||
if _, err := r.Cookie(anubis.TestCookieName); err == http.ErrNoCookie {
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.ClearCookie(w, anubis.TestCookieName, "/")
|
||||
lg.Warn("user has cookies disabled, this is not an anubis bug")
|
||||
s.respondWithError(w, r, "Your browser is configured to disable cookies. Anubis requires cookies for the legitimate interest of making sure you are a valid client. Please enable cookies for this domain")
|
||||
return
|
||||
}
|
||||
|
||||
s.ClearCookie(w, anubis.TestCookieName, "/", r.Host)
|
||||
s.ClearCookie(w, anubis.TestCookieName, "/")
|
||||
|
||||
redir := r.FormValue("redir")
|
||||
redirURL, err := url.ParseRequestURI(redir)
|
||||
@@ -377,9 +365,9 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
|
||||
challengeStr := s.challengeFor(r, rule.Challenge.Difficulty)
|
||||
|
||||
if err := impl.Validate(r, lg, rule, challengeStr); err != nil {
|
||||
failedValidations.WithLabelValues(rule.Challenge.Algorithm).Inc()
|
||||
failedValidations.WithLabelValues(string(rule.Challenge.Algorithm)).Inc()
|
||||
var cerr *challenge.Error
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
lg.Debug("challenge validate call failed", "err", err)
|
||||
|
||||
switch {
|
||||
@@ -402,12 +390,12 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
if err != nil {
|
||||
lg.Error("failed to sign JWT", "err", err)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath, r.Host)
|
||||
s.ClearCookie(w, s.cookieName, cookiePath)
|
||||
s.respondWithError(w, r, "failed to sign JWT")
|
||||
return
|
||||
}
|
||||
|
||||
s.SetCookie(w, s.cookieName, tokenString, cookiePath, r.Host)
|
||||
s.SetCookie(w, s.cookieName, tokenString, cookiePath)
|
||||
|
||||
challengesValidated.WithLabelValues(rule.Challenge.Algorithm).Inc()
|
||||
lg.Debug("challenge passed, redirecting to app")
|
||||
@@ -422,6 +410,12 @@ func cr(name string, rule config.Rule, weight int) policy.CheckResult {
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
weightOkayStatic = policy.NewStaticHashChecker("weight/okay")
|
||||
weightMildSusStatic = policy.NewStaticHashChecker("weight/mild-suspicion")
|
||||
weightVerySusStatic = policy.NewStaticHashChecker("weight/extreme-suspicion")
|
||||
)
|
||||
|
||||
// Check evaluates the list of rules, and returns the result
|
||||
func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) {
|
||||
host := r.Header.Get("X-Real-Ip")
|
||||
@@ -453,25 +447,34 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error)
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range s.policy.Thresholds {
|
||||
result, _, err := t.Program.ContextEval(r.Context(), &policy.ThresholdRequest{Weight: weight})
|
||||
if err != nil {
|
||||
slog.Error("error when evaluating threshold expression", "expression", t.Expression.String(), "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var matches bool
|
||||
|
||||
if val, ok := result.(types.Bool); ok {
|
||||
matches = bool(val)
|
||||
}
|
||||
|
||||
if matches {
|
||||
return cr("threshold/"+t.Name, t.Action, weight), &policy.Bot{
|
||||
Challenge: t.Challenge,
|
||||
Rules: &checker.List{},
|
||||
}, nil
|
||||
}
|
||||
switch {
|
||||
case weight <= 0:
|
||||
return cr("weight/okay", config.RuleAllow, weight), &policy.Bot{
|
||||
Challenge: &config.ChallengeRules{
|
||||
Difficulty: s.policy.DefaultDifficulty,
|
||||
ReportAs: s.policy.DefaultDifficulty,
|
||||
Algorithm: config.DefaultAlgorithm,
|
||||
},
|
||||
Rules: weightOkayStatic,
|
||||
}, nil
|
||||
case weight > 0 && weight < 10:
|
||||
return cr("weight/mild-suspicion", config.RuleChallenge, weight), &policy.Bot{
|
||||
Challenge: &config.ChallengeRules{
|
||||
Difficulty: s.policy.DefaultDifficulty,
|
||||
ReportAs: s.policy.DefaultDifficulty,
|
||||
Algorithm: "metarefresh",
|
||||
},
|
||||
Rules: weightMildSusStatic,
|
||||
}, nil
|
||||
case weight >= 10:
|
||||
return cr("weight/extreme-suspicion", config.RuleChallenge, weight), &policy.Bot{
|
||||
Challenge: &config.ChallengeRules{
|
||||
Difficulty: s.policy.DefaultDifficulty,
|
||||
ReportAs: s.policy.DefaultDifficulty,
|
||||
Algorithm: "fast",
|
||||
},
|
||||
Rules: weightVerySusStatic,
|
||||
}, nil
|
||||
}
|
||||
|
||||
return cr("default/allow", config.RuleAllow, weight), &policy.Bot{
|
||||
@@ -480,7 +483,7 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error)
|
||||
ReportAs: s.policy.DefaultDifficulty,
|
||||
Algorithm: config.DefaultAlgorithm,
|
||||
},
|
||||
Rules: &checker.List{},
|
||||
Rules: &policy.CheckerList{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
"github.com/TecharoHQ/anubis"
|
||||
"github.com/TecharoHQ/anubis/data"
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/TecharoHQ/anubis/internal/thoth/thothmock"
|
||||
"github.com/TecharoHQ/anubis/lib/policy"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
@@ -24,16 +23,10 @@ func init() {
|
||||
internal.InitSlog("debug")
|
||||
}
|
||||
|
||||
func loadPolicies(t *testing.T, fname string, difficulty int) *policy.ParsedConfig {
|
||||
func loadPolicies(t *testing.T, fname string) *policy.ParsedConfig {
|
||||
t.Helper()
|
||||
|
||||
ctx := thothmock.WithMockThoth(t)
|
||||
|
||||
if fname == "" {
|
||||
fname = "./testdata/test_config.yaml"
|
||||
}
|
||||
|
||||
anubisPolicy, err := LoadPoliciesOrDefault(ctx, fname, difficulty)
|
||||
anubisPolicy, err := LoadPoliciesOrDefault(fname, anubis.DefaultDifficulty)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -44,10 +37,6 @@ func loadPolicies(t *testing.T, fname string, difficulty int) *policy.ParsedConf
|
||||
func spawnAnubis(t *testing.T, opts Options) *Server {
|
||||
t.Helper()
|
||||
|
||||
if opts.Policy == nil {
|
||||
opts.Policy = loadPolicies(t, "", 4)
|
||||
}
|
||||
|
||||
s, err := New(opts)
|
||||
if err != nil {
|
||||
t.Fatalf("can't construct libanubis.Server: %v", err)
|
||||
@@ -175,7 +164,7 @@ func TestLoadPolicies(t *testing.T) {
|
||||
}
|
||||
defer fin.Close()
|
||||
|
||||
if _, err := policy.ParseConfig(t.Context(), fin, fname, 4); err != nil {
|
||||
if _, err := policy.ParseConfig(fin, fname, 4); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
@@ -184,7 +173,8 @@ func TestLoadPolicies(t *testing.T) {
|
||||
|
||||
// Regression test for CVE-2025-24369
|
||||
func TestCVE2025_24369(t *testing.T) {
|
||||
pol := loadPolicies(t, "", anubis.DefaultDifficulty)
|
||||
pol := loadPolicies(t, "")
|
||||
pol.DefaultDifficulty = 4
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: http.NewServeMux(),
|
||||
@@ -207,7 +197,8 @@ func TestCVE2025_24369(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCookieCustomExpiration(t *testing.T) {
|
||||
pol := loadPolicies(t, "", 0)
|
||||
pol := loadPolicies(t, "")
|
||||
pol.DefaultDifficulty = 0
|
||||
ckieExpiration := 10 * time.Minute
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
@@ -256,7 +247,8 @@ func TestCookieCustomExpiration(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCookieSettings(t *testing.T) {
|
||||
pol := loadPolicies(t, "", 0)
|
||||
pol := loadPolicies(t, "")
|
||||
pol.DefaultDifficulty = 0
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: http.NewServeMux(),
|
||||
@@ -321,7 +313,10 @@ func TestCheckDefaultDifficultyMatchesPolicy(t *testing.T) {
|
||||
|
||||
for i := 1; i < 10; i++ {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
anubisPolicy := loadPolicies(t, "", i)
|
||||
anubisPolicy, err := LoadPoliciesOrDefault("", i)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
s, err := New(Options{
|
||||
Next: h,
|
||||
@@ -339,13 +334,11 @@ func TestCheckDefaultDifficultyMatchesPolicy(t *testing.T) {
|
||||
|
||||
req.Header.Add("X-Real-Ip", "127.0.0.1")
|
||||
|
||||
cr, bot, err := s.check(req)
|
||||
_, bot, err := s.check(req)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
t.Log(cr.Name)
|
||||
|
||||
if bot.Challenge.Difficulty != i {
|
||||
t.Errorf("Challenge.Difficulty is wrong, wanted %d, got: %d", i, bot.Challenge.Difficulty)
|
||||
}
|
||||
@@ -393,7 +386,8 @@ func TestBasePrefix(t *testing.T) {
|
||||
// Reset the global BasePrefix before each test
|
||||
anubis.BasePrefix = ""
|
||||
|
||||
pol := loadPolicies(t, "", 4)
|
||||
pol := loadPolicies(t, "")
|
||||
pol.DefaultDifficulty = 4
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: h,
|
||||
@@ -521,7 +515,8 @@ func TestCustomStatusCodes(t *testing.T) {
|
||||
"DENY": 403,
|
||||
}
|
||||
|
||||
pol := loadPolicies(t, "./testdata/aggressive_403.yaml", 4)
|
||||
pol := loadPolicies(t, "./testdata/aggressive_403.yaml")
|
||||
pol.DefaultDifficulty = 4
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
Next: h,
|
||||
@@ -555,7 +550,7 @@ func TestCustomStatusCodes(t *testing.T) {
|
||||
func TestCloudflareWorkersRule(t *testing.T) {
|
||||
for _, variant := range []string{"cel", "header"} {
|
||||
t.Run(variant, func(t *testing.T) {
|
||||
pol := loadPolicies(t, "./testdata/cloudflare-workers-"+variant+".yaml", 0)
|
||||
pol := loadPolicies(t, "./testdata/cloudflare-workers-"+variant+".yaml")
|
||||
|
||||
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintln(w, "OK")
|
||||
@@ -611,7 +606,8 @@ func TestCloudflareWorkersRule(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRuleChange(t *testing.T) {
|
||||
pol := loadPolicies(t, "testdata/rule_change.yaml", 0)
|
||||
pol := loadPolicies(t, "testdata/rule_change.yaml")
|
||||
pol.DefaultDifficulty = 0
|
||||
ckieExpiration := 10 * time.Minute
|
||||
|
||||
srv := spawnAnubis(t, Options{
|
||||
@@ -636,102 +632,3 @@ func TestRuleChange(t *testing.T) {
|
||||
t.Errorf("wanted %d, got: %d", http.StatusFound, resp.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripBasePrefixFromRequest(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
basePrefix string
|
||||
stripBasePrefix bool
|
||||
requestPath string
|
||||
expectedPath string
|
||||
}{
|
||||
{
|
||||
name: "strip disabled - no change",
|
||||
basePrefix: "/foo",
|
||||
stripBasePrefix: false,
|
||||
requestPath: "/foo/bar",
|
||||
expectedPath: "/foo/bar",
|
||||
},
|
||||
{
|
||||
name: "strip enabled - removes prefix",
|
||||
basePrefix: "/foo",
|
||||
stripBasePrefix: true,
|
||||
requestPath: "/foo/bar",
|
||||
expectedPath: "/bar",
|
||||
},
|
||||
{
|
||||
name: "strip enabled - root becomes slash",
|
||||
basePrefix: "/foo",
|
||||
stripBasePrefix: true,
|
||||
requestPath: "/foo",
|
||||
expectedPath: "/",
|
||||
},
|
||||
{
|
||||
name: "strip enabled - trailing slash on base prefix",
|
||||
basePrefix: "/foo/",
|
||||
stripBasePrefix: true,
|
||||
requestPath: "/foo/bar",
|
||||
expectedPath: "/bar",
|
||||
},
|
||||
{
|
||||
name: "strip enabled - no prefix match",
|
||||
basePrefix: "/foo",
|
||||
stripBasePrefix: true,
|
||||
requestPath: "/other/bar",
|
||||
expectedPath: "/other/bar",
|
||||
},
|
||||
{
|
||||
name: "strip enabled - empty base prefix",
|
||||
basePrefix: "",
|
||||
stripBasePrefix: true,
|
||||
requestPath: "/foo/bar",
|
||||
expectedPath: "/foo/bar",
|
||||
},
|
||||
{
|
||||
name: "strip enabled - nested path",
|
||||
basePrefix: "/app",
|
||||
stripBasePrefix: true,
|
||||
requestPath: "/app/api/v1/users",
|
||||
expectedPath: "/api/v1/users",
|
||||
},
|
||||
{
|
||||
name: "strip enabled - exact match becomes root",
|
||||
basePrefix: "/myapp",
|
||||
stripBasePrefix: true,
|
||||
requestPath: "/myapp/",
|
||||
expectedPath: "/",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
srv := &Server{
|
||||
opts: Options{
|
||||
BasePrefix: tc.basePrefix,
|
||||
StripBasePrefix: tc.stripBasePrefix,
|
||||
},
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, tc.requestPath, nil)
|
||||
originalPath := req.URL.Path
|
||||
|
||||
result := srv.stripBasePrefixFromRequest(req)
|
||||
|
||||
if result.URL.Path != tc.expectedPath {
|
||||
t.Errorf("expected path %q, got %q", tc.expectedPath, result.URL.Path)
|
||||
}
|
||||
|
||||
// Ensure original request is not modified when no stripping should occur
|
||||
if !tc.stripBasePrefix || tc.basePrefix == "" || !strings.HasPrefix(tc.requestPath, strings.TrimSuffix(tc.basePrefix, "/")) {
|
||||
if result != req {
|
||||
t.Error("expected same request object when no modification needed")
|
||||
}
|
||||
} else {
|
||||
// Ensure original request is not modified when stripping occurs
|
||||
if req.URL.Path != originalPath {
|
||||
t.Error("original request was modified")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/a-h/templ"
|
||||
)
|
||||
|
||||
@@ -41,19 +40,12 @@ func Methods() []string {
|
||||
return result
|
||||
}
|
||||
|
||||
type IssueInput struct {
|
||||
Impressum *config.Impressum
|
||||
Rule *policy.Bot
|
||||
Challenge string
|
||||
OGTags map[string]string
|
||||
}
|
||||
|
||||
type Impl interface {
|
||||
// Setup registers any additional routes with the Impl for assets or API routes.
|
||||
Setup(mux *http.ServeMux)
|
||||
|
||||
// Issue a new challenge to the user, called by the Anubis.
|
||||
Issue(r *http.Request, lg *slog.Logger, in *IssueInput) (templ.Component, error)
|
||||
Issue(r *http.Request, lg *slog.Logger, rule *policy.Bot, challenge string, ogTags map[string]string) (templ.Component, error)
|
||||
|
||||
// Validate a challenge, making sure that it passes muster.
|
||||
Validate(r *http.Request, lg *slog.Logger, rule *policy.Bot, challenge string) error
|
||||
|
||||
@@ -22,9 +22,9 @@ func NewError(verb, publicReason string, privateReason error) *Error {
|
||||
}
|
||||
|
||||
type Error struct {
|
||||
PrivateReason error
|
||||
Verb string
|
||||
PublicReason string
|
||||
PrivateReason error
|
||||
StatusCode int
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ type Impl struct{}
|
||||
|
||||
func (i *Impl) Setup(mux *http.ServeMux) {}
|
||||
|
||||
func (i *Impl) Issue(r *http.Request, lg *slog.Logger, in *challenge.IssueInput) (templ.Component, error) {
|
||||
func (i *Impl) Issue(r *http.Request, lg *slog.Logger, rule *policy.Bot, challenge string, ogTags map[string]string) (templ.Component, error) {
|
||||
u, err := r.URL.Parse(anubis.BasePrefix + "/.within.website/x/cmd/anubis/api/pass-challenge")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't render page: %w", err)
|
||||
@@ -31,10 +31,10 @@ func (i *Impl) Issue(r *http.Request, lg *slog.Logger, in *challenge.IssueInput)
|
||||
|
||||
q := u.Query()
|
||||
q.Set("redir", r.URL.String())
|
||||
q.Set("challenge", in.Challenge)
|
||||
q.Set("challenge", challenge)
|
||||
u.RawQuery = q.Encode()
|
||||
|
||||
component, err := web.BaseWithChallengeAndOGTags("Making sure you're not a bot!", page(in.Challenge, u.String(), in.Rule.Challenge.Difficulty), in.Impressum, in.Challenge, in.Rule.Challenge, in.OGTags)
|
||||
component, err := web.BaseWithChallengeAndOGTags("Making sure you're not a bot!", page(challenge, u.String(), rule.Challenge.Difficulty), challenge, rule.Challenge, ogTags)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't render page: %w", err)
|
||||
}
|
||||
|
||||
@@ -28,8 +28,8 @@ func (i *Impl) Setup(mux *http.ServeMux) {
|
||||
/* no implementation required */
|
||||
}
|
||||
|
||||
func (i *Impl) Issue(r *http.Request, lg *slog.Logger, in *chall.IssueInput) (templ.Component, error) {
|
||||
component, err := web.BaseWithChallengeAndOGTags("Making sure you're not a bot!", web.Index(), in.Impressum, in.Challenge, in.Rule.Challenge, in.OGTags)
|
||||
func (i *Impl) Issue(r *http.Request, lg *slog.Logger, rule *policy.Bot, challenge string, ogTags map[string]string) (templ.Component, error) {
|
||||
component, err := web.BaseWithChallengeAndOGTags("Making sure you're not a bot!", web.Index(), challenge, rule.Challenge, ogTags)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't render page: %w", err)
|
||||
}
|
||||
|
||||
@@ -124,12 +124,7 @@ func TestBasic(t *testing.T) {
|
||||
t.Run(cs.name, func(t *testing.T) {
|
||||
lg := slog.With()
|
||||
|
||||
inp := &challenge.IssueInput{
|
||||
Rule: bot,
|
||||
Challenge: cs.challengeStr,
|
||||
}
|
||||
|
||||
if _, err := i.Issue(cs.req, lg, inp); err != nil {
|
||||
if _, err := i.Issue(cs.req, lg, bot, cs.challengeStr, nil); err != nil {
|
||||
t.Errorf("can't issue challenge: %v", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package lib
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"errors"
|
||||
@@ -21,32 +20,29 @@ import (
|
||||
"github.com/TecharoHQ/anubis/internal/ogtags"
|
||||
"github.com/TecharoHQ/anubis/lib/challenge"
|
||||
"github.com/TecharoHQ/anubis/lib/policy"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/web"
|
||||
"github.com/TecharoHQ/anubis/xess"
|
||||
"github.com/a-h/templ"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
Next http.Handler
|
||||
Policy *policy.ParsedConfig
|
||||
Target string
|
||||
CookieDynamicDomain bool
|
||||
CookieDomain string
|
||||
CookieExpiration time.Duration
|
||||
CookieName string
|
||||
CookiePartitioned bool
|
||||
BasePrefix string
|
||||
WebmasterEmail string
|
||||
RedirectDomains []string
|
||||
ED25519PrivateKey ed25519.PrivateKey
|
||||
HS512Secret []byte
|
||||
StripBasePrefix bool
|
||||
OpenGraph config.OpenGraph
|
||||
ServeRobotsTXT bool
|
||||
Next http.Handler
|
||||
Policy *policy.ParsedConfig
|
||||
Target string
|
||||
CookieDomain string
|
||||
CookieName string
|
||||
BasePrefix string
|
||||
WebmasterEmail string
|
||||
RedirectDomains []string
|
||||
PrivateKey ed25519.PrivateKey
|
||||
CookieExpiration time.Duration
|
||||
OGTimeToLive time.Duration
|
||||
OGCacheConsidersHost bool
|
||||
OGPassthrough bool
|
||||
CookiePartitioned bool
|
||||
ServeRobotsTXT bool
|
||||
}
|
||||
|
||||
func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty int) (*policy.ParsedConfig, error) {
|
||||
func LoadPoliciesOrDefault(fname string, defaultDifficulty int) (*policy.ParsedConfig, error) {
|
||||
var fin io.ReadCloser
|
||||
var err error
|
||||
|
||||
@@ -70,10 +66,7 @@ func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty
|
||||
}
|
||||
}(fin)
|
||||
|
||||
anubisPolicy, err := policy.ParseConfig(ctx, fin, fname, defaultDifficulty)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't parse policy file %s: %w", fname, err)
|
||||
}
|
||||
anubisPolicy, err := policy.ParseConfig(fin, fname, defaultDifficulty)
|
||||
var validationErrs []error
|
||||
|
||||
for _, b := range anubisPolicy.Bots {
|
||||
@@ -90,13 +83,13 @@ func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty
|
||||
}
|
||||
|
||||
func New(opts Options) (*Server, error) {
|
||||
if opts.ED25519PrivateKey == nil && opts.HS512Secret == nil {
|
||||
if opts.PrivateKey == nil {
|
||||
slog.Debug("opts.PrivateKey not set, generating a new one")
|
||||
_, priv, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("lib: can't generate private key: %v", err)
|
||||
}
|
||||
opts.ED25519PrivateKey = priv
|
||||
opts.PrivateKey = priv
|
||||
}
|
||||
|
||||
anubis.BasePrefix = opts.BasePrefix
|
||||
@@ -108,14 +101,14 @@ func New(opts Options) (*Server, error) {
|
||||
}
|
||||
|
||||
result := &Server{
|
||||
next: opts.Next,
|
||||
ed25519Priv: opts.ED25519PrivateKey,
|
||||
hs512Secret: opts.HS512Secret,
|
||||
policy: opts.Policy,
|
||||
opts: opts,
|
||||
DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](),
|
||||
OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph),
|
||||
cookieName: cookieName,
|
||||
next: opts.Next,
|
||||
priv: opts.PrivateKey,
|
||||
pub: opts.PrivateKey.Public().(ed25519.PublicKey),
|
||||
policy: opts.Policy,
|
||||
opts: opts,
|
||||
DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](),
|
||||
OGTags: ogtags.NewOGTagCache(opts.Target, opts.OGPassthrough, opts.OGTimeToLive, opts.OGCacheConsidersHost),
|
||||
cookieName: cookieName,
|
||||
}
|
||||
|
||||
mux := http.NewServeMux()
|
||||
@@ -152,14 +145,6 @@ func New(opts Options) (*Server, error) {
|
||||
}), "GET")
|
||||
}
|
||||
|
||||
if opts.Policy.Impressum != nil {
|
||||
registerWithPrefix(anubis.APIPrefix+"imprint", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
templ.Handler(
|
||||
web.Base(opts.Policy.Impressum.Page.Title, opts.Policy.Impressum.Page, opts.Policy.Impressum),
|
||||
).ServeHTTP(w, r)
|
||||
}), "GET")
|
||||
}
|
||||
|
||||
registerWithPrefix(anubis.APIPrefix+"pass-challenge", http.HandlerFunc(result.PassChallenge), "GET")
|
||||
registerWithPrefix(anubis.APIPrefix+"check", http.HandlerFunc(result.maybeReverseProxyHttpStatusOnly), "")
|
||||
registerWithPrefix("/", http.HandlerFunc(result.maybeReverseProxyOrPage), "")
|
||||
@@ -169,7 +154,7 @@ func New(opts Options) (*Server, error) {
|
||||
// make-challenge is only used in tests. Only enable while version is devel
|
||||
registerWithPrefix(anubis.APIPrefix+"make-challenge", http.HandlerFunc(result.MakeChallenge), "POST")
|
||||
}
|
||||
|
||||
|
||||
for _, implKind := range challenge.Methods() {
|
||||
impl, _ := challenge.Get(implKind)
|
||||
impl.Setup(mux)
|
||||
|
||||
@@ -7,12 +7,11 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis"
|
||||
"github.com/TecharoHQ/anubis/internal/thoth/thothmock"
|
||||
"github.com/TecharoHQ/anubis/lib/policy"
|
||||
)
|
||||
|
||||
func TestInvalidChallengeMethod(t *testing.T) {
|
||||
if _, err := LoadPoliciesOrDefault(t.Context(), "testdata/invalid-challenge-method.yaml", 4); !errors.Is(err, policy.ErrChallengeRuleHasWrongAlgorithm) {
|
||||
if _, err := LoadPoliciesOrDefault("testdata/invalid-challenge-method.yaml", 4); !errors.Is(err, policy.ErrChallengeRuleHasWrongAlgorithm) {
|
||||
t.Fatalf("wanted error %v but got %v", policy.ErrChallengeRuleHasWrongAlgorithm, err)
|
||||
}
|
||||
}
|
||||
@@ -26,7 +25,7 @@ func TestBadConfigs(t *testing.T) {
|
||||
for _, st := range finfos {
|
||||
st := st
|
||||
t.Run(st.Name(), func(t *testing.T) {
|
||||
if _, err := LoadPoliciesOrDefault(t.Context(), filepath.Join("policy", "config", "testdata", "bad", st.Name()), anubis.DefaultDifficulty); err == nil {
|
||||
if _, err := LoadPoliciesOrDefault(filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err == nil {
|
||||
t.Fatal(err)
|
||||
} else {
|
||||
t.Log(err)
|
||||
@@ -44,18 +43,9 @@ func TestGoodConfigs(t *testing.T) {
|
||||
for _, st := range finfos {
|
||||
st := st
|
||||
t.Run(st.Name(), func(t *testing.T) {
|
||||
t.Run("with-thoth", func(t *testing.T) {
|
||||
ctx := thothmock.WithMockThoth(t)
|
||||
if _, err := LoadPoliciesOrDefault(ctx, filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("without-thoth", func(t *testing.T) {
|
||||
if _, err := LoadPoliciesOrDefault(t.Context(), filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
if _, err := LoadPoliciesOrDefault(filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user