Compare commits

..

9 Commits

Author SHA1 Message Date
Xe Iaso
04ecf0a6de chore: spelling 2025-05-22 12:17:26 -04:00
Xe Iaso
502640bb2f feat: wire up asn and geoip checkers
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-05-22 12:06:17 -04:00
Xe Iaso
86ee5697f3 chore(lib/policy): move Checker to its own package to avoid import cycles
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-05-22 11:19:01 -04:00
Xe Iaso
9bb38d6ad0 feat(thoth): store a thoth client in a context
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-05-22 11:07:38 -04:00
Xe Iaso
49ab76c9dd feat(thoth): add GeoIP checker
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-05-22 10:48:15 -04:00
Xe Iaso
4aea22fac5 feat(thoth): make ASNChecker instances
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-05-21 15:51:11 -04:00
Xe Iaso
86ad85909c fix(thoth): minor testing fixups, ensure ASNChecker is Checker
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-05-21 12:13:08 -04:00
Xe Iaso
315253dce7 feat(thoth): cached ip to asn checker
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-05-21 11:27:53 -04:00
Xe Iaso
946557b378 feat(internal): add Thoth client and simple ASN checker
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-05-21 10:57:11 -04:00
346 changed files with 4137 additions and 17145 deletions

View File

@@ -9,4 +9,4 @@ exclude_dir = ["var", "vendor", "docs", "node_modules"]
[logger]
time = true
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address

View File

@@ -1,12 +0,0 @@
FROM ghcr.io/xe/devcontainer-base/pre/go
WORKDIR /app
COPY go.mod go.sum package.json package-lock.json ./
RUN apt-get update \
&& apt-get -y install zstd brotli redis \
&& mkdir -p /home/vscode/.local/share/fish \
&& chown -R vscode:vscode /home/vscode/.local/share/fish \
&& chown -R vscode:vscode /go
CMD ["/usr/bin/sleep", "infinity"]

View File

@@ -1,13 +0,0 @@
# Anubis Dev Container
Anubis offers a [development container](https://containers.dev/) image in order to make it easier to contribute to the project. This image is based on [Xe/devcontainer-base/go](https://github.com/Xe/devcontainer-base/tree/main/src/go), which is based on Debian Bookworm with the following customizations:
- [Fish](https://fishshell.com/) as the shell complete with a custom theme
- [Go](https://go.dev) at the most recent stable version
- [Node.js](https://nodejs.org/en) at the most recent stable version
- [Atuin](https://atuin.sh/) to sync shell history between your host OS and the development container
- [Docker](https://docker.com) to manage and build Anubis container images from inside the development container
- [Ko](https://ko.build/) to build production-ready Anubis container images
- [Neovim](https://neovim.io/) for use with Git
This development container is tested and known to work with [Visual Studio Code](https://code.visualstudio.com/). If you run into problems with it outside of VS Code, please file an issue and let us know what editor you are using.

View File

@@ -1,30 +0,0 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/debian
{
"name": "Dev",
"dockerComposeFile": [
"./docker-compose.yaml"
],
"service": "workspace",
"workspaceFolder": "/workspace/anubis",
"postStartCommand": "bash ./.devcontainer/poststart.sh",
"features": {
"ghcr.io/xe/devcontainer-features/ko:1.1.0": {},
"ghcr.io/devcontainers/features/github-cli:1": {}
},
"initializeCommand": "mkdir -p ${localEnv:HOME}${localEnv:USERPROFILE}/.local/share/atuin",
"customizations": {
"vscode": {
"extensions": [
"esbenp.prettier-vscode",
"ms-azuretools.vscode-containers",
"golang.go",
"unifiedjs.vscode-mdx",
"a-h.templ",
"redhat.vscode-yaml",
"hashicorp.hcl",
"fredwangwang.vscode-hcl-format"
]
}
}
}

View File

@@ -1,26 +0,0 @@
services:
playwright:
image: mcr.microsoft.com/playwright:v1.52.0-noble
init: true
network_mode: service:workspace
command:
- /bin/sh
- -c
- npx -y playwright@1.52.0 run-server --port 9001 --host 0.0.0.0
valkey:
image: valkey/valkey:8
pull_policy: always
# VS Code workspace service
workspace:
image: ghcr.io/techarohq/anubis/devcontainer
build:
context: ..
dockerfile: .devcontainer/Dockerfile
volumes:
- ../:/workspace/anubis:cached
environment:
VALKEY_URL: redis://valkey:6379/0
#entrypoint: ["/usr/bin/sleep", "infinity"]
user: vscode

View File

@@ -1,9 +0,0 @@
#!/usr/bin/env bash
pwd
npm ci &
go mod download &
go install ./utils/cmd/... &
wait

View File

@@ -1,25 +0,0 @@
.env
*.deb
*.rpm
# Additional package locks
pnpm-lock.yaml
yarn.lock
# Go binaries and test artifacts
main
*.test
node_modules
# MacOS
.DS_store
# Intellij
.idea
# how does this get here
doc/VERSION
web/static/js/*
!web/static/js/.gitignore

2
.gitattributes vendored
View File

@@ -1 +1 @@
**/*_templ.go linguist-generated=true
web/index_templ.go linguist-generated

View File

@@ -2,5 +2,4 @@ github
https
ssh
ubuntu
workarounds
rjack
workarounds

View File

@@ -83,13 +83,6 @@
^\Q.github/FUNDING.yml\E$
^\Q.github/workflows/spelling.yml\E$
^data/crawlers/
^docs/blog/tags\.yml$
^docs/docs/user/known-instances.md$
^docs/manifest/.*$
^docs/static/\.nojekyll$
^lib/policy/config/testdata/bad/unparseable\.json$
ignore$
robots.txt
^lib/localization/locales/.*\.json$
^lib/localization/.*_test.go$
^test/.*$

View File

@@ -1,105 +1,76 @@
acs
aeacus
Aibrew
alrest
amazonbot
anthro
anubis
anubistest
Applebot
archlinux
asnc
asnchecker
asns
aspirational
atuin
azuretools
badregexes
bbolt
bdba
berr
bingbot
Bitcoin
bitrate
blogging
Bluesky
blueskybot
boi
botnet
botstopper
BPort
Brightbot
broked
byteslice
Bytespider
cachebuster
cachediptoasn
Caddyfile
caninetools
Cardyb
celchecker
celphase
cerr
CELPHASE
certresolver
cespare
CGNAT
cgr
chainguard
chall
challengemozilla
challengetest
checkpath
checkresult
chen
chibi
cidranger
ckie
ckies
cloudflare
Codespaces
confd
connnection
containerbuild
coreutils
Cotoyogi
Cromite
CRDs
crt
Cscript
daemonizing
DDOS
Debian
debrpm
decaymap
devcontainers
Diffbot
decompiling
discordapp
discordbot
distros
dnf
dnsbl
dnserr
domainhere
dracula
dronebl
droneblresponse
dropin
duckduckbot
eerror
ellenjoe
emacs
enbyware
etld
everyones
evilbot
evilsite
expressionorlist
externalagent
externalfetcher
extldflags
facebookgo
Factset
fastcgi
fediverse
ffprobe
finfos
Firecrawl
flagenv
Fordola
forgejo
@@ -117,41 +88,27 @@ goland
gomod
goodbot
googlebot
gopsutil
govulncheck
goyaml
GPG
GPT
gptbot
grpcprom
grw
Hashcash
hashrate
headermap
healthcheck
healthz
hec
hmc
hostable
htmlc
htmx
httpdebug
Huawei
hypertext
iaskspider
iat
ifm
Imagesift
imgproxy
impressum
inp
internets
IPTo
iptoasn
iss
isset
ivh
Jenomis
JGit
joho
journalctl
@@ -159,57 +116,41 @@ jshelter
JWTs
kagi
kagibot
Keyfunc
keikaku
keypair
KHTML
kinda
KUBECONFIG
lcj
ldflags
letsencrypt
Lexentale
lgbt
licend
licstart
lightpanda
limsa
LIMSA
Linting
linuxbrew
LLU
loadbalancer
lol
lominsa
LOMINSA
maintainership
malware
mcr
memes
metarefresh
metrix
mimi
Minfilia
mistralai
minica
Mojeek
mojeekbot
mozilla
nbf
nepeat
netsurf
nginx
nicksnyder
nobots
NONINFRINGEMENT
nosleep
OCOB
ogtags
omgili
omgilibot
openai
opengraph
openrc
oswald
onionservice
pag
palemoon
Pangu
parseable
passthrough
Patreon
@@ -221,105 +162,73 @@ pipefail
pki
podkova
podman
poststart
prebaked
privkey
promauto
promhttp
proofofwork
publicsuffix
pwcmd
pwuser
qualys
qwant
qwantbot
rac
rawler
rcvar
rdb
redhat
redir
redirectscheme
refactors
relayd
reputational
reqmeta
risc
ruleset
runlevels
RUnlock
runtimedir
sas
sasl
searchbot
Scumm
searx
sebest
secretplans
Semrush
Seo
selfsigned
setsebool
shellcheck
shirou
Sidetrade
simprint
sitemap
sls
sni
Sourceware
Spambot
sparkline
spyderbot
srv
stackoverflow
startprecmd
stoppostcmd
storetest
subgrid
subr
subrequest
SVCNAME
tagline
tarballs
tarrif
tbn
tbr
techaro
techarohq
templ
templruntime
testarea
Thancred
thoth
thothmock
Tik
Timpibot
torproject
traefik
uberspace
Unbreak
unbreakdocker
unifiedjs
unixhttpd
unmarshal
unparseable
uvx
UXP
valkey
Varis
Velen
vendored
vhosts
videotest
VKE
Vultr
waitloop
weblate
webmaster
webpage
websecure
websites
Webzio
wildbase
withthothmock
wordpress
Workaround
workdir
wpbot
xcaddy
Xeact
xeiaso
xeserv
@@ -328,7 +237,6 @@ xess
xff
XForwarded
XNG
XOB
XReal
yae
YAMLTo
@@ -336,8 +244,6 @@ yeet
yeetfile
yourdomain
yoursite
yyz
Zenos
zizmor
zombocom
zos

View File

@@ -20,6 +20,9 @@
# https://twitter.com/nyttypos/status/1898844061873639490
#\([A-Z][a-z]{2,}(?: [a-z]+){3,}\)\.\s
# Complete sentences shouldn't be in the middle of another sentence as a parenthetical.
(?<!\.)\.\),
# Complete sentences in parentheticals should not have a space before the period.
\s\.\)(?!.*\}\})
@@ -273,6 +276,14 @@
# Most people only have two hands. Reword.
\b(?i)on the third hand\b
# Should be `Open Graph`
# unless talking about a specific Open Graph implementation:
# - Java
# - Node
# - Py
# - Ruby
\bOpenGraph\b
# Should be `OpenShift`
\bOpenshift\b

View File

@@ -128,7 +128,3 @@ go install(?:\s+[a-z]+\.[-@\w/.]+)+
# ignore long runs of a single character:
\b([A-Za-z])\g{-1}{3,}\b
# hit-count: 1 file-count: 1
# microsoft
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*

View File

@@ -2,7 +2,7 @@ name: Docker image builds (pull requests)
on:
pull_request:
branches: ["main"]
branches: [ "main" ]
env:
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
@@ -11,32 +11,7 @@ permissions:
contents: read
jobs:
buildx-bake:
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-tags: true
fetch-depth: 0
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build and push
id: build
uses: docker/bake-action@76f9fa3a758507623da19f6092dc4089a7e61592 # v6.6.0
with:
source: .
push: true
sbom: true
cache-from: type=gha
cache-to: type=gha,mode=max
set: |
osiris.tags=ttl.sh/techaro/pr-${{ github.event.number }}/osiris:24h
containerbuild:
build:
runs-on: ubuntu-24.04
steps:
- name: Checkout code
@@ -47,7 +22,7 @@ jobs:
persist-credentials: false
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@main
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3

View File

@@ -3,8 +3,8 @@ name: Docker image builds
on:
workflow_dispatch:
push:
branches: ["main"]
tags: ["v*"]
branches: [ "main" ]
tags: [ "v*" ]
env:
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
@@ -17,38 +17,7 @@ permissions:
pull-requests: write
jobs:
buildx-bake:
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-tags: true
fetch-depth: 0
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
id: build
uses: docker/bake-action@76f9fa3a758507623da19f6092dc4089a7e61592 # v6.6.0
with:
source: .
push: true
sbom: true
cache-from: type=gha
cache-to: type=gha,mode=max
set: ""
containerbuild:
build:
runs-on: ubuntu-24.04
steps:
- name: Checkout code
@@ -63,7 +32,7 @@ jobs:
echo "IMAGE=ghcr.io/${GITHUB_REPOSITORY,,}" >> $GITHUB_ENV
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@main
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
@@ -86,7 +55,7 @@ jobs:
run: |
brew bundle
- name: Log into registry
- name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ghcr.io
@@ -108,8 +77,9 @@ jobs:
DOCKER_REPO: ${{ env.IMAGE }}
SLOG_LEVEL: debug
- name: Generate artifact attestation
uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0
uses: actions/attest-build-provenance@db473fddc028af60658334401dc6fa3ffd8669fd # v2.3.0
with:
subject-name: ${{ env.IMAGE }}
subject-digest: ${{ steps.build.outputs.digest }}

View File

@@ -22,7 +22,7 @@ jobs:
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
@@ -36,13 +36,10 @@ jobs:
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with:
images: ghcr.io/techarohq/anubis/docs
tags: |
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
main
- name: Build and push
id: build
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 # v6.17.0
with:
context: ./docs
cache-to: type=gha
@@ -52,15 +49,15 @@ jobs:
platforms: linux/amd64
push: true
- name: Apply k8s manifests to limsa lominsa
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
- name: Apply k8s manifests to aeacus
uses: actions-hub/kubectl@f632a31512a74cb35940627c49c20f67723cbaaf # v1.33.1
env:
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
with:
args: apply -k docs/manifest
- name: Apply k8s manifests to limsa lominsa
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
- name: Apply k8s manifests to aeacus
uses: actions-hub/kubectl@f632a31512a74cb35940627c49c20f67723cbaaf # v1.33.1
env:
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
with:

View File

@@ -2,7 +2,7 @@ name: Docs test build
on:
pull_request:
branches: ["main"]
branches: [ "main" ]
permissions:
contents: read
@@ -18,20 +18,17 @@ jobs:
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Docker meta
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with:
images: ghcr.io/techarohq/anubis/docs
tags: |
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
main
images: ghcr.io/${{ github.repository }}/docs
- name: Build and push
id: build
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 # v6.17.0
with:
context: ./docs
cache-to: type=gha

View File

@@ -25,7 +25,7 @@ jobs:
sudo apt-get install -y build-essential
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@main
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
@@ -82,7 +82,7 @@ jobs:
run: npm run test
- name: Lint with staticcheck
uses: dominikh/staticcheck-action@024238d2898c874f26d723e7d0ff4308c35589a2 # v1.4.0
uses: dominikh/staticcheck-action@fe1dd0c3658873b46f8c9bb3291096a617310ca6 # v1.3.1
with:
version: "latest"

View File

@@ -1,9 +1,8 @@
name: Package builds (stable)
on:
workflow_dispatch:
# release:
# types: [published]
release:
types: [published]
permissions:
contents: write
@@ -14,67 +13,67 @@ jobs:
#runs-on: alrest-techarohq
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
fetch-tags: true
fetch-depth: 0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
fetch-tags: true
fetch-depth: 0
- name: build essential
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: build essential
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@main
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
/home/linuxbrew/.linuxbrew/Cellar
/home/linuxbrew/.linuxbrew/bin
/home/linuxbrew/.linuxbrew/etc
/home/linuxbrew/.linuxbrew/include
/home/linuxbrew/.linuxbrew/lib
/home/linuxbrew/.linuxbrew/opt
/home/linuxbrew/.linuxbrew/sbin
/home/linuxbrew/.linuxbrew/share
/home/linuxbrew/.linuxbrew/var
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
restore-keys: |
${{ runner.os }}-go-homebrew-cellar-
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
/home/linuxbrew/.linuxbrew/Cellar
/home/linuxbrew/.linuxbrew/bin
/home/linuxbrew/.linuxbrew/etc
/home/linuxbrew/.linuxbrew/include
/home/linuxbrew/.linuxbrew/lib
/home/linuxbrew/.linuxbrew/opt
/home/linuxbrew/.linuxbrew/sbin
/home/linuxbrew/.linuxbrew/share
/home/linuxbrew/.linuxbrew/var
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
restore-keys: |
${{ runner.os }}-go-homebrew-cellar-
- name: Install Brew dependencies
run: |
brew bundle
- name: Install Brew dependencies
run: |
brew bundle
- name: Setup Golang caches
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-golang-
- name: Setup Golang caches
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-golang-
- name: install node deps
run: |
npm ci
- name: install node deps
run: |
npm ci
- name: Build Packages
run: |
go tool yeet
- name: Build Packages
run: |
go tool yeet
- name: Upload released artifacts
env:
GITHUB_TOKEN: ${{ github.TOKEN }}
RELEASE_VERSION: ${{github.event.release.tag_name}}
shell: bash
run: |
RELEASE="${RELEASE_VERSION}"
cd var
for file in *; do
gh release upload $RELEASE $file
done
- name: Upload released artifacts
env:
GITHUB_TOKEN: ${{ github.TOKEN }}
RELEASE_VERSION: ${{github.event.release.tag_name}}
shell: bash
run: |
RELEASE="${RELEASE_VERSION}"
cd var
for file in *; do
gh release upload $RELEASE $file
done

View File

@@ -27,7 +27,7 @@ jobs:
sudo apt-get install -y build-essential
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@main
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3

View File

@@ -1,45 +0,0 @@
name: Smoke tests
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
permissions:
contents: read
jobs:
smoke-test:
strategy:
matrix:
test:
- git-clone
- git-push
- healthcheck
- i18n
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: latest
- uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
with:
go-version: stable
- uses: ko-build/setup-ko@d006021bd0c28d1ce33a07e7943d48b079944c8d # v0.9
- name: Install utils
run: |
go install ./utils/cmd/...
- name: Run test
run: |
cd test/${{ matrix.test }}
backoff-retry --try-count 10 ./test.sh

View File

@@ -1,37 +0,0 @@
name: Regenerate ssh ci runner image
on:
# pull_request:
# branches: ["main"]
schedule:
- cron: "0 0 1,8,15,22 * *"
workflow_dispatch:
permissions:
pull-requests: write
contents: write
packages: write
jobs:
ssh-ci-rebuild:
if: github.repository == 'TecharoHQ/anubis'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-tags: true
fetch-depth: 0
persist-credentials: false
- name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
- name: Build and push
run: |
cd ./test/ssh-ci
docker buildx bake --push

View File

@@ -1,43 +0,0 @@
name: SSH CI
on:
push:
branches: ["main"]
# pull_request:
# branches: ["main"]
permissions:
contents: read
jobs:
ssh:
if: github.repository == 'TecharoHQ/anubis'
runs-on: ubuntu-24.04
strategy:
matrix:
host:
- ubuntu@riscv64.techaro.lol
- ci@ppc64le.techaro.lol
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-tags: true
fetch-depth: 0
persist-credentials: false
- name: Install CI target SSH key
uses: shimataro/ssh-key-action@d4fffb50872869abe2d9a9098a6d9c5aa7d16be4 # v2.7.0
with:
key: ${{ secrets.CI_SSH_KEY }}
name: id_rsa
known_hosts: ${{ secrets.CI_SSH_KNOWN_HOSTS }}
- uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
with:
go-version: stable
- name: Run CI
run: go run ./utils/cmd/backoff-retry bash test/ssh-ci/rigging.sh ${{ matrix.host }}
env:
GITHUB_RUN_ID: ${{ github.run_id }}

View File

@@ -21,7 +21,7 @@ jobs:
persist-credentials: false
- name: Install the latest version of uv
uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
- name: Run zizmor 🌈
run: uvx zizmor --format sarif . > results.sarif
@@ -29,7 +29,7 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload SARIF file
uses: github/codeql-action/upload-sarif@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
uses: github/codeql-action/upload-sarif@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
with:
sarif_file: results.sarif
category: zizmor

2
.gitignore vendored
View File

@@ -20,5 +20,3 @@ node_modules
# how does this get here
doc/VERSION
web/static/locales/*.json

View File

@@ -1,12 +0,0 @@
{
"recommendations": [
"esbenp.prettier-vscode",
"ms-azuretools.vscode-containers",
"golang.go",
"unifiedjs.vscode-mdx",
"a-h.templ",
"redhat.vscode-yaml",
"hashicorp.hcl",
"fredwangwang.vscode-hcl-format"
]
}

27
.vscode/launch.json vendored
View File

@@ -1,27 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Package",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${fileDirname}"
},
{
"name": "Anubis [dev]",
"command": "npm run dev",
"request": "launch",
"type": "node-terminal"
},
{
"name": "Start Docs",
"command": "cd docs && npm ci && npm run start",
"request": "launch",
"type": "node-terminal"
}
]
}

19
.vscode/settings.json vendored
View File

@@ -11,24 +11,5 @@
"zig": false,
"javascript": false,
"properties": false
},
"[markdown]": {
"editor.wordWrap": "wordWrapColumn",
"editor.wordWrapColumn": 80,
"editor.wordBasedSuggestions": "off"
},
"[mdx]": {
"editor.wordWrap": "wordWrapColumn",
"editor.wordWrapColumn": 80,
"editor.wordBasedSuggestions": "off"
},
"[nunjucks]": {
"editor.wordWrap": "wordWrapColumn",
"editor.wordWrapColumn": 80,
"editor.wordBasedSuggestions": "off"
},
"cSpell.enabledFileTypes": {
"mdx": true,
"md": true
}
}

View File

@@ -18,7 +18,6 @@ assets: deps
build: assets
$(GO) build -o ./var/anubis ./cmd/anubis
$(GO) build -o ./var/robots2policy ./cmd/robots2policy
@echo "Anubis is now built to ./var/anubis"
lint: assets
@@ -28,7 +27,6 @@ lint: assets
prebaked-build:
$(GO) build -o ./var/anubis -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/anubis
$(GO) build -o ./var/robots2policy -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/robots2policy
test: assets
$(GO) test ./...

View File

@@ -9,56 +9,19 @@
![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/TecharoHQ/anubis)
![language count](https://img.shields.io/github/languages/count/TecharoHQ/anubis)
![repo size](https://img.shields.io/github/repo-size/TecharoHQ/anubis)
[![GitHub Sponsors](https://img.shields.io/github/sponsors/Xe)](https://github.com/sponsors/Xe)
## Sponsors
Anubis is brought to you by sponsors and donors like:
### Diamond Tier
<a href="https://www.raptorcs.com/content/base/products.html">
<img src="./docs/static/img/sponsors/raptor-computing-logo.webp" alt="Raptor Computing Systems" height=64 />
</a>
### Gold Tier
<a href="https://distrust.co?utm_campaign=github&utm_medium=referral&utm_content=anubis">
<img src="./docs/static/img/sponsors/distrust-logo.webp" alt="Distrust" height="64">
</a>
<a href="https://terminaltrove.com/?utm_campaign=github&utm_medium=referral&utm_content=anubis&utm_source=abgh">
<img src="./docs/static/img/sponsors/terminal-trove.webp" alt="Terminal Trove" height="64">
</a>
<a href="https://canine.tools?utm_campaign=github&utm_medium=referral&utm_content=anubis">
<img src="./docs/static/img/sponsors/caninetools-logo.webp" alt="canine.tools" height="64">
</a>
<a href="https://weblate.org/">
<img src="./docs/static/img/sponsors/weblate-logo.webp" alt="Weblate" height="64">
</a>
<a href="https://uberspace.de/">
<img src="./docs/static/img/sponsors/uberspace-logo.webp" alt="Uberspace" height="64">
</a>
<a href="https://wildbase.xyz/">
<img src="./docs/static/img/sponsors/wildbase-logo.webp" alt="Wildbase" height="64">
</a>
<a href="https://emma.pet">
<img
src="./docs/static/img/sponsors/nepeat-logo.webp"
alt="Cat eyes over the word Emma in a serif font"
height="64"
/>
</a>
<a href="https://fabulous.systems/">
<img
src="./docs/static/img/sponsors/fabulous-systems.webp"
alt="Cat eyes over the word Emma in a serif font"
height="64"
/>
</a>
[![Distrust](./docs/static/img/sponsors/distrust-logo.webp)](https://distrust.co?utm_campaign=github&utm_medium=referral&utm_content=anubis)
[![Terminal Trove](./docs/static/img/sponsors/terminal-trove.webp)](https://terminaltrove.com/?utm_campaign=github&utm_medium=referral&utm_content=anubis&utm_source=abgh)
[![canine.tools](./docs/static/img/sponsors/caninetools-logo.webp)](https://canine.tools?utm_campaign=github&utm_medium=referral&utm_content=anubis)
[![Weblate](./docs/static/img/sponsors/weblate-logo.webp)](https://weblate.org/?utm_campaign=github&utm_medium=referral&utm_content=anubis)
## Overview
Anubis is a Web AI Firewall Utility that [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using one or more challenges in order to protect upstream resources from scraper bots.
Anubis [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using a proof-of-work challenge in order to protect upstream resources from scraper bots.
This program is designed to help protect the small internet from the endless storm of requests that flood in from AI companies. Anubis is as lightweight as possible to ensure that everyone can afford to protect the communities closest to them.

View File

@@ -1 +1 @@
1.21.0
1.18.0

View File

@@ -11,11 +11,12 @@ var Version = "devel"
// CookieName is the name of the cookie that Anubis uses in order to validate
// access.
var CookieName = "techaro.lol-anubis-auth"
const CookieName = "techaro.lol-anubis-auth"
// TestCookieName is the name of the cookie that Anubis uses in order to check
// if cookies are enabled on the client's browser.
var TestCookieName = "techaro.lol-anubis-cookie-verification"
// WithDomainCookieName is the name that is prepended to the per-domain cookie used when COOKIE_DOMAIN is set.
const WithDomainCookieName = "techaro.lol-anubis-auth-for-"
const TestCookieName = "techaro.lol-anubis-cookie-test-if-you-block-this-anubis-wont-work"
// CookieDefaultExpirationTime is the amount of time before the cookie/JWT expires.
const CookieDefaultExpirationTime = 7 * 24 * time.Hour
@@ -32,7 +33,3 @@ const APIPrefix = "/.within.website/x/cmd/anubis/api/"
// DefaultDifficulty is the default "difficulty" (number of leading zeroes)
// that must be met by the client in order to pass the challenge.
const DefaultDifficulty = 4
// ForcedLanguage is the language being used instead of the one of the request's Accept-Language header
// if being set.
var ForcedLanguage = ""

View File

@@ -38,7 +38,6 @@ import (
"github.com/facebookgo/flagenv"
_ "github.com/joho/godotenv/autoload"
"github.com/prometheus/client_golang/prometheus/promhttp"
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
)
var (
@@ -47,13 +46,8 @@ var (
bindNetwork = flag.String("bind-network", "tcp", "network family to bind HTTP to, e.g. unix, tcp")
challengeDifficulty = flag.Int("difficulty", anubis.DefaultDifficulty, "difficulty of the challenge")
cookieDomain = flag.String("cookie-domain", "", "if set, the top-level domain that the Anubis cookie will be valid for")
cookieDynamicDomain = flag.Bool("cookie-dynamic-domain", false, "if set, automatically set the cookie Domain value based on the request domain")
cookieExpiration = flag.Duration("cookie-expiration-time", anubis.CookieDefaultExpirationTime, "The amount of time the authorization cookie is valid for")
cookiePrefix = flag.String("cookie-prefix", "techaro.lol-anubis", "prefix for browser cookies created by Anubis")
cookiePartitioned = flag.Bool("cookie-partitioned", false, "if true, sets the partitioned flag on Anubis cookies, enabling CHIPS support")
forcedLanguage = flag.String("forced-language", "", "if set, this language is being used instead of the one from the request's Accept-Language header")
hs512Secret = flag.String("hs512-secret", "", "secret used to sign JWTs, uses ed25519 if not set")
cookieSecure = flag.Bool("cookie-secure", true, "if true, sets the secure flag on Anubis cookies")
ed25519PrivateKeyHex = flag.String("ed25519-private-key-hex", "", "private key used to sign JWTs, if not set a random one will be assigned")
ed25519PrivateKeyHexFile = flag.String("ed25519-private-key-hex-file", "", "file name containing value for ed25519-private-key-hex")
metricsBind = flag.String("metrics-bind", ":9090", "network address to bind metrics to")
@@ -63,9 +57,7 @@ var (
policyFname = flag.String("policy-fname", "", "full path to anubis policy document (defaults to a sensible built-in policy)")
redirectDomains = flag.String("redirect-domains", "", "list of domains separated by commas which anubis is allowed to redirect to. Leaving this unset allows any domain.")
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
stripBasePrefix = flag.Bool("strip-base-prefix", false, "if true, strips the base prefix from requests forwarded to the target server")
target = flag.String("target", "http://localhost:3923", "target to reverse proxy to, set to an empty string to disable proxying when only using auth request")
targetSNI = flag.String("target-sni", "", "if set, the value of the TLS handshake hostname when forwarding requests to the target")
targetHost = flag.String("target-host", "", "if set, the value of the Host header when forwarding requests to the target")
targetInsecureSkipVerify = flag.Bool("target-insecure-skip-verify", false, "if true, skips TLS validation for the backend")
healthcheck = flag.Bool("healthcheck", false, "run a health check against Anubis")
@@ -76,12 +68,9 @@ var (
ogCacheConsiderHost = flag.Bool("og-cache-consider-host", false, "enable or disable the use of the host in the Open Graph tag cache")
extractResources = flag.String("extract-resources", "", "if set, extract the static resources to the specified folder")
webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals")
versionFlag = flag.Bool("version", false, "print Anubis version")
xffStripPrivate = flag.Bool("xff-strip-private", true, "if set, strip private addresses from X-Forwarded-For")
thothInsecure = flag.Bool("thoth-insecure", false, "if set, connect to Thoth over plain HTTP/2, don't enable this unless support told you to")
thothURL = flag.String("thoth-url", "", "if set, URL for Thoth, the IP reputation database for Anubis")
thothToken = flag.String("thoth-token", "", "if set, API token for Thoth, the IP reputation database for Anubis")
thothURL = flag.String("thoth-url", "", "if set, URL for Thoth, the IP reputation database for Anubis")
thothToken = flag.String("thoth-token", "", "if set, API token for Thoth, the IP reputation database for Anubis")
)
func keyFromHex(value string) (ed25519.PrivateKey, error) {
@@ -98,7 +87,7 @@ func keyFromHex(value string) (ed25519.PrivateKey, error) {
}
func doHealthCheck() error {
resp, err := http.Get("http://localhost" + *metricsBind + "/healthz")
resp, err := http.Get("http://localhost" + *metricsBind + anubis.BasePrefix + "/metrics")
if err != nil {
return fmt.Errorf("failed to fetch metrics: %w", err)
}
@@ -111,41 +100,8 @@ func doHealthCheck() error {
return nil
}
// parseBindNetFromAddr determine bind network and address based on the given network and address.
func parseBindNetFromAddr(address string) (string, string) {
defaultScheme := "http://"
if !strings.Contains(address, "://") {
if strings.HasPrefix(address, ":") {
address = defaultScheme + "localhost" + address
} else {
address = defaultScheme + address
}
}
bindUri, err := url.Parse(address)
if err != nil {
log.Fatal(fmt.Errorf("failed to parse bind URL: %w", err))
}
switch bindUri.Scheme {
case "unix":
return "unix", bindUri.Path
case "tcp", "http", "https":
return "tcp", bindUri.Host
default:
log.Fatal(fmt.Errorf("unsupported network scheme %s in address %s", bindUri.Scheme, address))
}
return "", address
}
func setupListener(network string, address string) (net.Listener, string) {
formattedAddress := ""
if network == "" {
// keep compatibility
network, address = parseBindNetFromAddr(address)
}
switch network {
case "unix":
formattedAddress = "unix:" + address
@@ -185,7 +141,7 @@ func setupListener(network string, address string) (net.Listener, string) {
return listener, formattedAddress
}
func makeReverseProxy(target string, targetSNI string, targetHost string, insecureSkipVerify bool) (http.Handler, error) {
func makeReverseProxy(target string, targetHost string, insecureSkipVerify bool) (http.Handler, error) {
targetUri, err := url.Parse(target)
if err != nil {
return nil, fmt.Errorf("failed to parse target URL: %w", err)
@@ -207,14 +163,10 @@ func makeReverseProxy(target string, targetSNI string, targetHost string, insecu
transport.RegisterProtocol("unix", libanubis.UnixRoundTripper{Transport: transport})
}
if insecureSkipVerify || targetSNI != "" {
transport.TLSClientConfig = &tls.Config{}
if insecureSkipVerify {
slog.Warn("TARGET_INSECURE_SKIP_VERIFY is set to true, TLS certificate validation will not be performed", "target", target)
transport.TLSClientConfig.InsecureSkipVerify = true
}
if targetSNI != "" {
transport.TLSClientConfig.ServerName = targetSNI
if insecureSkipVerify {
slog.Warn("TARGET_INSECURE_SKIP_VERIFY is set to true, TLS certificate validation will not be performed", "target", target)
transport.TLSClientConfig = &tls.Config{
InsecureSkipVerify: true,
}
}
@@ -232,25 +184,25 @@ func makeReverseProxy(target string, targetSNI string, targetHost string, insecu
return rp, nil
}
func startDecayMapCleanup(ctx context.Context, s *libanubis.Server) {
ticker := time.NewTicker(1 * time.Hour)
defer ticker.Stop()
for {
select {
case <-ticker.C:
s.CleanupDecayMap()
case <-ctx.Done():
return
}
}
}
func main() {
flagenv.Parse()
flag.Parse()
if *versionFlag {
fmt.Println("Anubis", anubis.Version)
return
}
internal.InitSlog(*slogLevel)
internal.SetHealth("anubis", healthv1.HealthCheckResponse_NOT_SERVING)
if *healthcheck {
log.Println("running healthcheck")
if err := doHealthCheck(); err != nil {
log.Fatal(err)
}
return
}
if *extractResources != "" {
if err := extractEmbedFS(data.BotPolicies, ".", *extractResources); err != nil {
@@ -263,40 +215,21 @@ func main() {
return
}
// install signal handler
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()
wg := new(sync.WaitGroup)
if *metricsBind != "" {
wg.Add(1)
go metricsServer(ctx, wg.Done)
}
var rp http.Handler
// when using anubis via Systemd and environment variables, then it is not possible to set targe to an empty string but only to space
if strings.TrimSpace(*target) != "" {
var err error
rp, err = makeReverseProxy(*target, *targetSNI, *targetHost, *targetInsecureSkipVerify)
rp, err = makeReverseProxy(*target, *targetHost, *targetInsecureSkipVerify)
if err != nil {
log.Fatalf("can't make reverse proxy: %v", err)
}
}
if *cookieDomain != "" && *cookieDynamicDomain {
log.Fatalf("you can't set COOKIE_DOMAIN and COOKIE_DYNAMIC_DOMAIN at the same time")
}
ctx := context.Background()
// Thoth configuration
switch {
case *thothURL != "" && *thothToken == "":
slog.Warn("THOTH_URL is set but no THOTH_TOKEN is set")
case *thothURL == "" && *thothToken != "":
slog.Warn("THOTH_TOKEN is set but no THOTH_URL is set")
case *thothURL != "" && *thothToken != "":
if *thothURL != "" && *thothToken != "" {
slog.Debug("connecting to Thoth")
thothClient, err := thoth.New(ctx, *thothURL, *thothToken, *thothInsecure)
thothClient, err := thoth.New(ctx, *thothURL, *thothToken)
if err != nil {
log.Fatalf("can't dial thoth at %s: %v", *thothURL, err)
}
@@ -332,20 +265,12 @@ func main() {
} else if strings.HasSuffix(*basePrefix, "/") {
log.Fatalf("[misconfiguration] base-prefix must not end with a slash")
}
if *stripBasePrefix && *basePrefix == "" {
log.Fatalf("[misconfiguration] strip-base-prefix is set to true, but base-prefix is not set, " +
"this may result in unexpected behavior")
}
var ed25519Priv ed25519.PrivateKey
if *hs512Secret != "" && (*ed25519PrivateKeyHex != "" || *ed25519PrivateKeyHexFile != "") {
log.Fatal("do not specify both HS512 and ED25519 secrets")
} else if *hs512Secret != "" {
ed25519Priv = ed25519.PrivateKey(*hs512Secret)
} else if *ed25519PrivateKeyHex != "" && *ed25519PrivateKeyHexFile != "" {
var priv ed25519.PrivateKey
if *ed25519PrivateKeyHex != "" && *ed25519PrivateKeyHexFile != "" {
log.Fatal("do not specify both ED25519_PRIVATE_KEY_HEX and ED25519_PRIVATE_KEY_HEX_FILE")
} else if *ed25519PrivateKeyHex != "" {
ed25519Priv, err = keyFromHex(*ed25519PrivateKeyHex)
priv, err = keyFromHex(*ed25519PrivateKeyHex)
if err != nil {
log.Fatalf("failed to parse and validate ED25519_PRIVATE_KEY_HEX: %v", err)
}
@@ -355,12 +280,12 @@ func main() {
log.Fatalf("failed to read ED25519_PRIVATE_KEY_HEX_FILE %s: %v", *ed25519PrivateKeyHexFile, err)
}
ed25519Priv, err = keyFromHex(string(bytes.TrimSpace(hexFile)))
priv, err = keyFromHex(string(bytes.TrimSpace(hexFile)))
if err != nil {
log.Fatalf("failed to parse and validate content of ED25519_PRIVATE_KEY_HEX_FILE: %v", err)
}
} else {
_, ed25519Priv, err = ed25519.GenerateKey(rand.Reader)
_, priv, err = ed25519.GenerateKey(rand.Reader)
if err != nil {
log.Fatalf("failed to generate ed25519 key: %v", err)
}
@@ -382,47 +307,42 @@ func main() {
slog.Warn("REDIRECT_DOMAINS is not set, Anubis will only redirect to the same domain a request is coming from, see https://anubis.techaro.lol/docs/admin/configuration/redirect-domains")
}
anubis.CookieName = *cookiePrefix + "-auth"
anubis.TestCookieName = *cookiePrefix + "-cookie-verification"
anubis.ForcedLanguage = *forcedLanguage
// If OpenGraph configuration values are not set in the config file, use the
// values from flags / envvars.
if !policy.OpenGraph.Enabled {
policy.OpenGraph.Enabled = *ogPassthrough
policy.OpenGraph.ConsiderHost = *ogCacheConsiderHost
policy.OpenGraph.TimeToLive = *ogTimeToLive
policy.OpenGraph.Override = map[string]string{}
}
s, err := libanubis.New(libanubis.Options{
BasePrefix: *basePrefix,
StripBasePrefix: *stripBasePrefix,
Next: rp,
Policy: policy,
ServeRobotsTXT: *robotsTxt,
ED25519PrivateKey: ed25519Priv,
HS512Secret: []byte(*hs512Secret),
CookieDomain: *cookieDomain,
CookieDynamicDomain: *cookieDynamicDomain,
CookieExpiration: *cookieExpiration,
CookiePartitioned: *cookiePartitioned,
RedirectDomains: redirectDomainsList,
Target: *target,
WebmasterEmail: *webmasterEmail,
OpenGraph: policy.OpenGraph,
CookieSecure: *cookieSecure,
BasePrefix: *basePrefix,
Next: rp,
Policy: policy,
ServeRobotsTXT: *robotsTxt,
PrivateKey: priv,
CookieDomain: *cookieDomain,
CookieExpiration: *cookieExpiration,
CookiePartitioned: *cookiePartitioned,
OGPassthrough: *ogPassthrough,
OGTimeToLive: *ogTimeToLive,
RedirectDomains: redirectDomainsList,
Target: *target,
WebmasterEmail: *webmasterEmail,
OGCacheConsidersHost: *ogCacheConsiderHost,
})
if err != nil {
log.Fatalf("can't construct libanubis.Server: %v", err)
}
wg := new(sync.WaitGroup)
// install signal handler
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()
if *metricsBind != "" {
wg.Add(1)
go metricsServer(ctx, wg.Done)
}
go startDecayMapCleanup(ctx, s)
var h http.Handler
h = s
h = internal.RemoteXRealIP(*useRemoteAddress, *bindNetwork, h)
h = internal.XForwardedForToXRealIP(h)
h = internal.XForwardedForUpdate(*xffStripPrivate, h)
h = internal.JA4H(h)
h = internal.XForwardedForUpdate(h)
srv := http.Server{Handler: h, ErrorLog: internal.GetFilteredHTTPLogger()}
listener, listenerUrl := setupListener(*bindNetwork, *bind)
@@ -451,8 +371,6 @@ func main() {
}
}()
internal.SetHealth("anubis", healthv1.HealthCheckResponse_SERVING)
if err := srv.Serve(listener); !errors.Is(err, http.ErrServerClosed) {
log.Fatal(err)
}
@@ -463,30 +381,20 @@ func metricsServer(ctx context.Context, done func()) {
defer done()
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
st, ok := internal.GetHealth("anubis")
if !ok {
slog.Error("health service anubis does not exist, file a bug")
}
switch st {
case healthv1.HealthCheckResponse_NOT_SERVING:
http.Error(w, "NOT OK", http.StatusInternalServerError)
return
case healthv1.HealthCheckResponse_SERVING:
fmt.Fprintln(w, "OK")
return
default:
http.Error(w, "UNKNOWN", http.StatusFailedDependency)
return
}
})
mux.Handle(anubis.BasePrefix+"/metrics", promhttp.Handler())
srv := http.Server{Handler: mux, ErrorLog: internal.GetFilteredHTTPLogger()}
listener, metricsUrl := setupListener(*metricsBindNetwork, *metricsBind)
slog.Debug("listening for metrics", "url", metricsUrl)
if *healthcheck {
log.Println("running healthcheck")
if err := doHealthCheck(); err != nil {
log.Fatal(err)
}
return
}
go func() {
<-ctx.Done()
c, cancel := context.WithTimeout(context.Background(), 5*time.Second)
@@ -518,11 +426,11 @@ func extractEmbedFS(fsys embed.FS, root string, destDir string) error {
return os.MkdirAll(destPath, 0o700)
}
embeddedData, err := fs.ReadFile(fsys, path)
data, err := fs.ReadFile(fsys, path)
if err != nil {
return err
}
return os.WriteFile(destPath, embeddedData, 0o644)
return os.WriteFile(destPath, data, 0o644)
})
}

View File

@@ -1,39 +0,0 @@
package config
import (
"errors"
"fmt"
"net"
)
var (
ErrInvalidHostpost = errors.New("bind: invalid host:port")
)
type Bind struct {
HTTP string `hcl:"http"`
HTTPS string `hcl:"https"`
Metrics string `hcl:"metrics"`
}
func (b *Bind) Valid() error {
var errs []error
if _, _, err := net.SplitHostPort(b.HTTP); err != nil {
errs = append(errs, fmt.Errorf("%w %q: %w", ErrInvalidHostpost, b.HTTP, err))
}
if _, _, err := net.SplitHostPort(b.HTTPS); err != nil {
errs = append(errs, fmt.Errorf("%w %q: %w", ErrInvalidHostpost, b.HTTPS, err))
}
if _, _, err := net.SplitHostPort(b.Metrics); err != nil {
errs = append(errs, fmt.Errorf("%w %q: %w", ErrInvalidHostpost, b.Metrics, err))
}
if len(errs) != 0 {
return errors.Join(errs...)
}
return nil
}

View File

@@ -1,55 +0,0 @@
package config
import (
"errors"
"net"
"testing"
)
func TestBindValid(t *testing.T) {
for _, tt := range []struct {
name string
precondition func(t *testing.T)
bind Bind
err error
}{
{
name: "basic",
precondition: nil,
bind: Bind{
HTTP: ":8081",
HTTPS: ":8082",
Metrics: ":8083",
},
err: nil,
},
{
name: "invalid ports",
precondition: func(t *testing.T) {
ln, err := net.Listen("tcp", ":8081")
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { ln.Close() })
},
bind: Bind{
HTTP: "",
HTTPS: "",
Metrics: "",
},
err: ErrInvalidHostpost,
},
} {
t.Run(tt.name, func(t *testing.T) {
if tt.precondition != nil {
tt.precondition(t)
}
if err := tt.bind.Valid(); !errors.Is(err, tt.err) {
t.Logf("want: %v", tt.err)
t.Logf("got: %v", err)
t.Error("got wrong error from validation function")
}
})
}
}

View File

@@ -1,31 +0,0 @@
package config
import (
"errors"
"fmt"
)
type Toplevel struct {
Bind Bind `hcl:"bind,block"`
Domains []Domain `hcl:"domain,block"`
}
func (t *Toplevel) Valid() error {
var errs []error
if err := t.Bind.Valid(); err != nil {
errs = append(errs, fmt.Errorf("invalid bind block:\n%w", err))
}
for _, d := range t.Domains {
if err := d.Valid(); err != nil {
errs = append(errs, fmt.Errorf("when parsing domain %s: %w", d.Name, err))
}
}
if len(errs) != 0 {
return fmt.Errorf("invalid configuration file:\n%w", errors.Join(errs...))
}
return nil
}

View File

@@ -1,66 +0,0 @@
package config
import (
"errors"
"fmt"
"net/url"
"golang.org/x/net/idna"
)
var (
ErrInvalidDomainName = errors.New("domain: name is invalid")
ErrInvalidDomainTLSConfig = errors.New("domain: TLS config is invalid")
ErrInvalidURL = errors.New("invalid URL")
ErrInvalidURLScheme = errors.New("URL has invalid scheme")
)
type Domain struct {
Name string `hcl:"name,label"`
TLS TLS `hcl:"tls,block"`
Target string `hcl:"target"`
InsecureSkipVerify bool `hcl:"insecure_skip_verify,optional"`
HealthTarget string `hcl:"health_target"`
}
func (d Domain) Valid() error {
var errs []error
if _, err := idna.Lookup.ToASCII(d.Name); err != nil {
errs = append(errs, fmt.Errorf("%w %q: %w", ErrInvalidDomainName, d.Name, err))
}
if err := d.TLS.Valid(); err != nil {
errs = append(errs, fmt.Errorf("%w: %w", ErrInvalidDomainTLSConfig, err))
}
if err := isURLValid(d.Target); err != nil {
errs = append(errs, fmt.Errorf("target has %w %q: %w", ErrInvalidURL, d.Target, err))
}
if err := isURLValid(d.HealthTarget); err != nil {
errs = append(errs, fmt.Errorf("health_target has %w %q: %w", ErrInvalidURL, d.HealthTarget, err))
}
if len(errs) != 0 {
return errors.Join(errs...)
}
return nil
}
func isURLValid(input string) error {
u, err := url.Parse(input)
if err != nil {
return err
}
switch u.Scheme {
case "http", "https", "h2c", "unix":
// do nothing
default:
return fmt.Errorf("%w %s has scheme %s (want http, https, h2c, unix)", ErrInvalidURLScheme, input, u.Scheme)
}
return nil
}

View File

@@ -1,89 +0,0 @@
package config
import (
"errors"
"testing"
)
func TestDomainValid(t *testing.T) {
for _, tt := range []struct {
name string
input Domain
err error
}{
{
name: "simple happy path",
input: Domain{
Name: "anubis.techaro.lol",
TLS: TLS{
Cert: "./testdata/tls/selfsigned.crt",
Key: "./testdata/tls/selfsigned.key",
},
Target: "http://localhost:3000",
HealthTarget: "http://localhost:9091/healthz",
},
},
{
name: "invalid domain name",
input: Domain{
Name: "\uFFFD.techaro.lol",
TLS: TLS{
Cert: "./testdata/tls/selfsigned.crt",
Key: "./testdata/tls/selfsigned.key",
},
Target: "http://localhost:3000",
HealthTarget: "http://localhost:9091/healthz",
},
err: ErrInvalidDomainName,
},
{
name: "invalid tls config",
input: Domain{
Name: "anubis.techaro.lol",
TLS: TLS{
Cert: "./testdata/tls/invalid.crt",
Key: "./testdata/tls/invalid.key",
},
Target: "http://localhost:3000",
HealthTarget: "http://localhost:9091/healthz",
},
err: ErrInvalidDomainTLSConfig,
},
{
name: "invalid URL",
input: Domain{
Name: "anubis.techaro.lol",
TLS: TLS{
Cert: "./testdata/tls/selfsigned.crt",
Key: "./testdata/tls/selfsigned.key",
},
Target: "file://[::1:3000",
HealthTarget: "file://[::1:9091/healthz",
},
err: ErrInvalidURL,
},
{
name: "wrong URL scheme",
input: Domain{
Name: "anubis.techaro.lol",
TLS: TLS{
Cert: "./testdata/tls/selfsigned.crt",
Key: "./testdata/tls/selfsigned.key",
},
Target: "file://localhost:3000",
HealthTarget: "file://localhost:9091/healthz",
},
err: ErrInvalidURLScheme,
},
} {
t.Run(tt.name, func(t *testing.T) {
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
t.Logf("want: %v", tt.err)
t.Logf("got: %v", err)
t.Error("got wrong error from validation function")
} else {
t.Log(err)
}
})
}
}

View File

@@ -1 +0,0 @@
aorsentaeiorsntoiearnstoieanrsoietnaioresntoeiar

View File

@@ -1 +0,0 @@
aorsentaeiorsntoiearnstoieanrsoietnaioresntoeiar

View File

@@ -1,11 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIBnzCCAVGgAwIBAgIUAw8funCpiB3ZAAPoWdSCWnzbsFIwBQYDK2VwMEUxCzAJ
BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l
dCBXaWRnaXRzIFB0eSBMdGQwHhcNMjUwNzE4MTkwMjM1WhcNMjUwODE3MTkwMjM1
WjBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwY
SW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMCowBQYDK2VwAyEAcXDHXV3vgpvjtTaz
s0Oj/73rMr06bhyGGhleYS1MNoWjUzBRMB0GA1UdDgQWBBQwmfKPthucFHB6Wfgz
2Nj5nkMQOjAfBgNVHSMEGDAWgBQwmfKPthucFHB6Wfgz2Nj5nkMQOjAPBgNVHRMB
Af8EBTADAQH/MAUGAytlcANBALBYbULlGwB7Ro0UTgUoQDNxEvayn3qzVFHIt7lC
/2/NzNBkk4yPT+a4mbRuydxLkv+JIvmQbarZxpksYnWlCAM=
-----END CERTIFICATE-----

View File

@@ -1,3 +0,0 @@
-----BEGIN PRIVATE KEY-----
MC4CAQAwBQYDK2VwBCIEIOHKoX22Mha6SnnpLm34fSSfTUDbRiDCi6N1nOgTOlds
-----END PRIVATE KEY-----

View File

@@ -1,40 +0,0 @@
package config
import (
"crypto/tls"
"errors"
"fmt"
"os"
)
var (
ErrCantReadTLS = errors.New("tls: can't read TLS")
ErrInvalidTLSKeypair = errors.New("tls: can't parse TLS keypair")
)
type TLS struct {
Cert string `hcl:"cert"`
Key string `hcl:"key"`
}
func (t TLS) Valid() error {
var errs []error
if _, err := os.Stat(t.Cert); err != nil {
errs = append(errs, fmt.Errorf("%w certificate %s: %w", ErrCantReadTLS, t.Cert, err))
}
if _, err := os.Stat(t.Key); err != nil {
errs = append(errs, fmt.Errorf("%w key %s: %w", ErrCantReadTLS, t.Key, err))
}
if _, err := tls.LoadX509KeyPair(t.Cert, t.Key); err != nil {
errs = append(errs, fmt.Errorf("%w (%s, %s): %w", ErrInvalidTLSKeypair, t.Cert, t.Key, err))
}
if len(errs) != 0 {
return errors.Join(errs...)
}
return nil
}

View File

@@ -1,48 +0,0 @@
package config
import (
"errors"
"testing"
)
func TestTLSValid(t *testing.T) {
for _, tt := range []struct {
name string
input TLS
err error
}{
{
name: "simple selfsigned",
input: TLS{
Cert: "./testdata/tls/selfsigned.crt",
Key: "./testdata/tls/selfsigned.key",
},
},
{
name: "files don't exist",
input: TLS{
Cert: "./testdata/tls/nonexistent.crt",
Key: "./testdata/tls/nonexistent.key",
},
err: ErrCantReadTLS,
},
{
name: "invalid keypair",
input: TLS{
Cert: "./testdata/tls/invalid.crt",
Key: "./testdata/tls/invalid.key",
},
err: ErrInvalidTLSKeypair,
},
} {
t.Run(tt.name, func(t *testing.T) {
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
t.Logf("want: %v", tt.err)
t.Logf("got: %v", err)
t.Error("got wrong error from validation function")
} else {
t.Log(err)
}
})
}
}

View File

@@ -1,85 +0,0 @@
package entrypoint
import (
"context"
"fmt"
"log/slog"
"net"
"github.com/TecharoHQ/anubis/cmd/osiris/internal/config"
"github.com/TecharoHQ/anubis/internal"
"github.com/hashicorp/hcl/v2/hclsimple"
"golang.org/x/sync/errgroup"
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
)
type Options struct {
ConfigFname string
}
func Main(ctx context.Context, opts Options) error {
internal.SetHealth("osiris", healthv1.HealthCheckResponse_NOT_SERVING)
var cfg config.Toplevel
if err := hclsimple.DecodeFile(opts.ConfigFname, nil, &cfg); err != nil {
return fmt.Errorf("can't read configuration file %s:\n\n%w", opts.ConfigFname, err)
}
if err := cfg.Valid(); err != nil {
return fmt.Errorf("configuration file %s is invalid:\n\n%w", opts.ConfigFname, err)
}
rtr, err := NewRouter(cfg)
if err != nil {
return err
}
rtr.opts = opts
go rtr.backgroundReloadConfig(ctx)
g, gCtx := errgroup.WithContext(ctx)
// HTTP
g.Go(func() error {
ln, err := net.Listen("tcp", cfg.Bind.HTTP)
if err != nil {
return fmt.Errorf("(HTTP) can't bind to tcp %s: %w", cfg.Bind.HTTP, err)
}
defer ln.Close()
go func(ctx context.Context) {
<-ctx.Done()
ln.Close()
}(ctx)
slog.Info("listening", "for", "http", "bind", cfg.Bind.HTTP)
return rtr.HandleHTTP(gCtx, ln)
})
// HTTPS
g.Go(func() error {
ln, err := net.Listen("tcp", cfg.Bind.HTTPS)
if err != nil {
return fmt.Errorf("(https) can't bind to tcp %s: %w", cfg.Bind.HTTPS, err)
}
defer ln.Close()
go func(ctx context.Context) {
<-ctx.Done()
ln.Close()
}(ctx)
slog.Info("listening", "for", "https", "bind", cfg.Bind.HTTPS)
return rtr.HandleHTTPS(gCtx, ln)
})
// Metrics
g.Go(func() error {
return rtr.ListenAndServeMetrics(gCtx, cfg.Bind.Metrics)
})
internal.SetHealth("osiris", healthv1.HealthCheckResponse_SERVING)
return g.Wait()
}

View File

@@ -1,93 +0,0 @@
package entrypoint
import (
"context"
"errors"
"net"
"net/http"
"os"
"path/filepath"
"testing"
"time"
)
func TestMainGoodConfig(t *testing.T) {
files, err := os.ReadDir("./testdata/good")
if err != nil {
t.Fatal(err)
}
for _, st := range files {
t.Run(st.Name(), func(t *testing.T) {
ctx, cancel := context.WithCancel(t.Context())
cfg := loadConfig(t, filepath.Join("testdata", "good", st.Name()))
go func(ctx context.Context) {
if err := Main(ctx, Options{
ConfigFname: filepath.Join("testdata", "good", st.Name()),
}); err != nil {
var netOpErr *net.OpError
switch {
case errors.Is(err, context.Canceled):
// Context was canceled, this is expected
return
case errors.As(err, &netOpErr):
// Network operation error occurred
t.Logf("Network operation error: %v", netOpErr)
return
case errors.Is(err, http.ErrServerClosed):
// Server was closed, this is expected
return
default:
// Other unexpected error
panic(err)
}
}
}(ctx)
wait := 5 * time.Millisecond
for i := range make([]struct{}, 10) {
if i != 0 {
time.Sleep(wait)
wait = wait * 2
}
t.Logf("try %d (wait=%s)", i+1, wait)
resp, err := http.Get("http://localhost" + cfg.Bind.Metrics + "/readyz")
if err != nil {
continue
}
if resp.StatusCode != http.StatusOK {
continue
}
cancel()
return
}
t.Fatal("router initialization did not work")
})
}
}
func TestMainBadConfig(t *testing.T) {
files, err := os.ReadDir("./testdata/bad")
if err != nil {
t.Fatal(err)
}
for _, st := range files {
t.Run(st.Name(), func(t *testing.T) {
if err := Main(t.Context(), Options{
ConfigFname: filepath.Join("testdata", "bad", st.Name()),
}); err == nil {
t.Error("wanted an error but got none")
} else {
t.Log(err)
}
})
}
}

View File

@@ -1,35 +0,0 @@
package entrypoint
import (
"crypto/tls"
"net"
"net/http"
"net/http/httputil"
"net/url"
"golang.org/x/net/http2"
)
func newH2CReverseProxy(target *url.URL) *httputil.ReverseProxy {
target.Scheme = "http"
director := func(req *http.Request) {
req.URL.Scheme = target.Scheme
req.URL.Host = target.Host
req.Host = target.Host
}
// Use h2c transport
transport := &http2.Transport{
AllowHTTP: true,
DialTLS: func(network, addr string, cfg *tls.Config) (net.Conn, error) {
// Just do plain TCP (h2c)
return net.Dial(network, addr)
},
}
return &httputil.ReverseProxy{
Director: director,
Transport: transport,
}
}

View File

@@ -1,51 +0,0 @@
package entrypoint
import (
"net/http"
"net/http/httptest"
"net/url"
"testing"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
)
func newH2cServer(t *testing.T, h http.Handler) *httptest.Server {
t.Helper()
h2s := &http2.Server{}
srv := httptest.NewServer(h2c.NewHandler(h, h2s))
t.Cleanup(func() {
srv.Close()
})
return srv
}
func TestH2CReverseProxy(t *testing.T) {
h := &ackHandler{}
srv := newH2cServer(t, h)
u, err := url.Parse(srv.URL)
if err != nil {
t.Fatal(err)
}
rp := httptest.NewServer(newH2CReverseProxy(u))
defer rp.Close()
resp, err := rp.Client().Get(rp.URL)
if err != nil {
t.Fatal(err)
}
if resp.StatusCode != http.StatusOK {
t.Errorf("wrong status code from reverse proxy: %d", resp.StatusCode)
}
if !h.ack {
t.Error("h2c handler was not executed")
}
}

View File

@@ -1,72 +0,0 @@
package entrypoint
import (
"bytes"
"fmt"
"log/slog"
"net/http"
"sort"
"github.com/TecharoHQ/anubis/internal"
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
)
func healthz(w http.ResponseWriter, r *http.Request) {
services, err := internal.HealthSrv.List(r.Context(), nil)
if err != nil {
slog.Error("can't get list of services", "err", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
var keys []string
for k := range services.Statuses {
if k == "" {
continue
}
keys = append(keys, k)
}
sort.Strings(keys)
var msg bytes.Buffer
var healthy bool = true
for _, k := range keys {
st := services.Statuses[k].GetStatus()
fmt.Fprintf(&msg, "%s: %s\n", k, st)
switch st {
case healthv1.HealthCheckResponse_SERVING:
// do nothing
default:
healthy = false
}
}
if !healthy {
w.WriteHeader(http.StatusInternalServerError)
}
w.Write(msg.Bytes())
}
func readyz(w http.ResponseWriter, r *http.Request) {
st, ok := internal.GetHealth("osiris")
if !ok {
slog.Error("health service osiris does not exist, file a bug")
http.Error(w, "health service osiris does not exist", http.StatusExpectationFailed)
}
switch st {
case healthv1.HealthCheckResponse_NOT_SERVING:
http.Error(w, "NOT OK", http.StatusInternalServerError)
return
case healthv1.HealthCheckResponse_SERVING:
fmt.Fprintln(w, "OK")
return
default:
http.Error(w, "UNKNOWN", http.StatusFailedDependency)
return
}
}

View File

@@ -1,66 +0,0 @@
package entrypoint
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/TecharoHQ/anubis/internal"
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
)
func TestHealthz(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(healthz))
internal.SetHealth("osiris", healthv1.HealthCheckResponse_NOT_SERVING)
resp, err := srv.Client().Get(srv.URL)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK {
t.Errorf("wanted not ready but got %d", resp.StatusCode)
}
internal.SetHealth("osiris", healthv1.HealthCheckResponse_SERVING)
resp, err = srv.Client().Get(srv.URL)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("wanted ready but got %d", resp.StatusCode)
}
}
func TestReadyz(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(readyz))
internal.SetHealth("osiris", healthv1.HealthCheckResponse_NOT_SERVING)
resp, err := srv.Client().Get(srv.URL)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK {
t.Errorf("wanted not ready but got %d", resp.StatusCode)
}
internal.SetHealth("osiris", healthv1.HealthCheckResponse_SERVING)
resp, err = srv.Client().Get(srv.URL)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("wanted ready but got %d", resp.StatusCode)
}
}

View File

@@ -1,320 +0,0 @@
package entrypoint
import (
"context"
"crypto/tls"
"errors"
"fmt"
"log/slog"
"net"
"net/http"
"net/http/httputil"
"net/url"
"os"
"os/signal"
"strings"
"sync"
"syscall"
"time"
"github.com/TecharoHQ/anubis/cmd/osiris/internal/config"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/internal/fingerprint"
"github.com/felixge/httpsnoop"
"github.com/hashicorp/hcl/v2/hclsimple"
"github.com/lum8rjack/go-ja4h"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
ErrTargetInvalid = errors.New("[unexpected] target invalid")
ErrNoHandler = errors.New("[unexpected] no handler for domain")
ErrInvalidTLSKeypair = errors.New("[unexpected] invalid TLS keypair")
ErrNoCert = errors.New("this server does not have a certificate for that domain")
requestsPerDomain = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "techaro",
Subsystem: "osiris",
Name: "request_count",
}, []string{"domain", "method", "response_code"})
responseTime = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "techaro",
Subsystem: "osiris",
Name: "response_time",
}, []string{"domain"})
unresolvedRequests = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: "techaro",
Subsystem: "osiris",
Name: "unresolved_requests",
})
)
type Router struct {
lock sync.RWMutex
routes map[string]http.Handler
tlsCerts map[string]*tls.Certificate
opts Options
}
func (rtr *Router) setConfig(c config.Toplevel) error {
var errs []error
newMap := map[string]http.Handler{}
newCerts := map[string]*tls.Certificate{}
for _, d := range c.Domains {
var domainErrs []error
u, err := url.Parse(d.Target)
if err != nil {
domainErrs = append(domainErrs, fmt.Errorf("%w %q: %v", ErrTargetInvalid, d.Target, err))
}
var h http.Handler
if u != nil {
switch u.Scheme {
case "http", "https":
rp := httputil.NewSingleHostReverseProxy(u)
if d.InsecureSkipVerify {
rp.Transport = &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true,
},
}
}
h = rp
case "h2c":
h = newH2CReverseProxy(u)
case "unix":
h = &httputil.ReverseProxy{
Director: func(r *http.Request) {
r.URL.Scheme = "http"
r.URL.Host = d.Name
r.Host = d.Name
},
Transport: &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", strings.TrimPrefix(d.Target, "unix://"))
},
},
}
}
}
if h == nil {
domainErrs = append(domainErrs, ErrNoHandler)
}
newMap[d.Name] = h
cert, err := tls.LoadX509KeyPair(d.TLS.Cert, d.TLS.Key)
if err != nil {
domainErrs = append(domainErrs, fmt.Errorf("%w: %w", ErrInvalidTLSKeypair, err))
}
newCerts[d.Name] = &cert
if len(domainErrs) != 0 {
errs = append(errs, fmt.Errorf("invalid domain %s: %w", d.Name, errors.Join(domainErrs...)))
}
}
if len(errs) != 0 {
return fmt.Errorf("can't compile config to routing map: %w", errors.Join(errs...))
}
rtr.lock.Lock()
rtr.routes = newMap
rtr.tlsCerts = newCerts
rtr.lock.Unlock()
return nil
}
func (rtr *Router) GetCertificate(hello *tls.ClientHelloInfo) (*tls.Certificate, error) {
rtr.lock.RLock()
cert, ok := rtr.tlsCerts[hello.ServerName]
rtr.lock.RUnlock()
if !ok {
return nil, ErrNoCert
}
return cert, nil
}
func (rtr *Router) loadConfig() error {
slog.Info("reloading config", "fname", rtr.opts.ConfigFname)
var cfg config.Toplevel
if err := hclsimple.DecodeFile(rtr.opts.ConfigFname, nil, &cfg); err != nil {
return err
}
if err := cfg.Valid(); err != nil {
return err
}
if err := rtr.setConfig(cfg); err != nil {
return err
}
slog.Info("done!")
return nil
}
func (rtr *Router) backgroundReloadConfig(ctx context.Context) {
t := time.NewTicker(time.Hour)
defer t.Stop()
ch := make(chan os.Signal, 1)
signal.Notify(ch, syscall.SIGHUP)
for {
select {
case <-ctx.Done():
return
case <-t.C:
if err := rtr.loadConfig(); err != nil {
slog.Error("can't reload config", "fname", rtr.opts.ConfigFname, "err", err)
}
case <-ch:
if err := rtr.loadConfig(); err != nil {
slog.Error("can't reload config", "fname", rtr.opts.ConfigFname, "err", err)
}
}
}
}
func NewRouter(c config.Toplevel) (*Router, error) {
result := &Router{
routes: map[string]http.Handler{},
}
if err := result.setConfig(c); err != nil {
return nil, err
}
return result, nil
}
func (rtr *Router) HandleHTTP(ctx context.Context, ln net.Listener) error {
srv := http.Server{
Handler: rtr,
ErrorLog: internal.GetFilteredHTTPLogger(),
}
go func(ctx context.Context) {
<-ctx.Done()
srv.Close()
}(ctx)
return srv.Serve(ln)
}
func (rtr *Router) HandleHTTPS(ctx context.Context, ln net.Listener) error {
tc := &tls.Config{
GetCertificate: rtr.GetCertificate,
}
srv := &http.Server{
Handler: rtr,
ErrorLog: internal.GetFilteredHTTPLogger(),
TLSConfig: tc,
}
go func(ctx context.Context) {
<-ctx.Done()
srv.Close()
}(ctx)
fingerprint.ApplyTLSFingerprinter(srv)
return srv.ServeTLS(ln, "", "")
}
func (rtr *Router) ListenAndServeMetrics(ctx context.Context, addr string) error {
ln, err := net.Listen("tcp", addr)
if err != nil {
return fmt.Errorf("(metrics) can't bind to tcp %s: %w", addr, err)
}
defer ln.Close()
go func(ctx context.Context) {
<-ctx.Done()
ln.Close()
}(ctx)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
mux.HandleFunc("/readyz", readyz)
mux.HandleFunc("/healthz", healthz)
slog.Info("listening", "for", "metrics", "bind", addr)
srv := http.Server{
Addr: addr,
Handler: mux,
ErrorLog: internal.GetFilteredHTTPLogger(),
}
go func(ctx context.Context) {
<-ctx.Done()
srv.Close()
}(ctx)
return srv.Serve(ln)
}
func (rtr *Router) ServeHTTP(w http.ResponseWriter, r *http.Request) {
var host = r.Host
if strings.Contains(host, ":") {
host, _, _ = net.SplitHostPort(host)
}
var h http.Handler
var ok bool
ja4hFP := ja4h.JA4H(r)
slog.Info("got request", "method", r.Method, "host", host, "path", r.URL.Path)
rtr.lock.RLock()
h, ok = rtr.routes[host]
rtr.lock.RUnlock()
if !ok {
unresolvedRequests.Inc()
http.NotFound(w, r) // TODO(Xe): brand this
return
}
r.Header.Set("X-Http-Ja4h-Fingerprint", ja4hFP)
if fp := fingerprint.GetTLSFingerprint(r); fp != nil {
if ja3n := fp.JA3N(); ja3n != nil {
r.Header.Set("X-Tls-Ja3n-Fingerprint", ja3n.String())
}
if ja4 := fp.JA4(); ja4 != nil {
r.Header.Set("X-Tls-Ja4-Fingerprint", ja4.String())
}
}
if tcpFP := fingerprint.GetTCPFingerprint(r); tcpFP != nil {
r.Header.Set("X-Tcp-Ja4t-Fingerprint", tcpFP.String())
}
m := httpsnoop.CaptureMetrics(h, w, r)
requestsPerDomain.WithLabelValues(host, r.Method, fmt.Sprint(m.Code)).Inc()
responseTime.WithLabelValues(host).Observe(float64(m.Duration.Milliseconds()))
slog.Info("request completed", "host", host, "method", r.Method, "response_code", m.Code, "duration_ms", m.Duration.Milliseconds())
}

View File

@@ -1,319 +0,0 @@
package entrypoint
import (
"context"
"crypto/tls"
"errors"
"fmt"
"net"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"github.com/TecharoHQ/anubis/cmd/osiris/internal/config"
"github.com/hashicorp/hcl/v2/hclsimple"
)
func loadConfig(t *testing.T, fname string) config.Toplevel {
t.Helper()
var cfg config.Toplevel
if err := hclsimple.DecodeFile(fname, nil, &cfg); err != nil {
t.Fatalf("can't read configuration file %s: %v", fname, err)
}
if err := cfg.Valid(); err != nil {
t.Errorf("configuration file %s is invalid: %v", "./testdata/selfsigned.hcl", err)
}
return cfg
}
func newRouter(t *testing.T, cfg config.Toplevel) *Router {
t.Helper()
rtr, err := NewRouter(cfg)
if err != nil {
t.Fatal(err)
}
return rtr
}
func TestNewRouter(t *testing.T) {
cfg := loadConfig(t, "./testdata/good/selfsigned.hcl")
rtr := newRouter(t, cfg)
srv := httptest.NewServer(rtr)
defer srv.Close()
}
func TestNewRouterFails(t *testing.T) {
cfg := loadConfig(t, "./testdata/good/selfsigned.hcl")
cfg.Domains = append(cfg.Domains, config.Domain{
Name: "test1.internal",
TLS: config.TLS{
Cert: "./testdata/tls/invalid.crt",
Key: "./testdata/tls/invalid.key",
},
Target: cfg.Domains[0].Target,
HealthTarget: cfg.Domains[0].HealthTarget,
})
rtr, err := NewRouter(cfg)
if err == nil {
t.Fatal("wanted an error but got none")
}
srv := httptest.NewServer(rtr)
defer srv.Close()
}
func TestRouterSetConfig(t *testing.T) {
for _, tt := range []struct {
name string
configFname string
mutation func(cfg config.Toplevel) config.Toplevel
err error
}{
{
name: "basic",
configFname: "./testdata/good/selfsigned.hcl",
mutation: func(cfg config.Toplevel) config.Toplevel {
return cfg
},
},
{
name: "all schemes",
configFname: "./testdata/good/selfsigned.hcl",
mutation: func(cfg config.Toplevel) config.Toplevel {
cfg.Domains = append(cfg.Domains, config.Domain{
Name: "http.internal",
TLS: cfg.Domains[0].TLS,
Target: "http://[::1]:3000",
HealthTarget: cfg.Domains[0].HealthTarget,
})
cfg.Domains = append(cfg.Domains, config.Domain{
Name: "https.internal",
TLS: cfg.Domains[0].TLS,
Target: "https://[::1]:3000",
HealthTarget: cfg.Domains[0].HealthTarget,
})
cfg.Domains = append(cfg.Domains, config.Domain{
Name: "h2c.internal",
TLS: cfg.Domains[0].TLS,
Target: "h2c://[::1]:3000",
HealthTarget: cfg.Domains[0].HealthTarget,
})
cfg.Domains = append(cfg.Domains, config.Domain{
Name: "unix.internal",
TLS: cfg.Domains[0].TLS,
Target: "unix://foo.sock",
HealthTarget: cfg.Domains[0].HealthTarget,
})
return cfg
},
},
{
name: "invalid TLS",
configFname: "./testdata/good/selfsigned.hcl",
mutation: func(cfg config.Toplevel) config.Toplevel {
cfg.Domains = append(cfg.Domains, config.Domain{
Name: "test1.internal",
TLS: config.TLS{
Cert: "./testdata/tls/invalid.crt",
Key: "./testdata/tls/invalid.key",
},
Target: cfg.Domains[0].Target,
HealthTarget: cfg.Domains[0].HealthTarget,
})
return cfg
},
err: ErrInvalidTLSKeypair,
},
{
name: "target is not a valid URL",
configFname: "./testdata/good/selfsigned.hcl",
mutation: func(cfg config.Toplevel) config.Toplevel {
cfg.Domains = append(cfg.Domains, config.Domain{
Name: "test1.internal",
TLS: cfg.Domains[0].TLS,
Target: "http://[::1:443",
HealthTarget: cfg.Domains[0].HealthTarget,
})
return cfg
},
err: ErrTargetInvalid,
},
{
name: "invalid target scheme",
configFname: "./testdata/good/selfsigned.hcl",
mutation: func(cfg config.Toplevel) config.Toplevel {
cfg.Domains = append(cfg.Domains, config.Domain{
Name: "test1.internal",
TLS: cfg.Domains[0].TLS,
Target: "foo://",
HealthTarget: cfg.Domains[0].HealthTarget,
})
return cfg
},
err: ErrNoHandler,
},
} {
t.Run(tt.name, func(t *testing.T) {
cfg := loadConfig(t, tt.configFname)
rtr := newRouter(t, cfg)
cfg = tt.mutation(cfg)
if err := rtr.setConfig(cfg); !errors.Is(err, tt.err) {
t.Logf("want: %v", tt.err)
t.Logf("got: %v", err)
t.Error("got wrong error from rtr.setConfig function")
}
})
}
}
type ackHandler struct {
ack bool
}
func (ah *ackHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
ah.ack = true
fmt.Fprintln(w, "OK")
}
func (ah *ackHandler) Reset() {
ah.ack = false
}
func newUnixServer(t *testing.T, h http.Handler) string {
sockName := filepath.Join(t.TempDir(), "s")
ln, err := net.Listen("unix", sockName)
if err != nil {
t.Fatalf("can't listen on %s: %v", sockName, err)
}
t.Cleanup(func() {
ln.Close()
os.Remove(sockName)
})
go func(ctx context.Context) {
srv := &http.Server{
Handler: h,
}
go func() {
<-ctx.Done()
srv.Close()
}()
srv.Serve(ln)
}(t.Context())
return "unix://" + sockName
}
func TestRouterGetCertificate(t *testing.T) {
cfg := loadConfig(t, "./testdata/good/selfsigned.hcl")
rtr := newRouter(t, cfg)
for _, tt := range []struct {
domainName string
err error
}{
{
domainName: "osiris.local.cetacean.club",
},
{
domainName: "whacky-fun.local",
err: ErrNoCert,
},
} {
t.Run(tt.domainName, func(t *testing.T) {
if _, err := rtr.GetCertificate(&tls.ClientHelloInfo{ServerName: tt.domainName}); !errors.Is(err, tt.err) {
t.Logf("want: %v", tt.err)
t.Logf("got: %v", err)
t.Error("got wrong error from rtr.GetCertificate")
}
})
}
}
func TestRouterServeAllProtocols(t *testing.T) {
cfg := loadConfig(t, "./testdata/good/all_protocols.hcl")
httpAckHandler := &ackHandler{}
httpsAckHandler := &ackHandler{}
h2cAckHandler := &ackHandler{}
unixAckHandler := &ackHandler{}
httpSrv := httptest.NewServer(httpAckHandler)
httpsSrv := httptest.NewTLSServer(httpsAckHandler)
h2cSrv := newH2cServer(t, h2cAckHandler)
unixPath := newUnixServer(t, unixAckHandler)
cfg.Domains[0].Target = httpSrv.URL
cfg.Domains[1].Target = httpsSrv.URL
cfg.Domains[2].Target = strings.ReplaceAll(h2cSrv.URL, "http:", "h2c:")
cfg.Domains[3].Target = unixPath
// enc := json.NewEncoder(os.Stderr)
// enc.SetIndent("", " ")
// enc.Encode(cfg)
rtr := newRouter(t, cfg)
cli := &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true,
},
},
}
t.Run("plain http", func(t *testing.T) {
ln, err := net.Listen("tcp", ":0")
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
ln.Close()
})
go rtr.HandleHTTP(t.Context(), ln)
serverURL := "http://" + ln.Addr().String()
t.Log(serverURL)
for _, d := range cfg.Domains {
t.Run(d.Name, func(t *testing.T) {
req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, serverURL, nil)
if err != nil {
t.Fatal(err)
}
req.Host = d.Name
resp, err := cli.Do(req)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Fatalf("wrong status code %d", resp.StatusCode)
}
})
}
})
}

View File

@@ -1,15 +0,0 @@
bind {
http = ":65530"
https = ":65531"
metrics = ":65532"
}
domain "osiris.local.cetacean.club" {
tls {
cert = "./testdata/invalid.crt"
key = "./testdata/invalid.key"
}
target = "http://localhost:3000"
health_target = "http://localhost:9091/healthz"
}

View File

@@ -1,46 +0,0 @@
bind {
http = ":65520"
https = ":65521"
metrics = ":65522"
}
domain "http.internal" {
tls {
cert = "./testdata/selfsigned.crt"
key = "./testdata/selfsigned.key"
}
target = "http://localhost:65510" # XXX(Xe) this is overwritten
health_target = "http://localhost:9091/healthz"
}
domain "https.internal" {
tls {
cert = "./testdata/selfsigned.crt"
key = "./testdata/selfsigned.key"
}
target = "https://localhost:65511" # XXX(Xe) this is overwritten
insecure_skip_verify = true
health_target = "http://localhost:9091/healthz"
}
domain "h2c.internal" {
tls {
cert = "./testdata/selfsigned.crt"
key = "./testdata/selfsigned.key"
}
target = "h2c://localhost:65511" # XXX(Xe) this is overwritten
health_target = "http://localhost:9091/healthz"
}
domain "unix.internal" {
tls {
cert = "./testdata/selfsigned.crt"
key = "./testdata/selfsigned.key"
}
target = "http://localhost:65511" # XXX(Xe) this is overwritten
health_target = "http://localhost:9091/healthz"
}

View File

@@ -1,15 +0,0 @@
bind {
http = ":65530"
https = ":65531"
metrics = ":65532"
}
domain "osiris.local.cetacean.club" {
tls {
cert = "./testdata/selfsigned.crt"
key = "./testdata/selfsigned.key"
}
target = "http://localhost:3000"
health_target = "http://localhost:9091/healthz"
}

View File

@@ -1,11 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIBnzCCAVGgAwIBAgIUOLTjSYOjFk00IemtFTC4oEZs988wBQYDK2VwMEUxCzAJ
BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l
dCBXaWRnaXRzIFB0eSBMdGQwHhcNMjUwNzE4MjEyNDIzWhcNMjUwODE3MjEyNDIz
WjBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwY
SW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMCowBQYDK2VwAyEAPHphABS15+4VV6R1
vYzBQYIycQmOmlbA8QcfwzuB2VajUzBRMB0GA1UdDgQWBBT2s+MQ4AR6cbK4V0+d
XZnok1orhDAfBgNVHSMEGDAWgBT2s+MQ4AR6cbK4V0+dXZnok1orhDAPBgNVHRMB
Af8EBTADAQH/MAUGAytlcANBAOdoJbRMnHmkEETzVtXP+jkAI9yQNRXujnglApGP
8I5pvIYVgYCgoQrnb4haVWFldHM1T9H698n19e/egfFb+w4=
-----END CERTIFICATE-----

View File

@@ -1,3 +0,0 @@
-----BEGIN PRIVATE KEY-----
MC4CAQAwBQYDK2VwBCIEIBop42tiZ0yzhaKo9NAc0PlAyBsE8NAE0i9Z7s2lgZuR
-----END PRIVATE KEY-----

View File

@@ -1,43 +0,0 @@
package main
import (
"context"
"flag"
"fmt"
"os"
"os/signal"
"syscall"
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/cmd/osiris/internal/entrypoint"
"github.com/TecharoHQ/anubis/internal"
"github.com/facebookgo/flagenv"
)
var (
configFname = flag.String("config", "./osiris.hcl", "Configuration file (HCL), see docs")
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
versionFlag = flag.Bool("version", false, "if true, show version information then quit")
)
func main() {
flagenv.Parse()
flag.Parse()
if *versionFlag {
fmt.Println("Osiris", anubis.Version)
return
}
internal.InitSlog(*slogLevel)
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer cancel()
if err := entrypoint.Main(ctx, entrypoint.Options{
ConfigFname: *configFname,
}); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
}

View File

@@ -1,15 +0,0 @@
bind {
http = ":3004"
https = ":3005"
metrics = ":9091"
}
domain "osiris.local.cetacean.club" {
tls {
cert = "./internal/config/testdata/tls/selfsigned.crt"
key = "./internal/config/testdata/tls/selfsigned.key"
}
target = "http://localhost:3000"
health_target = "http://localhost:9091/healthz"
}

View File

@@ -1,78 +0,0 @@
/*
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
into Anubis CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
*/
package main
import (
"fmt"
"io/fs"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
)
func main() {
if len(os.Args) < 2 {
fmt.Println("Usage: go run batch_process.go <cleaned_directory>")
fmt.Println("Example: go run batch_process.go ./cleaned")
os.Exit(1)
}
cleanedDir := os.Args[1]
outputDir := "generated_policies"
// Create output directory
if err := os.MkdirAll(outputDir, 0755); err != nil {
log.Fatalf("Failed to create output directory: %v", err)
}
count := 0
err := filepath.WalkDir(cleanedDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
// Skip directories
if d.IsDir() {
return nil
}
// Generate policy name from file path
relPath, _ := filepath.Rel(cleanedDir, path)
policyName := strings.ReplaceAll(relPath, "/", "-")
policyName = strings.TrimSuffix(policyName, "-robots.txt")
policyName = strings.ReplaceAll(policyName, ".", "-")
outputFile := filepath.Join(outputDir, policyName+".yaml")
cmd := exec.Command("go", "run", "main.go",
"-input", path,
"-output", outputFile,
"-name", policyName,
"-format", "yaml")
if err := cmd.Run(); err != nil {
fmt.Printf("Warning: Failed to process %s: %v\n", path, err)
return nil // Continue processing other files
}
count++
if count%100 == 0 {
fmt.Printf("Processed %d files...\n", count)
} else if count%10 == 0 {
fmt.Print(".")
}
return nil
})
if err != nil {
log.Fatalf("Error walking directory: %v", err)
}
fmt.Printf("Successfully processed %d robots.txt files\n", count)
fmt.Printf("Generated policies saved to: %s/\n", outputDir)
}

View File

@@ -1,313 +0,0 @@
package main
import (
"bufio"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"regexp"
"strings"
"github.com/TecharoHQ/anubis/lib/policy/config"
"sigs.k8s.io/yaml"
)
var (
inputFile = flag.String("input", "", "path to robots.txt file (use - for stdin)")
outputFile = flag.String("output", "", "output file path (use - for stdout, defaults to stdout)")
outputFormat = flag.String("format", "yaml", "output format: yaml or json")
baseAction = flag.String("action", "CHALLENGE", "default action for disallowed paths: ALLOW, DENY, CHALLENGE, WEIGH")
crawlDelay = flag.Int("crawl-delay-weight", 0, "if > 0, add weight adjustment for crawl-delay (difficulty adjustment)")
policyName = flag.String("name", "robots-txt-policy", "name for the generated policy")
userAgentDeny = flag.String("deny-user-agents", "DENY", "action for specifically blocked user agents: DENY, CHALLENGE")
helpFlag = flag.Bool("help", false, "show help")
)
type RobotsRule struct {
UserAgent string
Disallows []string
Allows []string
CrawlDelay int
IsBlacklist bool // true if this is a specifically denied user agent
}
type AnubisRule struct {
Expression *config.ExpressionOrList `yaml:"expression,omitempty" json:"expression,omitempty"`
Challenge *config.ChallengeRules `yaml:"challenge,omitempty" json:"challenge,omitempty"`
Weight *config.Weight `yaml:"weight,omitempty" json:"weight,omitempty"`
Name string `yaml:"name" json:"name"`
Action string `yaml:"action" json:"action"`
}
func init() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
fmt.Fprintf(os.Stderr, "%s [options] -input <robots.txt>\n\n", os.Args[0])
flag.PrintDefaults()
fmt.Fprintln(os.Stderr, "\nExamples:")
fmt.Fprintln(os.Stderr, " # Convert local robots.txt file")
fmt.Fprintln(os.Stderr, " robots2policy -input robots.txt -output policy.yaml")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " # Convert from URL")
fmt.Fprintln(os.Stderr, " robots2policy -input https://example.com/robots.txt -format json")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " # Read from stdin, write to stdout")
fmt.Fprintln(os.Stderr, " curl https://example.com/robots.txt | robots2policy -input -")
os.Exit(2)
}
}
func main() {
flag.Parse()
if len(flag.Args()) > 0 || *helpFlag || *inputFile == "" {
flag.Usage()
}
// Read robots.txt
var input io.Reader
if *inputFile == "-" {
input = os.Stdin
} else if strings.HasPrefix(*inputFile, "http://") || strings.HasPrefix(*inputFile, "https://") {
resp, err := http.Get(*inputFile)
if err != nil {
log.Fatalf("failed to fetch robots.txt from URL: %v", err)
}
defer resp.Body.Close()
input = resp.Body
} else {
file, err := os.Open(*inputFile)
if err != nil {
log.Fatalf("failed to open input file: %v", err)
}
defer file.Close()
input = file
}
// Parse robots.txt
rules, err := parseRobotsTxt(input)
if err != nil {
log.Fatalf("failed to parse robots.txt: %v", err)
}
// Convert to Anubis rules
anubisRules := convertToAnubisRules(rules)
// Check if any rules were generated
if len(anubisRules) == 0 {
log.Fatal("no valid rules generated from robots.txt - file may be empty or contain no disallow directives")
}
// Generate output
var output []byte
switch strings.ToLower(*outputFormat) {
case "yaml":
output, err = yaml.Marshal(anubisRules)
case "json":
output, err = json.MarshalIndent(anubisRules, "", " ")
default:
log.Fatalf("unsupported output format: %s (use yaml or json)", *outputFormat)
}
if err != nil {
log.Fatalf("failed to marshal output: %v", err)
}
// Write output
if *outputFile == "" || *outputFile == "-" {
fmt.Print(string(output))
} else {
err = os.WriteFile(*outputFile, output, 0644)
if err != nil {
log.Fatalf("failed to write output file: %v", err)
}
fmt.Printf("Generated Anubis policy written to %s\n", *outputFile)
}
}
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
scanner := bufio.NewScanner(input)
var rules []RobotsRule
var currentRule *RobotsRule
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// Skip empty lines and comments
if line == "" || strings.HasPrefix(line, "#") {
continue
}
// Split on first colon
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
directive := strings.TrimSpace(strings.ToLower(parts[0]))
value := strings.TrimSpace(parts[1])
switch directive {
case "user-agent":
// Start a new rule section
if currentRule != nil {
rules = append(rules, *currentRule)
}
currentRule = &RobotsRule{
UserAgent: value,
Disallows: make([]string, 0),
Allows: make([]string, 0),
}
case "disallow":
if currentRule != nil && value != "" {
currentRule.Disallows = append(currentRule.Disallows, value)
}
case "allow":
if currentRule != nil && value != "" {
currentRule.Allows = append(currentRule.Allows, value)
}
case "crawl-delay":
if currentRule != nil {
if delay, err := parseIntSafe(value); err == nil {
currentRule.CrawlDelay = delay
}
}
}
}
// Don't forget the last rule
if currentRule != nil {
rules = append(rules, *currentRule)
}
// Mark blacklisted user agents (those with "Disallow: /")
for i := range rules {
for _, disallow := range rules[i].Disallows {
if disallow == "/" {
rules[i].IsBlacklist = true
break
}
}
}
return rules, scanner.Err()
}
func parseIntSafe(s string) (int, error) {
var result int
_, err := fmt.Sscanf(s, "%d", &result)
return result, err
}
func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
var anubisRules []AnubisRule
ruleCounter := 0
for _, robotsRule := range robotsRules {
userAgent := robotsRule.UserAgent
// Handle crawl delay as weight adjustment (do this first before any continues)
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
ruleCounter++
rule := AnubisRule{
Name: fmt.Sprintf("%s-crawl-delay-%d", *policyName, ruleCounter),
Action: "WEIGH",
Weight: &config.Weight{Adjust: *crawlDelay},
}
if userAgent == "*" {
rule.Expression = &config.ExpressionOrList{
All: []string{"true"}, // Always applies
}
} else {
rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
}
}
anubisRules = append(anubisRules, rule)
}
// Handle blacklisted user agents (complete deny/challenge)
if robotsRule.IsBlacklist {
ruleCounter++
rule := AnubisRule{
Name: fmt.Sprintf("%s-blacklist-%d", *policyName, ruleCounter),
Action: *userAgentDeny,
}
if userAgent == "*" {
// This would block everything - convert to a weight adjustment instead
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
rule.Action = "WEIGH"
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
rule.Expression = &config.ExpressionOrList{
All: []string{"true"}, // Always applies
}
} else {
rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
}
}
anubisRules = append(anubisRules, rule)
continue
}
// Handle specific disallow rules
for _, disallow := range robotsRule.Disallows {
if disallow == "/" {
continue // Already handled as blacklist above
}
ruleCounter++
rule := AnubisRule{
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
Action: *baseAction,
}
// Build CEL expression
var conditions []string
// Add user agent condition if not wildcard
if userAgent != "*" {
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
}
// Add path condition
pathCondition := buildPathCondition(disallow)
conditions = append(conditions, pathCondition)
rule.Expression = &config.ExpressionOrList{
All: conditions,
}
anubisRules = append(anubisRules, rule)
}
}
return anubisRules
}
func buildPathCondition(robotsPath string) string {
// Handle wildcards in robots.txt paths
if strings.Contains(robotsPath, "*") || strings.Contains(robotsPath, "?") {
// Convert robots.txt wildcards to regex
regex := regexp.QuoteMeta(robotsPath)
regex = strings.ReplaceAll(regex, `\*`, `.*`) // * becomes .*
regex = strings.ReplaceAll(regex, `\?`, `.`) // ? becomes .
regex = "^" + regex
return fmt.Sprintf("path.matches(%q)", regex)
}
// Simple prefix match for most cases
return fmt.Sprintf("path.startsWith(%q)", robotsPath)
}

View File

@@ -1,418 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"gopkg.in/yaml.v3"
)
type TestCase struct {
name string
robotsFile string
expectedFile string
options TestOptions
}
type TestOptions struct {
format string
action string
crawlDelayWeight int
policyName string
deniedAction string
}
func TestDataFileConversion(t *testing.T) {
testCases := []TestCase{
{
name: "simple_default",
robotsFile: "simple.robots.txt",
expectedFile: "simple.yaml",
options: TestOptions{format: "yaml"},
},
{
name: "simple_json",
robotsFile: "simple.robots.txt",
expectedFile: "simple.json",
options: TestOptions{format: "json"},
},
{
name: "simple_deny_action",
robotsFile: "simple.robots.txt",
expectedFile: "deny-action.yaml",
options: TestOptions{format: "yaml", action: "DENY"},
},
{
name: "simple_custom_name",
robotsFile: "simple.robots.txt",
expectedFile: "custom-name.yaml",
options: TestOptions{format: "yaml", policyName: "my-custom-policy"},
},
{
name: "blacklist_with_crawl_delay",
robotsFile: "blacklist.robots.txt",
expectedFile: "blacklist.yaml",
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
},
{
name: "wildcards",
robotsFile: "wildcards.robots.txt",
expectedFile: "wildcards.yaml",
options: TestOptions{format: "yaml"},
},
{
name: "empty_file",
robotsFile: "empty.robots.txt",
expectedFile: "empty.yaml",
options: TestOptions{format: "yaml"},
},
{
name: "complex_scenario",
robotsFile: "complex.robots.txt",
expectedFile: "complex.yaml",
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
robotsPath := filepath.Join("testdata", tc.robotsFile)
expectedPath := filepath.Join("testdata", tc.expectedFile)
// Read robots.txt input
robotsFile, err := os.Open(robotsPath)
if err != nil {
t.Fatalf("Failed to open robots file %s: %v", robotsPath, err)
}
defer robotsFile.Close()
// Parse robots.txt
rules, err := parseRobotsTxt(robotsFile)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
// Set test options
oldFormat := *outputFormat
oldAction := *baseAction
oldCrawlDelay := *crawlDelay
oldPolicyName := *policyName
oldDeniedAction := *userAgentDeny
if tc.options.format != "" {
*outputFormat = tc.options.format
}
if tc.options.action != "" {
*baseAction = tc.options.action
}
if tc.options.crawlDelayWeight > 0 {
*crawlDelay = tc.options.crawlDelayWeight
}
if tc.options.policyName != "" {
*policyName = tc.options.policyName
}
if tc.options.deniedAction != "" {
*userAgentDeny = tc.options.deniedAction
}
// Restore options after test
defer func() {
*outputFormat = oldFormat
*baseAction = oldAction
*crawlDelay = oldCrawlDelay
*policyName = oldPolicyName
*userAgentDeny = oldDeniedAction
}()
// Convert to Anubis rules
anubisRules := convertToAnubisRules(rules)
// Generate output
var actualOutput []byte
switch strings.ToLower(*outputFormat) {
case "yaml":
actualOutput, err = yaml.Marshal(anubisRules)
case "json":
actualOutput, err = json.MarshalIndent(anubisRules, "", " ")
}
if err != nil {
t.Fatalf("Failed to marshal output: %v", err)
}
// Read expected output
expectedOutput, err := os.ReadFile(expectedPath)
if err != nil {
t.Fatalf("Failed to read expected file %s: %v", expectedPath, err)
}
if strings.ToLower(*outputFormat) == "yaml" {
var actualData []interface{}
var expectedData []interface{}
err = yaml.Unmarshal(actualOutput, &actualData)
if err != nil {
t.Fatalf("Failed to unmarshal actual output: %v", err)
}
err = yaml.Unmarshal(expectedOutput, &expectedData)
if err != nil {
t.Fatalf("Failed to unmarshal expected output: %v", err)
}
// Compare data structures
if !compareData(actualData, expectedData) {
actualStr := strings.TrimSpace(string(actualOutput))
expectedStr := strings.TrimSpace(string(expectedOutput))
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
}
} else {
var actualData []interface{}
var expectedData []interface{}
err = json.Unmarshal(actualOutput, &actualData)
if err != nil {
t.Fatalf("Failed to unmarshal actual JSON output: %v", err)
}
err = json.Unmarshal(expectedOutput, &expectedData)
if err != nil {
t.Fatalf("Failed to unmarshal expected JSON output: %v", err)
}
// Compare data structures
if !compareData(actualData, expectedData) {
actualStr := strings.TrimSpace(string(actualOutput))
expectedStr := strings.TrimSpace(string(expectedOutput))
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
}
}
})
}
}
func TestCaseInsensitiveParsing(t *testing.T) {
robotsTxt := `User-Agent: *
Disallow: /admin
Crawl-Delay: 10
User-agent: TestBot
disallow: /test
crawl-delay: 5
USER-AGENT: UpperBot
DISALLOW: /upper
CRAWL-DELAY: 20`
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse case-insensitive robots.txt: %v", err)
}
expectedRules := 3
if len(rules) != expectedRules {
t.Errorf("Expected %d rules, got %d", expectedRules, len(rules))
}
// Check that all crawl delays were parsed
for i, rule := range rules {
expectedDelays := []int{10, 5, 20}
if rule.CrawlDelay != expectedDelays[i] {
t.Errorf("Rule %d: expected crawl delay %d, got %d", i, expectedDelays[i], rule.CrawlDelay)
}
}
}
func TestVariousOutputFormats(t *testing.T) {
robotsTxt := `User-agent: *
Disallow: /admin`
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldPolicyName := *policyName
*policyName = "test-policy"
defer func() { *policyName = oldPolicyName }()
anubisRules := convertToAnubisRules(rules)
// Test YAML output
yamlOutput, err := yaml.Marshal(anubisRules)
if err != nil {
t.Fatalf("Failed to marshal YAML: %v", err)
}
if !strings.Contains(string(yamlOutput), "name: test-policy-disallow-1") {
t.Errorf("YAML output doesn't contain expected rule name")
}
// Test JSON output
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
if err != nil {
t.Fatalf("Failed to marshal JSON: %v", err)
}
if !strings.Contains(string(jsonOutput), `"name": "test-policy-disallow-1"`) {
t.Errorf("JSON output doesn't contain expected rule name")
}
}
func TestDifferentActions(t *testing.T) {
robotsTxt := `User-agent: *
Disallow: /admin`
testActions := []string{"ALLOW", "DENY", "CHALLENGE", "WEIGH"}
for _, action := range testActions {
t.Run("action_"+action, func(t *testing.T) {
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldAction := *baseAction
*baseAction = action
defer func() { *baseAction = oldAction }()
anubisRules := convertToAnubisRules(rules)
if len(anubisRules) != 1 {
t.Fatalf("Expected 1 rule, got %d", len(anubisRules))
}
if anubisRules[0].Action != action {
t.Errorf("Expected action %s, got %s", action, anubisRules[0].Action)
}
})
}
}
func TestPolicyNaming(t *testing.T) {
robotsTxt := `User-agent: *
Disallow: /admin
Disallow: /private
User-agent: BadBot
Disallow: /`
testNames := []string{"custom-policy", "my-rules", "site-protection"}
for _, name := range testNames {
t.Run("name_"+name, func(t *testing.T) {
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldName := *policyName
*policyName = name
defer func() { *policyName = oldName }()
anubisRules := convertToAnubisRules(rules)
// Check that all rule names use the custom prefix
for _, rule := range anubisRules {
if !strings.HasPrefix(rule.Name, name+"-") {
t.Errorf("Rule name %s doesn't start with expected prefix %s-", rule.Name, name)
}
}
})
}
}
func TestCrawlDelayWeights(t *testing.T) {
robotsTxt := `User-agent: *
Disallow: /admin
Crawl-delay: 10
User-agent: SlowBot
Disallow: /slow
Crawl-delay: 60`
testWeights := []int{1, 5, 10, 25}
for _, weight := range testWeights {
t.Run(fmt.Sprintf("weight_%d", weight), func(t *testing.T) {
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldWeight := *crawlDelay
*crawlDelay = weight
defer func() { *crawlDelay = oldWeight }()
anubisRules := convertToAnubisRules(rules)
// Count weight rules and verify they have correct weight
weightRules := 0
for _, rule := range anubisRules {
if rule.Action == "WEIGH" && rule.Weight != nil {
weightRules++
if rule.Weight.Adjust != weight {
t.Errorf("Expected weight %d, got %d", weight, rule.Weight.Adjust)
}
}
}
expectedWeightRules := 2 // One for *, one for SlowBot
if weightRules != expectedWeightRules {
t.Errorf("Expected %d weight rules, got %d", expectedWeightRules, weightRules)
}
})
}
}
func TestBlacklistActions(t *testing.T) {
robotsTxt := `User-agent: BadBot
Disallow: /
User-agent: SpamBot
Disallow: /`
testActions := []string{"DENY", "CHALLENGE"}
for _, action := range testActions {
t.Run("blacklist_"+action, func(t *testing.T) {
reader := strings.NewReader(robotsTxt)
rules, err := parseRobotsTxt(reader)
if err != nil {
t.Fatalf("Failed to parse robots.txt: %v", err)
}
oldAction := *userAgentDeny
*userAgentDeny = action
defer func() { *userAgentDeny = oldAction }()
anubisRules := convertToAnubisRules(rules)
// All rules should be blacklist rules with the specified action
for _, rule := range anubisRules {
if !strings.Contains(rule.Name, "blacklist") {
t.Errorf("Expected blacklist rule, got %s", rule.Name)
}
if rule.Action != action {
t.Errorf("Expected action %s, got %s", action, rule.Action)
}
}
})
}
}
// compareData performs a deep comparison of two data structures,
// ignoring differences that are semantically equivalent in YAML/JSON
func compareData(actual, expected interface{}) bool {
return reflect.DeepEqual(actual, expected)
}

View File

@@ -1,15 +0,0 @@
# Test with blacklisted user agents
User-agent: *
Disallow: /admin
Crawl-delay: 10
User-agent: BadBot
Disallow: /
User-agent: SpamBot
Disallow: /
Crawl-delay: 60
User-agent: Googlebot
Disallow: /search
Crawl-delay: 5

View File

@@ -1,30 +0,0 @@
- action: WEIGH
expression: "true"
name: robots-txt-policy-crawl-delay-1
weight:
adjust: 3
- action: CHALLENGE
expression: path.startsWith("/admin")
name: robots-txt-policy-disallow-2
- action: DENY
expression: userAgent.contains("BadBot")
name: robots-txt-policy-blacklist-3
- action: WEIGH
expression: userAgent.contains("SpamBot")
name: robots-txt-policy-crawl-delay-4
weight:
adjust: 3
- action: DENY
expression: userAgent.contains("SpamBot")
name: robots-txt-policy-blacklist-5
- action: WEIGH
expression: userAgent.contains("Googlebot")
name: robots-txt-policy-crawl-delay-6
weight:
adjust: 3
- action: CHALLENGE
expression:
all:
- userAgent.contains("Googlebot")
- path.startsWith("/search")
name: robots-txt-policy-disallow-7

View File

@@ -1,30 +0,0 @@
# Complex real-world example
User-agent: *
Disallow: /admin/
Disallow: /private/
Disallow: /api/internal/
Allow: /api/public/
Crawl-delay: 5
User-agent: Googlebot
Disallow: /search/
Allow: /api/
Crawl-delay: 2
User-agent: Bingbot
Disallow: /search/
Disallow: /admin/
Crawl-delay: 10
User-agent: BadBot
Disallow: /
User-agent: SeoBot
Disallow: /
Crawl-delay: 300
# Test with various patterns
User-agent: TestBot
Disallow: /*/admin
Disallow: /temp*.html
Disallow: /file?.log

View File

@@ -1,71 +0,0 @@
- action: WEIGH
expression: "true"
name: robots-txt-policy-crawl-delay-1
weight:
adjust: 5
- action: CHALLENGE
expression: path.startsWith("/admin/")
name: robots-txt-policy-disallow-2
- action: CHALLENGE
expression: path.startsWith("/private/")
name: robots-txt-policy-disallow-3
- action: CHALLENGE
expression: path.startsWith("/api/internal/")
name: robots-txt-policy-disallow-4
- action: WEIGH
expression: userAgent.contains("Googlebot")
name: robots-txt-policy-crawl-delay-5
weight:
adjust: 5
- action: CHALLENGE
expression:
all:
- userAgent.contains("Googlebot")
- path.startsWith("/search/")
name: robots-txt-policy-disallow-6
- action: WEIGH
expression: userAgent.contains("Bingbot")
name: robots-txt-policy-crawl-delay-7
weight:
adjust: 5
- action: CHALLENGE
expression:
all:
- userAgent.contains("Bingbot")
- path.startsWith("/search/")
name: robots-txt-policy-disallow-8
- action: CHALLENGE
expression:
all:
- userAgent.contains("Bingbot")
- path.startsWith("/admin/")
name: robots-txt-policy-disallow-9
- action: DENY
expression: userAgent.contains("BadBot")
name: robots-txt-policy-blacklist-10
- action: WEIGH
expression: userAgent.contains("SeoBot")
name: robots-txt-policy-crawl-delay-11
weight:
adjust: 5
- action: DENY
expression: userAgent.contains("SeoBot")
name: robots-txt-policy-blacklist-12
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/.*/admin")
name: robots-txt-policy-disallow-13
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/temp.*\\.html")
name: robots-txt-policy-disallow-14
- action: CHALLENGE
expression:
all:
- userAgent.contains("TestBot")
- path.matches("^/file.\\.log")
name: robots-txt-policy-disallow-15

View File

@@ -1,6 +0,0 @@
- action: CHALLENGE
expression: path.startsWith("/admin/")
name: my-custom-policy-disallow-1
- action: CHALLENGE
expression: path.startsWith("/private")
name: my-custom-policy-disallow-2

View File

@@ -1,6 +0,0 @@
- action: DENY
expression: path.startsWith("/admin/")
name: robots-txt-policy-disallow-1
- action: DENY
expression: path.startsWith("/private")
name: robots-txt-policy-disallow-2

View File

@@ -1,2 +0,0 @@
# Empty robots.txt (comments only)
# No actual rules

View File

@@ -1 +0,0 @@
[]

View File

@@ -1,12 +0,0 @@
[
{
"action": "CHALLENGE",
"expression": "path.startsWith(\"/admin/\")",
"name": "robots-txt-policy-disallow-1"
},
{
"action": "CHALLENGE",
"expression": "path.startsWith(\"/private\")",
"name": "robots-txt-policy-disallow-2"
}
]

View File

@@ -1,5 +0,0 @@
# Simple robots.txt test
User-agent: *
Disallow: /admin/
Disallow: /private
Allow: /public

View File

@@ -1,6 +0,0 @@
- action: CHALLENGE
expression: path.startsWith("/admin/")
name: robots-txt-policy-disallow-1
- action: CHALLENGE
expression: path.startsWith("/private")
name: robots-txt-policy-disallow-2

View File

@@ -1,6 +0,0 @@
# Test wildcard patterns
User-agent: *
Disallow: /search*
Disallow: /*/private
Disallow: /file?.txt
Disallow: /admin/*?action=delete

View File

@@ -1,12 +0,0 @@
- action: CHALLENGE
expression: path.matches("^/search.*")
name: robots-txt-policy-disallow-1
- action: CHALLENGE
expression: path.matches("^/.*/private")
name: robots-txt-policy-disallow-2
- action: CHALLENGE
expression: path.matches("^/file.\\.txt")
name: robots-txt-policy-disallow-3
- action: CHALLENGE
expression: path.matches("^/admin/.*.action=delete")
name: robots-txt-policy-disallow-4

View File

@@ -4,7 +4,7 @@
"import": "(data)/bots/_deny-pathological.yaml"
},
{
"import": "(data)/meta/ai-block-aggressive.yaml"
"import": "(data)/bots/ai-robots-txt.yaml"
},
{
"import": "(data)/crawlers/_allow-good.yaml"

View File

@@ -11,217 +11,51 @@
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
bots:
# Pathological bots to deny
- # This correlates to data/bots/deny-pathological.yaml in the source tree
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
import: (data)/bots/_deny-pathological.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
# Pathological bots to deny
- # This correlates to data/bots/deny-pathological.yaml in the source tree
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
import: (data)/bots/_deny-pathological.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
# Aggressively block AI/LLM related bots/agents by default
- import: (data)/meta/ai-block-aggressive.yaml
# Enforce https://github.com/ai-robots-txt/ai.robots.txt
- import: (data)/bots/ai-robots-txt.yaml
# Consider replacing the aggressive AI policy with more selective policies:
# - import: (data)/meta/ai-block-moderate.yaml
# - import: (data)/meta/ai-block-permissive.yaml
# Search engine crawlers to allow, defaults to:
# - Google (so they don't try to bypass Anubis)
# - Bing
# - DuckDuckGo
# - Qwant
# - The Internet Archive
# - Kagi
# - Marginalia
# - Mojeek
- import: (data)/crawlers/_allow-good.yaml
# Search engine crawlers to allow, defaults to:
# - Google (so they don't try to bypass Anubis)
# - Apple
# - Bing
# - DuckDuckGo
# - Qwant
# - The Internet Archive
# - Kagi
# - Marginalia
# - Mojeek
- import: (data)/crawlers/_allow-good.yaml
# Challenge Firefox AI previews
- import: (data)/clients/x-firefox-ai.yaml
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
- import: (data)/common/keep-internet-working.yaml
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
- import: (data)/common/keep-internet-working.yaml
# # Punish any bot with "bot" in the user-agent string
# # This is known to have a high false-positive rate, use at your own risk
# - name: generic-bot-catchall
# user_agent_regex: (?i:bot|crawler)
# action: CHALLENGE
# challenge:
# difficulty: 16 # impossible
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time
# # Punish any bot with "bot" in the user-agent string
# # This is known to have a high false-positive rate, use at your own risk
# - name: generic-bot-catchall
# user_agent_regex: (?i:bot|crawler)
# action: CHALLENGE
# challenge:
# difficulty: 16 # impossible
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time
# Requires a subscription to Thoth to use, see
# https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
- name: countries-with-aggressive-scrapers
action: WEIGH
geoip:
countries:
- BR
- CN
weight:
adjust: 10
# Requires a subscription to Thoth to use, see
# https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
- name: aggressive-asns-without-functional-abuse-contact
action: WEIGH
asns:
match:
- 13335 # Cloudflare
- 136907 # Huawei Cloud
- 45102 # Alibaba Cloud
weight:
adjust: 10
# ## System load based checks.
# # If the system is under high load, add weight.
# - name: high-load-average
# action: WEIGH
# expression: load_1m >= 10.0 # make sure to end the load comparison in a .0
# weight:
# adjust: 20
## If your backend service is running on the same operating system as Anubis,
## you can uncomment this rule to make the challenge easier when the system is
## under low load.
##
## If it is not, remove weight.
# - name: low-load-average
# action: WEIGH
# expression: load_15m <= 4.0 # make sure to end the load comparison in a .0
# weight:
# adjust: -10
# Generic catchall rule
- name: generic-browser
user_agent_regex: >-
Mozilla|Opera
action: WEIGH
weight:
adjust: 10
# Generic catchall rule
- name: generic-browser
user_agent_regex: >-
Mozilla|Opera
action: CHALLENGE
dnsbl: false
# #
# impressum:
# # Displayed at the bottom of every page rendered by Anubis.
# footer: >-
# This website is hosted by Zombocom. If you have any complaints or notes
# about the service, please contact
# <a href="mailto:contact@domainhere.example">contact@domainhere.example</a>
# and we will assist you as soon as possible.
# # The imprint page that will be linked to at the footer of every Anubis page.
# page:
# # The HTML <title> of the page
# title: Imprint and Privacy Policy
# # The HTML contents of the page. The exact contents of this page can
# # and will vary by locale. Please consult with a lawyer if you are not
# # sure what to put here
# body: >-
# <p>Last updated: June 2025</p>
# <h2>Information that is gathered from visitors</h2>
# <p>In common with other websites, log files are stored on the web server saving details such as the visitor's IP address, browser type, referring page and time of visit.</p>
# <p>Cookies may be used to remember visitor preferences when interacting with the website.</p>
# <p>Where registration is required, the visitor's email and a username will be stored on the server.</p>
# <!-- ... -->
# Open Graph passthrough configuration, see here for more information:
# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
openGraph:
# Enables Open Graph passthrough
enabled: false
# Enables the use of the HTTP host in the cache key, this enables
# caching metadata for multiple http hosts at once.
considerHost: false
# How long cached OpenGraph metadata should last in memory
ttl: 24h
# # If set, return these opengraph values instead of looking them up with
# # the target service.
# #
# # Correlates to properties in https://ogp.me/
# override:
# # og:title is required, it is the title of the website
# "og:title": "Techaro Anubis"
# "og:description": >-
# Anubis is a Web AI Firewall Utility that helps you fight the bots
# away so that you can maintain uptime at work!
# "description": >-
# Anubis is a Web AI Firewall Utility that helps you fight the bots
# away so that you can maintain uptime at work!
# By default, send HTTP 200 back to clients that either get issued a challenge
# or a denial. This seems weird, but this is load-bearing due to the fact that
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
# will stop sending requests once they get it.
status_codes:
CHALLENGE: 200
DENY: 200
# Anubis can store temporary data in one of a few backends. See the storage
# backends section of the docs for more information:
#
# https://anubis.techaro.lol/docs/admin/policies#storage-backends
store:
backend: memory
parameters: {}
# The weight thresholds for when to trigger individual challenges. Any
# CHALLENGE will take precedence over this.
#
# A threshold has four configuration options:
#
# - name: the name that is reported down the stack and used for metrics
# - expression: A CEL expression with the request weight in the variable
# weight
# - action: the Anubis action to apply, similar to in a bot policy
# - challenge: which challenge to send to the user, similar to in a bot policy
#
# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
# information.
thresholds:
# By default Anubis ships with the following thresholds:
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
expression: weight <= 0 # a feather weighs zero units
action: ALLOW # Allow the traffic through
# For clients that had some weight reduced through custom rules, give them a
# lightweight challenge.
- name: mild-suspicion
expression:
all:
- weight > 0
- weight < 10
action: CHALLENGE
challenge:
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
algorithm: metarefresh
difficulty: 1
report_as: 1
# For clients that are browser-like but have either gained points from custom rules or
# report as a standard browser.
- name: moderate-suspicion
expression:
all:
- weight >= 10
- weight < 20
action: CHALLENGE
challenge:
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
algorithm: fast
difficulty: 2 # two leading zeros, very fast for most clients
report_as: 2
# For clients that are browser like and have gained many points from custom rules
- name: extreme-suspicion
expression: weight >= 20
action: CHALLENGE
challenge:
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
algorithm: fast
difficulty: 4
report_as: 4
DENY: 200

View File

@@ -1,26 +1,28 @@
- name: deny-aggressive-brazilian-scrapers
action: WEIGH
weight:
adjust: 20
action: DENY
expression:
any:
# Internet Explorer should be out of support
- userAgent.contains("MSIE")
# Trident is the Internet Explorer browser engine
- userAgent.contains("Trident")
# Opera is a fork of chrome now
- userAgent.contains("Presto")
# Windows CE is discontinued
- userAgent.contains("Windows CE")
# Windows 95 is discontinued
- userAgent.contains("Windows 95")
# Windows 98 is discontinued
- userAgent.contains("Windows 98")
# Windows 9.x is discontinued
- userAgent.contains("Win 9x")
# Amazon does not have an Alexa Toolbar.
- userAgent.contains("Alexa Toolbar")
# This is not released, even Windows 11 calls itself Windows 10
- userAgent.contains("Windows NT 11.0")
# iPods are not in common use
- userAgent.contains("iPod")
# Internet Explorer should be out of support
- userAgent.contains("MSIE")
# Trident is the Internet Explorer browser engine
- userAgent.contains("Trident")
# Opera is a fork of chrome now
- userAgent.contains("Presto")
# Windows CE is discontinued
- userAgent.contains("Windows CE")
# Windows 95 is discontinued
- userAgent.contains("Windows 95")
# Windows 98 is discontinued
- userAgent.contains("Windows 98")
# Windows 9.x is discontinued
- userAgent.contains("Win 9x")
# Amazon does not have an Alexa Toolbar.
- userAgent.contains("Alexa Toolbar")
- name: challenge-aggressive-brazilian-scrapers
action: CHALLENGE
expression:
any:
# This is not released, even Windows 11 calls itself Windows 10
- userAgent.contains("Windows NT 11.0")
# iPods are not in common use
- userAgent.contains("iPod")

View File

@@ -1,11 +0,0 @@
# Extensive list of AI-affiliated agents based on https://github.com/ai-robots-txt/ai.robots.txt
# Add new/undocumented agents here. Where documentation exists, consider moving to dedicated policy files.
# Notes on various agents:
# - Amazonbot: Well documented, but they refuse to state which agent collects training data.
# - anthropic-ai/Claude-Web: Undocumented by Anthropic. Possibly deprecated or hallucinations?
# - Perplexity*: Well documented, but they refuse to state which agent collects training data.
# Warning: May contain user agents that _must_ be blocked in robots.txt, or the opt-out will have no effect.
- name: "ai-catchall"
user_agent_regex: >-
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Brightbot 1.0|Bytespider|Claude-Web|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|GoogleOther|GoogleOther-Image|GoogleOther-Video|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YouBot
action: DENY

View File

@@ -1,8 +1,4 @@
# Warning: Contains user agents that _must_ be blocked in robots.txt, or the opt-out will have no effect.
# Note: Blocks human-directed/non-training user agents
#
# CCBot is allowed because if Common Crawl is allowed, then scrapers don't need to scrape to get the data.
- name: "ai-robots-txt"
user_agent_regex: >-
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|MyCentralAIScraperBot|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|Poseidon Research Crawler|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot-BA|SemrushBot-CT|SemrushBot-OCOB|SemrushBot-SI|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot
action: DENY

View File

@@ -1,6 +1,4 @@
- name: cloudflare-workers
headers_regex:
CF-Worker: .*
action: WEIGH
weight:
adjust: 15
action: DENY

View File

@@ -1,8 +0,0 @@
# User agents that act on behalf of humans in AI tools, e.g. searching the web.
# Each entry should have a positive/ALLOW entry created as well, with further documentation.
# Exceptions:
# - Claude-User: No published IP allowlist
- name: "ai-clients"
user_agent_regex: >-
ChatGPT-User|Claude-User|MistralAI-User
action: DENY

View File

@@ -1,10 +0,0 @@
# Acts on behalf of user requests
# https://docs.mistral.ai/robots/
- name: mistral-mistralai-user
user_agent_regex: MistralAI-User/.+; \+https\://docs\.mistral\.ai/robots
action: ALLOW
# https://mistral.ai/mistralai-user-ips.json
remote_addresses: [
"20.240.160.161/32",
"20.240.160.1/32",
]

View File

@@ -1,93 +0,0 @@
# Acts on behalf of user requests
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
- name: openai-chatgpt-user
user_agent_regex: ChatGPT-User/.+; \+https\://openai\.com/bot
action: ALLOW
# https://openai.com/chatgpt-user.json
# curl 'https://openai.com/chatgpt-user.json' | jq '.prefixes.[].ipv4Prefix' | sed 's/$/,/'
remote_addresses: [
"13.65.138.112/28",
"23.98.179.16/28",
"13.65.138.96/28",
"172.183.222.128/28",
"20.102.212.144/28",
"40.116.73.208/28",
"172.183.143.224/28",
"52.190.190.16/28",
"13.83.237.176/28",
"51.8.155.64/28",
"74.249.86.176/28",
"51.8.155.48/28",
"20.55.229.144/28",
"135.237.131.208/28",
"135.237.133.48/28",
"51.8.155.112/28",
"135.237.133.112/28",
"52.159.249.96/28",
"52.190.137.16/28",
"52.255.111.112/28",
"40.84.181.32/28",
"172.178.141.112/28",
"52.190.142.64/28",
"172.178.140.144/28",
"52.190.137.144/28",
"172.178.141.128/28",
"57.154.187.32/28",
"4.196.118.112/28",
"20.193.50.32/28",
"20.215.188.192/28",
"20.215.214.16/28",
"4.197.22.112/28",
"4.197.115.112/28",
"172.213.21.16/28",
"172.213.11.144/28",
"172.213.12.112/28",
"172.213.21.144/28",
"20.90.7.144/28",
"57.154.175.0/28",
"57.154.174.112/28",
"52.236.94.144/28",
"137.135.191.176/28",
"23.98.186.192/28",
"23.98.186.96/28",
"23.98.186.176/28",
"23.98.186.64/28",
"68.221.67.192/28",
"68.221.67.160/28",
"13.83.167.128/28",
"20.228.106.176/28",
"52.159.227.32/28",
"68.220.57.64/28",
"172.213.21.112/28",
"68.221.67.224/28",
"68.221.75.16/28",
"20.97.189.96/28",
"52.252.113.240/28",
"52.230.163.32/28",
"172.212.159.64/28",
"52.255.111.80/28",
"52.255.111.0/28",
"4.151.241.240/28",
"52.255.111.32/28",
"52.255.111.48/28",
"52.255.111.16/28",
"52.230.164.176/28",
"52.176.139.176/28",
"52.173.234.16/28",
"4.151.71.176/28",
"4.151.119.48/28",
"52.255.109.112/28",
"52.255.109.80/28",
"20.161.75.208/28",
"68.154.28.96/28",
"52.255.109.128/28",
"52.225.75.208/28",
"52.190.139.48/28",
"68.221.67.240/28",
"52.156.77.144/28",
"52.148.129.32/28",
"40.84.221.208/28",
"104.210.139.224/28",
"40.84.221.224/28",
"104.210.139.192/28",
]

View File

@@ -1,2 +0,0 @@
- import: (data)/clients/small-internet-browsers/netsurf.yaml
- import: (data)/clients/small-internet-browsers/palemoon.yaml

View File

@@ -1,5 +0,0 @@
- name: "reduce-weight-netsurf"
user_agent_regex: "NetSurf"
action: WEIGH
weight:
adjust: -5

View File

@@ -1,5 +0,0 @@
- name: "reduce-weight-palemoon"
user_agent_regex: "PaleMoon"
action: WEIGH
weight:
adjust: -5

View File

@@ -1,6 +0,0 @@
# https://connect.mozilla.org/t5/firefox-labs/try-out-link-previews-in-firefox-labs-138-and-share-your/td-p/92012
- name: x-firefox-ai
action: WEIGH
expression: '"X-Firefox-Ai" in headers'
weight:
adjust: 5

View File

@@ -1,15 +1,15 @@
- name: ipv4-rfc-1918
action: ALLOW
remote_addresses:
- 10.0.0.0/8
- 172.16.0.0/12
- 192.168.0.0/16
- 100.64.0.0/10
- 10.0.0.0/8
- 172.16.0.0/12
- 192.168.0.0/16
- 100.64.0.0/10
- name: ipv6-ula
action: ALLOW
remote_addresses:
- fc00::/7
- fc00::/7
- name: ipv6-link-local
action: ALLOW
remote_addresses:
- fe80::/10
- fe80::/10

View File

@@ -1,10 +1,8 @@
- import: (data)/crawlers/googlebot.yaml
- import: (data)/crawlers/applebot.yaml
- import: (data)/crawlers/bingbot.yaml
- import: (data)/crawlers/duckduckbot.yaml
- import: (data)/crawlers/qwantbot.yaml
- import: (data)/crawlers/internet-archive.yaml
- import: (data)/crawlers/kagibot.yaml
- import: (data)/crawlers/marginalia.yaml
- import: (data)/crawlers/mojeekbot.yaml
- import: (data)/crawlers/commoncrawl.yaml
- import: (data)/crawlers/mojeekbot.yaml

View File

@@ -1,8 +0,0 @@
# User agents that index exclusively for search in for AI systems.
# Each entry should have a positive/ALLOW entry created as well, with further documentation.
# Exceptions:
# - Claude-SearchBot: No published IP allowlist
- name: "ai-crawlers-search"
user_agent_regex: >-
OAI-SearchBot|Claude-SearchBot
action: DENY

View File

@@ -1,8 +0,0 @@
# User agents that crawl for training AI/LLM systems
# Each entry should have a positive/ALLOW entry created as well, with further documentation.
# Exceptions:
# - ClaudeBot: No published IP allowlist
- name: "ai-crawlers-training"
user_agent_regex: >-
GPTBot|ClaudeBot
action: DENY

View File

@@ -1,20 +0,0 @@
# Indexing for search and Siri
# https://support.apple.com/en-us/119829
- name: applebot
user_agent_regex: Applebot
action: ALLOW
# https://search.developer.apple.com/applebot.json
remote_addresses: [
"17.241.208.160/27",
"17.241.193.160/27",
"17.241.200.160/27",
"17.22.237.0/24",
"17.22.245.0/24",
"17.22.253.0/24",
"17.241.75.0/24",
"17.241.219.0/24",
"17.241.227.0/24",
"17.246.15.0/24",
"17.246.19.0/24",
"17.246.23.0/24",
]

View File

@@ -1,12 +0,0 @@
- name: common-crawl
user_agent_regex: CCBot
action: ALLOW
# https://index.commoncrawl.org/ccbot.json
remote_addresses:
[
"2600:1f28:365:80b0::/60",
"18.97.9.168/29",
"18.97.14.80/29",
"18.97.14.88/30",
"98.85.178.216/32",
]

View File

@@ -1,16 +0,0 @@
# Collects AI training data
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
- name: openai-gptbot
user_agent_regex: GPTBot/1\.1; \+https\://openai\.com/gptbot
action: ALLOW
# https://openai.com/gptbot.json
remote_addresses: [
"52.230.152.0/24",
"20.171.206.0/24",
"20.171.207.0/24",
"4.227.36.0/25",
"20.125.66.80/28",
"172.182.204.0/24",
"172.182.214.0/24",
"172.182.215.0/24",
]

View File

@@ -1,13 +0,0 @@
# Indexing for search, does not collect training data
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
- name: openai-searchbot
user_agent_regex: OAI-SearchBot/1\.0; \+https\://openai\.com/searchbot
action: ALLOW
# https://openai.com/searchbot.json
remote_addresses: [
"20.42.10.176/28",
"172.203.190.128/28",
"104.210.140.128/28",
"51.8.102.0/24",
"135.234.64.0/24"
]

View File

@@ -3,6 +3,6 @@ package data
import "embed"
var (
//go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:clients all:common all:crawlers all:meta
//go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:clients all:common all:crawlers
BotPolicies embed.FS
)

View File

@@ -1,5 +0,0 @@
# meta policies
Contains policies that exclusively reference policies in _multiple_ other data folders.
Akin to "stances" that the administrator can take, with reference to various topics, such as AI/LLM systems.

View File

@@ -1,6 +0,0 @@
# Blocks all AI/LLM associated user agents, regardless of purpose or human agency
# Warning: To completely block some AI/LLM training, such as with Google, you _must_ place flags in robots.txt.
- import: (data)/bots/ai-catchall.yaml
- import: (data)/clients/ai.yaml
- import: (data)/crawlers/ai-search.yaml
- import: (data)/crawlers/ai-training.yaml

Some files were not shown because too many files have changed in this diff Show More