mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-05 16:28:17 +00:00
Compare commits
213 Commits
Xe/v1.19.0
...
Xe/osiris
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4092180626 | ||
|
|
03758405d3 | ||
|
|
eb78ccc30c | ||
|
|
4156f84020 | ||
|
|
76dcd21582 | ||
|
|
153da4f5ac | ||
|
|
89b6af05a3 | ||
|
|
9a711f1635 | ||
|
|
dabbe63bb6 | ||
|
|
0aed7d3688 | ||
|
|
2af731033c | ||
|
|
6b639cd911 | ||
|
|
a0aba2d74a | ||
|
|
d9c4e37978 | ||
|
|
1eafebedbc | ||
|
|
115ee97d1d | ||
|
|
b485499125 | ||
|
|
300720f030 | ||
|
|
d6298adc6d | ||
|
|
1a9d8fb0cf | ||
|
|
36e25ff5f3 | ||
|
|
c59b7179c3 | ||
|
|
59515ed669 | ||
|
|
4d6b578f93 | ||
|
|
2915c1d209 | ||
|
|
68b653b099 | ||
|
|
509a4f3ce8 | ||
|
|
5c4d8480e6 | ||
|
|
132b2ed853 | ||
|
|
d28991ce8d | ||
|
|
0fd4bb81b8 | ||
|
|
603c68fd54 | ||
|
|
c8f2eb1185 | ||
|
|
f6b94dca98 | ||
|
|
6d8b98eb3d | ||
|
|
b9d8275234 | ||
|
|
c2cc1df172 | ||
|
|
735b2ceb14 | ||
|
|
2cb57fc247 | ||
|
|
61ce581f36 | ||
|
|
3f6750ac7d | ||
|
|
25d75b352a | ||
|
|
de17823bc7 | ||
|
|
29622e605d | ||
|
|
9fa1795db7 | ||
|
|
fbf69680f5 | ||
|
|
c74de19532 | ||
|
|
6dc726013a | ||
|
|
02304e8f3c | ||
|
|
607c9791d8 | ||
|
|
6b67be86a1 | ||
|
|
e02f017153 | ||
|
|
66b39f64af | ||
|
|
944fd25924 | ||
|
|
fa3fbfb0a5 | ||
|
|
3c739c1305 | ||
|
|
cc56baa5c7 | ||
|
|
053d29e0b6 | ||
|
|
a668095c22 | ||
|
|
1c4a1aec4a | ||
|
|
5b8b6d1c94 | ||
|
|
0cb6ef76e1 | ||
|
|
a900e98b8b | ||
|
|
e79cd93b61 | ||
|
|
d17fc6a174 | ||
|
|
95768cb70f | ||
|
|
ca61b8a05f | ||
|
|
1ea1157cd7 | ||
|
|
44ae5f2e2b | ||
|
|
ea2e76c6ee | ||
|
|
4ea0add50d | ||
|
|
289c802a0b | ||
|
|
543b942be1 | ||
|
|
edbe1dcfd6 | ||
|
|
94db16c0df | ||
|
|
c2f46907a1 | ||
|
|
6fa5b8e4e0 | ||
|
|
f98750b038 | ||
|
|
7d0c58d1a8 | ||
|
|
e870ede120 | ||
|
|
592d1e3dfc | ||
|
|
f6254b4b98 | ||
|
|
d19026d693 | ||
|
|
7b72c790ab | ||
|
|
719a1409ca | ||
|
|
890f21bf47 | ||
|
|
93bfe910d8 | ||
|
|
19d8de784b | ||
|
|
dff2176beb | ||
|
|
506d8817d5 | ||
|
|
d0fae02d05 | ||
|
|
845095c3f6 | ||
|
|
2f1e78cc6c | ||
|
|
7c0996448a | ||
|
|
d7a758f805 | ||
|
|
c121896f9c | ||
|
|
888b7d6e77 | ||
|
|
0e43138324 | ||
|
|
c981c23f7e | ||
|
|
9f0c5e974e | ||
|
|
292c470ada | ||
|
|
12453fdc00 | ||
|
|
f5b3bf81bc | ||
|
|
1820649987 | ||
|
|
14eeeb56d6 | ||
|
|
d9e0fbe905 | ||
|
|
6aa17532da | ||
|
|
b1edf84a7c | ||
|
|
d47a3406db | ||
|
|
ff5991b5cf | ||
|
|
19f78f37ad | ||
|
|
b0b0a5c08a | ||
|
|
261306dc63 | ||
|
|
3520421757 | ||
|
|
ad5430612f | ||
|
|
c2423d0688 | ||
|
|
a1b7d2ccda | ||
|
|
7cf6ac5de6 | ||
|
|
59f5b07281 | ||
|
|
1562f88c35 | ||
|
|
15bd9b6a44 | ||
|
|
1ca531b930 | ||
|
|
f9259299b9 | ||
|
|
16a4e04027 | ||
|
|
8c79870edb | ||
|
|
060b10ea2d | ||
|
|
4c74934e9f | ||
|
|
5870f7072c | ||
|
|
3c1d95d61e | ||
|
|
ab801a3597 | ||
|
|
ecc716940e | ||
|
|
4948036f39 | ||
|
|
7aa732c700 | ||
|
|
226cf36bf7 | ||
|
|
1d5fa49eb0 | ||
|
|
97c1d4f353 | ||
|
|
244f1c505a | ||
|
|
ae4d3b0ce5 | ||
|
|
e60c43cdd2 | ||
|
|
b2b2679bae | ||
|
|
e2b46fc5e7 | ||
|
|
3437e575d4 | ||
|
|
ae064be710 | ||
|
|
e3826df3ab | ||
|
|
823d1be5d1 | ||
|
|
0c6a820372 | ||
|
|
81f6380dd4 | ||
|
|
e5455c02d8 | ||
|
|
1d8033d69e | ||
|
|
e0781e4560 | ||
|
|
7a195f1595 | ||
|
|
2904ff974b | ||
|
|
3b3080d497 | ||
|
|
60ba8e9557 | ||
|
|
14c80483a9 | ||
|
|
d1452b6d39 | ||
|
|
5e95da6b6c | ||
|
|
988fc0941b | ||
|
|
f5140ae57b | ||
|
|
bbdee34f37 | ||
|
|
6e2eeb9e65 | ||
|
|
c638653172 | ||
|
|
0fe46b48cf | ||
|
|
d6e5561768 | ||
|
|
6594ae0eef | ||
|
|
ad09f82c3c | ||
|
|
372b797f64 | ||
|
|
6eaf0e13a2 | ||
|
|
281b6c5c00 | ||
|
|
9539668049 | ||
|
|
8eff57fcb6 | ||
|
|
4ac59c3a79 | ||
|
|
bee1c22b96 | ||
|
|
5a7499ea3b | ||
|
|
5f3861ab37 | ||
|
|
9f1d791991 | ||
|
|
76fa3e01a5 | ||
|
|
f2db43ad4b | ||
|
|
ba4412c907 | ||
|
|
f184cd81e7 | ||
|
|
59bfced8bf | ||
|
|
780a935cb8 | ||
|
|
f4bc1df797 | ||
|
|
b496c90e86 | ||
|
|
ec73bcbaf1 | ||
|
|
8d19eed200 | ||
|
|
ec733e93a5 | ||
|
|
51c384eefd | ||
|
|
44d5ec0b6e | ||
|
|
3bc9040a96 | ||
|
|
de7dbfe6d6 | ||
|
|
77e0bbbce9 | ||
|
|
b4b5d2f82e | ||
|
|
988fff77f1 | ||
|
|
0d9ebebff6 | ||
|
|
ba00cdacd2 | ||
|
|
68a71c6a99 | ||
|
|
fbbab5a035 | ||
|
|
28ab29389c | ||
|
|
497005ce3e | ||
|
|
669eb4ba4b | ||
|
|
6c4e739b0b | ||
|
|
c8635357dc | ||
|
|
0ed905fd4e | ||
|
|
cd8a7eb2e2 | ||
|
|
22c47f40d1 | ||
|
|
669671bd46 | ||
|
|
6c247cdec8 | ||
|
|
eeae28f459 | ||
|
|
9ba10262e3 | ||
|
|
a28a3d155a | ||
|
|
086f43e3ca | ||
|
|
fa1f2355ea |
@@ -9,4 +9,4 @@ exclude_dir = ["var", "vendor", "docs", "node_modules"]
|
||||
|
||||
[logger]
|
||||
time = true
|
||||
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address
|
||||
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address
|
||||
|
||||
12
.devcontainer/Dockerfile
Normal file
12
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
FROM ghcr.io/xe/devcontainer-base/pre/go
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY go.mod go.sum package.json package-lock.json ./
|
||||
RUN apt-get update \
|
||||
&& apt-get -y install zstd brotli redis \
|
||||
&& mkdir -p /home/vscode/.local/share/fish \
|
||||
&& chown -R vscode:vscode /home/vscode/.local/share/fish \
|
||||
&& chown -R vscode:vscode /go
|
||||
|
||||
CMD ["/usr/bin/sleep", "infinity"]
|
||||
13
.devcontainer/README.md
Normal file
13
.devcontainer/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# Anubis Dev Container
|
||||
|
||||
Anubis offers a [development container](https://containers.dev/) image in order to make it easier to contribute to the project. This image is based on [Xe/devcontainer-base/go](https://github.com/Xe/devcontainer-base/tree/main/src/go), which is based on Debian Bookworm with the following customizations:
|
||||
|
||||
- [Fish](https://fishshell.com/) as the shell complete with a custom theme
|
||||
- [Go](https://go.dev) at the most recent stable version
|
||||
- [Node.js](https://nodejs.org/en) at the most recent stable version
|
||||
- [Atuin](https://atuin.sh/) to sync shell history between your host OS and the development container
|
||||
- [Docker](https://docker.com) to manage and build Anubis container images from inside the development container
|
||||
- [Ko](https://ko.build/) to build production-ready Anubis container images
|
||||
- [Neovim](https://neovim.io/) for use with Git
|
||||
|
||||
This development container is tested and known to work with [Visual Studio Code](https://code.visualstudio.com/). If you run into problems with it outside of VS Code, please file an issue and let us know what editor you are using.
|
||||
30
.devcontainer/devcontainer.json
Normal file
30
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,30 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/debian
|
||||
{
|
||||
"name": "Dev",
|
||||
"dockerComposeFile": [
|
||||
"./docker-compose.yaml"
|
||||
],
|
||||
"service": "workspace",
|
||||
"workspaceFolder": "/workspace/anubis",
|
||||
"postStartCommand": "bash ./.devcontainer/poststart.sh",
|
||||
"features": {
|
||||
"ghcr.io/xe/devcontainer-features/ko:1.1.0": {},
|
||||
"ghcr.io/devcontainers/features/github-cli:1": {}
|
||||
},
|
||||
"initializeCommand": "mkdir -p ${localEnv:HOME}${localEnv:USERPROFILE}/.local/share/atuin",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"esbenp.prettier-vscode",
|
||||
"ms-azuretools.vscode-containers",
|
||||
"golang.go",
|
||||
"unifiedjs.vscode-mdx",
|
||||
"a-h.templ",
|
||||
"redhat.vscode-yaml",
|
||||
"hashicorp.hcl",
|
||||
"fredwangwang.vscode-hcl-format"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
26
.devcontainer/docker-compose.yaml
Normal file
26
.devcontainer/docker-compose.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
services:
|
||||
playwright:
|
||||
image: mcr.microsoft.com/playwright:v1.52.0-noble
|
||||
init: true
|
||||
network_mode: service:workspace
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- npx -y playwright@1.52.0 run-server --port 9001 --host 0.0.0.0
|
||||
|
||||
valkey:
|
||||
image: valkey/valkey:8
|
||||
pull_policy: always
|
||||
|
||||
# VS Code workspace service
|
||||
workspace:
|
||||
image: ghcr.io/techarohq/anubis/devcontainer
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
volumes:
|
||||
- ../:/workspace/anubis:cached
|
||||
environment:
|
||||
VALKEY_URL: redis://valkey:6379/0
|
||||
#entrypoint: ["/usr/bin/sleep", "infinity"]
|
||||
user: vscode
|
||||
9
.devcontainer/poststart.sh
Normal file
9
.devcontainer/poststart.sh
Normal file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
pwd
|
||||
|
||||
npm ci &
|
||||
go mod download &
|
||||
go install ./utils/cmd/... &
|
||||
|
||||
wait
|
||||
25
.dockerignore
Normal file
25
.dockerignore
Normal file
@@ -0,0 +1,25 @@
|
||||
.env
|
||||
*.deb
|
||||
*.rpm
|
||||
|
||||
# Additional package locks
|
||||
pnpm-lock.yaml
|
||||
yarn.lock
|
||||
|
||||
# Go binaries and test artifacts
|
||||
main
|
||||
*.test
|
||||
|
||||
node_modules
|
||||
|
||||
# MacOS
|
||||
.DS_store
|
||||
|
||||
# Intellij
|
||||
.idea
|
||||
|
||||
# how does this get here
|
||||
doc/VERSION
|
||||
|
||||
web/static/js/*
|
||||
!web/static/js/.gitignore
|
||||
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -1 +1 @@
|
||||
web/index_templ.go linguist-generated
|
||||
**/*_templ.go linguist-generated=true
|
||||
|
||||
3
.github/actions/spelling/allow.txt
vendored
3
.github/actions/spelling/allow.txt
vendored
@@ -2,4 +2,5 @@ github
|
||||
https
|
||||
ssh
|
||||
ubuntu
|
||||
workarounds
|
||||
workarounds
|
||||
rjack
|
||||
7
.github/actions/spelling/excludes.txt
vendored
7
.github/actions/spelling/excludes.txt
vendored
@@ -83,6 +83,13 @@
|
||||
^\Q.github/FUNDING.yml\E$
|
||||
^\Q.github/workflows/spelling.yml\E$
|
||||
^data/crawlers/
|
||||
^docs/blog/tags\.yml$
|
||||
^docs/docs/user/known-instances.md$
|
||||
^docs/manifest/.*$
|
||||
^docs/static/\.nojekyll$
|
||||
^lib/policy/config/testdata/bad/unparseable\.json$
|
||||
ignore$
|
||||
robots.txt
|
||||
^lib/localization/locales/.*\.json$
|
||||
^lib/localization/.*_test.go$
|
||||
^test/.*$
|
||||
|
||||
147
.github/actions/spelling/expect.txt
vendored
147
.github/actions/spelling/expect.txt
vendored
@@ -1,143 +1,215 @@
|
||||
acs
|
||||
aeacus
|
||||
Aibrew
|
||||
alrest
|
||||
amazonbot
|
||||
anthro
|
||||
anubis
|
||||
anubistest
|
||||
Applebot
|
||||
archlinux
|
||||
asnc
|
||||
asnchecker
|
||||
asns
|
||||
aspirational
|
||||
atuin
|
||||
azuretools
|
||||
badregexes
|
||||
bbolt
|
||||
bdba
|
||||
berr
|
||||
bingbot
|
||||
Bitcoin
|
||||
blogging
|
||||
bitrate
|
||||
Bluesky
|
||||
blueskybot
|
||||
boi
|
||||
botnet
|
||||
botstopper
|
||||
BPort
|
||||
Brightbot
|
||||
broked
|
||||
byteslice
|
||||
Bytespider
|
||||
cachebuster
|
||||
cachediptoasn
|
||||
Caddyfile
|
||||
caninetools
|
||||
Cardyb
|
||||
celchecker
|
||||
CELPHASE
|
||||
celphase
|
||||
cerr
|
||||
certresolver
|
||||
cespare
|
||||
CGNAT
|
||||
cgr
|
||||
chainguard
|
||||
chall
|
||||
challengemozilla
|
||||
challengetest
|
||||
checkpath
|
||||
checkresult
|
||||
chen
|
||||
chibi
|
||||
cidranger
|
||||
ckie
|
||||
ckies
|
||||
cloudflare
|
||||
Codespaces
|
||||
confd
|
||||
connnection
|
||||
containerbuild
|
||||
coreutils
|
||||
CRDs
|
||||
Cotoyogi
|
||||
Cromite
|
||||
crt
|
||||
Cscript
|
||||
daemonizing
|
||||
DDOS
|
||||
Debian
|
||||
debrpm
|
||||
decaymap
|
||||
decompiling
|
||||
devcontainers
|
||||
Diffbot
|
||||
discordapp
|
||||
discordbot
|
||||
distros
|
||||
dnf
|
||||
dnsbl
|
||||
dnserr
|
||||
domainhere
|
||||
dracula
|
||||
dronebl
|
||||
droneblresponse
|
||||
dropin
|
||||
duckduckbot
|
||||
eerror
|
||||
ellenjoe
|
||||
emacs
|
||||
enbyware
|
||||
etld
|
||||
everyones
|
||||
evilbot
|
||||
evilsite
|
||||
expressionorlist
|
||||
externalagent
|
||||
externalfetcher
|
||||
extldflags
|
||||
facebookgo
|
||||
Factset
|
||||
fastcgi
|
||||
fediverse
|
||||
ffprobe
|
||||
finfos
|
||||
Firecrawl
|
||||
flagenv
|
||||
Fordola
|
||||
forgejo
|
||||
fsys
|
||||
fullchain
|
||||
gaissmai
|
||||
Galvus
|
||||
geoip
|
||||
geoipchecker
|
||||
gha
|
||||
gipc
|
||||
gitea
|
||||
godotenv
|
||||
goland
|
||||
gomod
|
||||
goodbot
|
||||
googlebot
|
||||
gopsutil
|
||||
govulncheck
|
||||
goyaml
|
||||
GPG
|
||||
GPT
|
||||
gptbot
|
||||
grpcprom
|
||||
grw
|
||||
Hashcash
|
||||
hashrate
|
||||
headermap
|
||||
healthcheck
|
||||
healthz
|
||||
hec
|
||||
hmc
|
||||
hostable
|
||||
htmlc
|
||||
htmx
|
||||
httpdebug
|
||||
Huawei
|
||||
hypertext
|
||||
iaskspider
|
||||
iat
|
||||
ifm
|
||||
Imagesift
|
||||
imgproxy
|
||||
impressum
|
||||
inp
|
||||
internets
|
||||
IPTo
|
||||
iptoasn
|
||||
iss
|
||||
isset
|
||||
ivh
|
||||
Jenomis
|
||||
JGit
|
||||
joho
|
||||
journalctl
|
||||
jshelter
|
||||
JWTs
|
||||
kagi
|
||||
kagibot
|
||||
keikaku
|
||||
Keyfunc
|
||||
keypair
|
||||
KHTML
|
||||
kinda
|
||||
KUBECONFIG
|
||||
lcj
|
||||
ldflags
|
||||
letsencrypt
|
||||
Lexentale
|
||||
lgbt
|
||||
licend
|
||||
licstart
|
||||
lightpanda
|
||||
LIMSA
|
||||
limsa
|
||||
Linting
|
||||
linuxbrew
|
||||
LLU
|
||||
loadbalancer
|
||||
lol
|
||||
LOMINSA
|
||||
lominsa
|
||||
maintainership
|
||||
malware
|
||||
mcr
|
||||
memes
|
||||
metarefresh
|
||||
metrix
|
||||
mimi
|
||||
minica
|
||||
Minfilia
|
||||
mistralai
|
||||
Mojeek
|
||||
mojeekbot
|
||||
mozilla
|
||||
nbf
|
||||
nepeat
|
||||
netsurf
|
||||
nginx
|
||||
nicksnyder
|
||||
nobots
|
||||
NONINFRINGEMENT
|
||||
nosleep
|
||||
OCOB
|
||||
ogtags
|
||||
onionservice
|
||||
omgili
|
||||
omgilibot
|
||||
openai
|
||||
opengraph
|
||||
openrc
|
||||
oswald
|
||||
pag
|
||||
palemoon
|
||||
Pangu
|
||||
parseable
|
||||
passthrough
|
||||
Patreon
|
||||
@@ -149,71 +221,105 @@ pipefail
|
||||
pki
|
||||
podkova
|
||||
podman
|
||||
poststart
|
||||
prebaked
|
||||
privkey
|
||||
promauto
|
||||
promhttp
|
||||
proofofwork
|
||||
publicsuffix
|
||||
pwcmd
|
||||
pwuser
|
||||
qualys
|
||||
qwant
|
||||
qwantbot
|
||||
rac
|
||||
rawler
|
||||
rcvar
|
||||
rdb
|
||||
redhat
|
||||
redir
|
||||
redirectscheme
|
||||
relayd
|
||||
refactors
|
||||
reputational
|
||||
reqmeta
|
||||
risc
|
||||
ruleset
|
||||
runlevels
|
||||
RUnlock
|
||||
runtimedir
|
||||
sas
|
||||
sasl
|
||||
Scumm
|
||||
searchbot
|
||||
searx
|
||||
sebest
|
||||
secretplans
|
||||
selfsigned
|
||||
Semrush
|
||||
Seo
|
||||
setsebool
|
||||
shellcheck
|
||||
shirou
|
||||
Sidetrade
|
||||
simprint
|
||||
sitemap
|
||||
sls
|
||||
sni
|
||||
Sourceware
|
||||
Spambot
|
||||
sparkline
|
||||
spyderbot
|
||||
srv
|
||||
stackoverflow
|
||||
startprecmd
|
||||
stoppostcmd
|
||||
storetest
|
||||
subgrid
|
||||
subr
|
||||
subrequest
|
||||
SVCNAME
|
||||
tagline
|
||||
tarballs
|
||||
tarrif
|
||||
tbn
|
||||
tbr
|
||||
techaro
|
||||
techarohq
|
||||
templ
|
||||
templruntime
|
||||
testarea
|
||||
torproject
|
||||
Thancred
|
||||
thoth
|
||||
thothmock
|
||||
Tik
|
||||
Timpibot
|
||||
traefik
|
||||
unixhttpd
|
||||
uberspace
|
||||
Unbreak
|
||||
unbreakdocker
|
||||
unifiedjs
|
||||
unmarshal
|
||||
unparseable
|
||||
uvx
|
||||
UXP
|
||||
valkey
|
||||
Varis
|
||||
Velen
|
||||
vendored
|
||||
vhosts
|
||||
videotest
|
||||
VKE
|
||||
Vultr
|
||||
waitloop
|
||||
weblate
|
||||
webmaster
|
||||
webpage
|
||||
websecure
|
||||
websites
|
||||
Webzio
|
||||
wildbase
|
||||
withthothmock
|
||||
wordpress
|
||||
Workaround
|
||||
workdir
|
||||
xcaddy
|
||||
wpbot
|
||||
Xeact
|
||||
xeiaso
|
||||
xeserv
|
||||
@@ -222,6 +328,7 @@ xess
|
||||
xff
|
||||
XForwarded
|
||||
XNG
|
||||
XOB
|
||||
XReal
|
||||
yae
|
||||
YAMLTo
|
||||
@@ -229,6 +336,8 @@ yeet
|
||||
yeetfile
|
||||
yourdomain
|
||||
yoursite
|
||||
yyz
|
||||
Zenos
|
||||
zizmor
|
||||
zombocom
|
||||
zos
|
||||
|
||||
@@ -273,14 +273,6 @@
|
||||
# Most people only have two hands. Reword.
|
||||
\b(?i)on the third hand\b
|
||||
|
||||
# Should be `Open Graph`
|
||||
# unless talking about a specific Open Graph implementation:
|
||||
# - Java
|
||||
# - Node
|
||||
# - Py
|
||||
# - Ruby
|
||||
\bOpenGraph\b
|
||||
|
||||
# Should be `OpenShift`
|
||||
\bOpenshift\b
|
||||
|
||||
|
||||
2
.github/actions/spelling/patterns.txt
vendored
2
.github/actions/spelling/patterns.txt
vendored
@@ -131,4 +131,4 @@ go install(?:\s+[a-z]+\.[-@\w/.]+)+
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# microsoft
|
||||
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
|
||||
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
|
||||
|
||||
31
.github/workflows/docker-pr.yml
vendored
31
.github/workflows/docker-pr.yml
vendored
@@ -2,7 +2,7 @@ name: Docker image builds (pull requests)
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
branches: ["main"]
|
||||
|
||||
env:
|
||||
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
|
||||
@@ -11,7 +11,32 @@ permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build:
|
||||
buildx-bake:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
uses: docker/bake-action@76f9fa3a758507623da19f6092dc4089a7e61592 # v6.6.0
|
||||
with:
|
||||
source: .
|
||||
push: true
|
||||
sbom: true
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
set: |
|
||||
osiris.tags=ttl.sh/techaro/pr-${{ github.event.number }}/osiris:24h
|
||||
|
||||
containerbuild:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -22,7 +47,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
|
||||
44
.github/workflows/docker.yml
vendored
44
.github/workflows/docker.yml
vendored
@@ -3,8 +3,8 @@ name: Docker image builds
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ "main" ]
|
||||
tags: [ "v*" ]
|
||||
branches: ["main"]
|
||||
tags: ["v*"]
|
||||
|
||||
env:
|
||||
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
|
||||
@@ -17,7 +17,38 @@ permissions:
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
build:
|
||||
buildx-bake:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
uses: docker/bake-action@76f9fa3a758507623da19f6092dc4089a7e61592 # v6.6.0
|
||||
with:
|
||||
source: .
|
||||
push: true
|
||||
sbom: true
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
set: ""
|
||||
|
||||
containerbuild:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -32,7 +63,7 @@ jobs:
|
||||
echo "IMAGE=ghcr.io/${GITHUB_REPOSITORY,,}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
@@ -55,7 +86,7 @@ jobs:
|
||||
run: |
|
||||
brew bundle
|
||||
|
||||
- name: Log into registry
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -77,9 +108,8 @@ jobs:
|
||||
DOCKER_REPO: ${{ env.IMAGE }}
|
||||
SLOG_LEVEL: debug
|
||||
|
||||
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@db473fddc028af60658334401dc6fa3ffd8669fd # v2.3.0
|
||||
uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0
|
||||
with:
|
||||
subject-name: ${{ env.IMAGE }}
|
||||
subject-digest: ${{ steps.build.outputs.digest }}
|
||||
|
||||
15
.github/workflows/docs-deploy.yml
vendored
15
.github/workflows/docs-deploy.yml
vendored
@@ -22,7 +22,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
@@ -36,10 +36,13 @@ jobs:
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
with:
|
||||
images: ghcr.io/techarohq/anubis/docs
|
||||
tags: |
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
main
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 # v6.17.0
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
|
||||
with:
|
||||
context: ./docs
|
||||
cache-to: type=gha
|
||||
@@ -49,15 +52,15 @@ jobs:
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
|
||||
- name: Apply k8s manifests to aeacus
|
||||
uses: actions-hub/kubectl@f632a31512a74cb35940627c49c20f67723cbaaf # v1.33.1
|
||||
- name: Apply k8s manifests to limsa lominsa
|
||||
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
with:
|
||||
args: apply -k docs/manifest
|
||||
|
||||
- name: Apply k8s manifests to aeacus
|
||||
uses: actions-hub/kubectl@f632a31512a74cb35940627c49c20f67723cbaaf # v1.33.1
|
||||
- name: Apply k8s manifests to limsa lominsa
|
||||
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
with:
|
||||
|
||||
11
.github/workflows/docs-test.yml
vendored
11
.github/workflows/docs-test.yml
vendored
@@ -2,7 +2,7 @@ name: Docs test build
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -18,17 +18,20 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
with:
|
||||
images: ghcr.io/${{ github.repository }}/docs
|
||||
images: ghcr.io/techarohq/anubis/docs
|
||||
tags: |
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
main
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 # v6.17.0
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
|
||||
with:
|
||||
context: ./docs
|
||||
cache-to: type=gha
|
||||
|
||||
4
.github/workflows/go.yml
vendored
4
.github/workflows/go.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
run: npm run test
|
||||
|
||||
- name: Lint with staticcheck
|
||||
uses: dominikh/staticcheck-action@fe1dd0c3658873b46f8c9bb3291096a617310ca6 # v1.3.1
|
||||
uses: dominikh/staticcheck-action@024238d2898c874f26d723e7d0ff4308c35589a2 # v1.4.0
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
|
||||
117
.github/workflows/package-builds-stable.yml
vendored
117
.github/workflows/package-builds-stable.yml
vendored
@@ -1,8 +1,9 @@
|
||||
name: Package builds (stable)
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
# release:
|
||||
# types: [published]
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -13,67 +14,67 @@ jobs:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
/home/linuxbrew/.linuxbrew/bin
|
||||
/home/linuxbrew/.linuxbrew/etc
|
||||
/home/linuxbrew/.linuxbrew/include
|
||||
/home/linuxbrew/.linuxbrew/lib
|
||||
/home/linuxbrew/.linuxbrew/opt
|
||||
/home/linuxbrew/.linuxbrew/sbin
|
||||
/home/linuxbrew/.linuxbrew/share
|
||||
/home/linuxbrew/.linuxbrew/var
|
||||
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-go-homebrew-cellar-
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
/home/linuxbrew/.linuxbrew/bin
|
||||
/home/linuxbrew/.linuxbrew/etc
|
||||
/home/linuxbrew/.linuxbrew/include
|
||||
/home/linuxbrew/.linuxbrew/lib
|
||||
/home/linuxbrew/.linuxbrew/opt
|
||||
/home/linuxbrew/.linuxbrew/sbin
|
||||
/home/linuxbrew/.linuxbrew/share
|
||||
/home/linuxbrew/.linuxbrew/var
|
||||
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-go-homebrew-cellar-
|
||||
|
||||
- name: Install Brew dependencies
|
||||
run: |
|
||||
brew bundle
|
||||
- name: Install Brew dependencies
|
||||
run: |
|
||||
brew bundle
|
||||
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-golang-
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-golang-
|
||||
|
||||
- name: install node deps
|
||||
run: |
|
||||
npm ci
|
||||
- name: install node deps
|
||||
run: |
|
||||
npm ci
|
||||
|
||||
- name: Build Packages
|
||||
run: |
|
||||
go tool yeet
|
||||
- name: Build Packages
|
||||
run: |
|
||||
go tool yeet
|
||||
|
||||
- name: Upload released artifacts
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.TOKEN }}
|
||||
RELEASE_VERSION: ${{github.event.release.tag_name}}
|
||||
shell: bash
|
||||
run: |
|
||||
RELEASE="${RELEASE_VERSION}"
|
||||
cd var
|
||||
for file in *; do
|
||||
gh release upload $RELEASE $file
|
||||
done
|
||||
- name: Upload released artifacts
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.TOKEN }}
|
||||
RELEASE_VERSION: ${{github.event.release.tag_name}}
|
||||
shell: bash
|
||||
run: |
|
||||
RELEASE="${RELEASE_VERSION}"
|
||||
cd var
|
||||
for file in *; do
|
||||
gh release upload $RELEASE $file
|
||||
done
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
|
||||
45
.github/workflows/smoke-tests.yml
vendored
Normal file
45
.github/workflows/smoke-tests.yml
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
name: Smoke tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
smoke-test:
|
||||
strategy:
|
||||
matrix:
|
||||
test:
|
||||
- git-clone
|
||||
- git-push
|
||||
- healthcheck
|
||||
- i18n
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: latest
|
||||
|
||||
- uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
|
||||
with:
|
||||
go-version: stable
|
||||
|
||||
- uses: ko-build/setup-ko@d006021bd0c28d1ce33a07e7943d48b079944c8d # v0.9
|
||||
|
||||
- name: Install utils
|
||||
run: |
|
||||
go install ./utils/cmd/...
|
||||
|
||||
- name: Run test
|
||||
run: |
|
||||
cd test/${{ matrix.test }}
|
||||
backoff-retry --try-count 10 ./test.sh
|
||||
37
.github/workflows/ssh-ci-runner-cron.yml
vendored
Normal file
37
.github/workflows/ssh-ci-runner-cron.yml
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
name: Regenerate ssh ci runner image
|
||||
|
||||
on:
|
||||
# pull_request:
|
||||
# branches: ["main"]
|
||||
schedule:
|
||||
- cron: "0 0 1,8,15,22 * *"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: write
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
ssh-ci-rebuild:
|
||||
if: github.repository == 'TecharoHQ/anubis'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
- name: Build and push
|
||||
run: |
|
||||
cd ./test/ssh-ci
|
||||
docker buildx bake --push
|
||||
43
.github/workflows/ssh-ci.yml
vendored
Normal file
43
.github/workflows/ssh-ci.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
name: SSH CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
# pull_request:
|
||||
# branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
ssh:
|
||||
if: github.repository == 'TecharoHQ/anubis'
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
host:
|
||||
- ubuntu@riscv64.techaro.lol
|
||||
- ci@ppc64le.techaro.lol
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install CI target SSH key
|
||||
uses: shimataro/ssh-key-action@d4fffb50872869abe2d9a9098a6d9c5aa7d16be4 # v2.7.0
|
||||
with:
|
||||
key: ${{ secrets.CI_SSH_KEY }}
|
||||
name: id_rsa
|
||||
known_hosts: ${{ secrets.CI_SSH_KNOWN_HOSTS }}
|
||||
|
||||
- uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
|
||||
with:
|
||||
go-version: stable
|
||||
|
||||
- name: Run CI
|
||||
run: go run ./utils/cmd/backoff-retry bash test/ssh-ci/rigging.sh ${{ matrix.host }}
|
||||
env:
|
||||
GITHUB_RUN_ID: ${{ github.run_id }}
|
||||
4
.github/workflows/zizmor.yml
vendored
4
.github/workflows/zizmor.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
||||
uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
|
||||
|
||||
- name: Run zizmor 🌈
|
||||
run: uvx zizmor --format sarif . > results.sarif
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload SARIF file
|
||||
uses: github/codeql-action/upload-sarif@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
|
||||
uses: github/codeql-action/upload-sarif@181d5eefc20863364f96762470ba6f862bdef56b # v3.29.2
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
category: zizmor
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -20,3 +20,5 @@ node_modules
|
||||
|
||||
# how does this get here
|
||||
doc/VERSION
|
||||
|
||||
web/static/locales/*.json
|
||||
12
.vscode/extensions.json
vendored
Normal file
12
.vscode/extensions.json
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"esbenp.prettier-vscode",
|
||||
"ms-azuretools.vscode-containers",
|
||||
"golang.go",
|
||||
"unifiedjs.vscode-mdx",
|
||||
"a-h.templ",
|
||||
"redhat.vscode-yaml",
|
||||
"hashicorp.hcl",
|
||||
"fredwangwang.vscode-hcl-format"
|
||||
]
|
||||
}
|
||||
27
.vscode/launch.json
vendored
Normal file
27
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Launch Package",
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "auto",
|
||||
"program": "${fileDirname}"
|
||||
},
|
||||
{
|
||||
"name": "Anubis [dev]",
|
||||
"command": "npm run dev",
|
||||
"request": "launch",
|
||||
"type": "node-terminal"
|
||||
},
|
||||
{
|
||||
"name": "Start Docs",
|
||||
"command": "cd docs && npm ci && npm run start",
|
||||
"request": "launch",
|
||||
"type": "node-terminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
19
.vscode/settings.json
vendored
19
.vscode/settings.json
vendored
@@ -11,5 +11,24 @@
|
||||
"zig": false,
|
||||
"javascript": false,
|
||||
"properties": false
|
||||
},
|
||||
"[markdown]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"[mdx]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"[nunjucks]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"cSpell.enabledFileTypes": {
|
||||
"mdx": true,
|
||||
"md": true
|
||||
}
|
||||
}
|
||||
|
||||
2
Makefile
2
Makefile
@@ -18,6 +18,7 @@ assets: deps
|
||||
|
||||
build: assets
|
||||
$(GO) build -o ./var/anubis ./cmd/anubis
|
||||
$(GO) build -o ./var/robots2policy ./cmd/robots2policy
|
||||
@echo "Anubis is now built to ./var/anubis"
|
||||
|
||||
lint: assets
|
||||
@@ -27,6 +28,7 @@ lint: assets
|
||||
|
||||
prebaked-build:
|
||||
$(GO) build -o ./var/anubis -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/anubis
|
||||
$(GO) build -o ./var/robots2policy -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/robots2policy
|
||||
|
||||
test: assets
|
||||
$(GO) test ./...
|
||||
|
||||
47
README.md
47
README.md
@@ -9,19 +9,56 @@
|
||||

|
||||

|
||||

|
||||
[](https://github.com/sponsors/Xe)
|
||||
|
||||
## Sponsors
|
||||
|
||||
Anubis is brought to you by sponsors and donors like:
|
||||
|
||||
[](https://distrust.co?utm_campaign=github&utm_medium=referral&utm_content=anubis)
|
||||
[](https://terminaltrove.com/?utm_campaign=github&utm_medium=referral&utm_content=anubis&utm_source=abgh)
|
||||
[](https://canine.tools?utm_campaign=github&utm_medium=referral&utm_content=anubis)
|
||||
[](https://weblate.org/?utm_campaign=github&utm_medium=referral&utm_content=anubis)
|
||||
### Diamond Tier
|
||||
|
||||
<a href="https://www.raptorcs.com/content/base/products.html">
|
||||
<img src="./docs/static/img/sponsors/raptor-computing-logo.webp" alt="Raptor Computing Systems" height=64 />
|
||||
</a>
|
||||
|
||||
### Gold Tier
|
||||
|
||||
<a href="https://distrust.co?utm_campaign=github&utm_medium=referral&utm_content=anubis">
|
||||
<img src="./docs/static/img/sponsors/distrust-logo.webp" alt="Distrust" height="64">
|
||||
</a>
|
||||
<a href="https://terminaltrove.com/?utm_campaign=github&utm_medium=referral&utm_content=anubis&utm_source=abgh">
|
||||
<img src="./docs/static/img/sponsors/terminal-trove.webp" alt="Terminal Trove" height="64">
|
||||
</a>
|
||||
<a href="https://canine.tools?utm_campaign=github&utm_medium=referral&utm_content=anubis">
|
||||
<img src="./docs/static/img/sponsors/caninetools-logo.webp" alt="canine.tools" height="64">
|
||||
</a>
|
||||
<a href="https://weblate.org/">
|
||||
<img src="./docs/static/img/sponsors/weblate-logo.webp" alt="Weblate" height="64">
|
||||
</a>
|
||||
<a href="https://uberspace.de/">
|
||||
<img src="./docs/static/img/sponsors/uberspace-logo.webp" alt="Uberspace" height="64">
|
||||
</a>
|
||||
<a href="https://wildbase.xyz/">
|
||||
<img src="./docs/static/img/sponsors/wildbase-logo.webp" alt="Wildbase" height="64">
|
||||
</a>
|
||||
<a href="https://emma.pet">
|
||||
<img
|
||||
src="./docs/static/img/sponsors/nepeat-logo.webp"
|
||||
alt="Cat eyes over the word Emma in a serif font"
|
||||
height="64"
|
||||
/>
|
||||
</a>
|
||||
<a href="https://fabulous.systems/">
|
||||
<img
|
||||
src="./docs/static/img/sponsors/fabulous-systems.webp"
|
||||
alt="Cat eyes over the word Emma in a serif font"
|
||||
height="64"
|
||||
/>
|
||||
</a>
|
||||
|
||||
## Overview
|
||||
|
||||
Anubis [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using a proof-of-work challenge in order to protect upstream resources from scraper bots.
|
||||
Anubis is a Web AI Firewall Utility that [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using one or more challenges in order to protect upstream resources from scraper bots.
|
||||
|
||||
This program is designed to help protect the small internet from the endless storm of requests that flood in from AI companies. Anubis is as lightweight as possible to ensure that everyone can afford to protect the communities closest to them.
|
||||
|
||||
|
||||
13
anubis.go
13
anubis.go
@@ -11,12 +11,11 @@ var Version = "devel"
|
||||
|
||||
// CookieName is the name of the cookie that Anubis uses in order to validate
|
||||
// access.
|
||||
const CookieName = "techaro.lol-anubis-auth"
|
||||
var CookieName = "techaro.lol-anubis-auth"
|
||||
|
||||
// WithDomainCookieName is the name that is prepended to the per-domain cookie used when COOKIE_DOMAIN is set.
|
||||
const WithDomainCookieName = "techaro.lol-anubis-auth-for-"
|
||||
|
||||
const TestCookieName = "techaro.lol-anubis-cookie-test-if-you-block-this-anubis-wont-work"
|
||||
// TestCookieName is the name of the cookie that Anubis uses in order to check
|
||||
// if cookies are enabled on the client's browser.
|
||||
var TestCookieName = "techaro.lol-anubis-cookie-verification"
|
||||
|
||||
// CookieDefaultExpirationTime is the amount of time before the cookie/JWT expires.
|
||||
const CookieDefaultExpirationTime = 7 * 24 * time.Hour
|
||||
@@ -33,3 +32,7 @@ const APIPrefix = "/.within.website/x/cmd/anubis/api/"
|
||||
// DefaultDifficulty is the default "difficulty" (number of leading zeroes)
|
||||
// that must be met by the client in order to pass the challenge.
|
||||
const DefaultDifficulty = 4
|
||||
|
||||
// ForcedLanguage is the language being used instead of the one of the request's Accept-Language header
|
||||
// if being set.
|
||||
var ForcedLanguage = ""
|
||||
|
||||
@@ -30,12 +30,15 @@ import (
|
||||
"github.com/TecharoHQ/anubis"
|
||||
"github.com/TecharoHQ/anubis/data"
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/TecharoHQ/anubis/internal/thoth"
|
||||
libanubis "github.com/TecharoHQ/anubis/lib"
|
||||
botPolicy "github.com/TecharoHQ/anubis/lib/policy"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/web"
|
||||
"github.com/facebookgo/flagenv"
|
||||
_ "github.com/joho/godotenv/autoload"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -44,8 +47,13 @@ var (
|
||||
bindNetwork = flag.String("bind-network", "tcp", "network family to bind HTTP to, e.g. unix, tcp")
|
||||
challengeDifficulty = flag.Int("difficulty", anubis.DefaultDifficulty, "difficulty of the challenge")
|
||||
cookieDomain = flag.String("cookie-domain", "", "if set, the top-level domain that the Anubis cookie will be valid for")
|
||||
cookieDynamicDomain = flag.Bool("cookie-dynamic-domain", false, "if set, automatically set the cookie Domain value based on the request domain")
|
||||
cookieExpiration = flag.Duration("cookie-expiration-time", anubis.CookieDefaultExpirationTime, "The amount of time the authorization cookie is valid for")
|
||||
cookiePrefix = flag.String("cookie-prefix", "techaro.lol-anubis", "prefix for browser cookies created by Anubis")
|
||||
cookiePartitioned = flag.Bool("cookie-partitioned", false, "if true, sets the partitioned flag on Anubis cookies, enabling CHIPS support")
|
||||
forcedLanguage = flag.String("forced-language", "", "if set, this language is being used instead of the one from the request's Accept-Language header")
|
||||
hs512Secret = flag.String("hs512-secret", "", "secret used to sign JWTs, uses ed25519 if not set")
|
||||
cookieSecure = flag.Bool("cookie-secure", true, "if true, sets the secure flag on Anubis cookies")
|
||||
ed25519PrivateKeyHex = flag.String("ed25519-private-key-hex", "", "private key used to sign JWTs, if not set a random one will be assigned")
|
||||
ed25519PrivateKeyHexFile = flag.String("ed25519-private-key-hex-file", "", "file name containing value for ed25519-private-key-hex")
|
||||
metricsBind = flag.String("metrics-bind", ":9090", "network address to bind metrics to")
|
||||
@@ -55,6 +63,7 @@ var (
|
||||
policyFname = flag.String("policy-fname", "", "full path to anubis policy document (defaults to a sensible built-in policy)")
|
||||
redirectDomains = flag.String("redirect-domains", "", "list of domains separated by commas which anubis is allowed to redirect to. Leaving this unset allows any domain.")
|
||||
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
|
||||
stripBasePrefix = flag.Bool("strip-base-prefix", false, "if true, strips the base prefix from requests forwarded to the target server")
|
||||
target = flag.String("target", "http://localhost:3923", "target to reverse proxy to, set to an empty string to disable proxying when only using auth request")
|
||||
targetSNI = flag.String("target-sni", "", "if set, the value of the TLS handshake hostname when forwarding requests to the target")
|
||||
targetHost = flag.String("target-host", "", "if set, the value of the Host header when forwarding requests to the target")
|
||||
@@ -67,6 +76,12 @@ var (
|
||||
ogCacheConsiderHost = flag.Bool("og-cache-consider-host", false, "enable or disable the use of the host in the Open Graph tag cache")
|
||||
extractResources = flag.String("extract-resources", "", "if set, extract the static resources to the specified folder")
|
||||
webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals")
|
||||
versionFlag = flag.Bool("version", false, "print Anubis version")
|
||||
xffStripPrivate = flag.Bool("xff-strip-private", true, "if set, strip private addresses from X-Forwarded-For")
|
||||
|
||||
thothInsecure = flag.Bool("thoth-insecure", false, "if set, connect to Thoth over plain HTTP/2, don't enable this unless support told you to")
|
||||
thothURL = flag.String("thoth-url", "", "if set, URL for Thoth, the IP reputation database for Anubis")
|
||||
thothToken = flag.String("thoth-token", "", "if set, API token for Thoth, the IP reputation database for Anubis")
|
||||
)
|
||||
|
||||
func keyFromHex(value string) (ed25519.PrivateKey, error) {
|
||||
@@ -83,7 +98,7 @@ func keyFromHex(value string) (ed25519.PrivateKey, error) {
|
||||
}
|
||||
|
||||
func doHealthCheck() error {
|
||||
resp, err := http.Get("http://localhost" + *metricsBind + anubis.BasePrefix + "/metrics")
|
||||
resp, err := http.Get("http://localhost" + *metricsBind + "/healthz")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch metrics: %w", err)
|
||||
}
|
||||
@@ -96,8 +111,41 @@ func doHealthCheck() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseBindNetFromAddr determine bind network and address based on the given network and address.
|
||||
func parseBindNetFromAddr(address string) (string, string) {
|
||||
defaultScheme := "http://"
|
||||
if !strings.Contains(address, "://") {
|
||||
if strings.HasPrefix(address, ":") {
|
||||
address = defaultScheme + "localhost" + address
|
||||
} else {
|
||||
address = defaultScheme + address
|
||||
}
|
||||
}
|
||||
|
||||
bindUri, err := url.Parse(address)
|
||||
if err != nil {
|
||||
log.Fatal(fmt.Errorf("failed to parse bind URL: %w", err))
|
||||
}
|
||||
|
||||
switch bindUri.Scheme {
|
||||
case "unix":
|
||||
return "unix", bindUri.Path
|
||||
case "tcp", "http", "https":
|
||||
return "tcp", bindUri.Host
|
||||
default:
|
||||
log.Fatal(fmt.Errorf("unsupported network scheme %s in address %s", bindUri.Scheme, address))
|
||||
}
|
||||
return "", address
|
||||
}
|
||||
|
||||
func setupListener(network string, address string) (net.Listener, string) {
|
||||
formattedAddress := ""
|
||||
|
||||
if network == "" {
|
||||
// keep compatibility
|
||||
network, address = parseBindNetFromAddr(address)
|
||||
}
|
||||
|
||||
switch network {
|
||||
case "unix":
|
||||
formattedAddress = "unix:" + address
|
||||
@@ -184,25 +232,25 @@ func makeReverseProxy(target string, targetSNI string, targetHost string, insecu
|
||||
return rp, nil
|
||||
}
|
||||
|
||||
func startDecayMapCleanup(ctx context.Context, s *libanubis.Server) {
|
||||
ticker := time.NewTicker(1 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
s.CleanupDecayMap()
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flagenv.Parse()
|
||||
flag.Parse()
|
||||
|
||||
if *versionFlag {
|
||||
fmt.Println("Anubis", anubis.Version)
|
||||
return
|
||||
}
|
||||
|
||||
internal.InitSlog(*slogLevel)
|
||||
internal.SetHealth("anubis", healthv1.HealthCheckResponse_NOT_SERVING)
|
||||
|
||||
if *healthcheck {
|
||||
log.Println("running healthcheck")
|
||||
if err := doHealthCheck(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if *extractResources != "" {
|
||||
if err := extractEmbedFS(data.BotPolicies, ".", *extractResources); err != nil {
|
||||
@@ -215,6 +263,17 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
// install signal handler
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer stop()
|
||||
|
||||
wg := new(sync.WaitGroup)
|
||||
|
||||
if *metricsBind != "" {
|
||||
wg.Add(1)
|
||||
go metricsServer(ctx, wg.Done)
|
||||
}
|
||||
|
||||
var rp http.Handler
|
||||
// when using anubis via Systemd and environment variables, then it is not possible to set targe to an empty string but only to space
|
||||
if strings.TrimSpace(*target) != "" {
|
||||
@@ -225,7 +284,27 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
policy, err := libanubis.LoadPoliciesOrDefault(*policyFname, *challengeDifficulty)
|
||||
if *cookieDomain != "" && *cookieDynamicDomain {
|
||||
log.Fatalf("you can't set COOKIE_DOMAIN and COOKIE_DYNAMIC_DOMAIN at the same time")
|
||||
}
|
||||
|
||||
// Thoth configuration
|
||||
switch {
|
||||
case *thothURL != "" && *thothToken == "":
|
||||
slog.Warn("THOTH_URL is set but no THOTH_TOKEN is set")
|
||||
case *thothURL == "" && *thothToken != "":
|
||||
slog.Warn("THOTH_TOKEN is set but no THOTH_URL is set")
|
||||
case *thothURL != "" && *thothToken != "":
|
||||
slog.Debug("connecting to Thoth")
|
||||
thothClient, err := thoth.New(ctx, *thothURL, *thothToken, *thothInsecure)
|
||||
if err != nil {
|
||||
log.Fatalf("can't dial thoth at %s: %v", *thothURL, err)
|
||||
}
|
||||
|
||||
ctx = thoth.With(ctx, thothClient)
|
||||
}
|
||||
|
||||
policy, err := libanubis.LoadPoliciesOrDefault(ctx, *policyFname, *challengeDifficulty)
|
||||
if err != nil {
|
||||
log.Fatalf("can't parse policy file: %v", err)
|
||||
}
|
||||
@@ -253,12 +332,20 @@ func main() {
|
||||
} else if strings.HasSuffix(*basePrefix, "/") {
|
||||
log.Fatalf("[misconfiguration] base-prefix must not end with a slash")
|
||||
}
|
||||
if *stripBasePrefix && *basePrefix == "" {
|
||||
log.Fatalf("[misconfiguration] strip-base-prefix is set to true, but base-prefix is not set, " +
|
||||
"this may result in unexpected behavior")
|
||||
}
|
||||
|
||||
var priv ed25519.PrivateKey
|
||||
if *ed25519PrivateKeyHex != "" && *ed25519PrivateKeyHexFile != "" {
|
||||
var ed25519Priv ed25519.PrivateKey
|
||||
if *hs512Secret != "" && (*ed25519PrivateKeyHex != "" || *ed25519PrivateKeyHexFile != "") {
|
||||
log.Fatal("do not specify both HS512 and ED25519 secrets")
|
||||
} else if *hs512Secret != "" {
|
||||
ed25519Priv = ed25519.PrivateKey(*hs512Secret)
|
||||
} else if *ed25519PrivateKeyHex != "" && *ed25519PrivateKeyHexFile != "" {
|
||||
log.Fatal("do not specify both ED25519_PRIVATE_KEY_HEX and ED25519_PRIVATE_KEY_HEX_FILE")
|
||||
} else if *ed25519PrivateKeyHex != "" {
|
||||
priv, err = keyFromHex(*ed25519PrivateKeyHex)
|
||||
ed25519Priv, err = keyFromHex(*ed25519PrivateKeyHex)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse and validate ED25519_PRIVATE_KEY_HEX: %v", err)
|
||||
}
|
||||
@@ -268,12 +355,12 @@ func main() {
|
||||
log.Fatalf("failed to read ED25519_PRIVATE_KEY_HEX_FILE %s: %v", *ed25519PrivateKeyHexFile, err)
|
||||
}
|
||||
|
||||
priv, err = keyFromHex(string(bytes.TrimSpace(hexFile)))
|
||||
ed25519Priv, err = keyFromHex(string(bytes.TrimSpace(hexFile)))
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse and validate content of ED25519_PRIVATE_KEY_HEX_FILE: %v", err)
|
||||
}
|
||||
} else {
|
||||
_, priv, err = ed25519.GenerateKey(rand.Reader)
|
||||
_, ed25519Priv, err = ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to generate ed25519 key: %v", err)
|
||||
}
|
||||
@@ -295,42 +382,47 @@ func main() {
|
||||
slog.Warn("REDIRECT_DOMAINS is not set, Anubis will only redirect to the same domain a request is coming from, see https://anubis.techaro.lol/docs/admin/configuration/redirect-domains")
|
||||
}
|
||||
|
||||
anubis.CookieName = *cookiePrefix + "-auth"
|
||||
anubis.TestCookieName = *cookiePrefix + "-cookie-verification"
|
||||
anubis.ForcedLanguage = *forcedLanguage
|
||||
|
||||
// If OpenGraph configuration values are not set in the config file, use the
|
||||
// values from flags / envvars.
|
||||
if !policy.OpenGraph.Enabled {
|
||||
policy.OpenGraph.Enabled = *ogPassthrough
|
||||
policy.OpenGraph.ConsiderHost = *ogCacheConsiderHost
|
||||
policy.OpenGraph.TimeToLive = *ogTimeToLive
|
||||
policy.OpenGraph.Override = map[string]string{}
|
||||
}
|
||||
|
||||
s, err := libanubis.New(libanubis.Options{
|
||||
BasePrefix: *basePrefix,
|
||||
Next: rp,
|
||||
Policy: policy,
|
||||
ServeRobotsTXT: *robotsTxt,
|
||||
PrivateKey: priv,
|
||||
CookieDomain: *cookieDomain,
|
||||
CookieExpiration: *cookieExpiration,
|
||||
CookiePartitioned: *cookiePartitioned,
|
||||
OGPassthrough: *ogPassthrough,
|
||||
OGTimeToLive: *ogTimeToLive,
|
||||
RedirectDomains: redirectDomainsList,
|
||||
Target: *target,
|
||||
WebmasterEmail: *webmasterEmail,
|
||||
OGCacheConsidersHost: *ogCacheConsiderHost,
|
||||
BasePrefix: *basePrefix,
|
||||
StripBasePrefix: *stripBasePrefix,
|
||||
Next: rp,
|
||||
Policy: policy,
|
||||
ServeRobotsTXT: *robotsTxt,
|
||||
ED25519PrivateKey: ed25519Priv,
|
||||
HS512Secret: []byte(*hs512Secret),
|
||||
CookieDomain: *cookieDomain,
|
||||
CookieDynamicDomain: *cookieDynamicDomain,
|
||||
CookieExpiration: *cookieExpiration,
|
||||
CookiePartitioned: *cookiePartitioned,
|
||||
RedirectDomains: redirectDomainsList,
|
||||
Target: *target,
|
||||
WebmasterEmail: *webmasterEmail,
|
||||
OpenGraph: policy.OpenGraph,
|
||||
CookieSecure: *cookieSecure,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("can't construct libanubis.Server: %v", err)
|
||||
}
|
||||
|
||||
wg := new(sync.WaitGroup)
|
||||
// install signal handler
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer stop()
|
||||
|
||||
if *metricsBind != "" {
|
||||
wg.Add(1)
|
||||
go metricsServer(ctx, wg.Done)
|
||||
}
|
||||
go startDecayMapCleanup(ctx, s)
|
||||
|
||||
var h http.Handler
|
||||
h = s
|
||||
h = internal.RemoteXRealIP(*useRemoteAddress, *bindNetwork, h)
|
||||
h = internal.XForwardedForToXRealIP(h)
|
||||
h = internal.XForwardedForUpdate(h)
|
||||
h = internal.XForwardedForUpdate(*xffStripPrivate, h)
|
||||
h = internal.JA4H(h)
|
||||
|
||||
srv := http.Server{Handler: h, ErrorLog: internal.GetFilteredHTTPLogger()}
|
||||
listener, listenerUrl := setupListener(*bindNetwork, *bind)
|
||||
@@ -359,6 +451,8 @@ func main() {
|
||||
}
|
||||
}()
|
||||
|
||||
internal.SetHealth("anubis", healthv1.HealthCheckResponse_SERVING)
|
||||
|
||||
if err := srv.Serve(listener); !errors.Is(err, http.ErrServerClosed) {
|
||||
log.Fatal(err)
|
||||
}
|
||||
@@ -369,20 +463,30 @@ func metricsServer(ctx context.Context, done func()) {
|
||||
defer done()
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle(anubis.BasePrefix+"/metrics", promhttp.Handler())
|
||||
mux.Handle("/metrics", promhttp.Handler())
|
||||
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
|
||||
st, ok := internal.GetHealth("anubis")
|
||||
if !ok {
|
||||
slog.Error("health service anubis does not exist, file a bug")
|
||||
}
|
||||
|
||||
switch st {
|
||||
case healthv1.HealthCheckResponse_NOT_SERVING:
|
||||
http.Error(w, "NOT OK", http.StatusInternalServerError)
|
||||
return
|
||||
case healthv1.HealthCheckResponse_SERVING:
|
||||
fmt.Fprintln(w, "OK")
|
||||
return
|
||||
default:
|
||||
http.Error(w, "UNKNOWN", http.StatusFailedDependency)
|
||||
return
|
||||
}
|
||||
})
|
||||
|
||||
srv := http.Server{Handler: mux, ErrorLog: internal.GetFilteredHTTPLogger()}
|
||||
listener, metricsUrl := setupListener(*metricsBindNetwork, *metricsBind)
|
||||
slog.Debug("listening for metrics", "url", metricsUrl)
|
||||
|
||||
if *healthcheck {
|
||||
log.Println("running healthcheck")
|
||||
if err := doHealthCheck(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
c, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
@@ -414,11 +518,11 @@ func extractEmbedFS(fsys embed.FS, root string, destDir string) error {
|
||||
return os.MkdirAll(destPath, 0o700)
|
||||
}
|
||||
|
||||
data, err := fs.ReadFile(fsys, path)
|
||||
embeddedData, err := fs.ReadFile(fsys, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(destPath, data, 0o644)
|
||||
return os.WriteFile(destPath, embeddedData, 0o644)
|
||||
})
|
||||
}
|
||||
|
||||
39
cmd/osiris/internal/config/bind.go
Normal file
39
cmd/osiris/internal/config/bind.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidHostpost = errors.New("bind: invalid host:port")
|
||||
)
|
||||
|
||||
type Bind struct {
|
||||
HTTP string `hcl:"http"`
|
||||
HTTPS string `hcl:"https"`
|
||||
Metrics string `hcl:"metrics"`
|
||||
}
|
||||
|
||||
func (b *Bind) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if _, _, err := net.SplitHostPort(b.HTTP); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w %q: %w", ErrInvalidHostpost, b.HTTP, err))
|
||||
}
|
||||
|
||||
if _, _, err := net.SplitHostPort(b.HTTPS); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w %q: %w", ErrInvalidHostpost, b.HTTPS, err))
|
||||
}
|
||||
|
||||
if _, _, err := net.SplitHostPort(b.Metrics); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w %q: %w", ErrInvalidHostpost, b.Metrics, err))
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
55
cmd/osiris/internal/config/bind_test.go
Normal file
55
cmd/osiris/internal/config/bind_test.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBindValid(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
precondition func(t *testing.T)
|
||||
bind Bind
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "basic",
|
||||
precondition: nil,
|
||||
bind: Bind{
|
||||
HTTP: ":8081",
|
||||
HTTPS: ":8082",
|
||||
Metrics: ":8083",
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "invalid ports",
|
||||
precondition: func(t *testing.T) {
|
||||
ln, err := net.Listen("tcp", ":8081")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Cleanup(func() { ln.Close() })
|
||||
},
|
||||
bind: Bind{
|
||||
HTTP: "",
|
||||
HTTPS: "",
|
||||
Metrics: "",
|
||||
},
|
||||
err: ErrInvalidHostpost,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if tt.precondition != nil {
|
||||
tt.precondition(t)
|
||||
}
|
||||
|
||||
if err := tt.bind.Valid(); !errors.Is(err, tt.err) {
|
||||
t.Logf("want: %v", tt.err)
|
||||
t.Logf("got: %v", err)
|
||||
t.Error("got wrong error from validation function")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
31
cmd/osiris/internal/config/config.go
Normal file
31
cmd/osiris/internal/config/config.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Toplevel struct {
|
||||
Bind Bind `hcl:"bind,block"`
|
||||
Domains []Domain `hcl:"domain,block"`
|
||||
}
|
||||
|
||||
func (t *Toplevel) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if err := t.Bind.Valid(); err != nil {
|
||||
errs = append(errs, fmt.Errorf("invalid bind block:\n%w", err))
|
||||
}
|
||||
|
||||
for _, d := range t.Domains {
|
||||
if err := d.Valid(); err != nil {
|
||||
errs = append(errs, fmt.Errorf("when parsing domain %s: %w", d.Name, err))
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("invalid configuration file:\n%w", errors.Join(errs...))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
66
cmd/osiris/internal/config/domain.go
Normal file
66
cmd/osiris/internal/config/domain.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
|
||||
"golang.org/x/net/idna"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidDomainName = errors.New("domain: name is invalid")
|
||||
ErrInvalidDomainTLSConfig = errors.New("domain: TLS config is invalid")
|
||||
ErrInvalidURL = errors.New("invalid URL")
|
||||
ErrInvalidURLScheme = errors.New("URL has invalid scheme")
|
||||
)
|
||||
|
||||
type Domain struct {
|
||||
Name string `hcl:"name,label"`
|
||||
TLS TLS `hcl:"tls,block"`
|
||||
Target string `hcl:"target"`
|
||||
InsecureSkipVerify bool `hcl:"insecure_skip_verify,optional"`
|
||||
HealthTarget string `hcl:"health_target"`
|
||||
}
|
||||
|
||||
func (d Domain) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if _, err := idna.Lookup.ToASCII(d.Name); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w %q: %w", ErrInvalidDomainName, d.Name, err))
|
||||
}
|
||||
|
||||
if err := d.TLS.Valid(); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w: %w", ErrInvalidDomainTLSConfig, err))
|
||||
}
|
||||
|
||||
if err := isURLValid(d.Target); err != nil {
|
||||
errs = append(errs, fmt.Errorf("target has %w %q: %w", ErrInvalidURL, d.Target, err))
|
||||
}
|
||||
|
||||
if err := isURLValid(d.HealthTarget); err != nil {
|
||||
errs = append(errs, fmt.Errorf("health_target has %w %q: %w", ErrInvalidURL, d.HealthTarget, err))
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isURLValid(input string) error {
|
||||
u, err := url.Parse(input)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch u.Scheme {
|
||||
case "http", "https", "h2c", "unix":
|
||||
// do nothing
|
||||
default:
|
||||
return fmt.Errorf("%w %s has scheme %s (want http, https, h2c, unix)", ErrInvalidURLScheme, input, u.Scheme)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
89
cmd/osiris/internal/config/domain_test.go
Normal file
89
cmd/osiris/internal/config/domain_test.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDomainValid(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input Domain
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "simple happy path",
|
||||
input: Domain{
|
||||
Name: "anubis.techaro.lol",
|
||||
TLS: TLS{
|
||||
Cert: "./testdata/tls/selfsigned.crt",
|
||||
Key: "./testdata/tls/selfsigned.key",
|
||||
},
|
||||
Target: "http://localhost:3000",
|
||||
HealthTarget: "http://localhost:9091/healthz",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid domain name",
|
||||
input: Domain{
|
||||
Name: "\uFFFD.techaro.lol",
|
||||
TLS: TLS{
|
||||
Cert: "./testdata/tls/selfsigned.crt",
|
||||
Key: "./testdata/tls/selfsigned.key",
|
||||
},
|
||||
Target: "http://localhost:3000",
|
||||
HealthTarget: "http://localhost:9091/healthz",
|
||||
},
|
||||
err: ErrInvalidDomainName,
|
||||
},
|
||||
{
|
||||
name: "invalid tls config",
|
||||
input: Domain{
|
||||
Name: "anubis.techaro.lol",
|
||||
TLS: TLS{
|
||||
Cert: "./testdata/tls/invalid.crt",
|
||||
Key: "./testdata/tls/invalid.key",
|
||||
},
|
||||
Target: "http://localhost:3000",
|
||||
HealthTarget: "http://localhost:9091/healthz",
|
||||
},
|
||||
err: ErrInvalidDomainTLSConfig,
|
||||
},
|
||||
{
|
||||
name: "invalid URL",
|
||||
input: Domain{
|
||||
Name: "anubis.techaro.lol",
|
||||
TLS: TLS{
|
||||
Cert: "./testdata/tls/selfsigned.crt",
|
||||
Key: "./testdata/tls/selfsigned.key",
|
||||
},
|
||||
Target: "file://[::1:3000",
|
||||
HealthTarget: "file://[::1:9091/healthz",
|
||||
},
|
||||
err: ErrInvalidURL,
|
||||
},
|
||||
{
|
||||
name: "wrong URL scheme",
|
||||
input: Domain{
|
||||
Name: "anubis.techaro.lol",
|
||||
TLS: TLS{
|
||||
Cert: "./testdata/tls/selfsigned.crt",
|
||||
Key: "./testdata/tls/selfsigned.key",
|
||||
},
|
||||
Target: "file://localhost:3000",
|
||||
HealthTarget: "file://localhost:9091/healthz",
|
||||
},
|
||||
err: ErrInvalidURLScheme,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
|
||||
t.Logf("want: %v", tt.err)
|
||||
t.Logf("got: %v", err)
|
||||
t.Error("got wrong error from validation function")
|
||||
} else {
|
||||
t.Log(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
1
cmd/osiris/internal/config/testdata/tls/invalid.crt
vendored
Normal file
1
cmd/osiris/internal/config/testdata/tls/invalid.crt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
aorsentaeiorsntoiearnstoieanrsoietnaioresntoeiar
|
||||
1
cmd/osiris/internal/config/testdata/tls/invalid.key
vendored
Normal file
1
cmd/osiris/internal/config/testdata/tls/invalid.key
vendored
Normal file
@@ -0,0 +1 @@
|
||||
aorsentaeiorsntoiearnstoieanrsoietnaioresntoeiar
|
||||
11
cmd/osiris/internal/config/testdata/tls/selfsigned.crt
vendored
Normal file
11
cmd/osiris/internal/config/testdata/tls/selfsigned.crt
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIBnzCCAVGgAwIBAgIUAw8funCpiB3ZAAPoWdSCWnzbsFIwBQYDK2VwMEUxCzAJ
|
||||
BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l
|
||||
dCBXaWRnaXRzIFB0eSBMdGQwHhcNMjUwNzE4MTkwMjM1WhcNMjUwODE3MTkwMjM1
|
||||
WjBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwY
|
||||
SW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMCowBQYDK2VwAyEAcXDHXV3vgpvjtTaz
|
||||
s0Oj/73rMr06bhyGGhleYS1MNoWjUzBRMB0GA1UdDgQWBBQwmfKPthucFHB6Wfgz
|
||||
2Nj5nkMQOjAfBgNVHSMEGDAWgBQwmfKPthucFHB6Wfgz2Nj5nkMQOjAPBgNVHRMB
|
||||
Af8EBTADAQH/MAUGAytlcANBALBYbULlGwB7Ro0UTgUoQDNxEvayn3qzVFHIt7lC
|
||||
/2/NzNBkk4yPT+a4mbRuydxLkv+JIvmQbarZxpksYnWlCAM=
|
||||
-----END CERTIFICATE-----
|
||||
3
cmd/osiris/internal/config/testdata/tls/selfsigned.key
vendored
Normal file
3
cmd/osiris/internal/config/testdata/tls/selfsigned.key
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MC4CAQAwBQYDK2VwBCIEIOHKoX22Mha6SnnpLm34fSSfTUDbRiDCi6N1nOgTOlds
|
||||
-----END PRIVATE KEY-----
|
||||
40
cmd/osiris/internal/config/tls.go
Normal file
40
cmd/osiris/internal/config/tls.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrCantReadTLS = errors.New("tls: can't read TLS")
|
||||
ErrInvalidTLSKeypair = errors.New("tls: can't parse TLS keypair")
|
||||
)
|
||||
|
||||
type TLS struct {
|
||||
Cert string `hcl:"cert"`
|
||||
Key string `hcl:"key"`
|
||||
}
|
||||
|
||||
func (t TLS) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if _, err := os.Stat(t.Cert); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w certificate %s: %w", ErrCantReadTLS, t.Cert, err))
|
||||
}
|
||||
|
||||
if _, err := os.Stat(t.Key); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w key %s: %w", ErrCantReadTLS, t.Key, err))
|
||||
}
|
||||
|
||||
if _, err := tls.LoadX509KeyPair(t.Cert, t.Key); err != nil {
|
||||
errs = append(errs, fmt.Errorf("%w (%s, %s): %w", ErrInvalidTLSKeypair, t.Cert, t.Key, err))
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
48
cmd/osiris/internal/config/tls_test.go
Normal file
48
cmd/osiris/internal/config/tls_test.go
Normal file
@@ -0,0 +1,48 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTLSValid(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input TLS
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "simple selfsigned",
|
||||
input: TLS{
|
||||
Cert: "./testdata/tls/selfsigned.crt",
|
||||
Key: "./testdata/tls/selfsigned.key",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "files don't exist",
|
||||
input: TLS{
|
||||
Cert: "./testdata/tls/nonexistent.crt",
|
||||
Key: "./testdata/tls/nonexistent.key",
|
||||
},
|
||||
err: ErrCantReadTLS,
|
||||
},
|
||||
{
|
||||
name: "invalid keypair",
|
||||
input: TLS{
|
||||
Cert: "./testdata/tls/invalid.crt",
|
||||
Key: "./testdata/tls/invalid.key",
|
||||
},
|
||||
err: ErrInvalidTLSKeypair,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
|
||||
t.Logf("want: %v", tt.err)
|
||||
t.Logf("got: %v", err)
|
||||
t.Error("got wrong error from validation function")
|
||||
} else {
|
||||
t.Log(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
85
cmd/osiris/internal/entrypoint/entrypoint.go
Normal file
85
cmd/osiris/internal/entrypoint/entrypoint.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package entrypoint
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
|
||||
"github.com/TecharoHQ/anubis/cmd/osiris/internal/config"
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/hashicorp/hcl/v2/hclsimple"
|
||||
"golang.org/x/sync/errgroup"
|
||||
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
ConfigFname string
|
||||
}
|
||||
|
||||
func Main(ctx context.Context, opts Options) error {
|
||||
internal.SetHealth("osiris", healthv1.HealthCheckResponse_NOT_SERVING)
|
||||
|
||||
var cfg config.Toplevel
|
||||
if err := hclsimple.DecodeFile(opts.ConfigFname, nil, &cfg); err != nil {
|
||||
return fmt.Errorf("can't read configuration file %s:\n\n%w", opts.ConfigFname, err)
|
||||
}
|
||||
|
||||
if err := cfg.Valid(); err != nil {
|
||||
return fmt.Errorf("configuration file %s is invalid:\n\n%w", opts.ConfigFname, err)
|
||||
}
|
||||
|
||||
rtr, err := NewRouter(cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rtr.opts = opts
|
||||
go rtr.backgroundReloadConfig(ctx)
|
||||
|
||||
g, gCtx := errgroup.WithContext(ctx)
|
||||
|
||||
// HTTP
|
||||
g.Go(func() error {
|
||||
ln, err := net.Listen("tcp", cfg.Bind.HTTP)
|
||||
if err != nil {
|
||||
return fmt.Errorf("(HTTP) can't bind to tcp %s: %w", cfg.Bind.HTTP, err)
|
||||
}
|
||||
defer ln.Close()
|
||||
|
||||
go func(ctx context.Context) {
|
||||
<-ctx.Done()
|
||||
ln.Close()
|
||||
}(ctx)
|
||||
|
||||
slog.Info("listening", "for", "http", "bind", cfg.Bind.HTTP)
|
||||
|
||||
return rtr.HandleHTTP(gCtx, ln)
|
||||
})
|
||||
|
||||
// HTTPS
|
||||
g.Go(func() error {
|
||||
ln, err := net.Listen("tcp", cfg.Bind.HTTPS)
|
||||
if err != nil {
|
||||
return fmt.Errorf("(https) can't bind to tcp %s: %w", cfg.Bind.HTTPS, err)
|
||||
}
|
||||
defer ln.Close()
|
||||
|
||||
go func(ctx context.Context) {
|
||||
<-ctx.Done()
|
||||
ln.Close()
|
||||
}(ctx)
|
||||
|
||||
slog.Info("listening", "for", "https", "bind", cfg.Bind.HTTPS)
|
||||
|
||||
return rtr.HandleHTTPS(gCtx, ln)
|
||||
})
|
||||
|
||||
// Metrics
|
||||
g.Go(func() error {
|
||||
return rtr.ListenAndServeMetrics(gCtx, cfg.Bind.Metrics)
|
||||
})
|
||||
|
||||
internal.SetHealth("osiris", healthv1.HealthCheckResponse_SERVING)
|
||||
|
||||
return g.Wait()
|
||||
}
|
||||
93
cmd/osiris/internal/entrypoint/entrypoint_test.go
Normal file
93
cmd/osiris/internal/entrypoint/entrypoint_test.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package entrypoint
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestMainGoodConfig(t *testing.T) {
|
||||
files, err := os.ReadDir("./testdata/good")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, st := range files {
|
||||
t.Run(st.Name(), func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(t.Context())
|
||||
cfg := loadConfig(t, filepath.Join("testdata", "good", st.Name()))
|
||||
|
||||
go func(ctx context.Context) {
|
||||
if err := Main(ctx, Options{
|
||||
ConfigFname: filepath.Join("testdata", "good", st.Name()),
|
||||
}); err != nil {
|
||||
var netOpErr *net.OpError
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled):
|
||||
// Context was canceled, this is expected
|
||||
return
|
||||
case errors.As(err, &netOpErr):
|
||||
// Network operation error occurred
|
||||
t.Logf("Network operation error: %v", netOpErr)
|
||||
return
|
||||
case errors.Is(err, http.ErrServerClosed):
|
||||
// Server was closed, this is expected
|
||||
return
|
||||
default:
|
||||
// Other unexpected error
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}(ctx)
|
||||
|
||||
wait := 5 * time.Millisecond
|
||||
|
||||
for i := range make([]struct{}, 10) {
|
||||
if i != 0 {
|
||||
time.Sleep(wait)
|
||||
wait = wait * 2
|
||||
}
|
||||
|
||||
t.Logf("try %d (wait=%s)", i+1, wait)
|
||||
|
||||
resp, err := http.Get("http://localhost" + cfg.Bind.Metrics + "/readyz")
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
continue
|
||||
}
|
||||
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
|
||||
t.Fatal("router initialization did not work")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMainBadConfig(t *testing.T) {
|
||||
files, err := os.ReadDir("./testdata/bad")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, st := range files {
|
||||
t.Run(st.Name(), func(t *testing.T) {
|
||||
if err := Main(t.Context(), Options{
|
||||
ConfigFname: filepath.Join("testdata", "bad", st.Name()),
|
||||
}); err == nil {
|
||||
t.Error("wanted an error but got none")
|
||||
} else {
|
||||
t.Log(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
35
cmd/osiris/internal/entrypoint/h2c.go
Normal file
35
cmd/osiris/internal/entrypoint/h2c.go
Normal file
@@ -0,0 +1,35 @@
|
||||
package entrypoint
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
|
||||
"golang.org/x/net/http2"
|
||||
)
|
||||
|
||||
func newH2CReverseProxy(target *url.URL) *httputil.ReverseProxy {
|
||||
target.Scheme = "http"
|
||||
|
||||
director := func(req *http.Request) {
|
||||
req.URL.Scheme = target.Scheme
|
||||
req.URL.Host = target.Host
|
||||
req.Host = target.Host
|
||||
}
|
||||
|
||||
// Use h2c transport
|
||||
transport := &http2.Transport{
|
||||
AllowHTTP: true,
|
||||
DialTLS: func(network, addr string, cfg *tls.Config) (net.Conn, error) {
|
||||
// Just do plain TCP (h2c)
|
||||
return net.Dial(network, addr)
|
||||
},
|
||||
}
|
||||
|
||||
return &httputil.ReverseProxy{
|
||||
Director: director,
|
||||
Transport: transport,
|
||||
}
|
||||
}
|
||||
51
cmd/osiris/internal/entrypoint/h2c_test.go
Normal file
51
cmd/osiris/internal/entrypoint/h2c_test.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package entrypoint
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/net/http2"
|
||||
"golang.org/x/net/http2/h2c"
|
||||
)
|
||||
|
||||
func newH2cServer(t *testing.T, h http.Handler) *httptest.Server {
|
||||
t.Helper()
|
||||
|
||||
h2s := &http2.Server{}
|
||||
|
||||
srv := httptest.NewServer(h2c.NewHandler(h, h2s))
|
||||
t.Cleanup(func() {
|
||||
srv.Close()
|
||||
})
|
||||
|
||||
return srv
|
||||
}
|
||||
|
||||
func TestH2CReverseProxy(t *testing.T) {
|
||||
h := &ackHandler{}
|
||||
|
||||
srv := newH2cServer(t, h)
|
||||
|
||||
u, err := url.Parse(srv.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rp := httptest.NewServer(newH2CReverseProxy(u))
|
||||
defer rp.Close()
|
||||
|
||||
resp, err := rp.Client().Get(rp.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("wrong status code from reverse proxy: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
if !h.ack {
|
||||
t.Error("h2c handler was not executed")
|
||||
}
|
||||
}
|
||||
72
cmd/osiris/internal/entrypoint/metrics.go
Normal file
72
cmd/osiris/internal/entrypoint/metrics.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package entrypoint
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"sort"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
|
||||
)
|
||||
|
||||
func healthz(w http.ResponseWriter, r *http.Request) {
|
||||
services, err := internal.HealthSrv.List(r.Context(), nil)
|
||||
if err != nil {
|
||||
slog.Error("can't get list of services", "err", err)
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
var keys []string
|
||||
for k := range services.Statuses {
|
||||
if k == "" {
|
||||
continue
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
sort.Strings(keys)
|
||||
|
||||
var msg bytes.Buffer
|
||||
|
||||
var healthy bool = true
|
||||
|
||||
for _, k := range keys {
|
||||
st := services.Statuses[k].GetStatus()
|
||||
fmt.Fprintf(&msg, "%s: %s\n", k, st)
|
||||
switch st {
|
||||
case healthv1.HealthCheckResponse_SERVING:
|
||||
// do nothing
|
||||
default:
|
||||
healthy = false
|
||||
}
|
||||
}
|
||||
|
||||
if !healthy {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
w.Write(msg.Bytes())
|
||||
}
|
||||
|
||||
func readyz(w http.ResponseWriter, r *http.Request) {
|
||||
st, ok := internal.GetHealth("osiris")
|
||||
if !ok {
|
||||
slog.Error("health service osiris does not exist, file a bug")
|
||||
http.Error(w, "health service osiris does not exist", http.StatusExpectationFailed)
|
||||
}
|
||||
|
||||
switch st {
|
||||
case healthv1.HealthCheckResponse_NOT_SERVING:
|
||||
http.Error(w, "NOT OK", http.StatusInternalServerError)
|
||||
return
|
||||
case healthv1.HealthCheckResponse_SERVING:
|
||||
fmt.Fprintln(w, "OK")
|
||||
return
|
||||
default:
|
||||
http.Error(w, "UNKNOWN", http.StatusFailedDependency)
|
||||
return
|
||||
}
|
||||
}
|
||||
66
cmd/osiris/internal/entrypoint/metrics_test.go
Normal file
66
cmd/osiris/internal/entrypoint/metrics_test.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package entrypoint
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
|
||||
)
|
||||
|
||||
func TestHealthz(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(healthz))
|
||||
|
||||
internal.SetHealth("osiris", healthv1.HealthCheckResponse_NOT_SERVING)
|
||||
|
||||
resp, err := srv.Client().Get(srv.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
t.Errorf("wanted not ready but got %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
internal.SetHealth("osiris", healthv1.HealthCheckResponse_SERVING)
|
||||
|
||||
resp, err = srv.Client().Get(srv.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("wanted ready but got %d", resp.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadyz(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(readyz))
|
||||
|
||||
internal.SetHealth("osiris", healthv1.HealthCheckResponse_NOT_SERVING)
|
||||
|
||||
resp, err := srv.Client().Get(srv.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
t.Errorf("wanted not ready but got %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
internal.SetHealth("osiris", healthv1.HealthCheckResponse_SERVING)
|
||||
|
||||
resp, err = srv.Client().Get(srv.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("wanted ready but got %d", resp.StatusCode)
|
||||
}
|
||||
}
|
||||
320
cmd/osiris/internal/entrypoint/router.go
Normal file
320
cmd/osiris/internal/entrypoint/router.go
Normal file
@@ -0,0 +1,320 @@
|
||||
package entrypoint
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/cmd/osiris/internal/config"
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/TecharoHQ/anubis/internal/fingerprint"
|
||||
"github.com/felixge/httpsnoop"
|
||||
"github.com/hashicorp/hcl/v2/hclsimple"
|
||||
"github.com/lum8rjack/go-ja4h"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrTargetInvalid = errors.New("[unexpected] target invalid")
|
||||
ErrNoHandler = errors.New("[unexpected] no handler for domain")
|
||||
ErrInvalidTLSKeypair = errors.New("[unexpected] invalid TLS keypair")
|
||||
ErrNoCert = errors.New("this server does not have a certificate for that domain")
|
||||
|
||||
requestsPerDomain = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "techaro",
|
||||
Subsystem: "osiris",
|
||||
Name: "request_count",
|
||||
}, []string{"domain", "method", "response_code"})
|
||||
|
||||
responseTime = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "techaro",
|
||||
Subsystem: "osiris",
|
||||
Name: "response_time",
|
||||
}, []string{"domain"})
|
||||
|
||||
unresolvedRequests = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: "techaro",
|
||||
Subsystem: "osiris",
|
||||
Name: "unresolved_requests",
|
||||
})
|
||||
)
|
||||
|
||||
type Router struct {
|
||||
lock sync.RWMutex
|
||||
routes map[string]http.Handler
|
||||
tlsCerts map[string]*tls.Certificate
|
||||
opts Options
|
||||
}
|
||||
|
||||
func (rtr *Router) setConfig(c config.Toplevel) error {
|
||||
var errs []error
|
||||
newMap := map[string]http.Handler{}
|
||||
newCerts := map[string]*tls.Certificate{}
|
||||
|
||||
for _, d := range c.Domains {
|
||||
var domainErrs []error
|
||||
|
||||
u, err := url.Parse(d.Target)
|
||||
if err != nil {
|
||||
domainErrs = append(domainErrs, fmt.Errorf("%w %q: %v", ErrTargetInvalid, d.Target, err))
|
||||
}
|
||||
|
||||
var h http.Handler
|
||||
|
||||
if u != nil {
|
||||
switch u.Scheme {
|
||||
case "http", "https":
|
||||
rp := httputil.NewSingleHostReverseProxy(u)
|
||||
|
||||
if d.InsecureSkipVerify {
|
||||
rp.Transport = &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
h = rp
|
||||
case "h2c":
|
||||
h = newH2CReverseProxy(u)
|
||||
case "unix":
|
||||
h = &httputil.ReverseProxy{
|
||||
Director: func(r *http.Request) {
|
||||
r.URL.Scheme = "http"
|
||||
r.URL.Host = d.Name
|
||||
r.Host = d.Name
|
||||
},
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
|
||||
return net.Dial("unix", strings.TrimPrefix(d.Target, "unix://"))
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if h == nil {
|
||||
domainErrs = append(domainErrs, ErrNoHandler)
|
||||
}
|
||||
|
||||
newMap[d.Name] = h
|
||||
|
||||
cert, err := tls.LoadX509KeyPair(d.TLS.Cert, d.TLS.Key)
|
||||
if err != nil {
|
||||
domainErrs = append(domainErrs, fmt.Errorf("%w: %w", ErrInvalidTLSKeypair, err))
|
||||
}
|
||||
|
||||
newCerts[d.Name] = &cert
|
||||
|
||||
if len(domainErrs) != 0 {
|
||||
errs = append(errs, fmt.Errorf("invalid domain %s: %w", d.Name, errors.Join(domainErrs...)))
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("can't compile config to routing map: %w", errors.Join(errs...))
|
||||
}
|
||||
|
||||
rtr.lock.Lock()
|
||||
rtr.routes = newMap
|
||||
rtr.tlsCerts = newCerts
|
||||
rtr.lock.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rtr *Router) GetCertificate(hello *tls.ClientHelloInfo) (*tls.Certificate, error) {
|
||||
rtr.lock.RLock()
|
||||
cert, ok := rtr.tlsCerts[hello.ServerName]
|
||||
rtr.lock.RUnlock()
|
||||
|
||||
if !ok {
|
||||
return nil, ErrNoCert
|
||||
}
|
||||
|
||||
return cert, nil
|
||||
}
|
||||
|
||||
func (rtr *Router) loadConfig() error {
|
||||
slog.Info("reloading config", "fname", rtr.opts.ConfigFname)
|
||||
var cfg config.Toplevel
|
||||
if err := hclsimple.DecodeFile(rtr.opts.ConfigFname, nil, &cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := cfg.Valid(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := rtr.setConfig(cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
slog.Info("done!")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rtr *Router) backgroundReloadConfig(ctx context.Context) {
|
||||
t := time.NewTicker(time.Hour)
|
||||
defer t.Stop()
|
||||
ch := make(chan os.Signal, 1)
|
||||
signal.Notify(ch, syscall.SIGHUP)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-t.C:
|
||||
if err := rtr.loadConfig(); err != nil {
|
||||
slog.Error("can't reload config", "fname", rtr.opts.ConfigFname, "err", err)
|
||||
}
|
||||
case <-ch:
|
||||
if err := rtr.loadConfig(); err != nil {
|
||||
slog.Error("can't reload config", "fname", rtr.opts.ConfigFname, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func NewRouter(c config.Toplevel) (*Router, error) {
|
||||
result := &Router{
|
||||
routes: map[string]http.Handler{},
|
||||
}
|
||||
|
||||
if err := result.setConfig(c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (rtr *Router) HandleHTTP(ctx context.Context, ln net.Listener) error {
|
||||
srv := http.Server{
|
||||
Handler: rtr,
|
||||
ErrorLog: internal.GetFilteredHTTPLogger(),
|
||||
}
|
||||
|
||||
go func(ctx context.Context) {
|
||||
<-ctx.Done()
|
||||
srv.Close()
|
||||
}(ctx)
|
||||
|
||||
return srv.Serve(ln)
|
||||
}
|
||||
|
||||
func (rtr *Router) HandleHTTPS(ctx context.Context, ln net.Listener) error {
|
||||
tc := &tls.Config{
|
||||
GetCertificate: rtr.GetCertificate,
|
||||
}
|
||||
|
||||
srv := &http.Server{
|
||||
Handler: rtr,
|
||||
ErrorLog: internal.GetFilteredHTTPLogger(),
|
||||
TLSConfig: tc,
|
||||
}
|
||||
|
||||
go func(ctx context.Context) {
|
||||
<-ctx.Done()
|
||||
srv.Close()
|
||||
}(ctx)
|
||||
|
||||
fingerprint.ApplyTLSFingerprinter(srv)
|
||||
|
||||
return srv.ServeTLS(ln, "", "")
|
||||
}
|
||||
|
||||
func (rtr *Router) ListenAndServeMetrics(ctx context.Context, addr string) error {
|
||||
ln, err := net.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("(metrics) can't bind to tcp %s: %w", addr, err)
|
||||
}
|
||||
defer ln.Close()
|
||||
|
||||
go func(ctx context.Context) {
|
||||
<-ctx.Done()
|
||||
ln.Close()
|
||||
}(ctx)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
|
||||
mux.Handle("/metrics", promhttp.Handler())
|
||||
mux.HandleFunc("/readyz", readyz)
|
||||
mux.HandleFunc("/healthz", healthz)
|
||||
|
||||
slog.Info("listening", "for", "metrics", "bind", addr)
|
||||
|
||||
srv := http.Server{
|
||||
Addr: addr,
|
||||
Handler: mux,
|
||||
ErrorLog: internal.GetFilteredHTTPLogger(),
|
||||
}
|
||||
|
||||
go func(ctx context.Context) {
|
||||
<-ctx.Done()
|
||||
srv.Close()
|
||||
}(ctx)
|
||||
|
||||
return srv.Serve(ln)
|
||||
}
|
||||
|
||||
func (rtr *Router) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
var host = r.Host
|
||||
|
||||
if strings.Contains(host, ":") {
|
||||
host, _, _ = net.SplitHostPort(host)
|
||||
}
|
||||
|
||||
var h http.Handler
|
||||
var ok bool
|
||||
|
||||
ja4hFP := ja4h.JA4H(r)
|
||||
|
||||
slog.Info("got request", "method", r.Method, "host", host, "path", r.URL.Path)
|
||||
|
||||
rtr.lock.RLock()
|
||||
h, ok = rtr.routes[host]
|
||||
rtr.lock.RUnlock()
|
||||
|
||||
if !ok {
|
||||
unresolvedRequests.Inc()
|
||||
http.NotFound(w, r) // TODO(Xe): brand this
|
||||
return
|
||||
}
|
||||
|
||||
r.Header.Set("X-Http-Ja4h-Fingerprint", ja4hFP)
|
||||
|
||||
if fp := fingerprint.GetTLSFingerprint(r); fp != nil {
|
||||
if ja3n := fp.JA3N(); ja3n != nil {
|
||||
r.Header.Set("X-Tls-Ja3n-Fingerprint", ja3n.String())
|
||||
}
|
||||
if ja4 := fp.JA4(); ja4 != nil {
|
||||
r.Header.Set("X-Tls-Ja4-Fingerprint", ja4.String())
|
||||
}
|
||||
}
|
||||
|
||||
if tcpFP := fingerprint.GetTCPFingerprint(r); tcpFP != nil {
|
||||
r.Header.Set("X-Tcp-Ja4t-Fingerprint", tcpFP.String())
|
||||
}
|
||||
|
||||
m := httpsnoop.CaptureMetrics(h, w, r)
|
||||
|
||||
requestsPerDomain.WithLabelValues(host, r.Method, fmt.Sprint(m.Code)).Inc()
|
||||
responseTime.WithLabelValues(host).Observe(float64(m.Duration.Milliseconds()))
|
||||
|
||||
slog.Info("request completed", "host", host, "method", r.Method, "response_code", m.Code, "duration_ms", m.Duration.Milliseconds())
|
||||
}
|
||||
319
cmd/osiris/internal/entrypoint/router_test.go
Normal file
319
cmd/osiris/internal/entrypoint/router_test.go
Normal file
@@ -0,0 +1,319 @@
|
||||
package entrypoint
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/cmd/osiris/internal/config"
|
||||
"github.com/hashicorp/hcl/v2/hclsimple"
|
||||
)
|
||||
|
||||
func loadConfig(t *testing.T, fname string) config.Toplevel {
|
||||
t.Helper()
|
||||
|
||||
var cfg config.Toplevel
|
||||
if err := hclsimple.DecodeFile(fname, nil, &cfg); err != nil {
|
||||
t.Fatalf("can't read configuration file %s: %v", fname, err)
|
||||
}
|
||||
|
||||
if err := cfg.Valid(); err != nil {
|
||||
t.Errorf("configuration file %s is invalid: %v", "./testdata/selfsigned.hcl", err)
|
||||
}
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
func newRouter(t *testing.T, cfg config.Toplevel) *Router {
|
||||
t.Helper()
|
||||
|
||||
rtr, err := NewRouter(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return rtr
|
||||
}
|
||||
|
||||
func TestNewRouter(t *testing.T) {
|
||||
cfg := loadConfig(t, "./testdata/good/selfsigned.hcl")
|
||||
rtr := newRouter(t, cfg)
|
||||
|
||||
srv := httptest.NewServer(rtr)
|
||||
defer srv.Close()
|
||||
}
|
||||
|
||||
func TestNewRouterFails(t *testing.T) {
|
||||
cfg := loadConfig(t, "./testdata/good/selfsigned.hcl")
|
||||
|
||||
cfg.Domains = append(cfg.Domains, config.Domain{
|
||||
Name: "test1.internal",
|
||||
TLS: config.TLS{
|
||||
Cert: "./testdata/tls/invalid.crt",
|
||||
Key: "./testdata/tls/invalid.key",
|
||||
},
|
||||
Target: cfg.Domains[0].Target,
|
||||
HealthTarget: cfg.Domains[0].HealthTarget,
|
||||
})
|
||||
|
||||
rtr, err := NewRouter(cfg)
|
||||
if err == nil {
|
||||
t.Fatal("wanted an error but got none")
|
||||
}
|
||||
|
||||
srv := httptest.NewServer(rtr)
|
||||
defer srv.Close()
|
||||
}
|
||||
|
||||
func TestRouterSetConfig(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
configFname string
|
||||
mutation func(cfg config.Toplevel) config.Toplevel
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "basic",
|
||||
configFname: "./testdata/good/selfsigned.hcl",
|
||||
mutation: func(cfg config.Toplevel) config.Toplevel {
|
||||
return cfg
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "all schemes",
|
||||
configFname: "./testdata/good/selfsigned.hcl",
|
||||
mutation: func(cfg config.Toplevel) config.Toplevel {
|
||||
cfg.Domains = append(cfg.Domains, config.Domain{
|
||||
Name: "http.internal",
|
||||
TLS: cfg.Domains[0].TLS,
|
||||
Target: "http://[::1]:3000",
|
||||
HealthTarget: cfg.Domains[0].HealthTarget,
|
||||
})
|
||||
cfg.Domains = append(cfg.Domains, config.Domain{
|
||||
Name: "https.internal",
|
||||
TLS: cfg.Domains[0].TLS,
|
||||
Target: "https://[::1]:3000",
|
||||
HealthTarget: cfg.Domains[0].HealthTarget,
|
||||
})
|
||||
cfg.Domains = append(cfg.Domains, config.Domain{
|
||||
Name: "h2c.internal",
|
||||
TLS: cfg.Domains[0].TLS,
|
||||
Target: "h2c://[::1]:3000",
|
||||
HealthTarget: cfg.Domains[0].HealthTarget,
|
||||
})
|
||||
cfg.Domains = append(cfg.Domains, config.Domain{
|
||||
Name: "unix.internal",
|
||||
TLS: cfg.Domains[0].TLS,
|
||||
Target: "unix://foo.sock",
|
||||
HealthTarget: cfg.Domains[0].HealthTarget,
|
||||
})
|
||||
|
||||
return cfg
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid TLS",
|
||||
configFname: "./testdata/good/selfsigned.hcl",
|
||||
mutation: func(cfg config.Toplevel) config.Toplevel {
|
||||
cfg.Domains = append(cfg.Domains, config.Domain{
|
||||
Name: "test1.internal",
|
||||
TLS: config.TLS{
|
||||
Cert: "./testdata/tls/invalid.crt",
|
||||
Key: "./testdata/tls/invalid.key",
|
||||
},
|
||||
Target: cfg.Domains[0].Target,
|
||||
HealthTarget: cfg.Domains[0].HealthTarget,
|
||||
})
|
||||
|
||||
return cfg
|
||||
},
|
||||
err: ErrInvalidTLSKeypair,
|
||||
},
|
||||
{
|
||||
name: "target is not a valid URL",
|
||||
configFname: "./testdata/good/selfsigned.hcl",
|
||||
mutation: func(cfg config.Toplevel) config.Toplevel {
|
||||
cfg.Domains = append(cfg.Domains, config.Domain{
|
||||
Name: "test1.internal",
|
||||
TLS: cfg.Domains[0].TLS,
|
||||
Target: "http://[::1:443",
|
||||
HealthTarget: cfg.Domains[0].HealthTarget,
|
||||
})
|
||||
|
||||
return cfg
|
||||
},
|
||||
err: ErrTargetInvalid,
|
||||
},
|
||||
{
|
||||
name: "invalid target scheme",
|
||||
configFname: "./testdata/good/selfsigned.hcl",
|
||||
mutation: func(cfg config.Toplevel) config.Toplevel {
|
||||
cfg.Domains = append(cfg.Domains, config.Domain{
|
||||
Name: "test1.internal",
|
||||
TLS: cfg.Domains[0].TLS,
|
||||
Target: "foo://",
|
||||
HealthTarget: cfg.Domains[0].HealthTarget,
|
||||
})
|
||||
|
||||
return cfg
|
||||
},
|
||||
err: ErrNoHandler,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cfg := loadConfig(t, tt.configFname)
|
||||
rtr := newRouter(t, cfg)
|
||||
|
||||
cfg = tt.mutation(cfg)
|
||||
|
||||
if err := rtr.setConfig(cfg); !errors.Is(err, tt.err) {
|
||||
t.Logf("want: %v", tt.err)
|
||||
t.Logf("got: %v", err)
|
||||
t.Error("got wrong error from rtr.setConfig function")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type ackHandler struct {
|
||||
ack bool
|
||||
}
|
||||
|
||||
func (ah *ackHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
ah.ack = true
|
||||
fmt.Fprintln(w, "OK")
|
||||
}
|
||||
|
||||
func (ah *ackHandler) Reset() {
|
||||
ah.ack = false
|
||||
}
|
||||
|
||||
func newUnixServer(t *testing.T, h http.Handler) string {
|
||||
sockName := filepath.Join(t.TempDir(), "s")
|
||||
ln, err := net.Listen("unix", sockName)
|
||||
if err != nil {
|
||||
t.Fatalf("can't listen on %s: %v", sockName, err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
ln.Close()
|
||||
os.Remove(sockName)
|
||||
})
|
||||
|
||||
go func(ctx context.Context) {
|
||||
srv := &http.Server{
|
||||
Handler: h,
|
||||
}
|
||||
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
srv.Close()
|
||||
}()
|
||||
|
||||
srv.Serve(ln)
|
||||
}(t.Context())
|
||||
|
||||
return "unix://" + sockName
|
||||
}
|
||||
|
||||
func TestRouterGetCertificate(t *testing.T) {
|
||||
cfg := loadConfig(t, "./testdata/good/selfsigned.hcl")
|
||||
rtr := newRouter(t, cfg)
|
||||
|
||||
for _, tt := range []struct {
|
||||
domainName string
|
||||
err error
|
||||
}{
|
||||
{
|
||||
domainName: "osiris.local.cetacean.club",
|
||||
},
|
||||
{
|
||||
domainName: "whacky-fun.local",
|
||||
err: ErrNoCert,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.domainName, func(t *testing.T) {
|
||||
if _, err := rtr.GetCertificate(&tls.ClientHelloInfo{ServerName: tt.domainName}); !errors.Is(err, tt.err) {
|
||||
t.Logf("want: %v", tt.err)
|
||||
t.Logf("got: %v", err)
|
||||
t.Error("got wrong error from rtr.GetCertificate")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRouterServeAllProtocols(t *testing.T) {
|
||||
cfg := loadConfig(t, "./testdata/good/all_protocols.hcl")
|
||||
|
||||
httpAckHandler := &ackHandler{}
|
||||
httpsAckHandler := &ackHandler{}
|
||||
h2cAckHandler := &ackHandler{}
|
||||
unixAckHandler := &ackHandler{}
|
||||
|
||||
httpSrv := httptest.NewServer(httpAckHandler)
|
||||
httpsSrv := httptest.NewTLSServer(httpsAckHandler)
|
||||
h2cSrv := newH2cServer(t, h2cAckHandler)
|
||||
unixPath := newUnixServer(t, unixAckHandler)
|
||||
|
||||
cfg.Domains[0].Target = httpSrv.URL
|
||||
cfg.Domains[1].Target = httpsSrv.URL
|
||||
cfg.Domains[2].Target = strings.ReplaceAll(h2cSrv.URL, "http:", "h2c:")
|
||||
cfg.Domains[3].Target = unixPath
|
||||
|
||||
// enc := json.NewEncoder(os.Stderr)
|
||||
// enc.SetIndent("", " ")
|
||||
// enc.Encode(cfg)
|
||||
|
||||
rtr := newRouter(t, cfg)
|
||||
|
||||
cli := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
t.Run("plain http", func(t *testing.T) {
|
||||
ln, err := net.Listen("tcp", ":0")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
ln.Close()
|
||||
})
|
||||
|
||||
go rtr.HandleHTTP(t.Context(), ln)
|
||||
|
||||
serverURL := "http://" + ln.Addr().String()
|
||||
t.Log(serverURL)
|
||||
|
||||
for _, d := range cfg.Domains {
|
||||
t.Run(d.Name, func(t *testing.T) {
|
||||
req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, serverURL, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
req.Host = d.Name
|
||||
|
||||
resp, err := cli.Do(req)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("wrong status code %d", resp.StatusCode)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
0
cmd/osiris/internal/entrypoint/testdata/bad/empty.hcl
vendored
Normal file
0
cmd/osiris/internal/entrypoint/testdata/bad/empty.hcl
vendored
Normal file
15
cmd/osiris/internal/entrypoint/testdata/bad/invalid.hcl
vendored
Normal file
15
cmd/osiris/internal/entrypoint/testdata/bad/invalid.hcl
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
bind {
|
||||
http = ":65530"
|
||||
https = ":65531"
|
||||
metrics = ":65532"
|
||||
}
|
||||
|
||||
domain "osiris.local.cetacean.club" {
|
||||
tls {
|
||||
cert = "./testdata/invalid.crt"
|
||||
key = "./testdata/invalid.key"
|
||||
}
|
||||
|
||||
target = "http://localhost:3000"
|
||||
health_target = "http://localhost:9091/healthz"
|
||||
}
|
||||
46
cmd/osiris/internal/entrypoint/testdata/good/all_protocols.hcl
vendored
Normal file
46
cmd/osiris/internal/entrypoint/testdata/good/all_protocols.hcl
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
bind {
|
||||
http = ":65520"
|
||||
https = ":65521"
|
||||
metrics = ":65522"
|
||||
}
|
||||
|
||||
domain "http.internal" {
|
||||
tls {
|
||||
cert = "./testdata/selfsigned.crt"
|
||||
key = "./testdata/selfsigned.key"
|
||||
}
|
||||
|
||||
target = "http://localhost:65510" # XXX(Xe) this is overwritten
|
||||
health_target = "http://localhost:9091/healthz"
|
||||
}
|
||||
|
||||
domain "https.internal" {
|
||||
tls {
|
||||
cert = "./testdata/selfsigned.crt"
|
||||
key = "./testdata/selfsigned.key"
|
||||
}
|
||||
|
||||
target = "https://localhost:65511" # XXX(Xe) this is overwritten
|
||||
insecure_skip_verify = true
|
||||
health_target = "http://localhost:9091/healthz"
|
||||
}
|
||||
|
||||
domain "h2c.internal" {
|
||||
tls {
|
||||
cert = "./testdata/selfsigned.crt"
|
||||
key = "./testdata/selfsigned.key"
|
||||
}
|
||||
|
||||
target = "h2c://localhost:65511" # XXX(Xe) this is overwritten
|
||||
health_target = "http://localhost:9091/healthz"
|
||||
}
|
||||
|
||||
domain "unix.internal" {
|
||||
tls {
|
||||
cert = "./testdata/selfsigned.crt"
|
||||
key = "./testdata/selfsigned.key"
|
||||
}
|
||||
|
||||
target = "http://localhost:65511" # XXX(Xe) this is overwritten
|
||||
health_target = "http://localhost:9091/healthz"
|
||||
}
|
||||
15
cmd/osiris/internal/entrypoint/testdata/good/selfsigned.hcl
vendored
Normal file
15
cmd/osiris/internal/entrypoint/testdata/good/selfsigned.hcl
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
bind {
|
||||
http = ":65530"
|
||||
https = ":65531"
|
||||
metrics = ":65532"
|
||||
}
|
||||
|
||||
domain "osiris.local.cetacean.club" {
|
||||
tls {
|
||||
cert = "./testdata/selfsigned.crt"
|
||||
key = "./testdata/selfsigned.key"
|
||||
}
|
||||
|
||||
target = "http://localhost:3000"
|
||||
health_target = "http://localhost:9091/healthz"
|
||||
}
|
||||
11
cmd/osiris/internal/entrypoint/testdata/selfsigned.crt
vendored
Normal file
11
cmd/osiris/internal/entrypoint/testdata/selfsigned.crt
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIBnzCCAVGgAwIBAgIUOLTjSYOjFk00IemtFTC4oEZs988wBQYDK2VwMEUxCzAJ
|
||||
BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l
|
||||
dCBXaWRnaXRzIFB0eSBMdGQwHhcNMjUwNzE4MjEyNDIzWhcNMjUwODE3MjEyNDIz
|
||||
WjBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwY
|
||||
SW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMCowBQYDK2VwAyEAPHphABS15+4VV6R1
|
||||
vYzBQYIycQmOmlbA8QcfwzuB2VajUzBRMB0GA1UdDgQWBBT2s+MQ4AR6cbK4V0+d
|
||||
XZnok1orhDAfBgNVHSMEGDAWgBT2s+MQ4AR6cbK4V0+dXZnok1orhDAPBgNVHRMB
|
||||
Af8EBTADAQH/MAUGAytlcANBAOdoJbRMnHmkEETzVtXP+jkAI9yQNRXujnglApGP
|
||||
8I5pvIYVgYCgoQrnb4haVWFldHM1T9H698n19e/egfFb+w4=
|
||||
-----END CERTIFICATE-----
|
||||
3
cmd/osiris/internal/entrypoint/testdata/selfsigned.key
vendored
Normal file
3
cmd/osiris/internal/entrypoint/testdata/selfsigned.key
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MC4CAQAwBQYDK2VwBCIEIBop42tiZ0yzhaKo9NAc0PlAyBsE8NAE0i9Z7s2lgZuR
|
||||
-----END PRIVATE KEY-----
|
||||
43
cmd/osiris/main.go
Normal file
43
cmd/osiris/main.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/TecharoHQ/anubis"
|
||||
"github.com/TecharoHQ/anubis/cmd/osiris/internal/entrypoint"
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/facebookgo/flagenv"
|
||||
)
|
||||
|
||||
var (
|
||||
configFname = flag.String("config", "./osiris.hcl", "Configuration file (HCL), see docs")
|
||||
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
|
||||
versionFlag = flag.Bool("version", false, "if true, show version information then quit")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flagenv.Parse()
|
||||
flag.Parse()
|
||||
|
||||
if *versionFlag {
|
||||
fmt.Println("Osiris", anubis.Version)
|
||||
return
|
||||
}
|
||||
|
||||
internal.InitSlog(*slogLevel)
|
||||
|
||||
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
if err := entrypoint.Main(ctx, entrypoint.Options{
|
||||
ConfigFname: *configFname,
|
||||
}); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
15
cmd/osiris/osiris.hcl
Normal file
15
cmd/osiris/osiris.hcl
Normal file
@@ -0,0 +1,15 @@
|
||||
bind {
|
||||
http = ":3004"
|
||||
https = ":3005"
|
||||
metrics = ":9091"
|
||||
}
|
||||
|
||||
domain "osiris.local.cetacean.club" {
|
||||
tls {
|
||||
cert = "./internal/config/testdata/tls/selfsigned.crt"
|
||||
key = "./internal/config/testdata/tls/selfsigned.key"
|
||||
}
|
||||
|
||||
target = "http://localhost:3000"
|
||||
health_target = "http://localhost:9091/healthz"
|
||||
}
|
||||
78
cmd/robots2policy/batch/batch_process.go
Normal file
78
cmd/robots2policy/batch/batch_process.go
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
|
||||
into Anubis CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
|
||||
*/
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Println("Usage: go run batch_process.go <cleaned_directory>")
|
||||
fmt.Println("Example: go run batch_process.go ./cleaned")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
cleanedDir := os.Args[1]
|
||||
outputDir := "generated_policies"
|
||||
|
||||
// Create output directory
|
||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||
log.Fatalf("Failed to create output directory: %v", err)
|
||||
}
|
||||
|
||||
count := 0
|
||||
err := filepath.WalkDir(cleanedDir, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generate policy name from file path
|
||||
relPath, _ := filepath.Rel(cleanedDir, path)
|
||||
policyName := strings.ReplaceAll(relPath, "/", "-")
|
||||
policyName = strings.TrimSuffix(policyName, "-robots.txt")
|
||||
policyName = strings.ReplaceAll(policyName, ".", "-")
|
||||
|
||||
outputFile := filepath.Join(outputDir, policyName+".yaml")
|
||||
|
||||
cmd := exec.Command("go", "run", "main.go",
|
||||
"-input", path,
|
||||
"-output", outputFile,
|
||||
"-name", policyName,
|
||||
"-format", "yaml")
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
fmt.Printf("Warning: Failed to process %s: %v\n", path, err)
|
||||
return nil // Continue processing other files
|
||||
}
|
||||
|
||||
count++
|
||||
if count%100 == 0 {
|
||||
fmt.Printf("Processed %d files...\n", count)
|
||||
} else if count%10 == 0 {
|
||||
fmt.Print(".")
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error walking directory: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Successfully processed %d robots.txt files\n", count)
|
||||
fmt.Printf("Generated policies saved to: %s/\n", outputDir)
|
||||
}
|
||||
313
cmd/robots2policy/main.go
Normal file
313
cmd/robots2policy/main.go
Normal file
@@ -0,0 +1,313 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
var (
|
||||
inputFile = flag.String("input", "", "path to robots.txt file (use - for stdin)")
|
||||
outputFile = flag.String("output", "", "output file path (use - for stdout, defaults to stdout)")
|
||||
outputFormat = flag.String("format", "yaml", "output format: yaml or json")
|
||||
baseAction = flag.String("action", "CHALLENGE", "default action for disallowed paths: ALLOW, DENY, CHALLENGE, WEIGH")
|
||||
crawlDelay = flag.Int("crawl-delay-weight", 0, "if > 0, add weight adjustment for crawl-delay (difficulty adjustment)")
|
||||
policyName = flag.String("name", "robots-txt-policy", "name for the generated policy")
|
||||
userAgentDeny = flag.String("deny-user-agents", "DENY", "action for specifically blocked user agents: DENY, CHALLENGE")
|
||||
helpFlag = flag.Bool("help", false, "show help")
|
||||
)
|
||||
|
||||
type RobotsRule struct {
|
||||
UserAgent string
|
||||
Disallows []string
|
||||
Allows []string
|
||||
CrawlDelay int
|
||||
IsBlacklist bool // true if this is a specifically denied user agent
|
||||
}
|
||||
|
||||
type AnubisRule struct {
|
||||
Expression *config.ExpressionOrList `yaml:"expression,omitempty" json:"expression,omitempty"`
|
||||
Challenge *config.ChallengeRules `yaml:"challenge,omitempty" json:"challenge,omitempty"`
|
||||
Weight *config.Weight `yaml:"weight,omitempty" json:"weight,omitempty"`
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Action string `yaml:"action" json:"action"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
flag.Usage = func() {
|
||||
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, "%s [options] -input <robots.txt>\n\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
fmt.Fprintln(os.Stderr, "\nExamples:")
|
||||
fmt.Fprintln(os.Stderr, " # Convert local robots.txt file")
|
||||
fmt.Fprintln(os.Stderr, " robots2policy -input robots.txt -output policy.yaml")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " # Convert from URL")
|
||||
fmt.Fprintln(os.Stderr, " robots2policy -input https://example.com/robots.txt -format json")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " # Read from stdin, write to stdout")
|
||||
fmt.Fprintln(os.Stderr, " curl https://example.com/robots.txt | robots2policy -input -")
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if len(flag.Args()) > 0 || *helpFlag || *inputFile == "" {
|
||||
flag.Usage()
|
||||
}
|
||||
|
||||
// Read robots.txt
|
||||
var input io.Reader
|
||||
if *inputFile == "-" {
|
||||
input = os.Stdin
|
||||
} else if strings.HasPrefix(*inputFile, "http://") || strings.HasPrefix(*inputFile, "https://") {
|
||||
resp, err := http.Get(*inputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to fetch robots.txt from URL: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
input = resp.Body
|
||||
} else {
|
||||
file, err := os.Open(*inputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to open input file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
input = file
|
||||
}
|
||||
|
||||
// Parse robots.txt
|
||||
rules, err := parseRobotsTxt(input)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
// Convert to Anubis rules
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Check if any rules were generated
|
||||
if len(anubisRules) == 0 {
|
||||
log.Fatal("no valid rules generated from robots.txt - file may be empty or contain no disallow directives")
|
||||
}
|
||||
|
||||
// Generate output
|
||||
var output []byte
|
||||
switch strings.ToLower(*outputFormat) {
|
||||
case "yaml":
|
||||
output, err = yaml.Marshal(anubisRules)
|
||||
case "json":
|
||||
output, err = json.MarshalIndent(anubisRules, "", " ")
|
||||
default:
|
||||
log.Fatalf("unsupported output format: %s (use yaml or json)", *outputFormat)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("failed to marshal output: %v", err)
|
||||
}
|
||||
|
||||
// Write output
|
||||
if *outputFile == "" || *outputFile == "-" {
|
||||
fmt.Print(string(output))
|
||||
} else {
|
||||
err = os.WriteFile(*outputFile, output, 0644)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to write output file: %v", err)
|
||||
}
|
||||
fmt.Printf("Generated Anubis policy written to %s\n", *outputFile)
|
||||
}
|
||||
}
|
||||
|
||||
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||
scanner := bufio.NewScanner(input)
|
||||
var rules []RobotsRule
|
||||
var currentRule *RobotsRule
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
// Skip empty lines and comments
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Split on first colon
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
directive := strings.TrimSpace(strings.ToLower(parts[0]))
|
||||
value := strings.TrimSpace(parts[1])
|
||||
|
||||
switch directive {
|
||||
case "user-agent":
|
||||
// Start a new rule section
|
||||
if currentRule != nil {
|
||||
rules = append(rules, *currentRule)
|
||||
}
|
||||
currentRule = &RobotsRule{
|
||||
UserAgent: value,
|
||||
Disallows: make([]string, 0),
|
||||
Allows: make([]string, 0),
|
||||
}
|
||||
|
||||
case "disallow":
|
||||
if currentRule != nil && value != "" {
|
||||
currentRule.Disallows = append(currentRule.Disallows, value)
|
||||
}
|
||||
|
||||
case "allow":
|
||||
if currentRule != nil && value != "" {
|
||||
currentRule.Allows = append(currentRule.Allows, value)
|
||||
}
|
||||
|
||||
case "crawl-delay":
|
||||
if currentRule != nil {
|
||||
if delay, err := parseIntSafe(value); err == nil {
|
||||
currentRule.CrawlDelay = delay
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last rule
|
||||
if currentRule != nil {
|
||||
rules = append(rules, *currentRule)
|
||||
}
|
||||
|
||||
// Mark blacklisted user agents (those with "Disallow: /")
|
||||
for i := range rules {
|
||||
for _, disallow := range rules[i].Disallows {
|
||||
if disallow == "/" {
|
||||
rules[i].IsBlacklist = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rules, scanner.Err()
|
||||
}
|
||||
|
||||
func parseIntSafe(s string) (int, error) {
|
||||
var result int
|
||||
_, err := fmt.Sscanf(s, "%d", &result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
var anubisRules []AnubisRule
|
||||
ruleCounter := 0
|
||||
|
||||
for _, robotsRule := range robotsRules {
|
||||
userAgent := robotsRule.UserAgent
|
||||
|
||||
// Handle crawl delay as weight adjustment (do this first before any continues)
|
||||
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-crawl-delay-%d", *policyName, ruleCounter),
|
||||
Action: "WEIGH",
|
||||
Weight: &config.Weight{Adjust: *crawlDelay},
|
||||
}
|
||||
|
||||
if userAgent == "*" {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
}
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
// Handle blacklisted user agents (complete deny/challenge)
|
||||
if robotsRule.IsBlacklist {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-blacklist-%d", *policyName, ruleCounter),
|
||||
Action: *userAgentDeny,
|
||||
}
|
||||
|
||||
if userAgent == "*" {
|
||||
// This would block everything - convert to a weight adjustment instead
|
||||
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||
rule.Action = "WEIGH"
|
||||
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
}
|
||||
anubisRules = append(anubisRules, rule)
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle specific disallow rules
|
||||
for _, disallow := range robotsRule.Disallows {
|
||||
if disallow == "/" {
|
||||
continue // Already handled as blacklist above
|
||||
}
|
||||
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||
Action: *baseAction,
|
||||
}
|
||||
|
||||
// Build CEL expression
|
||||
var conditions []string
|
||||
|
||||
// Add user agent condition if not wildcard
|
||||
if userAgent != "*" {
|
||||
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
|
||||
}
|
||||
|
||||
// Add path condition
|
||||
pathCondition := buildPathCondition(disallow)
|
||||
conditions = append(conditions, pathCondition)
|
||||
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: conditions,
|
||||
}
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return anubisRules
|
||||
}
|
||||
|
||||
func buildPathCondition(robotsPath string) string {
|
||||
// Handle wildcards in robots.txt paths
|
||||
if strings.Contains(robotsPath, "*") || strings.Contains(robotsPath, "?") {
|
||||
// Convert robots.txt wildcards to regex
|
||||
regex := regexp.QuoteMeta(robotsPath)
|
||||
regex = strings.ReplaceAll(regex, `\*`, `.*`) // * becomes .*
|
||||
regex = strings.ReplaceAll(regex, `\?`, `.`) // ? becomes .
|
||||
regex = "^" + regex
|
||||
return fmt.Sprintf("path.matches(%q)", regex)
|
||||
}
|
||||
|
||||
// Simple prefix match for most cases
|
||||
return fmt.Sprintf("path.startsWith(%q)", robotsPath)
|
||||
}
|
||||
418
cmd/robots2policy/robots2policy_test.go
Normal file
418
cmd/robots2policy/robots2policy_test.go
Normal file
@@ -0,0 +1,418 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type TestCase struct {
|
||||
name string
|
||||
robotsFile string
|
||||
expectedFile string
|
||||
options TestOptions
|
||||
}
|
||||
|
||||
type TestOptions struct {
|
||||
format string
|
||||
action string
|
||||
crawlDelayWeight int
|
||||
policyName string
|
||||
deniedAction string
|
||||
}
|
||||
|
||||
func TestDataFileConversion(t *testing.T) {
|
||||
|
||||
testCases := []TestCase{
|
||||
{
|
||||
name: "simple_default",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "simple.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "simple_json",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "simple.json",
|
||||
options: TestOptions{format: "json"},
|
||||
},
|
||||
{
|
||||
name: "simple_deny_action",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "deny-action.yaml",
|
||||
options: TestOptions{format: "yaml", action: "DENY"},
|
||||
},
|
||||
{
|
||||
name: "simple_custom_name",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "custom-name.yaml",
|
||||
options: TestOptions{format: "yaml", policyName: "my-custom-policy"},
|
||||
},
|
||||
{
|
||||
name: "blacklist_with_crawl_delay",
|
||||
robotsFile: "blacklist.robots.txt",
|
||||
expectedFile: "blacklist.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
|
||||
},
|
||||
{
|
||||
name: "wildcards",
|
||||
robotsFile: "wildcards.robots.txt",
|
||||
expectedFile: "wildcards.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "empty_file",
|
||||
robotsFile: "empty.robots.txt",
|
||||
expectedFile: "empty.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "complex_scenario",
|
||||
robotsFile: "complex.robots.txt",
|
||||
expectedFile: "complex.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
robotsPath := filepath.Join("testdata", tc.robotsFile)
|
||||
expectedPath := filepath.Join("testdata", tc.expectedFile)
|
||||
|
||||
// Read robots.txt input
|
||||
robotsFile, err := os.Open(robotsPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open robots file %s: %v", robotsPath, err)
|
||||
}
|
||||
defer robotsFile.Close()
|
||||
|
||||
// Parse robots.txt
|
||||
rules, err := parseRobotsTxt(robotsFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
// Set test options
|
||||
oldFormat := *outputFormat
|
||||
oldAction := *baseAction
|
||||
oldCrawlDelay := *crawlDelay
|
||||
oldPolicyName := *policyName
|
||||
oldDeniedAction := *userAgentDeny
|
||||
|
||||
if tc.options.format != "" {
|
||||
*outputFormat = tc.options.format
|
||||
}
|
||||
if tc.options.action != "" {
|
||||
*baseAction = tc.options.action
|
||||
}
|
||||
if tc.options.crawlDelayWeight > 0 {
|
||||
*crawlDelay = tc.options.crawlDelayWeight
|
||||
}
|
||||
if tc.options.policyName != "" {
|
||||
*policyName = tc.options.policyName
|
||||
}
|
||||
if tc.options.deniedAction != "" {
|
||||
*userAgentDeny = tc.options.deniedAction
|
||||
}
|
||||
|
||||
// Restore options after test
|
||||
defer func() {
|
||||
*outputFormat = oldFormat
|
||||
*baseAction = oldAction
|
||||
*crawlDelay = oldCrawlDelay
|
||||
*policyName = oldPolicyName
|
||||
*userAgentDeny = oldDeniedAction
|
||||
}()
|
||||
|
||||
// Convert to Anubis rules
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Generate output
|
||||
var actualOutput []byte
|
||||
switch strings.ToLower(*outputFormat) {
|
||||
case "yaml":
|
||||
actualOutput, err = yaml.Marshal(anubisRules)
|
||||
case "json":
|
||||
actualOutput, err = json.MarshalIndent(anubisRules, "", " ")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal output: %v", err)
|
||||
}
|
||||
|
||||
// Read expected output
|
||||
expectedOutput, err := os.ReadFile(expectedPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read expected file %s: %v", expectedPath, err)
|
||||
}
|
||||
|
||||
if strings.ToLower(*outputFormat) == "yaml" {
|
||||
var actualData []interface{}
|
||||
var expectedData []interface{}
|
||||
|
||||
err = yaml.Unmarshal(actualOutput, &actualData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal actual output: %v", err)
|
||||
}
|
||||
|
||||
err = yaml.Unmarshal(expectedOutput, &expectedData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal expected output: %v", err)
|
||||
}
|
||||
|
||||
// Compare data structures
|
||||
if !compareData(actualData, expectedData) {
|
||||
actualStr := strings.TrimSpace(string(actualOutput))
|
||||
expectedStr := strings.TrimSpace(string(expectedOutput))
|
||||
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
|
||||
}
|
||||
} else {
|
||||
var actualData []interface{}
|
||||
var expectedData []interface{}
|
||||
|
||||
err = json.Unmarshal(actualOutput, &actualData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal actual JSON output: %v", err)
|
||||
}
|
||||
|
||||
err = json.Unmarshal(expectedOutput, &expectedData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal expected JSON output: %v", err)
|
||||
}
|
||||
|
||||
// Compare data structures
|
||||
if !compareData(actualData, expectedData) {
|
||||
actualStr := strings.TrimSpace(string(actualOutput))
|
||||
expectedStr := strings.TrimSpace(string(expectedOutput))
|
||||
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCaseInsensitiveParsing(t *testing.T) {
|
||||
robotsTxt := `User-Agent: *
|
||||
Disallow: /admin
|
||||
Crawl-Delay: 10
|
||||
|
||||
User-agent: TestBot
|
||||
disallow: /test
|
||||
crawl-delay: 5
|
||||
|
||||
USER-AGENT: UpperBot
|
||||
DISALLOW: /upper
|
||||
CRAWL-DELAY: 20`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse case-insensitive robots.txt: %v", err)
|
||||
}
|
||||
|
||||
expectedRules := 3
|
||||
if len(rules) != expectedRules {
|
||||
t.Errorf("Expected %d rules, got %d", expectedRules, len(rules))
|
||||
}
|
||||
|
||||
// Check that all crawl delays were parsed
|
||||
for i, rule := range rules {
|
||||
expectedDelays := []int{10, 5, 20}
|
||||
if rule.CrawlDelay != expectedDelays[i] {
|
||||
t.Errorf("Rule %d: expected crawl delay %d, got %d", i, expectedDelays[i], rule.CrawlDelay)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVariousOutputFormats(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldPolicyName := *policyName
|
||||
*policyName = "test-policy"
|
||||
defer func() { *policyName = oldPolicyName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Test YAML output
|
||||
yamlOutput, err := yaml.Marshal(anubisRules)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal YAML: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(string(yamlOutput), "name: test-policy-disallow-1") {
|
||||
t.Errorf("YAML output doesn't contain expected rule name")
|
||||
}
|
||||
|
||||
// Test JSON output
|
||||
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal JSON: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(string(jsonOutput), `"name": "test-policy-disallow-1"`) {
|
||||
t.Errorf("JSON output doesn't contain expected rule name")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDifferentActions(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin`
|
||||
|
||||
testActions := []string{"ALLOW", "DENY", "CHALLENGE", "WEIGH"}
|
||||
|
||||
for _, action := range testActions {
|
||||
t.Run("action_"+action, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldAction := *baseAction
|
||||
*baseAction = action
|
||||
defer func() { *baseAction = oldAction }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
if len(anubisRules) != 1 {
|
||||
t.Fatalf("Expected 1 rule, got %d", len(anubisRules))
|
||||
}
|
||||
|
||||
if anubisRules[0].Action != action {
|
||||
t.Errorf("Expected action %s, got %s", action, anubisRules[0].Action)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPolicyNaming(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin
|
||||
Disallow: /private
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /`
|
||||
|
||||
testNames := []string{"custom-policy", "my-rules", "site-protection"}
|
||||
|
||||
for _, name := range testNames {
|
||||
t.Run("name_"+name, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldName := *policyName
|
||||
*policyName = name
|
||||
defer func() { *policyName = oldName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Check that all rule names use the custom prefix
|
||||
for _, rule := range anubisRules {
|
||||
if !strings.HasPrefix(rule.Name, name+"-") {
|
||||
t.Errorf("Rule name %s doesn't start with expected prefix %s-", rule.Name, name)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlDelayWeights(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: SlowBot
|
||||
Disallow: /slow
|
||||
Crawl-delay: 60`
|
||||
|
||||
testWeights := []int{1, 5, 10, 25}
|
||||
|
||||
for _, weight := range testWeights {
|
||||
t.Run(fmt.Sprintf("weight_%d", weight), func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldWeight := *crawlDelay
|
||||
*crawlDelay = weight
|
||||
defer func() { *crawlDelay = oldWeight }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Count weight rules and verify they have correct weight
|
||||
weightRules := 0
|
||||
for _, rule := range anubisRules {
|
||||
if rule.Action == "WEIGH" && rule.Weight != nil {
|
||||
weightRules++
|
||||
if rule.Weight.Adjust != weight {
|
||||
t.Errorf("Expected weight %d, got %d", weight, rule.Weight.Adjust)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expectedWeightRules := 2 // One for *, one for SlowBot
|
||||
if weightRules != expectedWeightRules {
|
||||
t.Errorf("Expected %d weight rules, got %d", expectedWeightRules, weightRules)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlacklistActions(t *testing.T) {
|
||||
robotsTxt := `User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SpamBot
|
||||
Disallow: /`
|
||||
|
||||
testActions := []string{"DENY", "CHALLENGE"}
|
||||
|
||||
for _, action := range testActions {
|
||||
t.Run("blacklist_"+action, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldAction := *userAgentDeny
|
||||
*userAgentDeny = action
|
||||
defer func() { *userAgentDeny = oldAction }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// All rules should be blacklist rules with the specified action
|
||||
for _, rule := range anubisRules {
|
||||
if !strings.Contains(rule.Name, "blacklist") {
|
||||
t.Errorf("Expected blacklist rule, got %s", rule.Name)
|
||||
}
|
||||
if rule.Action != action {
|
||||
t.Errorf("Expected action %s, got %s", action, rule.Action)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// compareData performs a deep comparison of two data structures,
|
||||
// ignoring differences that are semantically equivalent in YAML/JSON
|
||||
func compareData(actual, expected interface{}) bool {
|
||||
return reflect.DeepEqual(actual, expected)
|
||||
}
|
||||
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
Normal file
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# Test with blacklisted user agents
|
||||
User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SpamBot
|
||||
Disallow: /
|
||||
Crawl-delay: 60
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search
|
||||
Crawl-delay: 5
|
||||
30
cmd/robots2policy/testdata/blacklist.yaml
vendored
Normal file
30
cmd/robots2policy/testdata/blacklist.yaml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: DENY
|
||||
expression: userAgent.contains("BadBot")
|
||||
name: robots-txt-policy-blacklist-3
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("SpamBot")
|
||||
name: robots-txt-policy-crawl-delay-4
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: DENY
|
||||
expression: userAgent.contains("SpamBot")
|
||||
name: robots-txt-policy-blacklist-5
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Googlebot")
|
||||
name: robots-txt-policy-crawl-delay-6
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
Normal file
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
# Complex real-world example
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private/
|
||||
Disallow: /api/internal/
|
||||
Allow: /api/public/
|
||||
Crawl-delay: 5
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search/
|
||||
Allow: /api/
|
||||
Crawl-delay: 2
|
||||
|
||||
User-agent: Bingbot
|
||||
Disallow: /search/
|
||||
Disallow: /admin/
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SeoBot
|
||||
Disallow: /
|
||||
Crawl-delay: 300
|
||||
|
||||
# Test with various patterns
|
||||
User-agent: TestBot
|
||||
Disallow: /*/admin
|
||||
Disallow: /temp*.html
|
||||
Disallow: /file?.log
|
||||
71
cmd/robots2policy/testdata/complex.yaml
vendored
Normal file
71
cmd/robots2policy/testdata/complex.yaml
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private/")
|
||||
name: robots-txt-policy-disallow-3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/api/internal/")
|
||||
name: robots-txt-policy-disallow-4
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Googlebot")
|
||||
name: robots-txt-policy-crawl-delay-5
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-6
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Bingbot")
|
||||
name: robots-txt-policy-crawl-delay-7
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-8
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-9
|
||||
- action: DENY
|
||||
expression: userAgent.contains("BadBot")
|
||||
name: robots-txt-policy-blacklist-10
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("SeoBot")
|
||||
name: robots-txt-policy-crawl-delay-11
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: DENY
|
||||
expression: userAgent.contains("SeoBot")
|
||||
name: robots-txt-policy-blacklist-12
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
name: robots-txt-policy-disallow-13
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
name: robots-txt-policy-disallow-14
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
name: robots-txt-policy-disallow-15
|
||||
6
cmd/robots2policy/testdata/custom-name.yaml
vendored
Normal file
6
cmd/robots2policy/testdata/custom-name.yaml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: my-custom-policy-disallow-1
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private")
|
||||
name: my-custom-policy-disallow-2
|
||||
6
cmd/robots2policy/testdata/deny-action.yaml
vendored
Normal file
6
cmd/robots2policy/testdata/deny-action.yaml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
- action: DENY
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-1
|
||||
- action: DENY
|
||||
expression: path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-2
|
||||
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
Normal file
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Empty robots.txt (comments only)
|
||||
# No actual rules
|
||||
1
cmd/robots2policy/testdata/empty.yaml
vendored
Normal file
1
cmd/robots2policy/testdata/empty.yaml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
12
cmd/robots2policy/testdata/simple.json
vendored
Normal file
12
cmd/robots2policy/testdata/simple.json
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
[
|
||||
{
|
||||
"action": "CHALLENGE",
|
||||
"expression": "path.startsWith(\"/admin/\")",
|
||||
"name": "robots-txt-policy-disallow-1"
|
||||
},
|
||||
{
|
||||
"action": "CHALLENGE",
|
||||
"expression": "path.startsWith(\"/private\")",
|
||||
"name": "robots-txt-policy-disallow-2"
|
||||
}
|
||||
]
|
||||
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
Normal file
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
# Simple robots.txt test
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private
|
||||
Allow: /public
|
||||
6
cmd/robots2policy/testdata/simple.yaml
vendored
Normal file
6
cmd/robots2policy/testdata/simple.yaml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-1
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-2
|
||||
6
cmd/robots2policy/testdata/wildcards.robots.txt
vendored
Normal file
6
cmd/robots2policy/testdata/wildcards.robots.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Test wildcard patterns
|
||||
User-agent: *
|
||||
Disallow: /search*
|
||||
Disallow: /*/private
|
||||
Disallow: /file?.txt
|
||||
Disallow: /admin/*?action=delete
|
||||
12
cmd/robots2policy/testdata/wildcards.yaml
vendored
Normal file
12
cmd/robots2policy/testdata/wildcards.yaml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
- action: CHALLENGE
|
||||
expression: path.matches("^/search.*")
|
||||
name: robots-txt-policy-disallow-1
|
||||
- action: CHALLENGE
|
||||
expression: path.matches("^/.*/private")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: CHALLENGE
|
||||
expression: path.matches("^/file.\\.txt")
|
||||
name: robots-txt-policy-disallow-3
|
||||
- action: CHALLENGE
|
||||
expression: path.matches("^/admin/.*.action=delete")
|
||||
name: robots-txt-policy-disallow-4
|
||||
@@ -4,7 +4,7 @@
|
||||
"import": "(data)/bots/_deny-pathological.yaml"
|
||||
},
|
||||
{
|
||||
"import": "(data)/bots/ai-robots-txt.yaml"
|
||||
"import": "(data)/meta/ai-block-aggressive.yaml"
|
||||
},
|
||||
{
|
||||
"import": "(data)/crawlers/_allow-good.yaml"
|
||||
|
||||
@@ -11,51 +11,217 @@
|
||||
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
||||
|
||||
bots:
|
||||
# Pathological bots to deny
|
||||
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
||||
import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
# Pathological bots to deny
|
||||
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
||||
import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
|
||||
# Enforce https://github.com/ai-robots-txt/ai.robots.txt
|
||||
- import: (data)/bots/ai-robots-txt.yaml
|
||||
# Aggressively block AI/LLM related bots/agents by default
|
||||
- import: (data)/meta/ai-block-aggressive.yaml
|
||||
|
||||
# Search engine crawlers to allow, defaults to:
|
||||
# - Google (so they don't try to bypass Anubis)
|
||||
# - Bing
|
||||
# - DuckDuckGo
|
||||
# - Qwant
|
||||
# - The Internet Archive
|
||||
# - Kagi
|
||||
# - Marginalia
|
||||
# - Mojeek
|
||||
- import: (data)/crawlers/_allow-good.yaml
|
||||
# Consider replacing the aggressive AI policy with more selective policies:
|
||||
# - import: (data)/meta/ai-block-moderate.yaml
|
||||
# - import: (data)/meta/ai-block-permissive.yaml
|
||||
|
||||
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
||||
- import: (data)/common/keep-internet-working.yaml
|
||||
# Search engine crawlers to allow, defaults to:
|
||||
# - Google (so they don't try to bypass Anubis)
|
||||
# - Apple
|
||||
# - Bing
|
||||
# - DuckDuckGo
|
||||
# - Qwant
|
||||
# - The Internet Archive
|
||||
# - Kagi
|
||||
# - Marginalia
|
||||
# - Mojeek
|
||||
- import: (data)/crawlers/_allow-good.yaml
|
||||
# Challenge Firefox AI previews
|
||||
- import: (data)/clients/x-firefox-ai.yaml
|
||||
|
||||
# # Punish any bot with "bot" in the user-agent string
|
||||
# # This is known to have a high false-positive rate, use at your own risk
|
||||
# - name: generic-bot-catchall
|
||||
# user_agent_regex: (?i:bot|crawler)
|
||||
# action: CHALLENGE
|
||||
# challenge:
|
||||
# difficulty: 16 # impossible
|
||||
# report_as: 4 # lie to the operator
|
||||
# algorithm: slow # intentionally waste CPU cycles and time
|
||||
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
||||
- import: (data)/common/keep-internet-working.yaml
|
||||
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: CHALLENGE
|
||||
# # Punish any bot with "bot" in the user-agent string
|
||||
# # This is known to have a high false-positive rate, use at your own risk
|
||||
# - name: generic-bot-catchall
|
||||
# user_agent_regex: (?i:bot|crawler)
|
||||
# action: CHALLENGE
|
||||
# challenge:
|
||||
# difficulty: 16 # impossible
|
||||
# report_as: 4 # lie to the operator
|
||||
# algorithm: slow # intentionally waste CPU cycles and time
|
||||
|
||||
# Requires a subscription to Thoth to use, see
|
||||
# https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
|
||||
- name: countries-with-aggressive-scrapers
|
||||
action: WEIGH
|
||||
geoip:
|
||||
countries:
|
||||
- BR
|
||||
- CN
|
||||
weight:
|
||||
adjust: 10
|
||||
|
||||
# Requires a subscription to Thoth to use, see
|
||||
# https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
|
||||
- name: aggressive-asns-without-functional-abuse-contact
|
||||
action: WEIGH
|
||||
asns:
|
||||
match:
|
||||
- 13335 # Cloudflare
|
||||
- 136907 # Huawei Cloud
|
||||
- 45102 # Alibaba Cloud
|
||||
weight:
|
||||
adjust: 10
|
||||
|
||||
# ## System load based checks.
|
||||
# # If the system is under high load, add weight.
|
||||
# - name: high-load-average
|
||||
# action: WEIGH
|
||||
# expression: load_1m >= 10.0 # make sure to end the load comparison in a .0
|
||||
# weight:
|
||||
# adjust: 20
|
||||
|
||||
## If your backend service is running on the same operating system as Anubis,
|
||||
## you can uncomment this rule to make the challenge easier when the system is
|
||||
## under low load.
|
||||
##
|
||||
## If it is not, remove weight.
|
||||
# - name: low-load-average
|
||||
# action: WEIGH
|
||||
# expression: load_15m <= 4.0 # make sure to end the load comparison in a .0
|
||||
# weight:
|
||||
# adjust: -10
|
||||
|
||||
# Generic catchall rule
|
||||
- name: generic-browser
|
||||
user_agent_regex: >-
|
||||
Mozilla|Opera
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: 10
|
||||
|
||||
dnsbl: false
|
||||
|
||||
# #
|
||||
# impressum:
|
||||
# # Displayed at the bottom of every page rendered by Anubis.
|
||||
# footer: >-
|
||||
# This website is hosted by Zombocom. If you have any complaints or notes
|
||||
# about the service, please contact
|
||||
# <a href="mailto:contact@domainhere.example">contact@domainhere.example</a>
|
||||
# and we will assist you as soon as possible.
|
||||
|
||||
# # The imprint page that will be linked to at the footer of every Anubis page.
|
||||
# page:
|
||||
# # The HTML <title> of the page
|
||||
# title: Imprint and Privacy Policy
|
||||
# # The HTML contents of the page. The exact contents of this page can
|
||||
# # and will vary by locale. Please consult with a lawyer if you are not
|
||||
# # sure what to put here
|
||||
# body: >-
|
||||
# <p>Last updated: June 2025</p>
|
||||
|
||||
# <h2>Information that is gathered from visitors</h2>
|
||||
|
||||
# <p>In common with other websites, log files are stored on the web server saving details such as the visitor's IP address, browser type, referring page and time of visit.</p>
|
||||
|
||||
# <p>Cookies may be used to remember visitor preferences when interacting with the website.</p>
|
||||
|
||||
# <p>Where registration is required, the visitor's email and a username will be stored on the server.</p>
|
||||
|
||||
# <!-- ... -->
|
||||
|
||||
# Open Graph passthrough configuration, see here for more information:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
|
||||
openGraph:
|
||||
# Enables Open Graph passthrough
|
||||
enabled: false
|
||||
# Enables the use of the HTTP host in the cache key, this enables
|
||||
# caching metadata for multiple http hosts at once.
|
||||
considerHost: false
|
||||
# How long cached OpenGraph metadata should last in memory
|
||||
ttl: 24h
|
||||
# # If set, return these opengraph values instead of looking them up with
|
||||
# # the target service.
|
||||
# #
|
||||
# # Correlates to properties in https://ogp.me/
|
||||
# override:
|
||||
# # og:title is required, it is the title of the website
|
||||
# "og:title": "Techaro Anubis"
|
||||
# "og:description": >-
|
||||
# Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
# away so that you can maintain uptime at work!
|
||||
# "description": >-
|
||||
# Anubis is a Web AI Firewall Utility that helps you fight the bots
|
||||
# away so that you can maintain uptime at work!
|
||||
|
||||
# By default, send HTTP 200 back to clients that either get issued a challenge
|
||||
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
||||
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
|
||||
# will stop sending requests once they get it.
|
||||
status_codes:
|
||||
CHALLENGE: 200
|
||||
DENY: 200
|
||||
DENY: 200
|
||||
|
||||
# Anubis can store temporary data in one of a few backends. See the storage
|
||||
# backends section of the docs for more information:
|
||||
#
|
||||
# https://anubis.techaro.lol/docs/admin/policies#storage-backends
|
||||
store:
|
||||
backend: memory
|
||||
parameters: {}
|
||||
|
||||
# The weight thresholds for when to trigger individual challenges. Any
|
||||
# CHALLENGE will take precedence over this.
|
||||
#
|
||||
# A threshold has four configuration options:
|
||||
#
|
||||
# - name: the name that is reported down the stack and used for metrics
|
||||
# - expression: A CEL expression with the request weight in the variable
|
||||
# weight
|
||||
# - action: the Anubis action to apply, similar to in a bot policy
|
||||
# - challenge: which challenge to send to the user, similar to in a bot policy
|
||||
#
|
||||
# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
|
||||
# information.
|
||||
thresholds:
|
||||
# By default Anubis ships with the following thresholds:
|
||||
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
|
||||
expression: weight <= 0 # a feather weighs zero units
|
||||
action: ALLOW # Allow the traffic through
|
||||
# For clients that had some weight reduced through custom rules, give them a
|
||||
# lightweight challenge.
|
||||
- name: mild-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight > 0
|
||||
- weight < 10
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
|
||||
algorithm: metarefresh
|
||||
difficulty: 1
|
||||
report_as: 1
|
||||
# For clients that are browser-like but have either gained points from custom rules or
|
||||
# report as a standard browser.
|
||||
- name: moderate-suspicion
|
||||
expression:
|
||||
all:
|
||||
- weight >= 10
|
||||
- weight < 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 2 # two leading zeros, very fast for most clients
|
||||
report_as: 2
|
||||
# For clients that are browser like and have gained many points from custom rules
|
||||
- name: extreme-suspicion
|
||||
expression: weight >= 20
|
||||
action: CHALLENGE
|
||||
challenge:
|
||||
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
|
||||
algorithm: fast
|
||||
difficulty: 4
|
||||
report_as: 4
|
||||
|
||||
@@ -1,28 +1,26 @@
|
||||
- name: deny-aggressive-brazilian-scrapers
|
||||
action: DENY
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: 20
|
||||
expression:
|
||||
any:
|
||||
# Internet Explorer should be out of support
|
||||
- userAgent.contains("MSIE")
|
||||
# Trident is the Internet Explorer browser engine
|
||||
- userAgent.contains("Trident")
|
||||
# Opera is a fork of chrome now
|
||||
- userAgent.contains("Presto")
|
||||
# Windows CE is discontinued
|
||||
- userAgent.contains("Windows CE")
|
||||
# Windows 95 is discontinued
|
||||
- userAgent.contains("Windows 95")
|
||||
# Windows 98 is discontinued
|
||||
- userAgent.contains("Windows 98")
|
||||
# Windows 9.x is discontinued
|
||||
- userAgent.contains("Win 9x")
|
||||
# Amazon does not have an Alexa Toolbar.
|
||||
- userAgent.contains("Alexa Toolbar")
|
||||
- name: challenge-aggressive-brazilian-scrapers
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
any:
|
||||
# This is not released, even Windows 11 calls itself Windows 10
|
||||
- userAgent.contains("Windows NT 11.0")
|
||||
# iPods are not in common use
|
||||
- userAgent.contains("iPod")
|
||||
# Internet Explorer should be out of support
|
||||
- userAgent.contains("MSIE")
|
||||
# Trident is the Internet Explorer browser engine
|
||||
- userAgent.contains("Trident")
|
||||
# Opera is a fork of chrome now
|
||||
- userAgent.contains("Presto")
|
||||
# Windows CE is discontinued
|
||||
- userAgent.contains("Windows CE")
|
||||
# Windows 95 is discontinued
|
||||
- userAgent.contains("Windows 95")
|
||||
# Windows 98 is discontinued
|
||||
- userAgent.contains("Windows 98")
|
||||
# Windows 9.x is discontinued
|
||||
- userAgent.contains("Win 9x")
|
||||
# Amazon does not have an Alexa Toolbar.
|
||||
- userAgent.contains("Alexa Toolbar")
|
||||
# This is not released, even Windows 11 calls itself Windows 10
|
||||
- userAgent.contains("Windows NT 11.0")
|
||||
# iPods are not in common use
|
||||
- userAgent.contains("iPod")
|
||||
|
||||
11
data/bots/ai-catchall.yaml
Normal file
11
data/bots/ai-catchall.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
# Extensive list of AI-affiliated agents based on https://github.com/ai-robots-txt/ai.robots.txt
|
||||
# Add new/undocumented agents here. Where documentation exists, consider moving to dedicated policy files.
|
||||
# Notes on various agents:
|
||||
# - Amazonbot: Well documented, but they refuse to state which agent collects training data.
|
||||
# - anthropic-ai/Claude-Web: Undocumented by Anthropic. Possibly deprecated or hallucinations?
|
||||
# - Perplexity*: Well documented, but they refuse to state which agent collects training data.
|
||||
# Warning: May contain user agents that _must_ be blocked in robots.txt, or the opt-out will have no effect.
|
||||
- name: "ai-catchall"
|
||||
user_agent_regex: >-
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Brightbot 1.0|Bytespider|Claude-Web|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|GoogleOther|GoogleOther-Image|GoogleOther-Video|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YouBot
|
||||
action: DENY
|
||||
@@ -1,4 +1,8 @@
|
||||
# Warning: Contains user agents that _must_ be blocked in robots.txt, or the opt-out will have no effect.
|
||||
# Note: Blocks human-directed/non-training user agents
|
||||
#
|
||||
# CCBot is allowed because if Common Crawl is allowed, then scrapers don't need to scrape to get the data.
|
||||
- name: "ai-robots-txt"
|
||||
user_agent_regex: >-
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|QualifiedBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YouBot
|
||||
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|MyCentralAIScraperBot|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|Poseidon Research Crawler|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot-BA|SemrushBot-CT|SemrushBot-OCOB|SemrushBot-SI|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
|
||||
action: DENY
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
- name: cloudflare-workers
|
||||
headers_regex:
|
||||
CF-Worker: .*
|
||||
action: DENY
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: 15
|
||||
|
||||
8
data/clients/ai.yaml
Normal file
8
data/clients/ai.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
# User agents that act on behalf of humans in AI tools, e.g. searching the web.
|
||||
# Each entry should have a positive/ALLOW entry created as well, with further documentation.
|
||||
# Exceptions:
|
||||
# - Claude-User: No published IP allowlist
|
||||
- name: "ai-clients"
|
||||
user_agent_regex: >-
|
||||
ChatGPT-User|Claude-User|MistralAI-User
|
||||
action: DENY
|
||||
10
data/clients/mistral-mistralai-user.yaml
Normal file
10
data/clients/mistral-mistralai-user.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
# Acts on behalf of user requests
|
||||
# https://docs.mistral.ai/robots/
|
||||
- name: mistral-mistralai-user
|
||||
user_agent_regex: MistralAI-User/.+; \+https\://docs\.mistral\.ai/robots
|
||||
action: ALLOW
|
||||
# https://mistral.ai/mistralai-user-ips.json
|
||||
remote_addresses: [
|
||||
"20.240.160.161/32",
|
||||
"20.240.160.1/32",
|
||||
]
|
||||
93
data/clients/openai-chatgpt-user.yaml
Normal file
93
data/clients/openai-chatgpt-user.yaml
Normal file
@@ -0,0 +1,93 @@
|
||||
# Acts on behalf of user requests
|
||||
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
|
||||
- name: openai-chatgpt-user
|
||||
user_agent_regex: ChatGPT-User/.+; \+https\://openai\.com/bot
|
||||
action: ALLOW
|
||||
# https://openai.com/chatgpt-user.json
|
||||
# curl 'https://openai.com/chatgpt-user.json' | jq '.prefixes.[].ipv4Prefix' | sed 's/$/,/'
|
||||
remote_addresses: [
|
||||
"13.65.138.112/28",
|
||||
"23.98.179.16/28",
|
||||
"13.65.138.96/28",
|
||||
"172.183.222.128/28",
|
||||
"20.102.212.144/28",
|
||||
"40.116.73.208/28",
|
||||
"172.183.143.224/28",
|
||||
"52.190.190.16/28",
|
||||
"13.83.237.176/28",
|
||||
"51.8.155.64/28",
|
||||
"74.249.86.176/28",
|
||||
"51.8.155.48/28",
|
||||
"20.55.229.144/28",
|
||||
"135.237.131.208/28",
|
||||
"135.237.133.48/28",
|
||||
"51.8.155.112/28",
|
||||
"135.237.133.112/28",
|
||||
"52.159.249.96/28",
|
||||
"52.190.137.16/28",
|
||||
"52.255.111.112/28",
|
||||
"40.84.181.32/28",
|
||||
"172.178.141.112/28",
|
||||
"52.190.142.64/28",
|
||||
"172.178.140.144/28",
|
||||
"52.190.137.144/28",
|
||||
"172.178.141.128/28",
|
||||
"57.154.187.32/28",
|
||||
"4.196.118.112/28",
|
||||
"20.193.50.32/28",
|
||||
"20.215.188.192/28",
|
||||
"20.215.214.16/28",
|
||||
"4.197.22.112/28",
|
||||
"4.197.115.112/28",
|
||||
"172.213.21.16/28",
|
||||
"172.213.11.144/28",
|
||||
"172.213.12.112/28",
|
||||
"172.213.21.144/28",
|
||||
"20.90.7.144/28",
|
||||
"57.154.175.0/28",
|
||||
"57.154.174.112/28",
|
||||
"52.236.94.144/28",
|
||||
"137.135.191.176/28",
|
||||
"23.98.186.192/28",
|
||||
"23.98.186.96/28",
|
||||
"23.98.186.176/28",
|
||||
"23.98.186.64/28",
|
||||
"68.221.67.192/28",
|
||||
"68.221.67.160/28",
|
||||
"13.83.167.128/28",
|
||||
"20.228.106.176/28",
|
||||
"52.159.227.32/28",
|
||||
"68.220.57.64/28",
|
||||
"172.213.21.112/28",
|
||||
"68.221.67.224/28",
|
||||
"68.221.75.16/28",
|
||||
"20.97.189.96/28",
|
||||
"52.252.113.240/28",
|
||||
"52.230.163.32/28",
|
||||
"172.212.159.64/28",
|
||||
"52.255.111.80/28",
|
||||
"52.255.111.0/28",
|
||||
"4.151.241.240/28",
|
||||
"52.255.111.32/28",
|
||||
"52.255.111.48/28",
|
||||
"52.255.111.16/28",
|
||||
"52.230.164.176/28",
|
||||
"52.176.139.176/28",
|
||||
"52.173.234.16/28",
|
||||
"4.151.71.176/28",
|
||||
"4.151.119.48/28",
|
||||
"52.255.109.112/28",
|
||||
"52.255.109.80/28",
|
||||
"20.161.75.208/28",
|
||||
"68.154.28.96/28",
|
||||
"52.255.109.128/28",
|
||||
"52.225.75.208/28",
|
||||
"52.190.139.48/28",
|
||||
"68.221.67.240/28",
|
||||
"52.156.77.144/28",
|
||||
"52.148.129.32/28",
|
||||
"40.84.221.208/28",
|
||||
"104.210.139.224/28",
|
||||
"40.84.221.224/28",
|
||||
"104.210.139.192/28",
|
||||
]
|
||||
2
data/clients/small-internet-browsers/_permissive.yaml
Normal file
2
data/clients/small-internet-browsers/_permissive.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
- import: (data)/clients/small-internet-browsers/netsurf.yaml
|
||||
- import: (data)/clients/small-internet-browsers/palemoon.yaml
|
||||
5
data/clients/small-internet-browsers/netsurf.yaml
Normal file
5
data/clients/small-internet-browsers/netsurf.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
- name: "reduce-weight-netsurf"
|
||||
user_agent_regex: "NetSurf"
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: -5
|
||||
5
data/clients/small-internet-browsers/palemoon.yaml
Normal file
5
data/clients/small-internet-browsers/palemoon.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
- name: "reduce-weight-palemoon"
|
||||
user_agent_regex: "PaleMoon"
|
||||
action: WEIGH
|
||||
weight:
|
||||
adjust: -5
|
||||
6
data/clients/x-firefox-ai.yaml
Normal file
6
data/clients/x-firefox-ai.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
# https://connect.mozilla.org/t5/firefox-labs/try-out-link-previews-in-firefox-labs-138-and-share-your/td-p/92012
|
||||
- name: x-firefox-ai
|
||||
action: WEIGH
|
||||
expression: '"X-Firefox-Ai" in headers'
|
||||
weight:
|
||||
adjust: 5
|
||||
@@ -1,15 +1,15 @@
|
||||
- name: ipv4-rfc-1918
|
||||
action: ALLOW
|
||||
remote_addresses:
|
||||
- 10.0.0.0/8
|
||||
- 172.16.0.0/12
|
||||
- 192.168.0.0/16
|
||||
- 100.64.0.0/10
|
||||
- 10.0.0.0/8
|
||||
- 172.16.0.0/12
|
||||
- 192.168.0.0/16
|
||||
- 100.64.0.0/10
|
||||
- name: ipv6-ula
|
||||
action: ALLOW
|
||||
remote_addresses:
|
||||
- fc00::/7
|
||||
- fc00::/7
|
||||
- name: ipv6-link-local
|
||||
action: ALLOW
|
||||
remote_addresses:
|
||||
- fe80::/10
|
||||
- fe80::/10
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
- import: (data)/crawlers/googlebot.yaml
|
||||
- import: (data)/crawlers/applebot.yaml
|
||||
- import: (data)/crawlers/bingbot.yaml
|
||||
- import: (data)/crawlers/duckduckbot.yaml
|
||||
- import: (data)/crawlers/qwantbot.yaml
|
||||
- import: (data)/crawlers/internet-archive.yaml
|
||||
- import: (data)/crawlers/kagibot.yaml
|
||||
- import: (data)/crawlers/marginalia.yaml
|
||||
- import: (data)/crawlers/mojeekbot.yaml
|
||||
- import: (data)/crawlers/mojeekbot.yaml
|
||||
- import: (data)/crawlers/commoncrawl.yaml
|
||||
|
||||
8
data/crawlers/ai-search.yaml
Normal file
8
data/crawlers/ai-search.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
# User agents that index exclusively for search in for AI systems.
|
||||
# Each entry should have a positive/ALLOW entry created as well, with further documentation.
|
||||
# Exceptions:
|
||||
# - Claude-SearchBot: No published IP allowlist
|
||||
- name: "ai-crawlers-search"
|
||||
user_agent_regex: >-
|
||||
OAI-SearchBot|Claude-SearchBot
|
||||
action: DENY
|
||||
8
data/crawlers/ai-training.yaml
Normal file
8
data/crawlers/ai-training.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
# User agents that crawl for training AI/LLM systems
|
||||
# Each entry should have a positive/ALLOW entry created as well, with further documentation.
|
||||
# Exceptions:
|
||||
# - ClaudeBot: No published IP allowlist
|
||||
- name: "ai-crawlers-training"
|
||||
user_agent_regex: >-
|
||||
GPTBot|ClaudeBot
|
||||
action: DENY
|
||||
20
data/crawlers/applebot.yaml
Normal file
20
data/crawlers/applebot.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# Indexing for search and Siri
|
||||
# https://support.apple.com/en-us/119829
|
||||
- name: applebot
|
||||
user_agent_regex: Applebot
|
||||
action: ALLOW
|
||||
# https://search.developer.apple.com/applebot.json
|
||||
remote_addresses: [
|
||||
"17.241.208.160/27",
|
||||
"17.241.193.160/27",
|
||||
"17.241.200.160/27",
|
||||
"17.22.237.0/24",
|
||||
"17.22.245.0/24",
|
||||
"17.22.253.0/24",
|
||||
"17.241.75.0/24",
|
||||
"17.241.219.0/24",
|
||||
"17.241.227.0/24",
|
||||
"17.246.15.0/24",
|
||||
"17.246.19.0/24",
|
||||
"17.246.23.0/24",
|
||||
]
|
||||
12
data/crawlers/commoncrawl.yaml
Normal file
12
data/crawlers/commoncrawl.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
- name: common-crawl
|
||||
user_agent_regex: CCBot
|
||||
action: ALLOW
|
||||
# https://index.commoncrawl.org/ccbot.json
|
||||
remote_addresses:
|
||||
[
|
||||
"2600:1f28:365:80b0::/60",
|
||||
"18.97.9.168/29",
|
||||
"18.97.14.80/29",
|
||||
"18.97.14.88/30",
|
||||
"98.85.178.216/32",
|
||||
]
|
||||
16
data/crawlers/openai-gptbot.yaml
Normal file
16
data/crawlers/openai-gptbot.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
# Collects AI training data
|
||||
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
|
||||
- name: openai-gptbot
|
||||
user_agent_regex: GPTBot/1\.1; \+https\://openai\.com/gptbot
|
||||
action: ALLOW
|
||||
# https://openai.com/gptbot.json
|
||||
remote_addresses: [
|
||||
"52.230.152.0/24",
|
||||
"20.171.206.0/24",
|
||||
"20.171.207.0/24",
|
||||
"4.227.36.0/25",
|
||||
"20.125.66.80/28",
|
||||
"172.182.204.0/24",
|
||||
"172.182.214.0/24",
|
||||
"172.182.215.0/24",
|
||||
]
|
||||
13
data/crawlers/openai-searchbot.yaml
Normal file
13
data/crawlers/openai-searchbot.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
# Indexing for search, does not collect training data
|
||||
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
|
||||
- name: openai-searchbot
|
||||
user_agent_regex: OAI-SearchBot/1\.0; \+https\://openai\.com/searchbot
|
||||
action: ALLOW
|
||||
# https://openai.com/searchbot.json
|
||||
remote_addresses: [
|
||||
"20.42.10.176/28",
|
||||
"172.203.190.128/28",
|
||||
"104.210.140.128/28",
|
||||
"51.8.102.0/24",
|
||||
"135.234.64.0/24"
|
||||
]
|
||||
@@ -3,6 +3,6 @@ package data
|
||||
import "embed"
|
||||
|
||||
var (
|
||||
//go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:clients all:common all:crawlers
|
||||
//go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:clients all:common all:crawlers all:meta
|
||||
BotPolicies embed.FS
|
||||
)
|
||||
|
||||
5
data/meta/README.md
Normal file
5
data/meta/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# meta policies
|
||||
|
||||
Contains policies that exclusively reference policies in _multiple_ other data folders.
|
||||
|
||||
Akin to "stances" that the administrator can take, with reference to various topics, such as AI/LLM systems.
|
||||
6
data/meta/ai-block-aggressive.yaml
Normal file
6
data/meta/ai-block-aggressive.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
# Blocks all AI/LLM associated user agents, regardless of purpose or human agency
|
||||
# Warning: To completely block some AI/LLM training, such as with Google, you _must_ place flags in robots.txt.
|
||||
- import: (data)/bots/ai-catchall.yaml
|
||||
- import: (data)/clients/ai.yaml
|
||||
- import: (data)/crawlers/ai-search.yaml
|
||||
- import: (data)/crawlers/ai-training.yaml
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user