Compare commits

..

1 Commits

Author SHA1 Message Date
Xe Iaso
ffa67fc46a cmd/anubis: allow Internet Archive by default
This is based on the IP ranges advertised by AS7941

Also adds comments about the other IP rangesets and where they come
from.

Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-03-22 15:00:38 -04:00
261 changed files with 2768 additions and 13036 deletions

View File

@@ -1,12 +0,0 @@
root = "."
tmp_dir = "var"
[build]
cmd = "go build -o ./var/main ./cmd/anubis"
bin = "./var/main"
args = ["--use-remote-address"]
exclude_dir = ["var", "vendor", "docs", "node_modules"]
[logger]
time = true
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address

1
.gitattributes vendored
View File

@@ -1 +0,0 @@
web/index_templ.go linguist-generated

3
.github/FUNDING.yml vendored
View File

@@ -1,2 +1 @@
patreon: cadey
github: xe
patreon: cadey

View File

@@ -1,11 +0,0 @@
<!--
delete me and describe your change here, give enough context for a maintainer to understand what and why
See https://anubis.techaro.lol/docs/developer/code-quality for more information
-->
Checklist:
- [ ] Added a description of the changes to the `[Unreleased]` section of docs/docs/CHANGELOG.md
- [ ] Added test cases to [the relevant parts of the codebase](https://anubis.techaro.lol/docs/developer/code-quality)
- [ ] Ran integration tests `npm run test:integration` (unsupported on Windows, please use WSL)

View File

@@ -1,28 +0,0 @@
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: weekly
groups:
github-actions:
patterns:
- "*"
- package-ecosystem: gomod
directory: /
schedule:
interval: weekly
groups:
gomod:
patterns:
- "*"
- package-ecosystem: npm
directory: /
schedule:
interval: weekly
groups:
npm:
patterns:
- "*"

View File

@@ -12,57 +12,33 @@ permissions:
jobs:
build:
runs-on: ubuntu-24.04
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
with:
fetch-tags: true
fetch-depth: 0
persist-credentials: false
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
- uses: actions/setup-go@v5
with:
path: |
/home/linuxbrew/.linuxbrew/Cellar
/home/linuxbrew/.linuxbrew/bin
/home/linuxbrew/.linuxbrew/etc
/home/linuxbrew/.linuxbrew/include
/home/linuxbrew/.linuxbrew/lib
/home/linuxbrew/.linuxbrew/opt
/home/linuxbrew/.linuxbrew/sbin
/home/linuxbrew/.linuxbrew/share
/home/linuxbrew/.linuxbrew/var
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
restore-keys: |
${{ runner.os }}-go-homebrew-cellar-
go-version: '1.24.x'
- name: Install Brew dependencies
run: |
brew bundle
- uses: ko-build/setup-ko@v0.8
- name: Docker meta
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
uses: docker/metadata-action@v5
with:
images: ghcr.io/techarohq/anubis
- name: Build and push
id: build
run: |
npm ci
npm run container
go run ./cmd/containerbuild --docker-repo ghcr.io/techarohq/anubis --slog-level debug
env:
PULL_REQUEST_ID: ${{ github.event.number }}
DOCKER_REPO: ghcr.io/techarohq/anubis
SLOG_LEVEL: debug
- run: |
echo "Test this with:"
echo "docker pull ${DOCKER_IMAGE}"
env:
DOCKER_IMAGE: ${{ steps.build.outputs.docker_image }}
echo "docker pull ${{ steps.build.outputs.docker_image }}"

View File

@@ -18,41 +18,22 @@ permissions:
jobs:
build:
runs-on: ubuntu-24.04
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v4
with:
fetch-tags: true
fetch-depth: 0
persist-credentials: false
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
- uses: actions/setup-go@v5
with:
path: |
/home/linuxbrew/.linuxbrew/Cellar
/home/linuxbrew/.linuxbrew/bin
/home/linuxbrew/.linuxbrew/etc
/home/linuxbrew/.linuxbrew/include
/home/linuxbrew/.linuxbrew/lib
/home/linuxbrew/.linuxbrew/opt
/home/linuxbrew/.linuxbrew/sbin
/home/linuxbrew/.linuxbrew/share
/home/linuxbrew/.linuxbrew/var
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
restore-keys: |
${{ runner.os }}-go-homebrew-cellar-
go-version: '1.24.x'
- name: Install Brew dependencies
run: |
brew bundle
- uses: ko-build/setup-ko@v0.8
- name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
registry: ghcr.io
username: techarohq
@@ -60,22 +41,19 @@ jobs:
- name: Docker meta
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
uses: docker/metadata-action@v5
with:
images: ghcr.io/techarohq/anubis
- name: Build and push
id: build
run: |
npm ci
npm run container
env:
DOCKER_REPO: ghcr.io/techarohq/anubis
SLOG_LEVEL: debug
go run ./cmd/containerbuild --docker-repo ghcr.io/techarohq/anubis --slog-level debug
- name: Generate artifact attestation
uses: actions/attest-build-provenance@db473fddc028af60658334401dc6fa3ffd8669fd # v2.3.0
uses: actions/attest-build-provenance@v2
if: ${{github.event_name == 'pull_request'}}
with:
subject-name: ghcr.io/techarohq/anubis
subject-digest: ${{ steps.build.outputs.digest }}
push-to-registry: true
push-to-registry: true

View File

@@ -13,18 +13,16 @@ permissions:
jobs:
build:
runs-on: ubuntu-24.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
uses: docker/setup-buildx-action@v3
- name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@v3
with:
registry: ghcr.io
username: techarohq
@@ -32,13 +30,13 @@ jobs:
- name: Docker meta
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
uses: docker/metadata-action@v5
with:
images: ghcr.io/techarohq/anubis/docs
- name: Build and push
id: build
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
uses: docker/build-push-action@v6
with:
context: ./docs
cache-to: type=gha
@@ -49,14 +47,14 @@ jobs:
push: true
- name: Apply k8s manifests to aeacus
uses: actions-hub/kubectl@e81783053d902f50d752d21a6d99cf9689a652e1 # v1.33.0
uses: actions-hub/kubectl@master
env:
KUBE_CONFIG: ${{ secrets.AEACUS_KUBECONFIG }}
with:
args: apply -k docs/manifest
- name: Apply k8s manifests to aeacus
uses: actions-hub/kubectl@e81783053d902f50d752d21a6d99cf9689a652e1 # v1.33.0
uses: actions-hub/kubectl@master
env:
KUBE_CONFIG: ${{ secrets.AEACUS_KUBECONFIG }}
with:

View File

@@ -1,39 +0,0 @@
name: Docs test build
on:
pull_request:
branches: [ "main" ]
permissions:
contents: read
actions: write
jobs:
build:
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Docker meta
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with:
images: ghcr.io/techarohq/anubis/docs
- name: Build and push
id: build
uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v6.16.0
with:
context: ./docs
cache-to: type=gha
cache-from: type=gha
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64
push: false

View File

@@ -11,13 +11,11 @@ permissions:
actions: write
jobs:
go_tests:
build:
#runs-on: alrest-techarohq
runs-on: ubuntu-24.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- uses: actions/checkout@v4
- name: build essential
run: |
@@ -28,7 +26,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
uses: actions/cache@v4
with:
path: |
/home/linuxbrew/.linuxbrew/Cellar
@@ -49,7 +47,7 @@ jobs:
brew bundle
- name: Setup Golang caches
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
uses: actions/cache@v4
with:
path: |
~/.cache/go-build
@@ -58,34 +56,8 @@ jobs:
restore-keys: |
${{ runner.os }}-golang-
- name: Cache playwright binaries
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
id: playwright-cache
with:
path: |
~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-${{ hashFiles('**/go.sum') }}
- name: install node deps
run: |
npm ci
- name: install playwright browsers
run: |
npx --no-install playwright@1.52.0 install --with-deps
npx --no-install playwright@1.52.0 run-server --port 9001 &
- name: Build
run: npm run build
run: go build ./...
- name: Test
run: npm run test
- name: Lint with staticcheck
uses: dominikh/staticcheck-action@fe1dd0c3658873b46f8c9bb3291096a617310ca6 # v1.3.1
with:
version: "latest"
- name: Govulncheck
run: |
go tool govulncheck ./...
run: go test ./...

View File

@@ -1,82 +0,0 @@
name: Package builds (stable)
on:
release:
types: [published]
permissions:
contents: write
actions: write
jobs:
package_builds:
#runs-on: alrest-techarohq
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
fetch-tags: true
fetch-depth: 0
- name: build essential
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
/home/linuxbrew/.linuxbrew/Cellar
/home/linuxbrew/.linuxbrew/bin
/home/linuxbrew/.linuxbrew/etc
/home/linuxbrew/.linuxbrew/include
/home/linuxbrew/.linuxbrew/lib
/home/linuxbrew/.linuxbrew/opt
/home/linuxbrew/.linuxbrew/sbin
/home/linuxbrew/.linuxbrew/share
/home/linuxbrew/.linuxbrew/var
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
restore-keys: |
${{ runner.os }}-go-homebrew-cellar-
- name: Install Brew dependencies
run: |
brew bundle
- name: Setup Golang caches
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-golang-
- name: install node deps
run: |
npm ci
- name: Build Packages
run: |
wget https://github.com/TecharoHQ/yeet/releases/download/v0.2.2/yeet_0.2.2_amd64.deb -O var/yeet.deb
sudo apt -y install -f ./var/yeet.deb
rm ./var/yeet.deb
yeet
- name: Upload released artifacts
env:
GITHUB_TOKEN: ${{ github.TOKEN }}
RELEASE_VERSION: ${{github.event.release.tag_name}}
shell: bash
run: |
RELEASE="${RELEASE_VERSION}"
cd var
for file in *; do
gh release upload $RELEASE $file
done

View File

@@ -1,77 +0,0 @@
name: Package builds (unstable)
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
permissions:
contents: read
actions: write
jobs:
package_builds:
#runs-on: alrest-techarohq
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
fetch-tags: true
fetch-depth: 0
- name: build essential
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Set up Homebrew
uses: Homebrew/actions/setup-homebrew@master
- name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
/home/linuxbrew/.linuxbrew/Cellar
/home/linuxbrew/.linuxbrew/bin
/home/linuxbrew/.linuxbrew/etc
/home/linuxbrew/.linuxbrew/include
/home/linuxbrew/.linuxbrew/lib
/home/linuxbrew/.linuxbrew/opt
/home/linuxbrew/.linuxbrew/sbin
/home/linuxbrew/.linuxbrew/share
/home/linuxbrew/.linuxbrew/var
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
restore-keys: |
${{ runner.os }}-go-homebrew-cellar-
- name: Install Brew dependencies
run: |
brew bundle
- name: Setup Golang caches
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-golang-
- name: install node deps
run: |
npm ci
- name: Build Packages
run: |
wget https://github.com/TecharoHQ/yeet/releases/download/v0.2.2/yeet_0.2.2_amd64.deb -O var/yeet.deb
sudo apt -y install -f ./var/yeet.deb
rm ./var/yeet.deb
yeet
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: packages
path: var/*

View File

@@ -1,35 +0,0 @@
name: zizmor
on:
push:
paths:
- '.github/workflows/*.ya?ml'
pull_request:
paths:
- '.github/workflows/*.ya?ml'
jobs:
zizmor:
name: zizmor latest via PyPI
runs-on: ubuntu-24.04
permissions:
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Install the latest version of uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
- name: Run zizmor 🌈
run: uvx zizmor --format sarif . > results.sarif
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload SARIF file
uses: github/codeql-action/upload-sarif@60168efe1c415ce0f5521ea06d5c2062adbeed1b # v3.28.17
with:
sarif_file: results.sarif
category: zizmor

5
.github/zizmor.yml vendored
View File

@@ -1,5 +0,0 @@
rules:
unpinned-uses:
config:
policies:
Homebrew/actions/*: any

22
.gitignore vendored
View File

@@ -1,22 +1,2 @@
.env
*.deb
*.rpm
# Additional package locks
pnpm-lock.yaml
yarn.lock
# Go binaries and test artifacts
main
*.test
node_modules
# MacOS
.DS_store
# Intellij
.idea
# how does this get here
doc/VERSION
*.rpm

15
.vscode/settings.json vendored
View File

@@ -1,15 +0,0 @@
{
"github.copilot.enable": {
"*": false,
"plaintext": false,
"markdown": false,
"mdx": false,
"json": false,
"scminput": false,
"yaml": false,
"go": false,
"zig": false,
"javascript": false,
"properties": false
}
}

View File

@@ -1,7 +1,4 @@
# programming languages
brew "go@1.24"
brew "node"
brew "ko"
brew "esbuild"
brew "zstd"
brew "brotli"
brew "ko"

23
Dockerfile Normal file
View File

@@ -0,0 +1,23 @@
FROM docker.io/library/golang:1.24 AS build
ARG BUILDKIT_SBOM_SCAN_CONTEXT=true BUILDKIT_SBOM_SCAN_STAGE=true
WORKDIR /app
COPY go.mod go.sum /app/
RUN go mod download
COPY . .
RUN --mount=type=cache,target=/root/.cache \
VERSION=$(git describe --tags --always --dirty) \
&& go build -o /app/bin/anubis -ldflags="-X github.com/TecharoHQ/anubis.Version=${VERSION}" ./cmd/anubis
FROM docker.io/library/debian:bookworm AS runtime
ARG BUILDKIT_SBOM_SCAN_STAGE=true
RUN apt-get update \
&& apt-get -y install ca-certificates
COPY --from=build /app/bin/anubis /app/bin/anubis
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 CMD ["/app/bin/anubis", "--healthcheck"]
CMD ["/app/bin/anubis"]
LABEL org.opencontainers.image.source="https://github.com/TecharoHQ/anubis"

View File

@@ -1,32 +0,0 @@
VERSION= $(shell cat ./VERSION)
GO?= go
NPM?= npm
.PHONY: build assets deps lint prebaked-build test
all: build
deps:
$(NPM) ci
$(GO) mod download
assets: PATH:=$(PWD)/node_modules/.bin:$(PATH)
assets: deps
$(GO) generate ./...
./web/build.sh
./xess/build.sh
build: assets
$(GO) build -o ./var/anubis ./cmd/anubis
@echo "Anubis is now built to ./var/anubis"
lint: assets
$(GO) vet ./...
$(GO) tool staticcheck ./...
$(GO) tool govulncheck ./...
prebaked-build:
$(GO) build -o ./var/anubis -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/anubis
test: assets
$(GO) test ./...

6
PULL_REQUEST_TEMPLATE.md Normal file
View File

@@ -0,0 +1,6 @@
<!-- delete me and describe your change here -->
Checklist:
- [ ] Added a description of the changes to the `[Unreleased]` section of docs/docs/CHANGELOG.md
- [ ] Tested this at least manually

View File

@@ -1,7 +1,7 @@
# Anubis
<center>
<img width=256 src="./web/static/img/happy.webp" alt="A smiling chibi dark-skinned anthro jackal with brown hair and tall ears looking victorious with a thumbs-up" />
<img width=256 src="./cmd/anubis/static/img/happy.webp" alt="A smiling chibi dark-skinned anthro jackal with brown hair and tall ears looking victorious with a thumbs-up" />
</center>
![enbyware](https://pride-badges.pony.workers.dev/static/v1?label=enbyware&labelColor=%23555&stripeWidth=8&stripeColors=FCF434%2CFFFFFF%2C9C59D1%2C2C2C2C)
@@ -10,21 +10,11 @@
![language count](https://img.shields.io/github/languages/count/TecharoHQ/anubis)
![repo size](https://img.shields.io/github/repo-size/TecharoHQ/anubis)
## Sponsors
Anubis [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using a sha256 proof-of-work challenge in order to protect upstream resources from scraper bots.
Anubis is brought to you by sponsors and donors like:
Installing and using this will likely result in your website not being indexed by some search engines. This is considered a feature of Anubis, not a bug.
[![Distrust](./docs/static/img/sponsors/distrust-logo.webp)](https://distrust.co)
[![Terminal Trove](./docs/static/img/sponsors/terminal-trove.webp)](https://terminaltrove.com/?utm_campaign=github&utm_medium=referral&utm_content=anubis&utm_source=abgh)
[![canine.tools](./docs/static/img/sponsors/caninetools-logo.webp)](https://canine.tools)
## Overview
Anubis [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using a proof-of-work challenge in order to protect upstream resources from scraper bots.
This program is designed to help protect the small internet from the endless storm of requests that flood in from AI companies. Anubis is as lightweight as possible to ensure that everyone can afford to protect the communities closest to them.
Anubis is a bit of a nuclear response. This will result in your website being blocked from smaller scrapers and may inhibit "good bots" like the Internet Archive. You can configure [bot policy definitions](./docs/docs/admin/policies.mdx) to explicitly allowlist them and we are working on a curated set of "known good" bots to allow for a compromise between discoverability and uptime.
This is a bit of a nuclear response, but AI scraper bots scraping so aggressively have forced my hand. I hate that I have to do this, but this is what we get for the modern Internet because bots don't conform to standards like robots.txt, even when they claim to.
In most cases, you should not need this and can probably get by using Cloudflare to protect a given origin. However, for circumstances where you can't or won't use Cloudflare, Anubis is there for you.
@@ -32,28 +22,10 @@ If you want to try this out, connect to [anubis.techaro.lol](https://anubis.tech
## Support
If you run into any issues running Anubis, please [open an issue](https://github.com/TecharoHQ/anubis/issues/new?template=Blank+issue). Please include all the information I would need to diagnose your issue.
If you run into any issues running Anubis, please [open an issue](https://github.com/TecharoHQ/anubis/issues/new?template=Blank+issue) and tag it with the Anubis tag. Please include all the information I would need to diagnose your issue.
For live chat, please join the [Patreon](https://patreon.com/cadey) and ask in the Patron discord in the channel `#anubis`.
## Star History
<a href="https://www.star-history.com/#TecharoHQ/anubis&Date">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date&theme=dark" />
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date" />
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date" />
</picture>
</a>
## Packaging Status
[![Packaging status](https://repology.org/badge/vertical-allrepos/anubis-anti-crawler.svg?columns=3)](https://repology.org/project/anubis-anti-crawler/versions)
## Contributors
<a href="https://github.com/TecharoHQ/anubis/graphs/contributors">
<img src="https://contrib.rocks/image?repo=TecharoHQ/anubis" />
</a>
Made with [contrib.rocks](https://contrib.rocks).
[![Star History Chart](https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date)](https://www.star-history.com/#TecharoHQ/anubis&Date)

View File

@@ -1 +1 @@
1.18.0
1.14.2

View File

@@ -1,30 +0,0 @@
// Package anubis contains the version number of Anubis.
package anubis
import "time"
// Version is the current version of Anubis.
//
// This variable is set at build time using the -X linker flag. If not set,
// it defaults to "devel".
var Version = "devel"
// CookieName is the name of the cookie that Anubis uses in order to validate
// access.
const CookieName = "within.website-x-cmd-anubis-auth"
// CookieDefaultExpirationTime is the amount of time before the cookie/JWT expires.
const CookieDefaultExpirationTime = 7 * 24 * time.Hour
// BasePrefix is a global prefix for all Anubis endpoints. Can be emptied to remove the prefix entirely.
var BasePrefix = ""
// StaticPath is the location where all static Anubis assets are located.
const StaticPath = "/.within.website/x/cmd/anubis/"
// APIPrefix is the location where all Anubis API endpoints are located.
const APIPrefix = "/.within.website/x/cmd/anubis/api/"
// DefaultDifficulty is the default "difficulty" (number of leading zeroes)
// that must be met by the client in order to pass the challenge.
const DefaultDifficulty = 4

5
cmd/anubis/CHANGELOG.md Normal file
View File

@@ -0,0 +1,5 @@
# CHANGELOG
## 2025-01-24
- Added support for custom bot policy documentation, allowing administrators to change how Anubis works to meet their needs.

409
cmd/anubis/botPolicies.json Normal file
View File

@@ -0,0 +1,409 @@
{
"bots": [
{
"name": "amazonbot",
"user_agent_regex": "Amazonbot",
"action": "DENY"
},
{
"_comment": "This is based on the BGP routes advertised by AS7941",
"name": "internet-archive",
"action": "ALLOW",
"remote_addresses": [
"207.241.224.0/20",
"208.70.24.0/21",
"2620:0:9c0::/48"
]
},
{
"_comment": "Based on: https://developers.google.com/static/search/apis/ipranges/googlebot.json",
"name": "googlebot",
"user_agent_regex": "\\+http\\://www\\.google\\.com/bot\\.html",
"action": "ALLOW",
"remote_addresses": [
"2001:4860:4801:10::/64",
"2001:4860:4801:11::/64",
"2001:4860:4801:12::/64",
"2001:4860:4801:13::/64",
"2001:4860:4801:14::/64",
"2001:4860:4801:15::/64",
"2001:4860:4801:16::/64",
"2001:4860:4801:17::/64",
"2001:4860:4801:18::/64",
"2001:4860:4801:19::/64",
"2001:4860:4801:1a::/64",
"2001:4860:4801:1b::/64",
"2001:4860:4801:1c::/64",
"2001:4860:4801:1d::/64",
"2001:4860:4801:1e::/64",
"2001:4860:4801:1f::/64",
"2001:4860:4801:20::/64",
"2001:4860:4801:21::/64",
"2001:4860:4801:22::/64",
"2001:4860:4801:23::/64",
"2001:4860:4801:24::/64",
"2001:4860:4801:25::/64",
"2001:4860:4801:26::/64",
"2001:4860:4801:27::/64",
"2001:4860:4801:28::/64",
"2001:4860:4801:29::/64",
"2001:4860:4801:2::/64",
"2001:4860:4801:2a::/64",
"2001:4860:4801:2b::/64",
"2001:4860:4801:2c::/64",
"2001:4860:4801:2d::/64",
"2001:4860:4801:2e::/64",
"2001:4860:4801:2f::/64",
"2001:4860:4801:31::/64",
"2001:4860:4801:32::/64",
"2001:4860:4801:33::/64",
"2001:4860:4801:34::/64",
"2001:4860:4801:35::/64",
"2001:4860:4801:36::/64",
"2001:4860:4801:37::/64",
"2001:4860:4801:38::/64",
"2001:4860:4801:39::/64",
"2001:4860:4801:3a::/64",
"2001:4860:4801:3b::/64",
"2001:4860:4801:3c::/64",
"2001:4860:4801:3d::/64",
"2001:4860:4801:3e::/64",
"2001:4860:4801:40::/64",
"2001:4860:4801:41::/64",
"2001:4860:4801:42::/64",
"2001:4860:4801:43::/64",
"2001:4860:4801:44::/64",
"2001:4860:4801:45::/64",
"2001:4860:4801:46::/64",
"2001:4860:4801:47::/64",
"2001:4860:4801:48::/64",
"2001:4860:4801:49::/64",
"2001:4860:4801:4a::/64",
"2001:4860:4801:4b::/64",
"2001:4860:4801:4c::/64",
"2001:4860:4801:50::/64",
"2001:4860:4801:51::/64",
"2001:4860:4801:52::/64",
"2001:4860:4801:53::/64",
"2001:4860:4801:54::/64",
"2001:4860:4801:55::/64",
"2001:4860:4801:56::/64",
"2001:4860:4801:60::/64",
"2001:4860:4801:61::/64",
"2001:4860:4801:62::/64",
"2001:4860:4801:63::/64",
"2001:4860:4801:64::/64",
"2001:4860:4801:65::/64",
"2001:4860:4801:66::/64",
"2001:4860:4801:67::/64",
"2001:4860:4801:68::/64",
"2001:4860:4801:69::/64",
"2001:4860:4801:6a::/64",
"2001:4860:4801:6b::/64",
"2001:4860:4801:6c::/64",
"2001:4860:4801:6d::/64",
"2001:4860:4801:6e::/64",
"2001:4860:4801:6f::/64",
"2001:4860:4801:70::/64",
"2001:4860:4801:71::/64",
"2001:4860:4801:72::/64",
"2001:4860:4801:73::/64",
"2001:4860:4801:74::/64",
"2001:4860:4801:75::/64",
"2001:4860:4801:76::/64",
"2001:4860:4801:77::/64",
"2001:4860:4801:78::/64",
"2001:4860:4801:79::/64",
"2001:4860:4801:80::/64",
"2001:4860:4801:81::/64",
"2001:4860:4801:82::/64",
"2001:4860:4801:83::/64",
"2001:4860:4801:84::/64",
"2001:4860:4801:85::/64",
"2001:4860:4801:86::/64",
"2001:4860:4801:87::/64",
"2001:4860:4801:88::/64",
"2001:4860:4801:90::/64",
"2001:4860:4801:91::/64",
"2001:4860:4801:92::/64",
"2001:4860:4801:93::/64",
"2001:4860:4801:94::/64",
"2001:4860:4801:95::/64",
"2001:4860:4801:96::/64",
"2001:4860:4801:a0::/64",
"2001:4860:4801:a1::/64",
"2001:4860:4801:a2::/64",
"2001:4860:4801:a3::/64",
"2001:4860:4801:a4::/64",
"2001:4860:4801:a5::/64",
"2001:4860:4801:c::/64",
"2001:4860:4801:f::/64",
"192.178.5.0/27",
"192.178.6.0/27",
"192.178.6.128/27",
"192.178.6.160/27",
"192.178.6.192/27",
"192.178.6.32/27",
"192.178.6.64/27",
"192.178.6.96/27",
"34.100.182.96/28",
"34.101.50.144/28",
"34.118.254.0/28",
"34.118.66.0/28",
"34.126.178.96/28",
"34.146.150.144/28",
"34.147.110.144/28",
"34.151.74.144/28",
"34.152.50.64/28",
"34.154.114.144/28",
"34.155.98.32/28",
"34.165.18.176/28",
"34.175.160.64/28",
"34.176.130.16/28",
"34.22.85.0/27",
"34.64.82.64/28",
"34.65.242.112/28",
"34.80.50.80/28",
"34.88.194.0/28",
"34.89.10.80/28",
"34.89.198.80/28",
"34.96.162.48/28",
"35.247.243.240/28",
"66.249.64.0/27",
"66.249.64.128/27",
"66.249.64.160/27",
"66.249.64.224/27",
"66.249.64.32/27",
"66.249.64.64/27",
"66.249.64.96/27",
"66.249.65.0/27",
"66.249.65.128/27",
"66.249.65.160/27",
"66.249.65.192/27",
"66.249.65.224/27",
"66.249.65.32/27",
"66.249.65.64/27",
"66.249.65.96/27",
"66.249.66.0/27",
"66.249.66.128/27",
"66.249.66.160/27",
"66.249.66.192/27",
"66.249.66.224/27",
"66.249.66.32/27",
"66.249.66.64/27",
"66.249.66.96/27",
"66.249.68.0/27",
"66.249.68.128/27",
"66.249.68.32/27",
"66.249.68.64/27",
"66.249.68.96/27",
"66.249.69.0/27",
"66.249.69.128/27",
"66.249.69.160/27",
"66.249.69.192/27",
"66.249.69.224/27",
"66.249.69.32/27",
"66.249.69.64/27",
"66.249.69.96/27",
"66.249.70.0/27",
"66.249.70.128/27",
"66.249.70.160/27",
"66.249.70.192/27",
"66.249.70.224/27",
"66.249.70.32/27",
"66.249.70.64/27",
"66.249.70.96/27",
"66.249.71.0/27",
"66.249.71.128/27",
"66.249.71.160/27",
"66.249.71.192/27",
"66.249.71.224/27",
"66.249.71.32/27",
"66.249.71.64/27",
"66.249.71.96/27",
"66.249.72.0/27",
"66.249.72.128/27",
"66.249.72.160/27",
"66.249.72.192/27",
"66.249.72.224/27",
"66.249.72.32/27",
"66.249.72.64/27",
"66.249.72.96/27",
"66.249.73.0/27",
"66.249.73.128/27",
"66.249.73.160/27",
"66.249.73.192/27",
"66.249.73.224/27",
"66.249.73.32/27",
"66.249.73.64/27",
"66.249.73.96/27",
"66.249.74.0/27",
"66.249.74.128/27",
"66.249.74.160/27",
"66.249.74.192/27",
"66.249.74.32/27",
"66.249.74.64/27",
"66.249.74.96/27",
"66.249.75.0/27",
"66.249.75.128/27",
"66.249.75.160/27",
"66.249.75.192/27",
"66.249.75.224/27",
"66.249.75.32/27",
"66.249.75.64/27",
"66.249.75.96/27",
"66.249.76.0/27",
"66.249.76.128/27",
"66.249.76.160/27",
"66.249.76.192/27",
"66.249.76.224/27",
"66.249.76.32/27",
"66.249.76.64/27",
"66.249.76.96/27",
"66.249.77.0/27",
"66.249.77.128/27",
"66.249.77.160/27",
"66.249.77.192/27",
"66.249.77.224/27",
"66.249.77.32/27",
"66.249.77.64/27",
"66.249.77.96/27",
"66.249.78.0/27",
"66.249.78.32/27",
"66.249.79.0/27",
"66.249.79.128/27",
"66.249.79.160/27",
"66.249.79.192/27",
"66.249.79.224/27",
"66.249.79.32/27",
"66.249.79.64/27",
"66.249.79.96/27"
]
},
{
"_comment": "Based on: https://www.bing.com/toolbox/bingbot.json",
"name": "bingbot",
"user_agent_regex": "\\+http\\://www\\.bing\\.com/bingbot\\.htm",
"action": "ALLOW",
"remote_addresses": [
"157.55.39.0/24",
"207.46.13.0/24",
"40.77.167.0/24",
"13.66.139.0/24",
"13.66.144.0/24",
"52.167.144.0/24",
"13.67.10.16/28",
"13.69.66.240/28",
"13.71.172.224/28",
"139.217.52.0/28",
"191.233.204.224/28",
"20.36.108.32/28",
"20.43.120.16/28",
"40.79.131.208/28",
"40.79.186.176/28",
"52.231.148.0/28",
"20.79.107.240/28",
"51.105.67.0/28",
"20.125.163.80/28",
"40.77.188.0/22",
"65.55.210.0/24",
"199.30.24.0/23",
"40.77.202.0/24",
"40.77.139.0/25",
"20.74.197.0/28",
"20.15.133.160/27",
"40.77.177.0/24",
"40.77.178.0/23"
]
},
{
"_comment": "Based on: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json",
"name": "qwantbot",
"user_agent_regex": "\\+https\\://help\\.qwant\\.com/bot/",
"action": "ALLOW",
"remote_addresses": [
"91.242.162.0/24"
]
},
{
"_comment": "Based on: https://kagi.com/bot",
"name": "kagibot",
"user_agent_regex": "\\+https\\://kagi\\.com/bot",
"action": "ALLOW",
"remote_addresses": [
"216.18.205.234/32",
"35.212.27.76/32",
"104.254.65.50/32",
"209.151.156.194/32"
]
},
{
"_comment": "Received over email from marginalia operator",
"name": "marginalia",
"user_agent_regex": "search\\.marginalia\\.nu",
"action": "ALLOW",
"remote_addresses": [
"193.183.0.162/31",
"193.183.0.164/30",
"193.183.0.168/30",
"193.183.0.172/31",
"193.183.0.174/32"
]
},
{
"_comment": "Based on: https://www.mojeek.com/bot.html and manual admin confirmation in a GitHub thread: https://github.com/TecharoHQ/anubis/issues/47#issuecomment-2743815019",
"name": "mojeekbot",
"user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
"action": "ALLOW",
"remote_addresses": [
"5.102.173.71/32"
]
},
{
"name": "us-artificial-intelligence-scraper",
"user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
"action": "DENY"
},
{
"name": "well-known",
"path_regex": "^/.well-known/.*$",
"action": "ALLOW"
},
{
"name": "favicon",
"path_regex": "^/favicon.ico$",
"action": "ALLOW"
},
{
"name": "robots-txt",
"path_regex": "^/robots.txt$",
"action": "ALLOW"
},
{
"name": "lightpanda",
"user_agent_regex": "^Lightpanda/.*$",
"action": "DENY"
},
{
"name": "headless-chrome",
"user_agent_regex": "(?i:headlesschrom(e|ium))",
"action": "DENY"
},
{
"name": "generic-bot-catchall",
"user_agent_regex": "(?i:bot|crawler)",
"action": "CHALLENGE",
"challenge": {
"difficulty": 16,
"report_as": 4,
"algorithm": "slow"
}
},
{
"name": "generic-browser",
"user_agent_regex": "Mozilla",
"action": "CHALLENGE"
}
],
"dnsbl": true
}

View File

@@ -1,17 +1,17 @@
package decaymap
package main
import (
"sync"
"time"
)
func Zilch[T any]() T {
func zilch[T any]() T {
var zero T
return zero
}
// Impl is a lazy key->value map. It's a wrapper around a map and a mutex. If values exceed their time-to-live, they are pruned at Get time.
type Impl[K comparable, V any] struct {
// DecayMap is a lazy key->value map. It's a wrapper around a map and a mutex. If values exceed their time-to-live, they are pruned at Get time.
type DecayMap[K comparable, V any] struct {
data map[K]decayMapEntry[V]
lock sync.RWMutex
}
@@ -21,17 +21,17 @@ type decayMapEntry[V any] struct {
expiry time.Time
}
// New creates a new DecayMap of key type K and value type V.
// NewDecayMap creates a new DecayMap of key type K and value type V.
//
// Key types must be comparable to work with maps.
func New[K comparable, V any]() *Impl[K, V] {
return &Impl[K, V]{
func NewDecayMap[K comparable, V any]() *DecayMap[K, V] {
return &DecayMap[K, V]{
data: make(map[K]decayMapEntry[V]),
}
}
// expire forcibly expires a key by setting its time-to-live one second in the past.
func (m *Impl[K, V]) expire(key K) bool {
func (m *DecayMap[K, V]) expire(key K) bool {
m.lock.RLock()
val, ok := m.data[key]
m.lock.RUnlock()
@@ -51,32 +51,32 @@ func (m *Impl[K, V]) expire(key K) bool {
// Get gets a value from the DecayMap by key.
//
// If a value has expired, forcibly delete it if it was not updated.
func (m *Impl[K, V]) Get(key K) (V, bool) {
func (m *DecayMap[K, V]) Get(key K) (V, bool) {
m.lock.RLock()
value, ok := m.data[key]
m.lock.RUnlock()
if !ok {
return Zilch[V](), false
return zilch[V](), false
}
if time.Now().After(value.expiry) {
m.lock.Lock()
// Since previously reading m.data[key], the value may have been updated.
// Delete the entry only if the expiry time is still the same.
if m.data[key].expiry.Equal(value.expiry) {
if m.data[key].expiry == value.expiry {
delete(m.data, key)
}
m.lock.Unlock()
return Zilch[V](), false
return zilch[V](), false
}
return value.Value, true
}
// Set sets a key value pair in the map.
func (m *Impl[K, V]) Set(key K, value V, ttl time.Duration) {
func (m *DecayMap[K, V]) Set(key K, value V, ttl time.Duration) {
m.lock.Lock()
defer m.lock.Unlock()
@@ -85,23 +85,3 @@ func (m *Impl[K, V]) Set(key K, value V, ttl time.Duration) {
expiry: time.Now().Add(ttl),
}
}
// Cleanup removes all expired entries from the DecayMap.
func (m *Impl[K, V]) Cleanup() {
m.lock.Lock()
defer m.lock.Unlock()
now := time.Now()
for key, entry := range m.data {
if now.After(entry.expiry) {
delete(m.data, key)
}
}
}
// Len returns the number of entries in the DecayMap.
func (m *Impl[K, V]) Len() int {
m.lock.RLock()
defer m.lock.RUnlock()
return len(m.data)
}

View File

@@ -0,0 +1,31 @@
package main
import (
"testing"
"time"
)
func TestDecayMap(t *testing.T) {
dm := NewDecayMap[string, string]()
dm.Set("test", "hi", 5*time.Minute)
val, ok := dm.Get("test")
if !ok {
t.Error("somehow the test key was not set")
}
if val != "hi" {
t.Errorf("wanted value %q, got: %q", "hi", val)
}
ok = dm.expire("test")
if !ok {
t.Error("somehow could not force-expire the test key")
}
_, ok = dm.Get("test")
if ok {
t.Error("got value even though it was supposed to be expired")
}
}

217
cmd/anubis/index.templ Normal file
View File

@@ -0,0 +1,217 @@
package main
import (
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/xess"
)
templ base(title string, body templ.Component) {
<!DOCTYPE html>
<html>
<head>
<title>{ title }</title>
<link rel="stylesheet" href={ xess.URL }/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<style>
body,
html {
height: 100%;
display: flex;
justify-content: center;
align-items: center;
margin-left: auto;
margin-right: auto;
}
.centered-div {
text-align: center;
}
.lds-roller,
.lds-roller div,
.lds-roller div:after {
box-sizing: border-box;
}
.lds-roller {
display: inline-block;
position: relative;
width: 80px;
height: 80px;
}
.lds-roller div {
animation: lds-roller 1.2s cubic-bezier(0.5, 0, 0.5, 1) infinite;
transform-origin: 40px 40px;
}
.lds-roller div:after {
content: " ";
display: block;
position: absolute;
width: 7.2px;
height: 7.2px;
border-radius: 50%;
background: currentColor;
margin: -3.6px 0 0 -3.6px;
}
.lds-roller div:nth-child(1) {
animation-delay: -0.036s;
}
.lds-roller div:nth-child(1):after {
top: 62.62742px;
left: 62.62742px;
}
.lds-roller div:nth-child(2) {
animation-delay: -0.072s;
}
.lds-roller div:nth-child(2):after {
top: 67.71281px;
left: 56px;
}
.lds-roller div:nth-child(3) {
animation-delay: -0.108s;
}
.lds-roller div:nth-child(3):after {
top: 70.90963px;
left: 48.28221px;
}
.lds-roller div:nth-child(4) {
animation-delay: -0.144s;
}
.lds-roller div:nth-child(4):after {
top: 72px;
left: 40px;
}
.lds-roller div:nth-child(5) {
animation-delay: -0.18s;
}
.lds-roller div:nth-child(5):after {
top: 70.90963px;
left: 31.71779px;
}
.lds-roller div:nth-child(6) {
animation-delay: -0.216s;
}
.lds-roller div:nth-child(6):after {
top: 67.71281px;
left: 24px;
}
.lds-roller div:nth-child(7) {
animation-delay: -0.252s;
}
.lds-roller div:nth-child(7):after {
top: 62.62742px;
left: 17.37258px;
}
.lds-roller div:nth-child(8) {
animation-delay: -0.288s;
}
.lds-roller div:nth-child(8):after {
top: 56px;
left: 12.28719px;
}
@keyframes lds-roller {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}
</style>
@templ.JSONScript("anubis_version", anubis.Version)
</head>
<body id="top">
<main>
<center>
<h1 id="title" class=".centered-div">{ title }</h1>
</center>
@body
<footer>
<center>
<p>
Protected by <a href="https://github.com/TecharoHQ/anubis">Anubis</a> from <a
href="https://techaro.lol"
>Techaro</a>. Made with ❤️ in 🇨🇦.
</p>
</center>
</footer>
</main>
</body>
</html>
}
templ index() {
<div class="centered-div">
<img
id="image"
style="width:100%;max-width:256px;"
src={ "/.within.website/x/cmd/anubis/static/img/pensive.webp?cacheBuster=" +
anubis.Version }
/>
<img
style="display:none;"
style="width:100%;max-width:256px;"
src={ "/.within.website/x/cmd/anubis/static/img/happy.webp?cacheBuster=" +
anubis.Version }
/>
<p id="status">Loading...</p>
<script async type="module" src={ "/.within.website/x/cmd/anubis/static/js/main.mjs?cacheBuster=" + anubis.Version }></script>
<div id="spinner" class="lds-roller">
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
</div>
<details>
<summary>Why am I seeing this?</summary>
<p>You are seeing this because the administrator of this website has set up <a href="https://github.com/TecharoHQ/anubis">Anubis</a> to protect the server against the scourge of <a href="https://thelibre.news/foss-infrastructure-is-under-attack-by-ai-companies/">AI companies aggressively scraping websites</a>. This can and does cause downtime for the websites, which makes their resources inaccessible for everyone.</p>
<p>Anubis is a compromise. Anubis uses a <a href="https://anubis.techaro.lol/docs/design/why-proof-of-work">Proof-of-Work</a> scheme in the vein of <a href="https://en.wikipedia.org/wiki/Hashcash">Hashcash</a>, a proposed proof-of-work scheme for reducing email spam. The idea is that at individual scales the additional load is ignorable, but at mass scraper levels it adds up and makes scraping much more expensive.</p>
<p>Ultimately, this is a hack whose real purpose is to give a "good enough" placeholder solution so that more time can be spent on fingerprinting and identifying headless browsers (EG: via how they do font rendering) so that the challenge proof of work page doesn't need to be presented to users that are much more likely to be legitimate.</p>
<p>Please note that Anubis requires the use of modern JavaScript features that plugins like <a href="https://jshelter.org/">JShelter</a> will disable. Please disable JShelter or other such plugins for this domain.</p>
</details>
<noscript>
<p>
Sadly, you must enable JavaScript to get past this challenge. This is required because AI companies have changed
the social contract around how website hosting works. A no-JS solution is a work-in-progress.
</p>
</noscript>
<div id="testarea"></div>
</div>
}
templ errorPage(message string) {
<div class="centered-div">
<img
id="image"
style="width:100%;max-width:256px;"
src={ "/.within.website/x/cmd/anubis/static/img/sad.webp?cacheBuster=" + anubis.Version }
/>
<p>{ message }.</p>
<button onClick="window.location.reload();">Try again</button>
<p><a href="/">Go home</a></p>
</div>
}

225
cmd/anubis/index_templ.go Normal file
View File

@@ -0,0 +1,225 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.833
package main
//lint:file-ignore SA4006 This context is only used if a nested component is present.
import "github.com/a-h/templ"
import templruntime "github.com/a-h/templ/runtime"
import (
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/xess"
)
func base(title string, body templ.Component) templ.Component {
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
return templ_7745c5c3_CtxErr
}
templ_7745c5c3_Buffer, templ_7745c5c3_IsBuffer := templruntime.GetBuffer(templ_7745c5c3_W)
if !templ_7745c5c3_IsBuffer {
defer func() {
templ_7745c5c3_BufErr := templruntime.ReleaseBuffer(templ_7745c5c3_Buffer)
if templ_7745c5c3_Err == nil {
templ_7745c5c3_Err = templ_7745c5c3_BufErr
}
}()
}
ctx = templ.InitializeContext(ctx)
templ_7745c5c3_Var1 := templ.GetChildren(ctx)
if templ_7745c5c3_Var1 == nil {
templ_7745c5c3_Var1 = templ.NopComponent
}
ctx = templ.ClearChildren(ctx)
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 1, "<!doctype html><html><head><title>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var2 string
templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(title)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 12, Col: 17}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var2))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 2, "</title><link rel=\"stylesheet\" href=\"")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var3 string
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(xess.URL)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 13, Col: 41}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 3, "\"><meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\"><style>\n body,\n html {\n height: 100%;\n display: flex;\n justify-content: center;\n align-items: center;\n margin-left: auto;\n margin-right: auto;\n }\n\n .centered-div {\n text-align: center;\n }\n\n .lds-roller,\n .lds-roller div,\n .lds-roller div:after {\n box-sizing: border-box;\n }\n\n .lds-roller {\n display: inline-block;\n position: relative;\n width: 80px;\n height: 80px;\n }\n\n .lds-roller div {\n animation: lds-roller 1.2s cubic-bezier(0.5, 0, 0.5, 1) infinite;\n transform-origin: 40px 40px;\n }\n\n .lds-roller div:after {\n content: \" \";\n display: block;\n position: absolute;\n width: 7.2px;\n height: 7.2px;\n border-radius: 50%;\n background: currentColor;\n margin: -3.6px 0 0 -3.6px;\n }\n\n .lds-roller div:nth-child(1) {\n animation-delay: -0.036s;\n }\n\n .lds-roller div:nth-child(1):after {\n top: 62.62742px;\n left: 62.62742px;\n }\n\n .lds-roller div:nth-child(2) {\n animation-delay: -0.072s;\n }\n\n .lds-roller div:nth-child(2):after {\n top: 67.71281px;\n left: 56px;\n }\n\n .lds-roller div:nth-child(3) {\n animation-delay: -0.108s;\n }\n\n .lds-roller div:nth-child(3):after {\n top: 70.90963px;\n left: 48.28221px;\n }\n\n .lds-roller div:nth-child(4) {\n animation-delay: -0.144s;\n }\n\n .lds-roller div:nth-child(4):after {\n top: 72px;\n left: 40px;\n }\n\n .lds-roller div:nth-child(5) {\n animation-delay: -0.18s;\n }\n\n .lds-roller div:nth-child(5):after {\n top: 70.90963px;\n left: 31.71779px;\n }\n\n .lds-roller div:nth-child(6) {\n animation-delay: -0.216s;\n }\n\n .lds-roller div:nth-child(6):after {\n top: 67.71281px;\n left: 24px;\n }\n\n .lds-roller div:nth-child(7) {\n animation-delay: -0.252s;\n }\n\n .lds-roller div:nth-child(7):after {\n top: 62.62742px;\n left: 17.37258px;\n }\n\n .lds-roller div:nth-child(8) {\n animation-delay: -0.288s;\n }\n\n .lds-roller div:nth-child(8):after {\n top: 56px;\n left: 12.28719px;\n }\n\n @keyframes lds-roller {\n 0% {\n transform: rotate(0deg);\n }\n\n 100% {\n transform: rotate(360deg);\n }\n }\n </style>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templ.JSONScript("anubis_version", anubis.Version).Render(ctx, templ_7745c5c3_Buffer)
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 4, "</head><body id=\"top\"><main><center><h1 id=\"title\" class=\".centered-div\">")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var4 string
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(title)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 146, Col: 49}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 5, "</h1></center>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = body.Render(ctx, templ_7745c5c3_Buffer)
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 6, "<footer><center><p>Protected by <a href=\"https://github.com/TecharoHQ/anubis\">Anubis</a> from <a href=\"https://techaro.lol\">Techaro</a>. Made with ❤️ in 🇨🇦.</p></center></footer></main></body></html>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
return nil
})
}
func index() templ.Component {
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
return templ_7745c5c3_CtxErr
}
templ_7745c5c3_Buffer, templ_7745c5c3_IsBuffer := templruntime.GetBuffer(templ_7745c5c3_W)
if !templ_7745c5c3_IsBuffer {
defer func() {
templ_7745c5c3_BufErr := templruntime.ReleaseBuffer(templ_7745c5c3_Buffer)
if templ_7745c5c3_Err == nil {
templ_7745c5c3_Err = templ_7745c5c3_BufErr
}
}()
}
ctx = templ.InitializeContext(ctx)
templ_7745c5c3_Var5 := templ.GetChildren(ctx)
if templ_7745c5c3_Var5 == nil {
templ_7745c5c3_Var5 = templ.NopComponent
}
ctx = templ.ClearChildren(ctx)
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 7, "<div class=\"centered-div\"><img id=\"image\" style=\"width:100%;max-width:256px;\" src=\"")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var6 string
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs("/.within.website/x/cmd/anubis/static/img/pensive.webp?cacheBuster=" +
anubis.Version)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 169, Col: 18}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 8, "\"> <img style=\"display:none;\" style=\"width:100%;max-width:256px;\" src=\"")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var7 string
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs("/.within.website/x/cmd/anubis/static/img/happy.webp?cacheBuster=" +
anubis.Version)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 175, Col: 18}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 9, "\"><p id=\"status\">Loading...</p><script async type=\"module\" src=\"")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var8 string
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs("/.within.website/x/cmd/anubis/static/js/main.mjs?cacheBuster=" + anubis.Version)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 178, Col: 116}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\"></script><div id=\"spinner\" class=\"lds-roller\"><div></div><div></div><div></div><div></div><div></div><div></div><div></div><div></div></div><details><summary>Why am I seeing this?</summary><p>You are seeing this because the administrator of this website has set up <a href=\"https://github.com/TecharoHQ/anubis\">Anubis</a> to protect the server against the scourge of <a href=\"https://thelibre.news/foss-infrastructure-is-under-attack-by-ai-companies/\">AI companies aggressively scraping websites</a>. This can and does cause downtime for the websites, which makes their resources inaccessible for everyone.</p><p>Anubis is a compromise. Anubis uses a <a href=\"https://anubis.techaro.lol/docs/design/why-proof-of-work\">Proof-of-Work</a> scheme in the vein of <a href=\"https://en.wikipedia.org/wiki/Hashcash\">Hashcash</a>, a proposed proof-of-work scheme for reducing email spam. The idea is that at individual scales the additional load is ignorable, but at mass scraper levels it adds up and makes scraping much more expensive.</p><p>Ultimately, this is a hack whose real purpose is to give a \"good enough\" placeholder solution so that more time can be spent on fingerprinting and identifying headless browsers (EG: via how they do font rendering) so that the challenge proof of work page doesn't need to be presented to users that are much more likely to be legitimate.</p><p>Please note that Anubis requires the use of modern JavaScript features that plugins like <a href=\"https://jshelter.org/\">JShelter</a> will disable. Please disable JShelter or other such plugins for this domain.</p></details><noscript><p>Sadly, you must enable JavaScript to get past this challenge. This is required because AI companies have changed the social contract around how website hosting works. A no-JS solution is a work-in-progress.</p></noscript><div id=\"testarea\"></div></div>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
return nil
})
}
func errorPage(message string) templ.Component {
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
return templ_7745c5c3_CtxErr
}
templ_7745c5c3_Buffer, templ_7745c5c3_IsBuffer := templruntime.GetBuffer(templ_7745c5c3_W)
if !templ_7745c5c3_IsBuffer {
defer func() {
templ_7745c5c3_BufErr := templruntime.ReleaseBuffer(templ_7745c5c3_Buffer)
if templ_7745c5c3_Err == nil {
templ_7745c5c3_Err = templ_7745c5c3_BufErr
}
}()
}
ctx = templ.InitializeContext(ctx)
templ_7745c5c3_Var9 := templ.GetChildren(ctx)
if templ_7745c5c3_Var9 == nil {
templ_7745c5c3_Var9 = templ.NopComponent
}
ctx = templ.ClearChildren(ctx)
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 11, "<div class=\"centered-div\"><img id=\"image\" style=\"width:100%;max-width:256px;\" src=\"")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var10 string
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs("/.within.website/x/cmd/anubis/static/img/sad.webp?cacheBuster=" + anubis.Version)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 211, Col: 90}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 12, "\"><p>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var11 string
templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(message)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 213, Col: 14}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 13, ".</p><button onClick=\"window.location.reload();\">Try again</button><p><a href=\"/\">Go home</a></p></div>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
return nil
})
}
var _ = templruntime.GeneratedTemplate

View File

@@ -0,0 +1,162 @@
package config
import (
"errors"
"fmt"
"net"
"regexp"
)
type Rule string
const (
RuleUnknown Rule = ""
RuleAllow Rule = "ALLOW"
RuleDeny Rule = "DENY"
RuleChallenge Rule = "CHALLENGE"
)
type Algorithm string
const (
AlgorithmUnknown Algorithm = ""
AlgorithmFast Algorithm = "fast"
AlgorithmSlow Algorithm = "slow"
)
type Bot struct {
Name string `json:"name"`
UserAgentRegex *string `json:"user_agent_regex"`
PathRegex *string `json:"path_regex"`
Action Rule `json:"action"`
RemoteAddr []string `json:"remote_addresses"`
Challenge *ChallengeRules `json:"challenge,omitempty"`
}
var (
ErrNoBotRulesDefined = errors.New("config: must define at least one (1) bot rule")
ErrBotMustHaveName = errors.New("config.Bot: must set name")
ErrBotMustHaveUserAgentOrPath = errors.New("config.Bot: must set either user_agent_regex, path_regex, or remote_addresses")
ErrBotMustHaveUserAgentOrPathNotBoth = errors.New("config.Bot: must set either user_agent_regex, path_regex, and not both")
ErrUnknownAction = errors.New("config.Bot: unknown action")
ErrInvalidUserAgentRegex = errors.New("config.Bot: invalid user agent regex")
ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex")
ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR")
)
func (b Bot) Valid() error {
var errs []error
if b.Name == "" {
errs = append(errs, ErrBotMustHaveName)
}
if b.UserAgentRegex == nil && b.PathRegex == nil && (b.RemoteAddr == nil || len(b.RemoteAddr) == 0) {
errs = append(errs, ErrBotMustHaveUserAgentOrPath)
}
if b.UserAgentRegex != nil && b.PathRegex != nil {
errs = append(errs, ErrBotMustHaveUserAgentOrPathNotBoth)
}
if b.UserAgentRegex != nil {
if _, err := regexp.Compile(*b.UserAgentRegex); err != nil {
errs = append(errs, ErrInvalidUserAgentRegex, err)
}
}
if b.PathRegex != nil {
if _, err := regexp.Compile(*b.PathRegex); err != nil {
errs = append(errs, ErrInvalidPathRegex, err)
}
}
if b.RemoteAddr != nil && len(b.RemoteAddr) > 0 {
for _, cidr := range b.RemoteAddr {
if _, _, err := net.ParseCIDR(cidr); err != nil {
errs = append(errs, ErrInvalidCIDR, err)
}
}
}
switch b.Action {
case RuleAllow, RuleChallenge, RuleDeny:
// okay
default:
errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownAction, b.Action))
}
if b.Action == RuleChallenge && b.Challenge != nil {
if err := b.Challenge.Valid(); err != nil {
errs = append(errs, err)
}
}
if len(errs) != 0 {
return fmt.Errorf("config: bot entry for %q is not valid:\n%w", b.Name, errors.Join(errs...))
}
return nil
}
type ChallengeRules struct {
Difficulty int `json:"difficulty"`
ReportAs int `json:"report_as"`
Algorithm Algorithm `json:"algorithm"`
}
var (
ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid")
ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)")
ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)")
)
func (cr ChallengeRules) Valid() error {
var errs []error
if cr.Difficulty < 1 {
errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty))
}
if cr.Difficulty > 64 {
errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooHigh, cr.Difficulty))
}
switch cr.Algorithm {
case AlgorithmFast, AlgorithmSlow, AlgorithmUnknown:
// do nothing, it's all good
default:
errs = append(errs, fmt.Errorf("%w: %q", ErrChallengeRuleHasWrongAlgorithm, cr.Algorithm))
}
if len(errs) != 0 {
return fmt.Errorf("config: challenge rules entry is not valid:\n%w", errors.Join(errs...))
}
return nil
}
type Config struct {
Bots []Bot `json:"bots"`
DNSBL bool `json:"dnsbl"`
}
func (c Config) Valid() error {
var errs []error
if len(c.Bots) == 0 {
errs = append(errs, ErrNoBotRulesDefined)
}
for _, b := range c.Bots {
if err := b.Valid(); err != nil {
errs = append(errs, err)
}
}
if len(errs) != 0 {
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
}
return nil
}

View File

@@ -1,14 +1,11 @@
package config
import (
"encoding/json"
"errors"
"io/fs"
"os"
"path/filepath"
"testing"
"github.com/TecharoHQ/anubis/data"
"k8s.io/apimachinery/pkg/util/yaml"
)
func p[V any](v V) *V { return &v }
@@ -16,12 +13,12 @@ func p[V any](v V) *V { return &v }
func TestBotValid(t *testing.T) {
var tests = []struct {
name string
bot BotConfig
bot Bot
err error
}{
{
name: "simple user agent",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleChallenge,
UserAgentRegex: p("Mozilla"),
@@ -30,7 +27,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "simple path",
bot: BotConfig{
bot: Bot{
Name: "well-known-path",
Action: RuleAllow,
PathRegex: p("^/.well-known/.*$"),
@@ -39,7 +36,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "no rule name",
bot: BotConfig{
bot: Bot{
Action: RuleChallenge,
UserAgentRegex: p("Mozilla"),
},
@@ -47,7 +44,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "no rule matcher",
bot: BotConfig{
bot: Bot{
Name: "broken-rule",
Action: RuleAllow,
},
@@ -55,7 +52,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "both user-agent and path",
bot: BotConfig{
bot: Bot{
Name: "path-and-user-agent",
Action: RuleDeny,
UserAgentRegex: p("Mozilla"),
@@ -65,7 +62,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "unknown action",
bot: BotConfig{
bot: Bot{
Name: "Unknown action",
Action: RuleUnknown,
UserAgentRegex: p("Mozilla"),
@@ -74,7 +71,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "invalid user agent regex",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleChallenge,
UserAgentRegex: p("a(b"),
@@ -83,28 +80,16 @@ func TestBotValid(t *testing.T) {
},
{
name: "invalid path regex",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleChallenge,
PathRegex: p("a(b"),
},
err: ErrInvalidPathRegex,
},
{
name: "invalid headers regex",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleChallenge,
HeadersRegex: map[string]string{
"Content-Type": "a(b",
},
PathRegex: p("a(b"),
},
err: ErrInvalidHeadersRegex,
},
{
name: "challenge difficulty too low",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleChallenge,
PathRegex: p("Mozilla"),
@@ -118,7 +103,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "challenge difficulty too high",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleChallenge,
PathRegex: p("Mozilla"),
@@ -132,7 +117,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "challenge wrong algorithm",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleChallenge,
PathRegex: p("Mozilla"),
@@ -146,7 +131,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "invalid cidr range",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleAllow,
RemoteAddr: []string{"0.0.0.0/33"},
@@ -155,7 +140,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "only filter by IP range",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleAllow,
RemoteAddr: []string{"0.0.0.0/0"},
@@ -164,7 +149,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "filter by user agent and IP range",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleAllow,
UserAgentRegex: p("Mozilla"),
@@ -174,7 +159,7 @@ func TestBotValid(t *testing.T) {
},
{
name: "filter by path and IP range",
bot: BotConfig{
bot: Bot{
Name: "mozilla-ua",
Action: RuleAllow,
PathRegex: p("^.*$"),
@@ -221,69 +206,13 @@ func TestConfigValidKnownGood(t *testing.T) {
}
defer fin.Close()
c, err := Load(fin, st.Name())
if err != nil {
t.Fatal(err)
var c Config
if err := json.NewDecoder(fin).Decode(&c); err != nil {
t.Fatalf("can't decode file: %v", err)
}
if err := c.Valid(); err != nil {
t.Error(err)
}
if len(c.Bots) == 0 {
t.Error("wanted more than 0 bots, got zero")
}
})
}
}
func TestImportStatement(t *testing.T) {
type testCase struct {
name string
importPath string
err error
}
var tests []testCase
for _, folderName := range []string{
"apps",
"bots",
"common",
"crawlers",
} {
if err := fs.WalkDir(data.BotPolicies, folderName, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
tests = append(tests, testCase{
name: "(data)/" + path,
importPath: "(data)/" + path,
err: nil,
})
return nil
}); err != nil {
t.Fatal(err)
}
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
is := &ImportStatement{
Import: tt.importPath,
}
if err := is.Valid(); err != nil {
t.Errorf("validation error: %v", err)
}
if len(is.Bots) == 0 {
t.Error("wanted bot definitions, but got none")
t.Fatal(err)
}
})
}
@@ -304,8 +233,8 @@ func TestConfigValidBad(t *testing.T) {
}
defer fin.Close()
var c fileConfig
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
var c Config
if err := json.NewDecoder(fin).Decode(&c); err != nil {
t.Fatalf("can't decode file: %v", err)
}
@@ -317,49 +246,3 @@ func TestConfigValidBad(t *testing.T) {
})
}
}
func TestBotConfigZero(t *testing.T) {
var b BotConfig
if !b.Zero() {
t.Error("zero value BotConfig is not zero value")
}
b.Name = "hi"
if b.Zero() {
t.Error("BotConfig with name is zero value")
}
b.UserAgentRegex = p(".*")
if b.Zero() {
t.Error("BotConfig with user agent regex is zero value")
}
b.PathRegex = p(".*")
if b.Zero() {
t.Error("BotConfig with path regex is zero value")
}
b.HeadersRegex = map[string]string{"hi": "there"}
if b.Zero() {
t.Error("BotConfig with headers regex is zero value")
}
b.Action = RuleAllow
if b.Zero() {
t.Error("BotConfig with action is zero value")
}
b.RemoteAddr = []string{"::/0"}
if b.Zero() {
t.Error("BotConfig with remote addresses is zero value")
}
b.Challenge = &ChallengeRules{
Difficulty: 4,
ReportAs: 4,
Algorithm: AlgorithmFast,
}
if b.Zero() {
t.Error("BotConfig with challenge rules is zero value")
}
}

View File

@@ -9,13 +9,6 @@
"name": "user-agent-bad",
"user_agent_regex": "a(b",
"action": "DENY"
},
{
"name": "headers-bad",
"headers": {
"Accept-Encoding": "a(b"
},
"action": "DENY"
}
]
}

89
cmd/anubis/js/main.mjs Normal file
View File

@@ -0,0 +1,89 @@
import processFast from "./proof-of-work.mjs";
import processSlow from "./proof-of-work-slow.mjs";
import { testVideo } from "./video.mjs";
const algorithms = {
"fast": processFast,
"slow": processSlow,
}
// from Xeact
const u = (url = "", params = {}) => {
let result = new URL(url, window.location.href);
Object.entries(params).forEach((kv) => {
let [k, v] = kv;
result.searchParams.set(k, v);
});
return result.toString();
};
const imageURL = (mood, cacheBuster) =>
u(`/.within.website/x/cmd/anubis/static/img/${mood}.webp`, { cacheBuster });
(async () => {
const status = document.getElementById('status');
const image = document.getElementById('image');
const title = document.getElementById('title');
const spinner = document.getElementById('spinner');
const anubisVersion = JSON.parse(document.getElementById('anubis_version').textContent);
// const testarea = document.getElementById('testarea');
// const videoWorks = await testVideo(testarea);
// console.log(`videoWorks: ${videoWorks}`);
// if (!videoWorks) {
// title.innerHTML = "Oh no!";
// status.innerHTML = "Checks failed. Please check your browser's settings and try again.";
// image.src = imageURL("sad");
// spinner.innerHTML = "";
// spinner.style.display = "none";
// return;
// }
status.innerHTML = 'Calculating...';
const { challenge, rules } = await fetch("/.within.website/x/cmd/anubis/api/make-challenge", { method: "POST" })
.then(r => {
if (!r.ok) {
throw new Error("Failed to fetch config");
}
return r.json();
})
.catch(err => {
title.innerHTML = "Oh no!";
status.innerHTML = `Failed to fetch config: ${err.message}`;
image.src = imageURL("sad", anubisVersion);
spinner.innerHTML = "";
spinner.style.display = "none";
throw err;
});
const process = algorithms[rules.algorithm];
if (!process) {
title.innerHTML = "Oh no!";
status.innerHTML = `Failed to resolve check algorithm. You may want to reload the page.`;
image.src = imageURL("sad", anubisVersion);
spinner.innerHTML = "";
spinner.style.display = "none";
return;
}
status.innerHTML = `Calculating...<br/>Difficulty: ${rules.report_as}`;
const t0 = Date.now();
const { hash, nonce } = await process(challenge, rules.difficulty);
const t1 = Date.now();
console.log({ hash, nonce });
title.innerHTML = "Success!";
status.innerHTML = `Done! Took ${t1 - t0}ms, ${nonce} iterations`;
image.src = imageURL("happy", anubisVersion);
spinner.innerHTML = "";
spinner.style.display = "none";
setTimeout(() => {
const redir = window.location.href;
window.location.href = u("/.within.website/x/cmd/anubis/api/pass-challenge", { response: hash, nonce, redir, elapsedTime: t1 - t0 });
}, 250);
})();

View File

@@ -1,12 +1,6 @@
// https://dev.to/ratmd/simple-proof-of-work-in-javascript-3kgm
export default function process(
data,
difficulty = 5,
signal = null,
progressCallback = null,
_threads = 1,
) {
export default function process(data, difficulty = 5, _threads = 1) {
console.debug("slow algo");
return new Promise((resolve, reject) => {
let webWorkerURL = URL.createObjectURL(new Blob([
@@ -14,33 +8,15 @@ export default function process(
], { type: 'application/javascript' }));
let worker = new Worker(webWorkerURL);
const terminate = () => {
worker.terminate();
if (signal != null) {
// clean up listener to avoid memory leak
signal.removeEventListener("abort", terminate);
if (signal.aborted) {
console.log("PoW aborted");
reject(false);
}
}
};
if (signal != null) {
signal.addEventListener("abort", terminate, { once: true });
}
worker.onmessage = (event) => {
if (typeof event.data === "number") {
progressCallback?.(event.data);
} else {
terminate();
resolve(event.data);
}
worker.terminate();
resolve(event.data);
};
worker.onerror = (event) => {
terminate();
reject(event);
worker.terminate();
reject();
};
worker.postMessage({
@@ -71,9 +47,6 @@ function processTask() {
let hash;
let nonce = 0;
do {
if (nonce & 1023 === 0) {
postMessage(nonce);
}
hash = await sha256(data + nonce++);
} while (hash.substring(0, difficulty) !== Array(difficulty + 1).join('0'));

View File

@@ -1,10 +1,4 @@
export default function process(
data,
difficulty = 5,
signal = null,
progressCallback = null,
threads = (navigator.hardwareConcurrency || 1),
) {
export default function process(data, difficulty = 5, threads = (navigator.hardwareConcurrency || 1)) {
console.debug("fast algo");
return new Promise((resolve, reject) => {
let webWorkerURL = URL.createObjectURL(new Blob([
@@ -12,36 +6,19 @@ export default function process(
], { type: 'application/javascript' }));
const workers = [];
const terminate = () => {
workers.forEach((w) => w.terminate());
if (signal != null) {
// clean up listener to avoid memory leak
signal.removeEventListener("abort", terminate);
if (signal.aborted) {
console.log("PoW aborted");
reject(false);
}
}
};
if (signal != null) {
signal.addEventListener("abort", terminate, { once: true });
}
for (let i = 0; i < threads; i++) {
let worker = new Worker(webWorkerURL);
worker.onmessage = (event) => {
if (typeof event.data === "number") {
progressCallback?.(event.data);
} else {
terminate();
resolve(event.data);
}
workers.forEach(worker => worker.terminate());
worker.terminate();
resolve(event.data);
};
worker.onerror = (event) => {
terminate();
reject(event);
worker.terminate();
reject();
};
worker.postMessage({
@@ -78,8 +55,6 @@ function processTask() {
let nonce = event.data.nonce;
let threads = event.data.threads;
const threadId = nonce;
while (true) {
const currentHash = await sha256(data + nonce);
const thisHash = new Uint8Array(currentHash);
@@ -103,21 +78,7 @@ function processTask() {
break;
}
const oldNonce = nonce;
nonce += threads;
// send a progress update every 1024 iterations. since each thread checks
// separate values, one simple way to do this is by bit masking the
// nonce for multiples of 1024. unfortunately, if the number of threads
// is not prime, only some of the threads will be sending the status
// update and they will get behind the others. this is slightly more
// complicated but ensures an even distribution between threads.
if (
nonce > oldNonce | 1023 && // we've wrapped past 1024
(nonce >> 10) % threads === threadId // and it's our turn
) {
postMessage(nonce);
}
}
postMessage({

View File

@@ -1,25 +1,27 @@
package main
import (
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"crypto/sha256"
"crypto/subtle"
"embed"
"encoding/hex"
"errors"
"encoding/json"
"flag"
"fmt"
"io/fs"
"io"
"log"
"log/slog"
"math"
mrand "math/rand"
"net"
"net/http"
"net/http/httputil"
"net/url"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"sync"
@@ -27,59 +29,76 @@ import (
"time"
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/data"
"github.com/TecharoHQ/anubis/cmd/anubis/internal/config"
"github.com/TecharoHQ/anubis/cmd/anubis/internal/dnsbl"
"github.com/TecharoHQ/anubis/internal"
libanubis "github.com/TecharoHQ/anubis/lib"
botPolicy "github.com/TecharoHQ/anubis/lib/policy"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/web"
"github.com/TecharoHQ/anubis/xess"
"github.com/a-h/templ"
"github.com/facebookgo/flagenv"
"github.com/golang-jwt/jwt/v5"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
basePrefix = flag.String("base-prefix", "", "base prefix (root URL) the application is served under e.g. /myapp")
bind = flag.String("bind", ":8923", "network address to bind HTTP to")
bindNetwork = flag.String("bind-network", "tcp", "network family to bind HTTP to, e.g. unix, tcp")
challengeDifficulty = flag.Int("difficulty", anubis.DefaultDifficulty, "difficulty of the challenge")
cookieDomain = flag.String("cookie-domain", "", "if set, the top-level domain that the Anubis cookie will be valid for")
cookieExpiration = flag.Duration("cookie-expiration-time", anubis.CookieDefaultExpirationTime, "The amount of time the authorization cookie is valid for")
cookiePartitioned = flag.Bool("cookie-partitioned", false, "if true, sets the partitioned flag on Anubis cookies, enabling CHIPS support")
ed25519PrivateKeyHex = flag.String("ed25519-private-key-hex", "", "private key used to sign JWTs, if not set a random one will be assigned")
ed25519PrivateKeyHexFile = flag.String("ed25519-private-key-hex-file", "", "file name containing value for ed25519-private-key-hex")
metricsBind = flag.String("metrics-bind", ":9090", "network address to bind metrics to")
metricsBindNetwork = flag.String("metrics-bind-network", "tcp", "network family for the metrics server to bind to")
socketMode = flag.String("socket-mode", "0770", "socket mode (permissions) for unix domain sockets.")
robotsTxt = flag.Bool("serve-robots-txt", false, "serve a robots.txt file that disallows all robots")
policyFname = flag.String("policy-fname", "", "full path to anubis policy document (defaults to a sensible built-in policy)")
redirectDomains = flag.String("redirect-domains", "", "list of domains separated by commas which anubis is allowed to redirect to. Leaving this unset allows any domain.")
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
target = flag.String("target", "http://localhost:3923", "target to reverse proxy to, set to an empty string to disable proxying when only using auth request")
healthcheck = flag.Bool("healthcheck", false, "run a health check against Anubis")
useRemoteAddress = flag.Bool("use-remote-address", false, "read the client's IP address from the network request, useful for debugging and running Anubis on bare metal")
debugBenchmarkJS = flag.Bool("debug-benchmark-js", false, "respond to every request with a challenge for benchmarking hashrate")
ogPassthrough = flag.Bool("og-passthrough", false, "enable Open Graph tag passthrough")
ogTimeToLive = flag.Duration("og-expiry-time", 24*time.Hour, "Open Graph tag cache expiration time")
ogCacheConsiderHost = flag.Bool("og-cache-consider-host", false, "enable or disable the use of the host in the Open Graph tag cache")
extractResources = flag.String("extract-resources", "", "if set, extract the static resources to the specified folder")
webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals")
bind = flag.String("bind", ":8923", "network address to bind HTTP to")
bindNetwork = flag.String("bind-network", "tcp", "network family to bind HTTP to, e.g. unix, tcp")
challengeDifficulty = flag.Int("difficulty", defaultDifficulty, "difficulty of the challenge")
metricsBind = flag.String("metrics-bind", ":9090", "network address to bind metrics to")
metricsBindNetwork = flag.String("metrics-bind-network", "tcp", "network family for the metrics server to bind to")
socketMode = flag.String("socket-mode", "0770", "socket mode (permissions) for unix domain sockets.")
robotsTxt = flag.Bool("serve-robots-txt", false, "serve a robots.txt file that disallows all robots")
policyFname = flag.String("policy-fname", "", "full path to anubis policy document (defaults to a sensible built-in policy)")
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
target = flag.String("target", "http://localhost:3923", "target to reverse proxy to")
healthcheck = flag.Bool("healthcheck", false, "run a health check against Anubis")
debugXRealIPDefault = flag.String("debug-x-real-ip-default", "", "If set, replace empty X-Real-Ip headers with this value, useful only for debugging Anubis and running it locally")
//go:embed static botPolicies.json
static embed.FS
challengesIssued = promauto.NewCounter(prometheus.CounterOpts{
Name: "anubis_challenges_issued",
Help: "The total number of challenges issued",
})
challengesValidated = promauto.NewCounter(prometheus.CounterOpts{
Name: "anubis_challenges_validated",
Help: "The total number of challenges validated",
})
droneBLHits = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "anubis_dronebl_hits",
Help: "The total number of hits from DroneBL",
}, []string{"status"})
failedValidations = promauto.NewCounter(prometheus.CounterOpts{
Name: "anubis_failed_validations",
Help: "The total number of failed validations",
})
timeTaken = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "anubis_time_taken",
Help: "The time taken for a browser to generate a response (milliseconds)",
Buckets: prometheus.ExponentialBucketsRange(1, math.Pow(2, 18), 19),
})
)
func keyFromHex(value string) (ed25519.PrivateKey, error) {
keyBytes, err := hex.DecodeString(value)
if err != nil {
return nil, fmt.Errorf("supplied key is not hex-encoded: %w", err)
}
const (
cookieName = "within.website-x-cmd-anubis-auth"
staticPath = "/.within.website/x/cmd/anubis/"
defaultDifficulty = 4
)
if len(keyBytes) != ed25519.SeedSize {
return nil, fmt.Errorf("supplied key is not %d bytes long, got %d bytes", ed25519.SeedSize, len(keyBytes))
}
return ed25519.NewKeyFromSeed(keyBytes), nil
}
//go:generate go tool github.com/a-h/templ/cmd/templ generate
//go:generate esbuild js/main.mjs --sourcemap --bundle --minify --outfile=static/js/main.mjs
//go:generate gzip -f -k static/js/main.mjs
//go:generate zstd -f -k --ultra -22 static/js/main.mjs
//go:generate brotli -fZk static/js/main.mjs
func doHealthCheck() error {
resp, err := http.Get("http://localhost" + *metricsBind + anubis.BasePrefix + "/metrics")
resp, err := http.Get("http://localhost" + *metricsBind + "/metrics")
if err != nil {
return fmt.Errorf("failed to fetch metrics: %w", err)
}
@@ -98,11 +117,7 @@ func setupListener(network string, address string) (net.Listener, string) {
case "unix":
formattedAddress = "unix:" + address
case "tcp":
if strings.HasPrefix(address, ":") { // assume it's just a port e.g. :4259
formattedAddress = "http://localhost" + address
} else {
formattedAddress = "http://" + address
}
formattedAddress = "http://localhost" + address
default:
formattedAddress = fmt.Sprintf(`(%s) %s`, network, address)
}
@@ -122,10 +137,7 @@ func setupListener(network string, address string) (net.Listener, string) {
err = os.Chmod(address, os.FileMode(mode))
if err != nil {
err := listener.Close()
if err != nil {
log.Printf("failed to close listener: %v", err)
}
listener.Close()
log.Fatal(fmt.Errorf("could not change socket mode: %w", err))
}
}
@@ -133,163 +145,58 @@ func setupListener(network string, address string) (net.Listener, string) {
return listener, formattedAddress
}
func makeReverseProxy(target string) (http.Handler, error) {
targetUri, err := url.Parse(target)
if err != nil {
return nil, fmt.Errorf("failed to parse target URL: %w", err)
}
transport := http.DefaultTransport.(*http.Transport).Clone()
// https://github.com/oauth2-proxy/oauth2-proxy/blob/4e2100a2879ef06aea1411790327019c1a09217c/pkg/upstream/http.go#L124
if targetUri.Scheme == "unix" {
// clean path up so we don't use the socket path in proxied requests
addr := targetUri.Path
targetUri.Path = ""
// tell transport how to dial unix sockets
transport.DialContext = func(ctx context.Context, _, _ string) (net.Conn, error) {
dialer := net.Dialer{}
return dialer.DialContext(ctx, "unix", addr)
}
// tell transport how to handle the unix url scheme
transport.RegisterProtocol("unix", libanubis.UnixRoundTripper{Transport: transport})
}
rp := httputil.NewSingleHostReverseProxy(targetUri)
rp.Transport = transport
return rp, nil
}
func startDecayMapCleanup(ctx context.Context, s *libanubis.Server) {
ticker := time.NewTicker(1 * time.Hour)
defer ticker.Stop()
for {
select {
case <-ticker.C:
s.CleanupDecayMap()
case <-ctx.Done():
return
}
}
}
func main() {
flagenv.Parse()
flag.Parse()
internal.InitSlog(*slogLevel)
if *extractResources != "" {
if err := extractEmbedFS(data.BotPolicies, ".", *extractResources); err != nil {
if *healthcheck {
if err := doHealthCheck(); err != nil {
log.Fatal(err)
}
if err := extractEmbedFS(web.Static, "static", *extractResources); err != nil {
log.Fatal(err)
}
fmt.Printf("Extracted embedded static files to %s\n", *extractResources)
return
}
var rp http.Handler
// when using anubis via Systemd and environment variables, then it is not possible to set targe to an empty string but only to space
if strings.TrimSpace(*target) != "" {
var err error
rp, err = makeReverseProxy(*target)
if err != nil {
log.Fatalf("can't make reverse proxy: %v", err)
}
}
policy, err := libanubis.LoadPoliciesOrDefault(*policyFname, *challengeDifficulty)
s, err := New(*target, *policyFname)
if err != nil {
log.Fatalf("can't parse policy file: %v", err)
log.Fatal(err)
}
ruleErrorIDs := make(map[string]string)
for _, rule := range policy.Bots {
fmt.Println("Rule error IDs:")
for _, rule := range s.policy.Bots {
if rule.Action != config.RuleDeny {
continue
}
hash := rule.Hash()
ruleErrorIDs[rule.Name] = hash
}
// replace the bot policy rules with a single rule that always benchmarks
if *debugBenchmarkJS {
policy.Bots = []botPolicy.Bot{{
Name: "",
Rules: botPolicy.NewHeaderExistsChecker("User-Agent"),
Action: config.RuleBenchmark,
}}
}
if *basePrefix != "" && !strings.HasPrefix(*basePrefix, "/") {
log.Fatalf("[misconfiguration] base-prefix must start with a slash, eg: /%s", *basePrefix)
} else if strings.HasSuffix(*basePrefix, "/") {
log.Fatalf("[misconfiguration] base-prefix must not end with a slash")
}
var priv ed25519.PrivateKey
if *ed25519PrivateKeyHex != "" && *ed25519PrivateKeyHexFile != "" {
log.Fatal("do not specify both ED25519_PRIVATE_KEY_HEX and ED25519_PRIVATE_KEY_HEX_FILE")
} else if *ed25519PrivateKeyHex != "" {
priv, err = keyFromHex(*ed25519PrivateKeyHex)
hash, err := rule.Hash()
if err != nil {
log.Fatalf("failed to parse and validate ED25519_PRIVATE_KEY_HEX: %v", err)
}
} else if *ed25519PrivateKeyHexFile != "" {
hexFile, err := os.ReadFile(*ed25519PrivateKeyHexFile)
if err != nil {
log.Fatalf("failed to read ED25519_PRIVATE_KEY_HEX_FILE %s: %v", *ed25519PrivateKeyHexFile, err)
log.Fatalf("can't calculate checksum of rule %s: %v", rule.Name, err)
}
priv, err = keyFromHex(string(bytes.TrimSpace(hexFile)))
if err != nil {
log.Fatalf("failed to parse and validate content of ED25519_PRIVATE_KEY_HEX_FILE: %v", err)
}
} else {
_, priv, err = ed25519.GenerateKey(rand.Reader)
if err != nil {
log.Fatalf("failed to generate ed25519 key: %v", err)
}
slog.Warn("generating random key, Anubis will have strange behavior when multiple instances are behind the same load balancer target, for more information: see https://anubis.techaro.lol/docs/admin/installation#key-generation")
fmt.Printf("* %s: %s\n", rule.Name, hash)
}
fmt.Println()
var redirectDomainsList []string
if *redirectDomains != "" {
domains := strings.Split(*redirectDomains, ",")
for _, domain := range domains {
_, err = url.Parse(domain)
if err != nil {
log.Fatalf("cannot parse redirect-domain %q: %s", domain, err.Error())
}
redirectDomainsList = append(redirectDomainsList, strings.TrimSpace(domain))
}
} else {
slog.Warn("REDIRECT_DOMAINS is not set, Anubis will only redirect to the same domain a request is coming from, see https://anubis.techaro.lol/docs/admin/configuration/redirect-domains")
}
mux := http.NewServeMux()
xess.Mount(mux)
s, err := libanubis.New(libanubis.Options{
BasePrefix: *basePrefix,
Next: rp,
Policy: policy,
ServeRobotsTXT: *robotsTxt,
PrivateKey: priv,
CookieDomain: *cookieDomain,
CookieExpiration: *cookieExpiration,
CookiePartitioned: *cookiePartitioned,
OGPassthrough: *ogPassthrough,
OGTimeToLive: *ogTimeToLive,
RedirectDomains: redirectDomainsList,
Target: *target,
WebmasterEmail: *webmasterEmail,
OGCacheConsidersHost: *ogCacheConsiderHost,
})
if err != nil {
log.Fatalf("can't construct libanubis.Server: %v", err)
mux.Handle(staticPath, internal.UnchangingCache(http.StripPrefix(staticPath, http.FileServerFS(static))))
// mux.HandleFunc("GET /.within.website/x/cmd/anubis/static/js/main.mjs", serveMainJSWithBestEncoding)
mux.HandleFunc("POST /.within.website/x/cmd/anubis/api/make-challenge", s.makeChallenge)
mux.HandleFunc("GET /.within.website/x/cmd/anubis/api/pass-challenge", s.passChallenge)
mux.HandleFunc("GET /.within.website/x/cmd/anubis/api/test-error", s.testError)
if *robotsTxt {
mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, r *http.Request) {
http.ServeFileFS(w, r, static, "static/robots.txt")
})
mux.HandleFunc("/.well-known/robots.txt", func(w http.ResponseWriter, r *http.Request) {
http.ServeFileFS(w, r, static, "static/robots.txt")
})
}
wg := new(sync.WaitGroup)
@@ -301,30 +208,24 @@ func main() {
wg.Add(1)
go metricsServer(ctx, wg.Done)
}
go startDecayMapCleanup(ctx, s)
mux.HandleFunc("/", s.maybeReverseProxy)
var h http.Handler
h = s
h = internal.RemoteXRealIP(*useRemoteAddress, *bindNetwork, h)
h = mux
h = internal.DefaultXRealIP(*debugXRealIPDefault, h)
h = internal.XForwardedForToXRealIP(h)
h = internal.XForwardedForUpdate(h)
srv := http.Server{Handler: h, ErrorLog: internal.GetFilteredHTTPLogger()}
listener, listenerUrl := setupListener(*bindNetwork, *bind)
srv := http.Server{Handler: h}
listener, url := setupListener(*bindNetwork, *bind)
slog.Info(
"listening",
"url", listenerUrl,
"url", url,
"difficulty", *challengeDifficulty,
"serveRobotsTXT", *robotsTxt,
"target", *target,
"version", anubis.Version,
"use-remote-address", *useRemoteAddress,
"debug-benchmark-js", *debugBenchmarkJS,
"og-passthrough", *ogPassthrough,
"og-expiry-time", *ogTimeToLive,
"base-prefix", *basePrefix,
"cookie-expiration-time", *cookieExpiration,
"rule-error-ids", ruleErrorIDs,
"debug-x-real-ip-default", *debugXRealIPDefault,
)
go func() {
@@ -336,7 +237,7 @@ func main() {
}
}()
if err := srv.Serve(listener); !errors.Is(err, http.ErrServerClosed) {
if err := srv.Serve(listener); err != http.ErrServerClosed {
log.Fatal(err)
}
wg.Wait()
@@ -346,19 +247,11 @@ func metricsServer(ctx context.Context, done func()) {
defer done()
mux := http.NewServeMux()
mux.Handle(anubis.BasePrefix+"/metrics", promhttp.Handler())
mux.Handle("/metrics", promhttp.Handler())
srv := http.Server{Handler: mux, ErrorLog: internal.GetFilteredHTTPLogger()}
listener, metricsUrl := setupListener(*metricsBindNetwork, *metricsBind)
slog.Debug("listening for metrics", "url", metricsUrl)
if *healthcheck {
log.Println("running healthcheck")
if err := doHealthCheck(); err != nil {
log.Fatal(err)
}
return
}
srv := http.Server{Handler: mux}
listener, url := setupListener(*metricsBindNetwork, *metricsBind)
slog.Debug("listening for metrics", "url", url)
go func() {
<-ctx.Done()
@@ -369,33 +262,450 @@ func metricsServer(ctx context.Context, done func()) {
}
}()
if err := srv.Serve(listener); !errors.Is(err, http.ErrServerClosed) {
if err := srv.Serve(listener); err != http.ErrServerClosed {
log.Fatal(err)
}
}
func extractEmbedFS(fsys embed.FS, root string, destDir string) error {
return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
func sha256sum(text string) string {
hash := sha256.New()
hash.Write([]byte(text))
return hex.EncodeToString(hash.Sum(nil))
}
func (s *Server) challengeFor(r *http.Request, difficulty int) string {
fp := sha256.Sum256(s.priv.Seed())
data := fmt.Sprintf(
"Accept-Language=%s,X-Real-IP=%s,User-Agent=%s,WeekTime=%s,Fingerprint=%x,Difficulty=%d",
r.Header.Get("Accept-Language"),
r.Header.Get("X-Real-Ip"),
r.UserAgent(),
time.Now().UTC().Round(24*7*time.Hour).Format(time.RFC3339),
fp,
difficulty,
)
return sha256sum(data)
}
func New(target, policyFname string) (*Server, error) {
u, err := url.Parse(target)
if err != nil {
return nil, fmt.Errorf("failed to parse target URL: %w", err)
}
pub, priv, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return nil, fmt.Errorf("failed to generate ed25519 key: %w", err)
}
transport := http.DefaultTransport.(*http.Transport).Clone()
// https://github.com/oauth2-proxy/oauth2-proxy/blob/4e2100a2879ef06aea1411790327019c1a09217c/pkg/upstream/http.go#L124
if u.Scheme == "unix" {
// clean path up so we don't use the socket path in proxied requests
addr := u.Path
u.Path = ""
// tell transport how to dial unix sockets
transport.DialContext = func(ctx context.Context, _, _ string) (net.Conn, error) {
dialer := net.Dialer{}
return dialer.DialContext(ctx, "unix", addr)
}
// tell transport how to handle the unix url scheme
transport.RegisterProtocol("unix", unixRoundTripper{Transport: transport})
}
rp := httputil.NewSingleHostReverseProxy(u)
rp.Transport = transport
var fin io.ReadCloser
if policyFname != "" {
fin, err = os.Open(policyFname)
if err != nil {
return err
return nil, fmt.Errorf("can't parse policy file %s: %w", policyFname, err)
}
relPath, err := filepath.Rel(root, path)
} else {
policyFname = "(static)/botPolicies.json"
fin, err = static.Open("botPolicies.json")
if err != nil {
return err
return nil, fmt.Errorf("[unexpected] can't parse builtin policy file %s: %w", policyFname, err)
}
}
defer fin.Close()
policy, err := parseConfig(fin, policyFname, *challengeDifficulty)
if err != nil {
return nil, err // parseConfig sets a fancy error for us
}
return &Server{
rp: rp,
priv: priv,
pub: pub,
policy: policy,
dnsblCache: NewDecayMap[string, dnsbl.DroneBLResponse](),
}, nil
}
// https://github.com/oauth2-proxy/oauth2-proxy/blob/master/pkg/upstream/http.go#L124
type unixRoundTripper struct {
Transport *http.Transport
}
// set bare minimum stuff
func (t unixRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
req = req.Clone(req.Context())
if req.Host == "" {
req.Host = "localhost"
}
req.URL.Host = req.Host // proxy error: no Host in request URL
req.URL.Scheme = "http" // make http.Transport happy and avoid an infinite recursion
return t.Transport.RoundTrip(req)
}
type Server struct {
rp *httputil.ReverseProxy
priv ed25519.PrivateKey
pub ed25519.PublicKey
policy *ParsedConfig
dnsblCache *DecayMap[string, dnsbl.DroneBLResponse]
}
func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request) {
lg := slog.With(
"user_agent", r.UserAgent(),
"accept_language", r.Header.Get("Accept-Language"),
"priority", r.Header.Get("Priority"),
"x-forwarded-for",
r.Header.Get("X-Forwarded-For"),
"x-real-ip", r.Header.Get("X-Real-Ip"),
)
cr, rule, err := s.check(r)
if err != nil {
lg.Error("check failed", "err", err)
templ.Handler(base("Oh noes!", errorPage("Internal Server Error: administrator has misconfigured Anubis. Please contact the administrator and ask them to look for the logs around \"maybeReverseProxy\"")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
r.Header.Add("X-Anubis-Rule", cr.Name)
r.Header.Add("X-Anubis-Action", string(cr.Rule))
lg = lg.With("check_result", cr)
policyApplications.WithLabelValues(cr.Name, string(cr.Rule)).Add(1)
ip := r.Header.Get("X-Real-Ip")
if s.policy.DNSBL && ip != "" {
resp, ok := s.dnsblCache.Get(ip)
if !ok {
lg.Debug("looking up ip in dnsbl")
resp, err := dnsbl.Lookup(ip)
if err != nil {
lg.Error("can't look up ip in dnsbl", "err", err)
}
s.dnsblCache.Set(ip, resp, 24*time.Hour)
droneBLHits.WithLabelValues(resp.String()).Inc()
}
destPath := filepath.Join(destDir, root, relPath)
if d.IsDir() {
return os.MkdirAll(destPath, 0o700)
if resp != dnsbl.AllGood {
lg.Info("DNSBL hit", "status", resp.String())
templ.Handler(base("Oh noes!", errorPage(fmt.Sprintf("DroneBL reported an entry: %s, see https://dronebl.org/lookup?ip=%s", resp.String(), ip))), templ.WithStatus(http.StatusOK)).ServeHTTP(w, r)
return
}
}
data, err := fs.ReadFile(fsys, path)
switch cr.Rule {
case config.RuleAllow:
lg.Debug("allowing traffic to origin (explicit)")
s.rp.ServeHTTP(w, r)
return
case config.RuleDeny:
clearCookie(w)
lg.Info("explicit deny")
if rule == nil {
lg.Error("rule is nil, cannot calculate checksum")
templ.Handler(base("Oh noes!", errorPage("Other internal server error (contact the admin)")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
hash, err := rule.Hash()
if err != nil {
return err
lg.Error("can't calculate checksum of rule", "err", err)
templ.Handler(base("Oh noes!", errorPage("Other internal server error (contact the admin)")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
lg.Debug("rule hash", "hash", hash)
templ.Handler(base("Oh noes!", errorPage(fmt.Sprintf("Access Denied: error code %s", hash))), templ.WithStatus(http.StatusOK)).ServeHTTP(w, r)
return
case config.RuleChallenge:
lg.Debug("challenge requested")
default:
clearCookie(w)
templ.Handler(base("Oh noes!", errorPage("Other internal server error (contact the admin)")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
return os.WriteFile(destPath, data, 0o644)
ckie, err := r.Cookie(cookieName)
if err != nil {
lg.Debug("cookie not found", "path", r.URL.Path)
clearCookie(w)
s.renderIndex(w, r)
return
}
if err := ckie.Valid(); err != nil {
lg.Debug("cookie is invalid", "err", err)
clearCookie(w)
s.renderIndex(w, r)
return
}
if time.Now().After(ckie.Expires) && !ckie.Expires.IsZero() {
lg.Debug("cookie expired", "path", r.URL.Path)
clearCookie(w)
s.renderIndex(w, r)
return
}
token, err := jwt.ParseWithClaims(ckie.Value, jwt.MapClaims{}, func(token *jwt.Token) (interface{}, error) {
return s.pub, nil
}, jwt.WithExpirationRequired(), jwt.WithStrictDecoding())
if err != nil || !token.Valid {
lg.Debug("invalid token", "path", r.URL.Path, "err", err)
clearCookie(w)
s.renderIndex(w, r)
return
}
if randomJitter() {
r.Header.Add("X-Anubis-Status", "PASS-BRIEF")
lg.Debug("cookie is not enrolled into secondary screening")
s.rp.ServeHTTP(w, r)
return
}
claims, ok := token.Claims.(jwt.MapClaims)
if !ok {
lg.Debug("invalid token claims type", "path", r.URL.Path)
clearCookie(w)
s.renderIndex(w, r)
return
}
challenge := s.challengeFor(r, rule.Challenge.Difficulty)
if claims["challenge"] != challenge {
lg.Debug("invalid challenge", "path", r.URL.Path)
clearCookie(w)
s.renderIndex(w, r)
return
}
var nonce int
if v, ok := claims["nonce"].(float64); ok {
nonce = int(v)
}
calcString := fmt.Sprintf("%s%d", challenge, nonce)
calculated := sha256sum(calcString)
if subtle.ConstantTimeCompare([]byte(claims["response"].(string)), []byte(calculated)) != 1 {
lg.Debug("invalid response", "path", r.URL.Path)
failedValidations.Inc()
clearCookie(w)
s.renderIndex(w, r)
return
}
slog.Debug("all checks passed")
r.Header.Add("X-Anubis-Status", "PASS-FULL")
s.rp.ServeHTTP(w, r)
}
func (s *Server) renderIndex(w http.ResponseWriter, r *http.Request) {
templ.Handler(
base("Making sure you're not a bot!", index()),
).ServeHTTP(w, r)
}
func (s *Server) makeChallenge(w http.ResponseWriter, r *http.Request) {
lg := slog.With("user_agent", r.UserAgent(), "accept_language", r.Header.Get("Accept-Language"), "priority", r.Header.Get("Priority"), "x-forwarded-for", r.Header.Get("X-Forwarded-For"), "x-real-ip", r.Header.Get("X-Real-Ip"))
cr, rule, err := s.check(r)
if err != nil {
lg.Error("check failed", "err", err)
w.WriteHeader(http.StatusInternalServerError)
json.NewEncoder(w).Encode(struct {
Error string `json:"error"`
}{
Error: "Internal Server Error: administrator has misconfigured Anubis. Please contact the administrator and ask them to look for the logs around \"makeChallenge\"",
})
return
}
lg = lg.With("check_result", cr)
challenge := s.challengeFor(r, rule.Challenge.Difficulty)
json.NewEncoder(w).Encode(struct {
Challenge string `json:"challenge"`
Rules *config.ChallengeRules `json:"rules"`
}{
Challenge: challenge,
Rules: rule.Challenge,
})
lg.Debug("made challenge", "challenge", challenge, "rules", rule.Challenge, "cr", cr)
challengesIssued.Inc()
}
func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) {
lg := slog.With(
"user_agent", r.UserAgent(),
"accept_language", r.Header.Get("Accept-Language"),
"priority", r.Header.Get("Priority"),
"x-forwarded-for", r.Header.Get("X-Forwarded-For"),
"x-real-ip", r.Header.Get("X-Real-Ip"),
)
cr, rule, err := s.check(r)
if err != nil {
lg.Error("check failed", "err", err)
templ.Handler(base("Oh noes!", errorPage("Internal Server Error: administrator has misconfigured Anubis. Please contact the administrator and ask them to look for the logs around \"passChallenge\".")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
lg = lg.With("check_result", cr)
nonceStr := r.FormValue("nonce")
if nonceStr == "" {
clearCookie(w)
lg.Debug("no nonce")
templ.Handler(base("Oh noes!", errorPage("missing nonce")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
elapsedTimeStr := r.FormValue("elapsedTime")
if elapsedTimeStr == "" {
clearCookie(w)
lg.Debug("no elapsedTime")
templ.Handler(base("Oh noes!", errorPage("missing elapsedTime")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
elapsedTime, err := strconv.ParseFloat(elapsedTimeStr, 64)
if err != nil {
clearCookie(w)
lg.Debug("elapsedTime doesn't parse", "err", err)
templ.Handler(base("Oh noes!", errorPage("invalid elapsedTime")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
lg.Info("challenge took", "elapsedTime", elapsedTime)
timeTaken.Observe(elapsedTime)
response := r.FormValue("response")
redir := r.FormValue("redir")
challenge := s.challengeFor(r, rule.Challenge.Difficulty)
nonce, err := strconv.Atoi(nonceStr)
if err != nil {
clearCookie(w)
lg.Debug("nonce doesn't parse", "err", err)
templ.Handler(base("Oh noes!", errorPage("invalid nonce")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
calcString := fmt.Sprintf("%s%d", challenge, nonce)
calculated := sha256sum(calcString)
if subtle.ConstantTimeCompare([]byte(response), []byte(calculated)) != 1 {
clearCookie(w)
lg.Debug("hash does not match", "got", response, "want", calculated)
templ.Handler(base("Oh noes!", errorPage("invalid response")), templ.WithStatus(http.StatusForbidden)).ServeHTTP(w, r)
failedValidations.Inc()
return
}
// compare the leading zeroes
if !strings.HasPrefix(response, strings.Repeat("0", *challengeDifficulty)) {
clearCookie(w)
lg.Debug("difficulty check failed", "response", response, "difficulty", *challengeDifficulty)
templ.Handler(base("Oh noes!", errorPage("invalid response")), templ.WithStatus(http.StatusForbidden)).ServeHTTP(w, r)
failedValidations.Inc()
return
}
// generate JWT cookie
token := jwt.NewWithClaims(jwt.SigningMethodEdDSA, jwt.MapClaims{
"challenge": challenge,
"nonce": nonce,
"response": response,
"iat": time.Now().Unix(),
"nbf": time.Now().Add(-1 * time.Minute).Unix(),
"exp": time.Now().Add(24 * 7 * time.Hour).Unix(),
})
tokenString, err := token.SignedString(s.priv)
if err != nil {
lg.Error("failed to sign JWT", "err", err)
clearCookie(w)
templ.Handler(base("Oh noes!", errorPage("failed to sign JWT")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
return
}
http.SetCookie(w, &http.Cookie{
Name: cookieName,
Value: tokenString,
Expires: time.Now().Add(24 * 7 * time.Hour),
SameSite: http.SameSiteLaxMode,
Path: "/",
})
challengesValidated.Inc()
lg.Debug("challenge passed, redirecting to app")
http.Redirect(w, r, redir, http.StatusFound)
}
func (s *Server) testError(w http.ResponseWriter, r *http.Request) {
err := r.FormValue("err")
templ.Handler(base("Oh noes!", errorPage(err)), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
}
func ohNoes(w http.ResponseWriter, r *http.Request, err error) {
slog.Error("super fatal error", "err", err)
templ.Handler(base("Oh noes!", errorPage("An internal server error happened")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
}
func clearCookie(w http.ResponseWriter) {
http.SetCookie(w, &http.Cookie{
Name: cookieName,
Value: "",
Expires: time.Now().Add(-1 * time.Hour),
MaxAge: -1,
SameSite: http.SameSiteLaxMode,
})
}
func randomJitter() bool {
return mrand.Intn(100) > 10
}
func serveMainJSWithBestEncoding(w http.ResponseWriter, r *http.Request) {
priorityList := []string{"zstd", "br", "gzip"}
enc2ext := map[string]string{
"zstd": "zst",
"br": "br",
"gzip": "gz",
}
for _, enc := range priorityList {
if strings.Contains(r.Header.Get("Accept-Encoding"), enc) {
w.Header().Set("Content-Type", "text/javascript")
w.Header().Set("Content-Encoding", enc)
http.ServeFileFS(w, r, static, "static/js/main.mjs."+enc2ext[enc])
return
}
}
w.Header().Set("Content-Type", "text/javascript")
http.ServeFileFS(w, r, static, "static/js/main.mjs")
}

212
cmd/anubis/policy.go Normal file
View File

@@ -0,0 +1,212 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"io"
"log"
"log/slog"
"net"
"net/http"
"regexp"
"github.com/TecharoHQ/anubis/cmd/anubis/internal/config"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/yl2chen/cidranger"
)
var (
policyApplications = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "anubis_policy_results",
Help: "The results of each policy rule",
}, []string{"rule", "action"})
)
type ParsedConfig struct {
orig config.Config
Bots []Bot
DNSBL bool
}
type Bot struct {
Name string
UserAgent *regexp.Regexp
Path *regexp.Regexp
Action config.Rule `json:"action"`
Challenge *config.ChallengeRules
Ranger cidranger.Ranger
}
func (b Bot) Hash() (string, error) {
var pathRex string
if b.Path != nil {
pathRex = b.Path.String()
}
var userAgentRex string
if b.UserAgent != nil {
userAgentRex = b.UserAgent.String()
}
return sha256sum(fmt.Sprintf("%s::%s::%s", b.Name, pathRex, userAgentRex)), nil
}
func parseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) {
var c config.Config
if err := json.NewDecoder(fin).Decode(&c); err != nil {
return nil, fmt.Errorf("can't parse policy config JSON %s: %w", fname, err)
}
if err := c.Valid(); err != nil {
return nil, err
}
var err error
result := &ParsedConfig{
orig: c,
}
for _, b := range c.Bots {
if berr := b.Valid(); berr != nil {
err = errors.Join(err, berr)
continue
}
var botParseErr error
parsedBot := Bot{
Name: b.Name,
Action: b.Action,
}
if b.RemoteAddr != nil && len(b.RemoteAddr) > 0 {
parsedBot.Ranger = cidranger.NewPCTrieRanger()
for _, cidr := range b.RemoteAddr {
_, rng, err := net.ParseCIDR(cidr)
if err != nil {
return nil, fmt.Errorf("[unexpected] range %s not parsing: %w", cidr, err)
}
parsedBot.Ranger.Insert(cidranger.NewBasicRangerEntry(*rng))
}
}
if b.UserAgentRegex != nil {
userAgent, err := regexp.Compile(*b.UserAgentRegex)
if err != nil {
botParseErr = errors.Join(botParseErr, fmt.Errorf("while compiling user agent regexp: %w", err))
continue
} else {
parsedBot.UserAgent = userAgent
}
}
if b.PathRegex != nil {
path, err := regexp.Compile(*b.PathRegex)
if err != nil {
botParseErr = errors.Join(botParseErr, fmt.Errorf("while compiling path regexp: %w", err))
continue
} else {
parsedBot.Path = path
}
}
if b.Challenge == nil {
parsedBot.Challenge = &config.ChallengeRules{
Difficulty: defaultDifficulty,
ReportAs: defaultDifficulty,
Algorithm: config.AlgorithmFast,
}
} else {
parsedBot.Challenge = b.Challenge
if parsedBot.Challenge.Algorithm == config.AlgorithmUnknown {
parsedBot.Challenge.Algorithm = config.AlgorithmFast
}
}
result.Bots = append(result.Bots, parsedBot)
}
if err != nil {
return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, err)
}
result.DNSBL = c.DNSBL
return result, nil
}
type CheckResult struct {
Name string
Rule config.Rule
}
func (cr CheckResult) LogValue() slog.Value {
return slog.GroupValue(
slog.String("name", cr.Name),
slog.String("rule", string(cr.Rule)))
}
func cr(name string, rule config.Rule) CheckResult {
return CheckResult{
Name: name,
Rule: rule,
}
}
func (s *Server) checkRemoteAddress(b Bot, addr net.IP) bool {
if b.Ranger == nil {
return false
}
ok, err := b.Ranger.Contains(addr)
if err != nil {
log.Panicf("[unexpected] something very funky is going on, %q does not have a calculable network number: %v", addr.String(), err)
}
return ok
}
// Check evaluates the list of rules, and returns the result
func (s *Server) check(r *http.Request) (CheckResult, *Bot, error) {
host := r.Header.Get("X-Real-Ip")
if host == "" {
return zilch[CheckResult](), nil, fmt.Errorf("[misconfiguration] X-Real-Ip header is not set")
}
addr := net.ParseIP(host)
if addr == nil {
return zilch[CheckResult](), nil, fmt.Errorf("[misconfiguration] %q is not an IP address", host)
}
for _, b := range s.policy.Bots {
if b.UserAgent != nil {
if uaMatch := b.UserAgent.MatchString(r.UserAgent()); uaMatch || (uaMatch && s.checkRemoteAddress(b, addr)) {
return cr("bot/"+b.Name, b.Action), &b, nil
}
}
if b.Path != nil {
if pathMatch := b.Path.MatchString(r.URL.Path); pathMatch || (pathMatch && s.checkRemoteAddress(b, addr)) {
return cr("bot/"+b.Name, b.Action), &b, nil
}
}
if b.Ranger != nil {
if s.checkRemoteAddress(b, addr) {
return cr("bot/"+b.Name, b.Action), &b, nil
}
}
}
return cr("default/allow", config.RuleAllow), &Bot{
Challenge: &config.ChallengeRules{
Difficulty: defaultDifficulty,
ReportAs: defaultDifficulty,
Algorithm: config.AlgorithmFast,
},
}, nil
}

View File

@@ -1,28 +1,25 @@
package policy
package main
import (
"os"
"path/filepath"
"testing"
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/data"
)
func TestDefaultPolicyMustParse(t *testing.T) {
fin, err := data.BotPolicies.Open("botPolicies.json")
fin, err := static.Open("botPolicies.json")
if err != nil {
t.Fatal(err)
}
defer fin.Close()
if _, err := ParseConfig(fin, "botPolicies.json", anubis.DefaultDifficulty); err != nil {
if _, err := parseConfig(fin, "botPolicies.json", defaultDifficulty); err != nil {
t.Fatalf("can't parse config: %v", err)
}
}
func TestGoodConfigs(t *testing.T) {
finfos, err := os.ReadDir("config/testdata/good")
finfos, err := os.ReadDir("internal/config/testdata/good")
if err != nil {
t.Fatal(err)
}
@@ -30,13 +27,13 @@ func TestGoodConfigs(t *testing.T) {
for _, st := range finfos {
st := st
t.Run(st.Name(), func(t *testing.T) {
fin, err := os.Open(filepath.Join("config", "testdata", "good", st.Name()))
fin, err := os.Open(filepath.Join("internal", "config", "testdata", "good", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
if _, err := ParseConfig(fin, fin.Name(), anubis.DefaultDifficulty); err != nil {
if _, err := parseConfig(fin, fin.Name(), defaultDifficulty); err != nil {
t.Fatal(err)
}
})
@@ -44,7 +41,7 @@ func TestGoodConfigs(t *testing.T) {
}
func TestBadConfigs(t *testing.T) {
finfos, err := os.ReadDir("config/testdata/bad")
finfos, err := os.ReadDir("internal/config/testdata/bad")
if err != nil {
t.Fatal(err)
}
@@ -52,13 +49,13 @@ func TestBadConfigs(t *testing.T) {
for _, st := range finfos {
st := st
t.Run(st.Name(), func(t *testing.T) {
fin, err := os.Open(filepath.Join("config", "testdata", "bad", st.Name()))
fin, err := os.Open(filepath.Join("internal", "config", "testdata", "bad", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
if _, err := ParseConfig(fin, fin.Name(), anubis.DefaultDifficulty); err == nil {
if _, err := parseConfig(fin, fin.Name(), defaultDifficulty); err == nil {
t.Fatal(err)
} else {
t.Log(err)

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

View File

@@ -0,0 +1,2 @@
(()=>{function p(r,n=5,t=navigator.hardwareConcurrency||1){return console.debug("fast algo"),new Promise((e,o)=>{let s=URL.createObjectURL(new Blob(["(",y(),")()"],{type:"application/javascript"})),a=[];for(let i=0;i<t;i++){let c=new Worker(s);c.onmessage=d=>{a.forEach(u=>u.terminate()),c.terminate(),e(d.data)},c.onerror=d=>{c.terminate(),o()},c.postMessage({data:r,difficulty:n,nonce:i,threads:t}),a.push(c)}URL.revokeObjectURL(s)})}function y(){return function(){let r=t=>{let e=new TextEncoder().encode(t);return crypto.subtle.digest("SHA-256",e.buffer)};function n(t){return Array.from(t).map(e=>e.toString(16).padStart(2,"0")).join("")}addEventListener("message",async t=>{let e=t.data.data,o=t.data.difficulty,s,a=t.data.nonce,i=t.data.threads;for(;;){let c=await r(e+a),d=new Uint8Array(c),u=!0;for(let m=0;m<o;m++){let l=Math.floor(m/2),g=m%2;if((d[l]>>(g===0?4:0)&15)!==0){u=!1;break}}if(u){s=n(d),console.log(s);break}a+=i}postMessage({hash:s,data:e,difficulty:o,nonce:a})})}.toString()}function f(r,n=5,t=1){return console.debug("slow algo"),new Promise((e,o)=>{let s=URL.createObjectURL(new Blob(["(",b(),")()"],{type:"application/javascript"})),a=new Worker(s);a.onmessage=i=>{a.terminate(),e(i.data)},a.onerror=i=>{a.terminate(),o()},a.postMessage({data:r,difficulty:n}),URL.revokeObjectURL(s)})}function b(){return function(){let r=n=>{let t=new TextEncoder().encode(n);return crypto.subtle.digest("SHA-256",t.buffer).then(e=>Array.from(new Uint8Array(e)).map(o=>o.toString(16).padStart(2,"0")).join(""))};addEventListener("message",async n=>{let t=n.data.data,e=n.data.difficulty,o,s=0;do o=await r(t+s++);while(o.substring(0,e)!==Array(e+1).join("0"));s-=1,postMessage({hash:o,data:t,difficulty:e,nonce:s})})}.toString()}var L={fast:p,slow:f},w=(r="",n={})=>{let t=new URL(r,window.location.href);return Object.entries(n).forEach(e=>{let[o,s]=e;t.searchParams.set(o,s)}),t.toString()},h=(r,n)=>w(`/.within.website/x/cmd/anubis/static/img/${r}.webp`,{cacheBuster:n});(async()=>{let r=document.getElementById("status"),n=document.getElementById("image"),t=document.getElementById("title"),e=document.getElementById("spinner"),o=JSON.parse(document.getElementById("anubis_version").textContent);r.innerHTML="Calculating...";let{challenge:s,rules:a}=await fetch("/.within.website/x/cmd/anubis/api/make-challenge",{method:"POST"}).then(l=>{if(!l.ok)throw new Error("Failed to fetch config");return l.json()}).catch(l=>{throw t.innerHTML="Oh no!",r.innerHTML=`Failed to fetch config: ${l.message}`,n.src=h("sad",o),e.innerHTML="",e.style.display="none",l}),i=L[a.algorithm];if(!i){t.innerHTML="Oh no!",r.innerHTML="Failed to resolve check algorithm. You may want to reload the page.",n.src=h("sad",o),e.innerHTML="",e.style.display="none";return}r.innerHTML=`Calculating...<br/>Difficulty: ${a.report_as}`;let c=Date.now(),{hash:d,nonce:u}=await i(s,a.difficulty),m=Date.now();console.log({hash:d,nonce:u}),t.innerHTML="Success!",r.innerHTML=`Done! Took ${m-c}ms, ${u} iterations`,n.src=h("happy",o),e.innerHTML="",e.style.display="none",setTimeout(()=>{let l=window.location.href;window.location.href=w("/.within.website/x/cmd/anubis/api/pass-challenge",{response:d,nonce:u,redir:l,elapsedTime:m-c})},250)})();})();
//# sourceMappingURL=main.mjs.map

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -1,11 +1,9 @@
User-agent: AI2Bot
User-agent: Ai2Bot-Dolma
User-agent: aiHitBot
User-agent: Amazonbot
User-agent: anthropic-ai
User-agent: Applebot
User-agent: Applebot-Extended
User-agent: Brightbot 1.0
User-agent: Bytespider
User-agent: CCBot
User-agent: ChatGPT-User
@@ -13,13 +11,9 @@ User-agent: Claude-Web
User-agent: ClaudeBot
User-agent: cohere-ai
User-agent: cohere-training-data-crawler
User-agent: Cotoyogi
User-agent: Crawlspace
User-agent: Diffbot
User-agent: DuckAssistBot
User-agent: FacebookBot
User-agent: Factset_spyderbot
User-agent: FirecrawlAgent
User-agent: FriendlyCrawler
User-agent: Google-Extended
User-agent: GoogleOther
@@ -30,27 +24,19 @@ User-agent: iaskspider/2.0
User-agent: ICC-Crawler
User-agent: ImagesiftBot
User-agent: img2dataset
User-agent: imgproxy
User-agent: ISSCyberRiskCrawler
User-agent: Kangaroo Bot
User-agent: meta-externalagent
User-agent: Meta-ExternalAgent
User-agent: meta-externalfetcher
User-agent: Meta-ExternalFetcher
User-agent: NovaAct
User-agent: OAI-SearchBot
User-agent: omgili
User-agent: omgilibot
User-agent: Operator
User-agent: PanguBot
User-agent: Perplexity-User
User-agent: PerplexityBot
User-agent: PetalBot
User-agent: Scrapy
User-agent: SemrushBot-OCOB
User-agent: SemrushBot-SWA
User-agent: SemrushBot
User-agent: Sidetrade indexer bot
User-agent: TikTokSpider
User-agent: Timpibot
User-agent: VelenPublicWebCrawler
User-agent: Webzio-Extended
@@ -58,4 +44,4 @@ User-agent: YouBot
Disallow: /
User-agent: *
Disallow: /
Disallow: /

View File

@@ -19,6 +19,7 @@ var (
dockerLabels = flag.String("docker-labels", os.Getenv("DOCKER_METADATA_OUTPUT_LABELS"), "Docker image labels")
dockerRepo = flag.String("docker-repo", "registry.int.xeserv.us/techaro/anubis", "Docker image repository for Anubis")
dockerTags = flag.String("docker-tags", os.Getenv("DOCKER_METADATA_OUTPUT_TAGS"), "newline separated docker tags including the registry name")
githubActor = flag.String("github-actor", "", "GitHub actor")
githubEventName = flag.String("github-event-name", "", "GitHub event name")
pullRequestID = flag.Int("pull-request-id", -1, "GitHub pull request ID")
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
@@ -30,7 +31,7 @@ func main() {
internal.InitSlog(*slogLevel)
koDockerRepo := strings.TrimSuffix(*dockerRepo, "/"+filepath.Base(*dockerRepo))
koDockerRepo := strings.TrimRight(*dockerRepo, "/"+filepath.Base(*dockerRepo))
if *githubEventName == "pull_request" && *pullRequestID != -1 {
*dockerRepo = fmt.Sprintf("ttl.sh/techaro/pr-%d/anubis", *pullRequestID)
@@ -112,6 +113,11 @@ type image struct {
tag string
}
func newlineSep2Comma(inp string) string {
lines := strings.Split(inp, "\n")
return strings.Join(lines, ",")
}
func parseImageList(imageList string) ([]image, error) {
images := strings.Split(imageList, "\n")
var result []image
@@ -123,15 +129,15 @@ func parseImageList(imageList string) ([]image, error) {
// reg.xeiaso.net/techaro/anubis:latest
// repository: reg.xeiaso.net/techaro/anubis
// tag: latest
index := strings.LastIndex(img, ":")
parts := strings.SplitN(img, ":", 2)
result = append(result, image{
repository: img[:index],
tag: img[index+1:],
repository: parts[0],
tag: parts[1],
})
}
if len(result) == 0 {
return nil, fmt.Errorf("no images provided, bad flags")
return nil, fmt.Errorf("no images provided, bad flags??")
}
return result, nil

View File

@@ -1,6 +0,0 @@
- name: allow-api-routes
action: ALLOW
expression:
all:
- '!(method == "HEAD" || method == "GET")'
- path.startsWith("/api/")

View File

@@ -1,7 +0,0 @@
# By Aibrew: https://github.com/TecharoHQ/anubis/discussions/261#discussioncomment-12821065
- name: gitea-feed-atom
action: ALLOW
path_regex: ^/[.A-Za-z0-9_-]{1,256}?[./A-Za-z0-9_-]*\.atom$
- name: gitea-feed-rss
action: ALLOW
path_regex: ^/[.A-Za-z0-9_-]{1,256}?[./A-Za-z0-9_-]*\.rss$

View File

@@ -1,29 +0,0 @@
{
"bots": [
{
"import": "(data)/bots/_deny-pathological.yaml"
},
{
"import": "(data)/bots/ai-robots-txt.yaml"
},
{
"import": "(data)/crawlers/_allow-good.yaml"
},
{
"import": "(data)/bots/aggressive-brazilian-scrapers.yaml"
},
{
"import": "(data)/common/keep-internet-working.yaml"
},
{
"name": "generic-browser",
"user_agent_regex": "Mozilla|Opera",
"action": "CHALLENGE"
}
],
"dnsbl": false,
"status_codes": {
"CHALLENGE": 200,
"DENY": 200
}
}

View File

@@ -1,61 +0,0 @@
## Anubis has the ability to let you import snippets of configuration into the main
## configuration file. This allows you to break up your config into smaller parts
## that get logically assembled into one big file.
##
## Of note, a bot rule can either have inline bot configuration or import a
## bot config snippet. You cannot do both in a single bot rule.
##
## Import paths can either be prefixed with (data) to import from the common/shared
## rules in the data folder in the Anubis source tree or will point to absolute/relative
## paths in your filesystem. If you don't have access to the Anubis source tree, check
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
bots:
# Pathological bots to deny
- # This correlates to data/bots/deny-pathological.yaml in the source tree
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
import: (data)/bots/_deny-pathological.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
# Enforce https://github.com/ai-robots-txt/ai.robots.txt
- import: (data)/bots/ai-robots-txt.yaml
# Search engine crawlers to allow, defaults to:
# - Google (so they don't try to bypass Anubis)
# - Bing
# - DuckDuckGo
# - Qwant
# - The Internet Archive
# - Kagi
# - Marginalia
# - Mojeek
- import: (data)/crawlers/_allow-good.yaml
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
- import: (data)/common/keep-internet-working.yaml
# # Punish any bot with "bot" in the user-agent string
# # This is known to have a high false-positive rate, use at your own risk
# - name: generic-bot-catchall
# user_agent_regex: (?i:bot|crawler)
# action: CHALLENGE
# challenge:
# difficulty: 16 # impossible
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time
# Generic catchall rule
- name: generic-browser
user_agent_regex: >-
Mozilla|Opera
action: CHALLENGE
dnsbl: false
# By default, send HTTP 200 back to clients that either get issued a challenge
# or a denial. This seems weird, but this is load-bearing due to the fact that
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
# will stop sending requests once they get it.
status_codes:
CHALLENGE: 200
DENY: 200

View File

@@ -1,3 +0,0 @@
- import: (data)/bots/cloudflare-workers.yaml
- import: (data)/bots/headless-browsers.yaml
- import: (data)/bots/us-ai-scraper.yaml

View File

@@ -1,28 +0,0 @@
- name: deny-aggressive-brazilian-scrapers
action: DENY
expression:
any:
# Internet Explorer should be out of support
- userAgent.contains("MSIE")
# Trident is the Internet Explorer browser engine
- userAgent.contains("Trident")
# Opera is a fork of chrome now
- userAgent.contains("Presto")
# Windows CE is discontinued
- userAgent.contains("Windows CE")
# Windows 95 is discontinued
- userAgent.contains("Windows 95")
# Windows 98 is discontinued
- userAgent.contains("Windows 98")
# Windows 9.x is discontinued
- userAgent.contains("Win 9x")
# Amazon does not have an Alexa Toolbar.
- userAgent.contains("Alexa Toolbar")
- name: challenge-aggressive-brazilian-scrapers
action: CHALLENGE
expression:
any:
# This is not released, even Windows 11 calls itself Windows 10
- userAgent.contains("Windows NT 11.0")
# iPods are not in common use
- userAgent.contains("iPod")

View File

@@ -1,4 +0,0 @@
- name: "ai-robots-txt"
user_agent_regex: >-
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot
action: DENY

View File

@@ -1,4 +0,0 @@
- name: cloudflare-workers
headers_regex:
CF-Worker: .*
action: DENY

View File

@@ -1,9 +0,0 @@
- name: lightpanda
user_agent_regex: ^LightPanda/.*$
action: DENY
- name: headless-chrome
user_agent_regex: HeadlessChrome
action: DENY
- name: headless-chromium
user_agent_regex: HeadlessChromium
action: DENY

View File

@@ -1,9 +0,0 @@
# phrik in the Arch Linux IRC channels
- name: archlinux-phrik
action: ALLOW
expression:
all:
- remoteAddress == "159.69.213.214"
- userAgent == "Mozilla/5.0 (compatible; utils.web Limnoria module)"
- '"X-Http-Version" in headers'
- headers["X-Http-Version"] == "HTTP/1.1"

View File

@@ -1,9 +0,0 @@
# chat in the gentoo IRC channels
- name: gentoo-chat
action: ALLOW
expression:
all:
- remoteAddress == "45.76.166.57"
- userAgent == "Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0"
- '"X-Http-Version" in headers'
- headers["X-Http-Version"] == "HTTP/1.1"

View File

@@ -1,3 +0,0 @@
- name: us-artificial-intelligence-scraper
user_agent_regex: \+https\://github\.com/US-Artificial-Intelligence/scraper
action: DENY

View File

@@ -1,14 +0,0 @@
- name: allow-git-clients
action: ALLOW
expression:
all:
- >
(
userAgent.startsWith("git/") ||
userAgent.contains("libgit") ||
userAgent.startsWith("go-git") ||
userAgent.startsWith("JGit/") ||
userAgent.startsWith("JGit-")
)
- '"Git-Protocol" in headers'
- headers["Git-Protocol"] == "version=2"

View File

@@ -1,7 +0,0 @@
- name: go-get
action: ALLOW
expression:
all:
- userAgent.startsWith("Go-http-client/")
- '"go-get" in query'
- query["go-get"] == "1"

View File

@@ -1,6 +0,0 @@
- name: allow-api-routes
action: ALLOW
expression:
all:
- '!(method == "HEAD" || method == "GET")'
- path.startsWith("/api/")

View File

@@ -1,15 +0,0 @@
- name: ipv4-rfc-1918
action: ALLOW
remote_addresses:
- 10.0.0.0/8
- 172.16.0.0/12
- 192.168.0.0/16
- 100.64.0.0/10
- name: ipv6-ula
action: ALLOW
remote_addresses:
- fc00::/7
- name: ipv6-link-local
action: ALLOW
remote_addresses:
- fe80::/10

View File

@@ -1,7 +0,0 @@
- name: allow-api-requests
action: ALLOW
expression:
all:
- '"Accept" in headers'
- 'headers["Accept"] == "application/json"'
- 'path.startsWith("/api/")'

View File

@@ -1,13 +0,0 @@
# Common "keeping the internet working" routes
- name: well-known
path_regex: ^/.well-known/.*$
action: ALLOW
- name: favicon
path_regex: ^/favicon.ico$
action: ALLOW
- name: robots-txt
path_regex: ^/robots.txt$
action: ALLOW
- name: sitemap
path_regex: ^/sitemap.xml$
action: ALLOW

View File

@@ -1,3 +0,0 @@
- name: no-user-agent-string
action: DENY
expression: userAgent == ""

View File

@@ -1,8 +0,0 @@
- import: (data)/crawlers/googlebot.yaml
- import: (data)/crawlers/bingbot.yaml
- import: (data)/crawlers/duckduckbot.yaml
- import: (data)/crawlers/qwantbot.yaml
- import: (data)/crawlers/internet-archive.yaml
- import: (data)/crawlers/kagibot.yaml
- import: (data)/crawlers/marginalia.yaml
- import: (data)/crawlers/mojeekbot.yaml

View File

@@ -1,34 +0,0 @@
- name: bingbot
user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm
action: ALLOW
# https://www.bing.com/toolbox/bingbot.json
remote_addresses: [
"157.55.39.0/24",
"207.46.13.0/24",
"40.77.167.0/24",
"13.66.139.0/24",
"13.66.144.0/24",
"52.167.144.0/24",
"13.67.10.16/28",
"13.69.66.240/28",
"13.71.172.224/28",
"139.217.52.0/28",
"191.233.204.224/28",
"20.36.108.32/28",
"20.43.120.16/28",
"40.79.131.208/28",
"40.79.186.176/28",
"52.231.148.0/28",
"20.79.107.240/28",
"51.105.67.0/28",
"20.125.163.80/28",
"40.77.188.0/22",
"65.55.210.0/24",
"199.30.24.0/23",
"40.77.202.0/24",
"40.77.139.0/25",
"20.74.197.0/28",
"20.15.133.160/27",
"40.77.177.0/24",
"40.77.178.0/23"
]

View File

@@ -1,275 +0,0 @@
- name: duckduckbot
user_agent_regex: DuckDuckBot/1\.1; \(\+http\://duckduckgo\.com/duckduckbot\.html\)
action: ALLOW
# https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
remote_addresses: [
"57.152.72.128/32",
"51.8.253.152/32",
"40.80.242.63/32",
"20.12.141.99/32",
"20.49.136.28/32",
"51.116.131.221/32",
"51.107.40.209/32",
"20.40.133.240/32",
"20.50.168.91/32",
"51.120.48.122/32",
"20.193.45.113/32",
"40.76.173.151/32",
"40.76.163.7/32",
"20.185.79.47/32",
"52.142.26.175/32",
"20.185.79.15/32",
"52.142.24.149/32",
"40.76.162.208/32",
"40.76.163.23/32",
"40.76.162.191/32",
"40.76.162.247/32",
"40.88.21.235/32",
"20.191.45.212/32",
"52.146.59.12/32",
"52.146.59.156/32",
"52.146.59.154/32",
"52.146.58.236/32",
"20.62.224.44/32",
"51.104.180.53/32",
"51.104.180.47/32",
"51.104.180.26/32",
"51.104.146.225/32",
"51.104.146.235/32",
"20.73.202.147/32",
"20.73.132.240/32",
"20.71.12.143/32",
"20.56.197.58/32",
"20.56.197.63/32",
"20.43.150.93/32",
"20.43.150.85/32",
"20.44.222.1/32",
"40.89.243.175/32",
"13.89.106.77/32",
"52.143.242.6/32",
"52.143.241.111/32",
"52.154.60.82/32",
"20.197.209.11/32",
"20.197.209.27/32",
"20.226.133.105/32",
"191.234.216.4/32",
"191.234.216.178/32",
"20.53.92.211/32",
"20.53.91.2/32",
"20.207.99.197/32",
"20.207.97.190/32",
"40.81.250.205/32",
"40.64.106.11/32",
"40.64.105.247/32",
"20.72.242.93/32",
"20.99.255.235/32",
"20.113.3.121/32",
"52.224.16.221/32",
"52.224.21.53/32",
"52.224.20.204/32",
"52.224.21.19/32",
"52.224.20.249/32",
"52.224.20.203/32",
"52.224.20.190/32",
"52.224.16.229/32",
"52.224.21.20/32",
"52.146.63.80/32",
"52.224.20.227/32",
"52.224.20.193/32",
"52.190.37.160/32",
"52.224.21.23/32",
"52.224.20.223/32",
"52.224.20.181/32",
"52.224.21.49/32",
"52.224.21.55/32",
"52.224.21.61/32",
"52.224.19.152/32",
"52.224.20.186/32",
"52.224.21.27/32",
"52.224.21.51/32",
"52.224.20.174/32",
"52.224.21.4/32",
"51.104.164.109/32",
"51.104.167.71/32",
"51.104.160.177/32",
"51.104.162.149/32",
"51.104.167.95/32",
"51.104.167.54/32",
"51.104.166.111/32",
"51.104.167.88/32",
"51.104.161.32/32",
"51.104.163.250/32",
"51.104.164.189/32",
"51.104.167.19/32",
"51.104.160.167/32",
"51.104.167.110/32",
"20.191.44.119/32",
"51.104.167.104/32",
"20.191.44.234/32",
"51.104.164.215/32",
"51.104.167.52/32",
"20.191.44.22/32",
"51.104.167.87/32",
"51.104.167.96/32",
"20.191.44.16/32",
"51.104.167.61/32",
"51.104.164.147/32",
"20.50.48.159/32",
"40.114.182.172/32",
"20.50.50.130/32",
"20.50.50.163/32",
"20.50.50.46/32",
"40.114.182.153/32",
"20.50.50.118/32",
"20.50.49.55/32",
"20.50.49.25/32",
"40.114.183.251/32",
"20.50.50.123/32",
"20.50.49.237/32",
"20.50.48.192/32",
"20.50.50.134/32",
"51.138.90.233/32",
"40.114.183.196/32",
"20.50.50.146/32",
"40.114.183.88/32",
"20.50.50.145/32",
"20.50.50.121/32",
"20.50.49.40/32",
"51.138.90.206/32",
"40.114.182.45/32",
"51.138.90.161/32",
"20.50.49.0/32",
"40.119.232.215/32",
"104.43.55.167/32",
"40.119.232.251/32",
"40.119.232.50/32",
"40.119.232.146/32",
"40.119.232.218/32",
"104.43.54.127/32",
"104.43.55.117/32",
"104.43.55.116/32",
"104.43.55.166/32",
"52.154.169.50/32",
"52.154.171.70/32",
"52.154.170.229/32",
"52.154.170.113/32",
"52.154.171.44/32",
"52.154.172.2/32",
"52.143.244.81/32",
"52.154.171.87/32",
"52.154.171.250/32",
"52.154.170.28/32",
"52.154.170.122/32",
"52.143.243.117/32",
"52.143.247.235/32",
"52.154.171.235/32",
"52.154.171.196/32",
"52.154.171.0/32",
"52.154.170.243/32",
"52.154.170.26/32",
"52.154.169.200/32",
"52.154.170.96/32",
"52.154.170.88/32",
"52.154.171.150/32",
"52.154.171.205/32",
"52.154.170.117/32",
"52.154.170.209/32",
"191.235.202.48/32",
"191.233.3.202/32",
"191.235.201.214/32",
"191.233.3.197/32",
"191.235.202.38/32",
"20.53.78.144/32",
"20.193.24.10/32",
"20.53.78.236/32",
"20.53.78.138/32",
"20.53.78.123/32",
"20.53.78.106/32",
"20.193.27.215/32",
"20.193.25.197/32",
"20.193.12.126/32",
"20.193.24.251/32",
"20.204.242.101/32",
"20.207.72.113/32",
"20.204.242.19/32",
"20.219.45.67/32",
"20.207.72.11/32",
"20.219.45.190/32",
"20.204.243.55/32",
"20.204.241.148/32",
"20.207.72.110/32",
"20.204.240.172/32",
"20.207.72.21/32",
"20.204.246.81/32",
"20.207.107.181/32",
"20.204.246.254/32",
"20.219.43.246/32",
"52.149.25.43/32",
"52.149.61.51/32",
"52.149.58.139/32",
"52.149.60.38/32",
"52.148.165.38/32",
"52.143.95.162/32",
"52.149.56.151/32",
"52.149.30.45/32",
"52.149.58.173/32",
"52.143.95.204/32",
"52.149.28.83/32",
"52.149.58.69/32",
"52.148.161.87/32",
"52.149.58.27/32",
"52.149.28.18/32",
"20.79.226.26/32",
"20.79.239.66/32",
"20.79.238.198/32",
"20.113.14.159/32",
"20.75.144.152/32",
"20.43.172.120/32",
"20.53.134.160/32",
"20.201.15.208/32",
"20.93.28.24/32",
"20.61.34.40/32",
"52.242.224.168/32",
"20.80.129.80/32",
"20.195.108.47/32",
"4.195.133.120/32",
"4.228.76.163/32",
"4.182.131.108/32",
"4.209.224.56/32",
"108.141.83.74/32",
"4.213.46.14/32",
"172.169.17.165/32",
"51.8.71.117/32",
"20.3.1.178/32",
"52.149.56.151/32",
"52.149.30.45/32",
"52.149.58.173/32",
"52.143.95.204/32",
"52.149.28.83/32",
"52.149.58.69/32",
"52.148.161.87/32",
"52.149.58.27/32",
"52.149.28.18/32",
"20.79.226.26/32",
"20.79.239.66/32",
"20.79.238.198/32",
"20.113.14.159/32",
"20.75.144.152/32",
"20.43.172.120/32",
"20.53.134.160/32",
"20.201.15.208/32",
"20.93.28.24/32",
"20.61.34.40/32",
"52.242.224.168/32",
"20.80.129.80/32",
"20.195.108.47/32",
"4.195.133.120/32",
"4.228.76.163/32",
"4.182.131.108/32",
"4.209.224.56/32",
"108.141.83.74/32",
"4.213.46.14/32",
"172.169.17.165/32",
"51.8.71.117/32",
"20.3.1.178/32"
]

View File

@@ -1,263 +0,0 @@
- name: googlebot
user_agent_regex: \+http\://www\.google\.com/bot\.html
action: ALLOW
# https://developers.google.com/static/search/apis/ipranges/googlebot.json
remote_addresses: [
"2001:4860:4801:10::/64",
"2001:4860:4801:11::/64",
"2001:4860:4801:12::/64",
"2001:4860:4801:13::/64",
"2001:4860:4801:14::/64",
"2001:4860:4801:15::/64",
"2001:4860:4801:16::/64",
"2001:4860:4801:17::/64",
"2001:4860:4801:18::/64",
"2001:4860:4801:19::/64",
"2001:4860:4801:1a::/64",
"2001:4860:4801:1b::/64",
"2001:4860:4801:1c::/64",
"2001:4860:4801:1d::/64",
"2001:4860:4801:1e::/64",
"2001:4860:4801:1f::/64",
"2001:4860:4801:20::/64",
"2001:4860:4801:21::/64",
"2001:4860:4801:22::/64",
"2001:4860:4801:23::/64",
"2001:4860:4801:24::/64",
"2001:4860:4801:25::/64",
"2001:4860:4801:26::/64",
"2001:4860:4801:27::/64",
"2001:4860:4801:28::/64",
"2001:4860:4801:29::/64",
"2001:4860:4801:2::/64",
"2001:4860:4801:2a::/64",
"2001:4860:4801:2b::/64",
"2001:4860:4801:2c::/64",
"2001:4860:4801:2d::/64",
"2001:4860:4801:2e::/64",
"2001:4860:4801:2f::/64",
"2001:4860:4801:31::/64",
"2001:4860:4801:32::/64",
"2001:4860:4801:33::/64",
"2001:4860:4801:34::/64",
"2001:4860:4801:35::/64",
"2001:4860:4801:36::/64",
"2001:4860:4801:37::/64",
"2001:4860:4801:38::/64",
"2001:4860:4801:39::/64",
"2001:4860:4801:3a::/64",
"2001:4860:4801:3b::/64",
"2001:4860:4801:3c::/64",
"2001:4860:4801:3d::/64",
"2001:4860:4801:3e::/64",
"2001:4860:4801:40::/64",
"2001:4860:4801:41::/64",
"2001:4860:4801:42::/64",
"2001:4860:4801:43::/64",
"2001:4860:4801:44::/64",
"2001:4860:4801:45::/64",
"2001:4860:4801:46::/64",
"2001:4860:4801:47::/64",
"2001:4860:4801:48::/64",
"2001:4860:4801:49::/64",
"2001:4860:4801:4a::/64",
"2001:4860:4801:4b::/64",
"2001:4860:4801:4c::/64",
"2001:4860:4801:50::/64",
"2001:4860:4801:51::/64",
"2001:4860:4801:52::/64",
"2001:4860:4801:53::/64",
"2001:4860:4801:54::/64",
"2001:4860:4801:55::/64",
"2001:4860:4801:56::/64",
"2001:4860:4801:60::/64",
"2001:4860:4801:61::/64",
"2001:4860:4801:62::/64",
"2001:4860:4801:63::/64",
"2001:4860:4801:64::/64",
"2001:4860:4801:65::/64",
"2001:4860:4801:66::/64",
"2001:4860:4801:67::/64",
"2001:4860:4801:68::/64",
"2001:4860:4801:69::/64",
"2001:4860:4801:6a::/64",
"2001:4860:4801:6b::/64",
"2001:4860:4801:6c::/64",
"2001:4860:4801:6d::/64",
"2001:4860:4801:6e::/64",
"2001:4860:4801:6f::/64",
"2001:4860:4801:70::/64",
"2001:4860:4801:71::/64",
"2001:4860:4801:72::/64",
"2001:4860:4801:73::/64",
"2001:4860:4801:74::/64",
"2001:4860:4801:75::/64",
"2001:4860:4801:76::/64",
"2001:4860:4801:77::/64",
"2001:4860:4801:78::/64",
"2001:4860:4801:79::/64",
"2001:4860:4801:80::/64",
"2001:4860:4801:81::/64",
"2001:4860:4801:82::/64",
"2001:4860:4801:83::/64",
"2001:4860:4801:84::/64",
"2001:4860:4801:85::/64",
"2001:4860:4801:86::/64",
"2001:4860:4801:87::/64",
"2001:4860:4801:88::/64",
"2001:4860:4801:90::/64",
"2001:4860:4801:91::/64",
"2001:4860:4801:92::/64",
"2001:4860:4801:93::/64",
"2001:4860:4801:94::/64",
"2001:4860:4801:95::/64",
"2001:4860:4801:96::/64",
"2001:4860:4801:a0::/64",
"2001:4860:4801:a1::/64",
"2001:4860:4801:a2::/64",
"2001:4860:4801:a3::/64",
"2001:4860:4801:a4::/64",
"2001:4860:4801:a5::/64",
"2001:4860:4801:c::/64",
"2001:4860:4801:f::/64",
"192.178.5.0/27",
"192.178.6.0/27",
"192.178.6.128/27",
"192.178.6.160/27",
"192.178.6.192/27",
"192.178.6.32/27",
"192.178.6.64/27",
"192.178.6.96/27",
"34.100.182.96/28",
"34.101.50.144/28",
"34.118.254.0/28",
"34.118.66.0/28",
"34.126.178.96/28",
"34.146.150.144/28",
"34.147.110.144/28",
"34.151.74.144/28",
"34.152.50.64/28",
"34.154.114.144/28",
"34.155.98.32/28",
"34.165.18.176/28",
"34.175.160.64/28",
"34.176.130.16/28",
"34.22.85.0/27",
"34.64.82.64/28",
"34.65.242.112/28",
"34.80.50.80/28",
"34.88.194.0/28",
"34.89.10.80/28",
"34.89.198.80/28",
"34.96.162.48/28",
"35.247.243.240/28",
"66.249.64.0/27",
"66.249.64.128/27",
"66.249.64.160/27",
"66.249.64.224/27",
"66.249.64.32/27",
"66.249.64.64/27",
"66.249.64.96/27",
"66.249.65.0/27",
"66.249.65.128/27",
"66.249.65.160/27",
"66.249.65.192/27",
"66.249.65.224/27",
"66.249.65.32/27",
"66.249.65.64/27",
"66.249.65.96/27",
"66.249.66.0/27",
"66.249.66.128/27",
"66.249.66.160/27",
"66.249.66.192/27",
"66.249.66.224/27",
"66.249.66.32/27",
"66.249.66.64/27",
"66.249.66.96/27",
"66.249.68.0/27",
"66.249.68.128/27",
"66.249.68.32/27",
"66.249.68.64/27",
"66.249.68.96/27",
"66.249.69.0/27",
"66.249.69.128/27",
"66.249.69.160/27",
"66.249.69.192/27",
"66.249.69.224/27",
"66.249.69.32/27",
"66.249.69.64/27",
"66.249.69.96/27",
"66.249.70.0/27",
"66.249.70.128/27",
"66.249.70.160/27",
"66.249.70.192/27",
"66.249.70.224/27",
"66.249.70.32/27",
"66.249.70.64/27",
"66.249.70.96/27",
"66.249.71.0/27",
"66.249.71.128/27",
"66.249.71.160/27",
"66.249.71.192/27",
"66.249.71.224/27",
"66.249.71.32/27",
"66.249.71.64/27",
"66.249.71.96/27",
"66.249.72.0/27",
"66.249.72.128/27",
"66.249.72.160/27",
"66.249.72.192/27",
"66.249.72.224/27",
"66.249.72.32/27",
"66.249.72.64/27",
"66.249.72.96/27",
"66.249.73.0/27",
"66.249.73.128/27",
"66.249.73.160/27",
"66.249.73.192/27",
"66.249.73.224/27",
"66.249.73.32/27",
"66.249.73.64/27",
"66.249.73.96/27",
"66.249.74.0/27",
"66.249.74.128/27",
"66.249.74.160/27",
"66.249.74.192/27",
"66.249.74.32/27",
"66.249.74.64/27",
"66.249.74.96/27",
"66.249.75.0/27",
"66.249.75.128/27",
"66.249.75.160/27",
"66.249.75.192/27",
"66.249.75.224/27",
"66.249.75.32/27",
"66.249.75.64/27",
"66.249.75.96/27",
"66.249.76.0/27",
"66.249.76.128/27",
"66.249.76.160/27",
"66.249.76.192/27",
"66.249.76.224/27",
"66.249.76.32/27",
"66.249.76.64/27",
"66.249.76.96/27",
"66.249.77.0/27",
"66.249.77.128/27",
"66.249.77.160/27",
"66.249.77.192/27",
"66.249.77.224/27",
"66.249.77.32/27",
"66.249.77.64/27",
"66.249.77.96/27",
"66.249.78.0/27",
"66.249.78.32/27",
"66.249.79.0/27",
"66.249.79.128/27",
"66.249.79.160/27",
"66.249.79.192/27",
"66.249.79.224/27",
"66.249.79.32/27",
"66.249.79.64/27",
"66.249.79.96/27"
]

View File

@@ -1,8 +0,0 @@
- name: internet-archive
action: ALLOW
# https://ipinfo.io/AS7941
remote_addresses: [
"207.241.224.0/20",
"208.70.24.0/21",
"2620:0:9c0::/48"
]

View File

@@ -1,10 +0,0 @@
- name: kagibot
user_agent_regex: \+https\://kagi\.com/bot
action: ALLOW
# https://kagi.com/bot
remote_addresses: [
"216.18.205.234/32",
"35.212.27.76/32",
"104.254.65.50/32",
"209.151.156.194/32"
]

View File

@@ -1,11 +0,0 @@
- name: marginalia
user_agent_regex: search\.marginalia\.nu
action: ALLOW
# Received directly over email
remote_addresses: [
"193.183.0.162/31",
"193.183.0.164/30",
"193.183.0.168/30",
"193.183.0.172/31",
"193.183.0.174/32"
]

View File

@@ -1,5 +0,0 @@
- name: mojeekbot
user_agent_regex: \+https\://www\.mojeek\.com/bot\.html
action: ALLOW
# https://www.mojeek.com/bot.html
remote_addresses: [ "5.102.173.71/32" ]

View File

@@ -1,5 +0,0 @@
- name: qwantbot
user_agent_regex: \+https\://help\.qwant\.com/bot/
action: ALLOW
# https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
remote_addresses: [ "91.242.162.0/24" ]

View File

@@ -1,8 +0,0 @@
package data
import "embed"
var (
//go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:clients all:common all:crawlers
BotPolicies embed.FS
)

View File

@@ -1,60 +0,0 @@
package decaymap
import (
"testing"
"time"
)
func TestImpl(t *testing.T) {
dm := New[string, string]()
dm.Set("test", "hi", 5*time.Minute)
val, ok := dm.Get("test")
if !ok {
t.Error("somehow the test key was not set")
}
if val != "hi" {
t.Errorf("wanted value %q, got: %q", "hi", val)
}
ok = dm.expire("test")
if !ok {
t.Error("somehow could not force-expire the test key")
}
_, ok = dm.Get("test")
if ok {
t.Error("got value even though it was supposed to be expired")
}
}
func TestCleanup(t *testing.T) {
dm := New[string, string]()
dm.Set("test1", "hi1", 1*time.Second)
dm.Set("test2", "hi2", 2*time.Second)
dm.Set("test3", "hi3", 3*time.Second)
dm.expire("test1") // Force expire test1
dm.expire("test2") // Force expire test2
dm.Cleanup()
finalLen := dm.Len() // Get the length after cleanup
if finalLen != 1 { // "test3" should be the only one left
t.Errorf("Cleanup failed to remove expired entries. Expected length 1, got %d", finalLen)
}
if _, ok := dm.Get("test1"); ok { // Verify Get still behaves correctly after Cleanup
t.Error("test1 should not be found after cleanup")
}
if _, ok := dm.Get("test2"); ok {
t.Error("test2 should not be found after cleanup")
}
if val, ok := dm.Get("test3"); !ok || val != "hi3" {
t.Error("test3 should still be found after cleanup")
}
}

8
doc.go Normal file
View File

@@ -0,0 +1,8 @@
// Package Anubis contains the version number of Anubis.
package anubis
// Version is the current version of Anubis.
//
// This variable is set at build time using the -X linker flag. If not set,
// it defaults to "devel".
var Version = "devel"

View File

@@ -0,0 +1,12 @@
---
slug: first-blog-post
title: First Blog Post
authors: [slorber, yangshun]
tags: [hola, docusaurus]
---
Lorem ipsum dolor sit amet...
<!-- truncate -->
...consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

View File

@@ -0,0 +1,44 @@
---
slug: long-blog-post
title: Long Blog Post
authors: yangshun
tags: [hello, docusaurus]
---
This is the summary of a very long blog post,
Use a `<!--` `truncate` `-->` comment to limit blog post size in the list view.
<!-- truncate -->
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

View File

@@ -0,0 +1,24 @@
---
slug: mdx-blog-post
title: MDX Blog Post
authors: [slorber]
tags: [docusaurus]
---
Blog posts support [Docusaurus Markdown features](https://docusaurus.io/docs/markdown-features), such as [MDX](https://mdxjs.com/).
:::tip
Use the power of React to create interactive blog posts.
:::
{/* truncate */}
For example, use JSX to create an interactive button:
```js
<button onClick={() => alert('button clicked!')}>Click me!</button>
```
<button onClick={() => alert('button clicked!')}>Click me!</button>

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

View File

@@ -0,0 +1,29 @@
---
slug: welcome
title: Welcome
authors: [slorber, yangshun]
tags: [facebook, hello, docusaurus]
---
[Docusaurus blogging features](https://docusaurus.io/docs/blog) are powered by the [blog plugin](https://docusaurus.io/docs/api/plugins/@docusaurus/plugin-content-blog).
Here are a few tips you might find useful.
<!-- truncate -->
Simply add Markdown files (or folders) to the `blog` directory.
Regular blog authors can be added to `authors.yml`.
The blog post date can be extracted from filenames, such as:
- `2019-05-30-welcome.md`
- `2019-05-30-welcome/index.md`
A blog post folder can be convenient to co-locate blog post images:
![Docusaurus Plushie](./docusaurus-plushie-banner.jpeg)
The blog supports tags as well!
**And if you don't want a blog**: just delete this directory, and use `blog: false` in your Docusaurus config.

23
docs/blog/authors.yml Normal file
View File

@@ -0,0 +1,23 @@
yangshun:
name: Yangshun Tay
title: Front End Engineer @ Facebook
url: https://github.com/yangshun
image_url: https://github.com/yangshun.png
page: true
socials:
x: yangshunz
github: yangshun
slorber:
name: Sébastien Lorber
title: Docusaurus maintainer
url: https://sebastienlorber.com
image_url: https://github.com/slorber.png
page:
# customize the url of the author page at /blog/authors/<permalink>
permalink: '/all-sebastien-lorber-articles'
socials:
x: sebastienlorber
linkedin: sebastienlorber
github: slorber
newsletter: https://thisweekinreact.com

19
docs/blog/tags.yml Normal file
View File

@@ -0,0 +1,19 @@
facebook:
label: Facebook
permalink: /facebook
description: Facebook tag description
hello:
label: Hello
permalink: /hello
description: Hello tag description
docusaurus:
label: Docusaurus
permalink: /docusaurus
description: Docusaurus tag description
hola:
label: Hola
permalink: /hola
description: Hola tag description

View File

@@ -11,187 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## v1.18.0: Varis zos Galvus
The big ticket feature in this release is [CEL expression matching support](https://anubis.techaro.lol/docs/admin/configuration/expressions). This allows you to tailor your approach for the individual services you are protecting.
These can be as simple as:
```yaml
- name: allow-api-requests
action: ALLOW
expression:
all:
- '"Accept" in headers'
- 'headers["Accept"] == "application/json"'
- 'path.startsWith("/api/")'
```
Or as complicated as:
```yaml
- name: allow-git-clients
action: ALLOW
expression:
all:
- >-
(
userAgent.startsWith("git/") ||
userAgent.contains("libgit") ||
userAgent.startsWith("go-git") ||
userAgent.startsWith("JGit/") ||
userAgent.startsWith("JGit-")
)
- '"Git-Protocol" in headers'
- headers["Git-Protocol"] == "version=2"
```
The docs have more information, but here's a tl;dr of the variables you have access to in expressions:
| Name | Type | Explanation | Example |
| :-------------- | :-------------------- | :---------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------- |
| `headers` | `map[string, string]` | The [headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers) of the request being processed. | `{"User-Agent": "Mozilla/5.0 Gecko/20100101 Firefox/137.0"}` |
| `host` | `string` | The [HTTP hostname](https://web.dev/articles/url-parts#host) the request is targeted to. | `anubis.techaro.lol` |
| `method` | `string` | The [HTTP method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Methods) in the request being processed. | `GET`, `POST`, `DELETE`, etc. |
| `path` | `string` | The [path](https://web.dev/articles/url-parts#pathname) of the request being processed. | `/`, `/api/memes/create` |
| `query` | `map[string, string]` | The [query parameters](https://web.dev/articles/url-parts#query) of the request being processed. | `?foo=bar` -> `{"foo": "bar"}` |
| `remoteAddress` | `string` | The IP address of the client. | `1.1.1.1` |
| `userAgent` | `string` | The [`User-Agent`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent) string in the request being processed. | `Mozilla/5.0 Gecko/20100101 Firefox/137.0` |
This will be made more elaborate in the future. Give me time. This is a [simple, lovable, and complete](https://longform.asmartbear.com/slc/) implementation of this feature so that administrators can get hacking ASAP.
Other changes:
- Use CSS variables to deduplicate styles
- Fixed native packages not containing the stdlib and botPolicies.yaml
- Change import syntax to allow multi-level imports
- Changed the startup logging to use JSON formatting as all the other logs do.
- Added the ability to do [expression matching with CEL](./admin/configuration/expressions.mdx)
- Add a warning for clients that don't store cookies
- Disable Open Graph passthrough by default ([#435](https://github.com/TecharoHQ/anubis/issues/435))
- Clarify the license of the mascot images ([#442](https://github.com/TecharoHQ/anubis/issues/442))
- Started Suppressing 'Context canceled' errors from http in the logs ([#446](https://github.com/TecharoHQ/anubis/issues/446))
## v1.17.1: Asahi sas Brutus: Echo 1
- Added customization of authorization cookie expiration time with `--cookie-expiration-time` flag or envvar
- Updated the `OG_PASSTHROUGH` to be true by default, thereby allowing Open Graph tags to be passed through by default
- Added the ability to [customize Anubis' HTTP status codes](./admin/configuration/custom-status-codes.mdx) ([#355](https://github.com/TecharoHQ/anubis/issues/355))
## v1.17.0: Asahi sas Brutus
- Ensure regexes can't end in newlines ([#372](https://github.com/TecharoHQ/anubis/issues/372))
- Add documentation for default allow behavior (implicit rule)
- Enable [importing configuration snippets](./admin/configuration/import.mdx) ([#321](https://github.com/TecharoHQ/anubis/pull/321))
- Refactor check logic to be more generic and work on a Checker type
- Add more AI user agents based on the [ai.robots.txt](https://github.com/ai-robots-txt/ai.robots.txt) project
- Embedded challenge data in initial HTML response to improve performance
- Added support to use Nginx' `auth_request` directive with Anubis
- Added support to allow to restrict the allowed redirect domains
- Whitelisted [DuckDuckBot](https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/) in botPolicies
- Improvements to build scripts to make them less independent of the build host
- Improved the Open Graph error logging
- Added `Opera` to the `generic-browser` bot policy rule
- Added FreeBSD rc.d script so can be run as a FreeBSD daemon
- Allow requests from the Internet Archive
- Added example nginx configuration to documentation
- Added example Apache configuration to the documentation [#277](https://github.com/TecharoHQ/anubis/issues/277)
- Move per-environment configuration details into their own pages
- Added support for running anubis behind a prefix (e.g. `/myapp`)
- Added headers support to bot policy rules
- Moved configuration file from JSON to YAML by default
- Added documentation on how to use Anubis with Traefik in Docker
- Improved error handling in some edge cases
- Disable `generic-bot-catchall` rule because of its high false positive rate in real-world scenarios
- Moved all CSS inline to the Xess package, changed colors to be CSS variables
- Set or append to `X-Forwarded-For` header unless the remote connects over a loopback address [#328](https://github.com/TecharoHQ/anubis/issues/328)
- Fixed mojeekbot user agent regex
- Added support for running anubis behind a base path (e.g. `/myapp`)
- Reduce Anubis' paranoia with user cookies ([#365](https://github.com/TecharoHQ/anubis/pull/365))
- Added support for Open Graph passthrough while using unix sockets
- The Open Graph subsystem now passes the HTTP `HOST` header through to the origin
- Updated the `OG_PASSTHROUGH` to be true by default, thereby allowing Open Graph tags to be passed through by default
## v1.16.0
Fordola rem Lupis
> I want to make them pay! All of them! Everyone who ever mocked or looked down on me -- I want the power to make them pay!
The following features are the "big ticket" items:
- Added support for native Debian, Red Hat, and tarball packaging strategies including installation and use directions
- A prebaked tarball has been added, allowing distros to build Anubis like they could in v1.15.x
- The placeholder Anubis mascot has been replaced with a design by [CELPHASE](https://bsky.app/profile/celphase.bsky.social)
- Verification page now shows hash rate and a progress bar for completion probability
- Added support for [Open Graph tags](https://ogp.me/) when rendering the challenge page. This allows for social previews to be generated when sharing the challenge page on social media platforms ([#195](https://github.com/TecharoHQ/anubis/pull/195))
- Added support for passing the ed25519 signing key in a file with `-ed25519-private-key-hex-file` or `ED25519_PRIVATE_KEY_HEX_FILE`
The other small fixes have been made:
- Added a periodic cleanup routine for the decaymap that removes expired entries, ensuring stale data is properly pruned
- Added a no-store Cache-Control header to the challenge page
- Hide the directory listings for Anubis' internal static content
- Changed `--debug-x-real-ip-default` to `--use-remote-address`, getting the IP address from the request's socket address instead
- DroneBL lookups have been disabled by default
- Static asset builds are now done on demand instead of the results being committed to source control
- The Dockerfile has been removed as it is no longer in use
- Developer documentation has been added to the docs site
- Show more errors when some predictable challenge page errors happen ([#150](https://github.com/TecharoHQ/anubis/issues/150))
- Added the `--debug-benchmark-js` flag for testing proof-of-work performance during development
- Use `TrimSuffix` instead of `TrimRight` on containerbuild
- Fix the startup logs to correctly show the address and port the server is listening on
- Add [LibreJS](https://www.gnu.org/software/librejs/) banner to Anubis JavaScript to allow LibreJS users to run the challenge
- Added a wait with button continue + 30 second auto continue after 30s if you click "Why am I seeing this?"
- Fixed a typo in the challenge page title
- Disabled running integration tests on Windows hosts due to it's reliance on posix features (see [#133](https://github.com/TecharoHQ/anubis/pull/133#issuecomment-2764732309))
- Fixed minor typos
- Added a Makefile to enable comfortable workflows for downstream packagers
- Added `zizmor` for GitHub Actions static analysis
- Fixed most `zizmor` findings
- Enabled Dependabot
- Added an air config for autoreload support in development ([#195](https://github.com/TecharoHQ/anubis/pull/195))
- Added an `--extract-resources` flag to extract static resources to a local folder
- Add noindex flag to all Anubis pages ([#227](https://github.com/TecharoHQ/anubis/issues/227))
- Added `WEBMASTER_EMAIL` variable, if it is present then display that email address on error pages ([#235](https://github.com/TecharoHQ/anubis/pull/235), [#115](https://github.com/TecharoHQ/anubis/issues/115))
- Hash pinned all GitHub Actions
## v1.15.1
Zenos yae Galvus: Echo 1
Fixes a recurrence of [CVE-2025-24369](https://github.com/Xe/x/security/advisories/GHSA-56w8-8ppj-2p4f)
due to an incorrect logic change in a refactor. This allows an attacker to mint a valid
access token by passing any SHA-256 hash instead of one that matches the proof-of-work
test.
This case has been added as a regression test. It was not when CVE-2025-24369 was released
due to the project not having the maturity required to enable this kind of regression testing.
## v1.15.0
Zenos yae Galvus
> Yes...the coming days promise to be most interesting. Most interesting.
Headline changes:
- ed25519 signing keys for Anubis can be stored in the flag `--ed25519-private-key-hex` or envvar `ED25519_PRIVATE_KEY_HEX`; if one is not provided when Anubis starts, a new one is generated and logged
- Add the ability to set the cookie domain with the envvar `COOKIE_DOMAIN=techaro.lol` for all domains under `techaro.lol`
- Add the ability to set the cookie partitioned flag with the envvar `COOKIE_PARTITIONED=true`
Many other small changes were made, including but not limited to:
- Fixed and clarified installation instructions
- Introduced integration tests using Playwright
- Refactor & Split up Anubis into cmd and lib.go
- Fixed bot check to only apply if address range matches
- Fix default difficulty setting that was broken in a refactor
- Linting fixes
- Make dark mode diff lines readable in the documentation
- Fix CI based browser smoke test
Users running Anubis' test suite may run into issues with the integration tests on Windows hosts. This is a known issue and will be fixed at some point in the future. In the meantime, use the Windows Subsystem for Linux (WSL).
## v1.14.2
@@ -237,7 +57,7 @@ Livia sas Junius
[#21](https://github.com/TecharoHQ/anubis/pull/21)
- Don't overflow the image when browser windows are small (eg. on phones)
[#27](https://github.com/TecharoHQ/anubis/pull/27)
- Lower the default difficulty to 5 from 4
- Lower the default difficulty to 4 from 5
- Don't duplicate work across multiple threads [#36](https://github.com/TecharoHQ/anubis/pull/36)
- Documentation has been moved to https://anubis.techaro.lol/ with sources in docs/
- Removed several visible AI artifacts (e.g., 6 fingers) [#37](https://github.com/TecharoHQ/anubis/pull/37)
@@ -280,4 +100,4 @@ Livia sas Junius
([fd6903a](https://github.com/TecharoHQ/anubis/commit/fd6903aeed315b8fddee32890d7458a9271e4798)).
- Footer links on the check page now point to Techaro's brand
([4ebccb1](https://github.com/TecharoHQ/anubis/commit/4ebccb197ec20d024328d7f92cad39bbbe4d6359))
- Anubis was imported from [Xe/x](https://github.com/Xe/x)
- Anubis was imported from [Xe/x](https://github.com/Xe/x).

View File

@@ -1,8 +0,0 @@
{
"label": "Configuration",
"position": 10,
"link": {
"type": "generated-index",
"description": "Detailed information about configuring parts of Anubis."
}
}

View File

@@ -1,19 +0,0 @@
# Custom status codes for Anubis errors
Out of the box, Anubis will reply with `HTTP 200` for challenge and denial pages. This is intended to make AI scrapers have a hard time with your website because when they are faced with a non-200 response, they will hammer the page over and over until they get a 200 response. This behavior may not be desirable, as such Anubis lets you customize what HTTP status codes are returned when Anubis throws challenge and denial pages.
This is configured in the `status_codes` block of your [bot policy file](../policies.mdx):
```yaml
status_codes:
CHALLENGE: 200
DENY: 200
```
To match CloudFlare's behavior, use a configuration like this:
```yaml
status_codes:
CHALLENGE: 403
DENY: 403
```

View File

@@ -1,150 +0,0 @@
# Expression-based rule matching
Most of the Anubis matchers let you match individual parts of a request and only those parts in isolation. In order to defend a service in depth, you often need the ability to match against multiple aspects of a request. Anubis implements [Common Expression Language (CEL)](https://cel.dev) to let administrators define these more advanced rules. This allows you to tailor your approach for the individual services you are protecting.
As an example, here is a rule that lets you allow JSON API requests through Anubis:
```yaml
- name: allow-api-requests
action: ALLOW
expression:
all:
- '"Accept" in headers'
- 'headers["Accept"] == "application/json"'
- 'path.startsWith("/api/")'
```
This is an advanced feature and as such it is easy to get yourself in trouble with it. Use this with care.
## Common Expression Language (CEL)
CEL is an expression language made by Google as a part of their access control lists system. As programs grow more complicated and users have the need to express more complicated security requirements, they often want the ability to just run a small bit of code to check things for themselves. CEL expressions are built for this. They are implicitly sandboxed so that they cannot affect the system they are running in and also designed to evaluate as fast as humanly possible.
Imagine a CEL expression as the contents of an `if` statement in JavaScript or the `WHERE` clause in SQL. Consider this example expression:
```python
userAgent == ""
```
This is roughly equivalent to the following in JavaScript:
```js
if (userAgent == "") {
// Do something
}
```
Using these expressions, you can define more elaborate rules as facts and circumstances demand. For more information about the syntax and grammar of CEL, take a look at [the language specification](https://github.com/google/cel-spec/blob/master/doc/langdef.md).
## How Anubis uses CEL
Anubis uses CEL to let administrators create complicated filter rules. Anubis has several modes of using CEL:
- Validating requests against single expressions
- Validating multiple expressions and ensuring at least one of them are true (`any`)
- Validating multiple expressions and ensuring all of them are true (`all`)
The common pattern is that every Anubis expression returns `true`, `false`, or raises an error.
### Single expressions
A single expression that returns either `true` or `false`. If the expression returns `true`, then the action specified in the rule will be taken. If it returns `false`, Anubis will move on to the next rule.
For example, consider this rule:
```yaml
- name: no-user-agent-string
action: DENY
expression: userAgent == ""
```
For this rule, if a request comes in without a [`User-Agent` string](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent) set, Anubis will deny the request and return an error page.
### `any` blocks
An `any` block that contains a list of expressions. If any expression in the list returns `true`, then the action specified in the rule will be taken. If all expressions in that list return `false`, Anubis will move on to the next rule.
For example, consider this rule:
```yaml
- name: known-banned-user
action: DENY
expression:
any:
- remoteAddress == "8.8.8.8"
- remoteAddress == "1.1.1.1"
```
For this rule, if a request comes in from `8.8.8.8` or `1.1.1.1`, Anubis will deny the request and return an error page.
#### `all` blocks
An `all` block that contains a list of expressions. If all expressions in the list return `true`, then the action specified in the rule will be taken. If any of the expressions in the list returns `false`, Anubis will move on to the next rule.
For example, consider this rule:
```yaml
- name: go-get
action: ALLOW
expression:
all:
- userAgent.startsWith("Go-http-client/")
- '"go-get" in query'
- query["go-get"] == "1"
```
For this rule, if a request comes in matching [the signature of the `go get` command](https://pkg.go.dev/cmd/go#hdr-Remote_import_paths), Anubis will allow it through to the target.
## Variables exposed to Anubis expressions
Anubis exposes the following variables to expressions:
| Name | Type | Explanation | Example |
| :-------------- | :-------------------- | :---------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------- |
| `headers` | `map[string, string]` | The [headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers) of the request being processed. | `{"User-Agent": "Mozilla/5.0 Gecko/20100101 Firefox/137.0"}` |
| `host` | `string` | The [HTTP hostname](https://web.dev/articles/url-parts#host) the request is targeted to. | `anubis.techaro.lol` |
| `method` | `string` | The [HTTP method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Methods) in the request being processed. | `GET`, `POST`, `DELETE`, etc. |
| `path` | `string` | The [path](https://web.dev/articles/url-parts#pathname) of the request being processed. | `/`, `/api/memes/create` |
| `query` | `map[string, string]` | The [query parameters](https://web.dev/articles/url-parts#query) of the request being processed. | `?foo=bar` -> `{"foo": "bar"}` |
| `remoteAddress` | `string` | The IP address of the client. | `1.1.1.1` |
| `userAgent` | `string` | The [`User-Agent`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent) string in the request being processed. | `Mozilla/5.0 Gecko/20100101 Firefox/137.0` |
Of note: in many languages when you look up a key in a map and there is nothing there, the language will return some "falsy" value like `undefined` in JavaScript, `None` in Python, or the zero value of the type in Go. In CEL, if you try to look up a value that does not exist, execution of the expression will fail and Anubis will return an error.
In order to avoid this, make sure the header or query parameter you are testing is present in the request with an `all` block like this:
```yaml
- name: challenge-wiki-history-page
action: CHALLENGE
all:
- 'path == "/index.php"'
- '"title" in query'
- '"action" in query'
- 'query["action"] == "history"
```
This rule throws a challenge if and only if all of the following conditions are true:
- The URL path is `/index.php`
- The URL query string contains a `title` value
- The URL query string contains an `action` value
- The URL query string's `action` value is `"history"`
So given an HTTP request like this:
```text
GET /index.php?title=Index&action=history HTTP/1.1
User-Agent: Mozilla/5.0 Gecko/20100101 Firefox/137.0
Host: wiki.int.techaro.lol
X-Real-Ip: 8.8.8.8
```
Anubis would return a challenge because all of those conditions are true.
## Functions exposed to Anubis expressions
There are currently no functions from the Anubis runtime exposed to expressions. This will change in the future.
## Life advice
Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this.

Some files were not shown because too many files have changed in this diff Show More