mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-07 17:28:17 +00:00
Compare commits
7 Commits
v1.14.1
...
Xe/allow-i
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ffa67fc46a | ||
|
|
1509b06cb9 | ||
|
|
56cdb2e51b | ||
|
|
15d801be7d | ||
|
|
c66305904b | ||
|
|
5f7942faca | ||
|
|
869e46a4cc |
44
.github/workflows/docker-pr.yml
vendored
Normal file
44
.github/workflows/docker-pr.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Docker image builds (pull requests)
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
|
||||
env:
|
||||
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.24.x'
|
||||
|
||||
- uses: ko-build/setup-ko@v0.8
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ghcr.io/techarohq/anubis
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
run: |
|
||||
go run ./cmd/containerbuild --docker-repo ghcr.io/techarohq/anubis --slog-level debug
|
||||
env:
|
||||
PULL_REQUEST_ID: ${{ github.event.number }}
|
||||
|
||||
- run: |
|
||||
echo "Test this with:"
|
||||
echo "docker pull ${{ steps.build.outputs.docker_image }}"
|
||||
22
.github/workflows/docker.yml
vendored
22
.github/workflows/docker.yml
vendored
@@ -5,8 +5,6 @@ on:
|
||||
push:
|
||||
branches: [ "main" ]
|
||||
tags: [ "v*" ]
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
|
||||
env:
|
||||
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
|
||||
@@ -28,12 +26,6 @@ jobs:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.24.x'
|
||||
@@ -57,20 +49,6 @@ jobs:
|
||||
id: build
|
||||
run: |
|
||||
go run ./cmd/containerbuild --docker-repo ghcr.io/techarohq/anubis --slog-level debug
|
||||
env:
|
||||
PULL_REQUEST_ID: ${{ github.event.number }}
|
||||
|
||||
# - name: "Comment about where to test this"
|
||||
# uses: thollander/actions-comment-pull-request@v3
|
||||
# if: ${{github.event_name == 'pull_request'}}
|
||||
# with:
|
||||
# message: |
|
||||
# You can try this PR out by using the following docker image:
|
||||
|
||||
# ```
|
||||
# ${{ steps.build.outputs.docker_image }}
|
||||
# ```
|
||||
# comment-tag: ${{ steps.build.outputs.docker_image }}
|
||||
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@v2
|
||||
|
||||
@@ -6,8 +6,19 @@
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"_comment": "This is based on the BGP routes advertised by AS7941",
|
||||
"name": "internet-archive",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"207.241.224.0/20",
|
||||
"208.70.24.0/21",
|
||||
"2620:0:9c0::/48"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://developers.google.com/static/search/apis/ipranges/googlebot.json",
|
||||
"name": "googlebot",
|
||||
"user_agent_regex": "\\+http\\:\\/\\/www\\.google\\.com/bot\\.html",
|
||||
"user_agent_regex": "\\+http\\://www\\.google\\.com/bot\\.html",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"2001:4860:4801:10::/64",
|
||||
@@ -270,8 +281,9 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://www.bing.com/toolbox/bingbot.json",
|
||||
"name": "bingbot",
|
||||
"user_agent_regex": "\\+http\\:\\/\\/www\\.bing\\.com/bingbot\\.htm",
|
||||
"user_agent_regex": "\\+http\\://www\\.bing\\.com/bingbot\\.htm",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"157.55.39.0/24",
|
||||
@@ -305,16 +317,18 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json",
|
||||
"name": "qwantbot",
|
||||
"user_agent_regex": "\\+https\\:\\/\\/help\\.qwant\\.com/bot/",
|
||||
"user_agent_regex": "\\+https\\://help\\.qwant\\.com/bot/",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"91.242.162.0/24"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://kagi.com/bot",
|
||||
"name": "kagibot",
|
||||
"user_agent_regex": "\\+https\\:\\/\\/kagi\\.com/bot",
|
||||
"user_agent_regex": "\\+https\\://kagi\\.com/bot",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"216.18.205.234/32",
|
||||
@@ -324,6 +338,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Received over email from marginalia operator",
|
||||
"name": "marginalia",
|
||||
"user_agent_regex": "search\\.marginalia\\.nu",
|
||||
"action": "ALLOW",
|
||||
@@ -335,9 +350,18 @@
|
||||
"193.183.0.174/32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://www.mojeek.com/bot.html and manual admin confirmation in a GitHub thread: https://github.com/TecharoHQ/anubis/issues/47#issuecomment-2743815019",
|
||||
"name": "mojeekbot",
|
||||
"user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"5.102.173.71/32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "us-artificial-intelligence-scraper",
|
||||
"user_agent_regex": "\\+https\\:\\/\\/github\\.com\\/US-Artificial-Intelligence\\/scraper",
|
||||
"user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
@@ -355,11 +379,6 @@
|
||||
"path_regex": "^/robots.txt$",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "rss-readers",
|
||||
"path_regex": ".*\\.(rss|xml|atom|json)$",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "lightpanda",
|
||||
"user_agent_regex": "^Lightpanda/.*$",
|
||||
@@ -367,12 +386,7 @@
|
||||
},
|
||||
{
|
||||
"name": "headless-chrome",
|
||||
"user_agent_regex": "HeadlessChrome",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "headless-chromium",
|
||||
"user_agent_regex": "HeadlessChromium",
|
||||
"user_agent_regex": "(?i:headlesschrom(e|ium))",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -23,21 +23,8 @@ var (
|
||||
githubEventName = flag.String("github-event-name", "", "GitHub event name")
|
||||
pullRequestID = flag.Int("pull-request-id", -1, "GitHub pull request ID")
|
||||
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
|
||||
|
||||
knownContributors = []string{
|
||||
"Xe",
|
||||
}
|
||||
)
|
||||
|
||||
func inList(needle string, haystack []string) bool {
|
||||
for _, h := range haystack {
|
||||
if h == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func main() {
|
||||
flagenv.Parse()
|
||||
flag.Parse()
|
||||
@@ -46,11 +33,7 @@ func main() {
|
||||
|
||||
koDockerRepo := strings.TrimRight(*dockerRepo, "/"+filepath.Base(*dockerRepo))
|
||||
|
||||
if *githubEventName == "pull_request" && !inList(*githubActor, knownContributors) {
|
||||
if *pullRequestID == -1 {
|
||||
log.Fatal("Must set --pull-request-id when --github-event-name=pull_request")
|
||||
}
|
||||
|
||||
if *githubEventName == "pull_request" && *pullRequestID != -1 {
|
||||
*dockerRepo = fmt.Sprintf("ttl.sh/techaro/pr-%d/anubis", *pullRequestID)
|
||||
*dockerTags = fmt.Sprintf("ttl.sh/techaro/pr-%d/anubis:24h", *pullRequestID)
|
||||
koDockerRepo = fmt.Sprintf("ttl.sh/techaro/pr-%d", *pullRequestID)
|
||||
|
||||
@@ -11,6 +11,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
- Fixed and clarified installation instructions
|
||||
|
||||
## v1.14.2
|
||||
|
||||
Livia sas Junius: Echo 2
|
||||
|
||||
- Remove default RSS reader rule as it may allow for a targeted attack against rails apps
|
||||
[#67](https://github.com/TecharoHQ/anubis/pull/67)
|
||||
- Whitelist MojeekBot in botPolicies [#47](https://github.com/TecharoHQ/anubis/issues/47)
|
||||
- botPolicies regex has been cleaned up [#66](https://github.com/TecharoHQ/anubis/pull/66)
|
||||
|
||||
## v1.14.1
|
||||
|
||||
Livia sas Junius: Echo 1
|
||||
|
||||
@@ -29,7 +29,7 @@ Anubis uses these environment variables for configuration:
|
||||
| `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. |
|
||||
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
|
||||
| `SOCKET_MODE` | `0770` | *Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`.* The socket mode (permissions) for Unix domain sockets. |
|
||||
| `POLICY_FNAME` | `/data/cfg/botPolicy.json` | The file containing [bot policy configuration](./policies.md). See the bot policy documentation for more details. |
|
||||
| `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.md). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. |
|
||||
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
|
||||
| `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. Supports Unix domain sockets, set this to a URI like so: `unix:///path/to/socket.sock`. |
|
||||
|
||||
@@ -47,13 +47,15 @@ services:
|
||||
METRICS_BIND: ":9090"
|
||||
SERVE_ROBOTS_TXT: "true"
|
||||
TARGET: "http://nginx"
|
||||
POLICY_FNAME: "/data/cfg/botPolicy.json"
|
||||
ports:
|
||||
- 8080:8080
|
||||
volumes:
|
||||
- "./botPolicy.json:/data/cfg/botPolicy.json:ro"
|
||||
nginx:
|
||||
image: nginx
|
||||
volumes:
|
||||
- "./www:/usr/share/nginx/html"
|
||||
- "./botPolicy.json:/data/cfg/botPolicy.json"
|
||||
```
|
||||
|
||||
## Kubernetes
|
||||
|
||||
4
go.mod
4
go.mod
@@ -5,8 +5,9 @@ go 1.24.1
|
||||
require (
|
||||
github.com/a-h/templ v0.3.833
|
||||
github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2
|
||||
github.com/prometheus/client_golang v1.21.1
|
||||
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a
|
||||
github.com/yl2chen/cidranger v1.0.2
|
||||
)
|
||||
|
||||
@@ -34,7 +35,6 @@ require (
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.62.0 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a // indirect
|
||||
golang.org/x/mod v0.24.0 // indirect
|
||||
golang.org/x/net v0.37.0 // indirect
|
||||
golang.org/x/sync v0.12.0 // indirect
|
||||
|
||||
4
go.sum
4
go.sum
@@ -31,8 +31,8 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
|
||||
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
|
||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
||||
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
|
||||
|
||||
Reference in New Issue
Block a user