Compare commits

...

3 Commits

Author SHA1 Message Date
Xe Iaso
ffa67fc46a cmd/anubis: allow Internet Archive by default
This is based on the IP ranges advertised by AS7941

Also adds comments about the other IP rangesets and where they come
from.

Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-03-22 15:00:38 -04:00
Dennis ten Hoove
1509b06cb9 Cleanup regex (#66)
* Cleanup regex

Were were going overkill on the escape characters

* Update docs/docs/CHANGELOG.md

Co-authored-by: Xe Iaso <me@xeiaso.net>
Signed-off-by: Dennis ten Hoove <36002865+dennis1248@users.noreply.github.com>

---------

Signed-off-by: Dennis ten Hoove <36002865+dennis1248@users.noreply.github.com>
Co-authored-by: Xe Iaso <me@xeiaso.net>
2025-03-22 13:52:14 -04:00
Xe Iaso
56cdb2e51b Fix docker image CI for pull requests (#84)
Closes #65

Pull request images will now be `ttl.sh/techaro/pr-{number}/anubis:24h`.
2025-03-22 11:26:49 -04:00
5 changed files with 69 additions and 52 deletions

44
.github/workflows/docker-pr.yml vendored Normal file
View File

@@ -0,0 +1,44 @@
name: Docker image builds (pull requests)
on:
pull_request:
branches: [ "main" ]
env:
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-tags: true
fetch-depth: 0
- uses: actions/setup-go@v5
with:
go-version: '1.24.x'
- uses: ko-build/setup-ko@v0.8
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/techarohq/anubis
- name: Build and push
id: build
run: |
go run ./cmd/containerbuild --docker-repo ghcr.io/techarohq/anubis --slog-level debug
env:
PULL_REQUEST_ID: ${{ github.event.number }}
- run: |
echo "Test this with:"
echo "docker pull ${{ steps.build.outputs.docker_image }}"

View File

@@ -5,8 +5,6 @@ on:
push:
branches: [ "main" ]
tags: [ "v*" ]
pull_request:
branches: [ "main" ]
env:
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
@@ -28,12 +26,6 @@ jobs:
fetch-tags: true
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- uses: actions/setup-go@v5
with:
go-version: '1.24.x'
@@ -57,20 +49,6 @@ jobs:
id: build
run: |
go run ./cmd/containerbuild --docker-repo ghcr.io/techarohq/anubis --slog-level debug
env:
PULL_REQUEST_ID: ${{ github.event.number }}
# - name: "Comment about where to test this"
# uses: thollander/actions-comment-pull-request@v3
# if: ${{github.event_name == 'pull_request'}}
# with:
# message: |
# You can try this PR out by using the following docker image:
# ```
# ${{ steps.build.outputs.docker_image }}
# ```
# comment-tag: ${{ steps.build.outputs.docker_image }}
- name: Generate artifact attestation
uses: actions/attest-build-provenance@v2

View File

@@ -6,8 +6,19 @@
"action": "DENY"
},
{
"_comment": "This is based on the BGP routes advertised by AS7941",
"name": "internet-archive",
"action": "ALLOW",
"remote_addresses": [
"207.241.224.0/20",
"208.70.24.0/21",
"2620:0:9c0::/48"
]
},
{
"_comment": "Based on: https://developers.google.com/static/search/apis/ipranges/googlebot.json",
"name": "googlebot",
"user_agent_regex": "\\+http\\:\\/\\/www\\.google\\.com/bot\\.html",
"user_agent_regex": "\\+http\\://www\\.google\\.com/bot\\.html",
"action": "ALLOW",
"remote_addresses": [
"2001:4860:4801:10::/64",
@@ -270,8 +281,9 @@
]
},
{
"_comment": "Based on: https://www.bing.com/toolbox/bingbot.json",
"name": "bingbot",
"user_agent_regex": "\\+http\\:\\/\\/www\\.bing\\.com/bingbot\\.htm",
"user_agent_regex": "\\+http\\://www\\.bing\\.com/bingbot\\.htm",
"action": "ALLOW",
"remote_addresses": [
"157.55.39.0/24",
@@ -305,16 +317,18 @@
]
},
{
"_comment": "Based on: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json",
"name": "qwantbot",
"user_agent_regex": "\\+https\\:\\/\\/help\\.qwant\\.com/bot/",
"user_agent_regex": "\\+https\\://help\\.qwant\\.com/bot/",
"action": "ALLOW",
"remote_addresses": [
"91.242.162.0/24"
]
},
{
"_comment": "Based on: https://kagi.com/bot",
"name": "kagibot",
"user_agent_regex": "\\+https\\:\\/\\/kagi\\.com/bot",
"user_agent_regex": "\\+https\\://kagi\\.com/bot",
"action": "ALLOW",
"remote_addresses": [
"216.18.205.234/32",
@@ -324,6 +338,7 @@
]
},
{
"_comment": "Received over email from marginalia operator",
"name": "marginalia",
"user_agent_regex": "search\\.marginalia\\.nu",
"action": "ALLOW",
@@ -336,6 +351,7 @@
]
},
{
"_comment": "Based on: https://www.mojeek.com/bot.html and manual admin confirmation in a GitHub thread: https://github.com/TecharoHQ/anubis/issues/47#issuecomment-2743815019",
"name": "mojeekbot",
"user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
"action": "ALLOW",
@@ -345,7 +361,7 @@
},
{
"name": "us-artificial-intelligence-scraper",
"user_agent_regex": "\\+https\\:\\/\\/github\\.com\\/US-Artificial-Intelligence\\/scraper",
"user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
"action": "DENY"
},
{
@@ -370,12 +386,7 @@
},
{
"name": "headless-chrome",
"user_agent_regex": "HeadlessChrome",
"action": "DENY"
},
{
"name": "headless-chromium",
"user_agent_regex": "HeadlessChromium",
"user_agent_regex": "(?i:headlesschrom(e|ium))",
"action": "DENY"
},
{
@@ -395,4 +406,4 @@
}
],
"dnsbl": true
}
}

View File

@@ -23,21 +23,8 @@ var (
githubEventName = flag.String("github-event-name", "", "GitHub event name")
pullRequestID = flag.Int("pull-request-id", -1, "GitHub pull request ID")
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
knownContributors = []string{
"Xe",
}
)
func inList(needle string, haystack []string) bool {
for _, h := range haystack {
if h == needle {
return true
}
}
return false
}
func main() {
flagenv.Parse()
flag.Parse()
@@ -46,11 +33,7 @@ func main() {
koDockerRepo := strings.TrimRight(*dockerRepo, "/"+filepath.Base(*dockerRepo))
if *githubEventName == "pull_request" && !inList(*githubActor, knownContributors) {
if *pullRequestID == -1 {
log.Fatal("Must set --pull-request-id when --github-event-name=pull_request")
}
if *githubEventName == "pull_request" && *pullRequestID != -1 {
*dockerRepo = fmt.Sprintf("ttl.sh/techaro/pr-%d/anubis", *pullRequestID)
*dockerTags = fmt.Sprintf("ttl.sh/techaro/pr-%d/anubis:24h", *pullRequestID)
koDockerRepo = fmt.Sprintf("ttl.sh/techaro/pr-%d", *pullRequestID)

View File

@@ -20,6 +20,7 @@ Livia sas Junius: Echo 2
- Remove default RSS reader rule as it may allow for a targeted attack against rails apps
[#67](https://github.com/TecharoHQ/anubis/pull/67)
- Whitelist MojeekBot in botPolicies [#47](https://github.com/TecharoHQ/anubis/issues/47)
- botPolicies regex has been cleaned up [#66](https://github.com/TecharoHQ/anubis/pull/66)
## v1.14.1