Compare commits

...

3 Commits

Author SHA1 Message Date
Xe Iaso
cb7f9d3a8a chore(default-config): remove Tencent Cloud block rule
Tencent Cloud's abuse team reached out to me recently and asked for this
rule to be removed. Prior attempts to reach out to them to report
abusive traffic have failed, thus leading to this IP space block as a
last resort to try and maintain uptime for systems administrators.

Unfortunately, it's difficult for Tencent's abuse team to take action if
there is a blanket block like this. Let's see if this doesn't cause too
much grief.
2025-10-31 09:35:39 -04:00
Xe Iaso
59f1e36167 fix: SERVE_ROBOTS_TXT works again (#1229)
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-10-31 09:08:33 -04:00
Xe Iaso
62c1b80189 chore: tag v1.23.0
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-10-29 20:38:34 -04:00
11 changed files with 78 additions and 5 deletions

View File

@@ -23,6 +23,7 @@ jobs:
- i18n
- palemoon/amd64
#- palemoon/i386
- robots_txt
runs-on: ubuntu-latest
steps:
- name: Checkout code

View File

@@ -1 +1 @@
1.23.0-pre2
1.23.0

View File

@@ -4,4 +4,3 @@
- import: (data)/bots/custom-async-http-client.yaml
- import: (data)/crawlers/alibaba-cloud.yaml
- import: (data)/crawlers/huawei-cloud.yaml
- import: (data)/crawlers/tencent-cloud.yaml

View File

@@ -13,6 +13,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
<!-- This changes the project to: -->
- Fix `SERVE_ROBOTS_TXT` setting file after the double slash fix broke it.
- Remove the default configuration rule to block Tencent cloud. If users see abuse from Tencent cloud IP ranges, please contact abuse@tencent.com and mention that you are using Anubis to protect your services. Please include source IP address, source port, timestamp, target IP address, target port, request headers (including the User-Agent header), and target endpoints/patterns.
## v1.23.0: Lyse Hext
- Add default tencent cloud DENY rule.

View File

@@ -345,6 +345,15 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
return
}
// Forward robots.txt requests to mux when ServeRobotsTXT is enabled
if s.opts.ServeRobotsTXT {
path := strings.TrimPrefix(r.URL.Path, anubis.BasePrefix)
if path == "/robots.txt" || path == "/.well-known/robots.txt" {
s.mux.ServeHTTP(w, r)
return
}
}
s.maybeReverseProxyOrPage(w, r)
}

4
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "@techaro/anubis",
"version": "1.23.0-pre2",
"version": "1.23.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@techaro/anubis",
"version": "1.23.0-pre2",
"version": "1.23.0",
"license": "ISC",
"dependencies": {
"@aws-crypto/sha256-js": "^5.2.0",

View File

@@ -1,6 +1,6 @@
{
"name": "@techaro/anubis",
"version": "1.23.0-pre2",
"version": "1.23.0",
"description": "",
"main": "index.js",
"scripts": {

View File

@@ -0,0 +1,8 @@
bots:
- name: challenge
user_agent_regex: CHALLENGE
action: CHALLENGE
status_codes:
CHALLENGE: 200
DENY: 403

27
test/robots_txt/test.mjs Normal file
View File

@@ -0,0 +1,27 @@
async function getRobotsTxt() {
return fetch("http://localhost:8923/robots.txt", {
headers: {
"Accept-Language": "en",
"User-Agent": "Mozilla/5.0",
}
})
.then(resp => {
if (resp.status !== 200) {
throw new Error(`wanted status 200, got status: ${resp.status}`);
}
return resp;
})
.then(resp => resp.text());
}
(async () => {
const page = await getRobotsTxt();
if (page.includes(`<html>`)) {
console.log(page)
throw new Error("serve robots.txt smoke test failed");
}
console.log("serve-robots-txt serves robots.txt");
process.exit(0);
})();

24
test/robots_txt/test.sh Executable file
View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -euo pipefail
function cleanup() {
pkill -P $$
}
trap cleanup EXIT SIGINT
# Build static assets
(cd ../.. && npm ci && npm run assets)
go tool anubis --help 2>/dev/null || :
go run ../cmd/unixhttpd &
go tool anubis \
--policy-fname ./anubis.yaml \
--use-remote-address \
--serve-robots-txt \
--target=unix://$(pwd)/unixhttpd.sock &
backoff-retry node ./test.mjs

2
test/robots_txt/var/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
*
!.gitignore