Files
anubis-mirror/internal/honeypot/naive/naive.go
T
Xe Iaso caa4d1273e fix(honeypot/naive): apply robot9001 style delays
Currently the honeypotting feature has no limits or delays anywhere and
uses that to feed an internal greylist of IP networks. This can cause
issues such as in #1613 where Claude's crawler seemed to pick up on it
and egress data at over one megabit per second until the administrator
noticed and blocked the address range.

This takes a different approach by inspiration of how the classic #xkcd
IRC bot Robot9000 works. The first time a given IPv4 /24 or IPv6 /48
visits a honepot page, Anubis sleeps for 1 millisecond. The second it
sleeps for two milliseconds. The third is four milliseconds and so on.
The goal of this is to make the scraping inherently self-limiting such
that the scrapers go off in their own corner where they won't really
hurt anyone.

Let's see if this works out according to keikaku.

Ref: https://github.com/TecharoHQ/anubis/issues/1613
Signed-off-by: Xe Iaso <me@xeiaso.net>
2026-05-15 17:39:28 -04:00

201 lines
4.8 KiB
Go

package naive
import (
"context"
_ "embed"
"fmt"
"log/slog"
"math"
"math/rand/v2"
"net/http"
"net/netip"
"time"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/internal/honeypot"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/store"
"github.com/a-h/templ"
"github.com/google/uuid"
"github.com/nikandfor/spintax"
)
//go:generate go tool github.com/a-h/templ/cmd/templ generate
// XXX(Xe): All of this was generated by ChatGPT, GLM 4.6, and GPT-OSS 120b. This is pseudoprofound bullshit in spintax[1] format so that the bullshit generator can emit plausibly human-authored text while being very computationally cheap.
//
// It feels somewhat poetic to use spammer technology in Anubis.
//
// [1]: https://outboundly.ai/blogs/what-is-spintax-and-how-to-use-it/
//
//go:embed spintext.txt
var spintext string
//go:embed titles.txt
var titles string
//go:embed affirmations.txt
var affirmations string
func New(st store.Interface, lg *slog.Logger) (*Impl, error) {
affirmation, err := spintax.Parse(affirmations)
if err != nil {
return nil, fmt.Errorf("can't parse affirmations: %w", err)
}
body, err := spintax.Parse(spintext)
if err != nil {
return nil, fmt.Errorf("can't parse bodies: %w", err)
}
title, err := spintax.Parse(titles)
if err != nil {
return nil, fmt.Errorf("can't parse titles: %w", err)
}
lg.Debug("initialized basic bullshit generator", "affirmations", affirmation.Count(), "bodies", body.Count(), "titles", title.Count())
return &Impl{
st: st,
infos: store.JSON[honeypot.Info]{Underlying: st, Prefix: "honeypot:info"},
uaWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:user-agent"},
networkWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:network"},
affirmation: affirmation,
body: body,
title: title,
lg: lg.With("component", "honeypot/naive"),
}, nil
}
type Impl struct {
st store.Interface
infos store.JSON[honeypot.Info]
uaWeight store.JSON[int]
networkWeight store.JSON[int]
lg *slog.Logger
affirmation, body, title spintax.Spintax
}
func (i *Impl) incrementNetwork(ctx context.Context, network string) int {
result, _ := i.networkWeight.Get(ctx, internal.SHA256sum(network))
result++
i.networkWeight.Set(ctx, internal.SHA256sum(network), result, time.Hour)
return result
}
func (i *Impl) CheckNetwork() checker.Impl {
return checker.Func(func(r *http.Request) (bool, error) {
realIP, _ := internal.RealIP(r)
if !realIP.IsValid() {
realIP = netip.MustParseAddr(r.Header.Get("X-Real-Ip"))
}
network, ok := internal.ClampIP(realIP)
if !ok {
return false, nil
}
result, _ := i.networkWeight.Get(r.Context(), internal.SHA256sum(network.String()))
if result >= 25 {
return true, nil
}
return false, nil
})
}
func (i *Impl) Hash() string {
return internal.SHA256sum("naive honeypot")
}
func (i *Impl) makeAffirmations() []string {
count := rand.IntN(5) + 1
var result []string
for range count {
result = append(result, i.affirmation.Spin())
}
return result
}
func (i *Impl) makeSpins() []string {
count := rand.IntN(5) + 1
var result []string
for range count {
result = append(result, i.body.Spin())
}
return result
}
func (i *Impl) makeTitle() string {
return i.title.Spin()
}
func (i *Impl) ServeHTTP(w http.ResponseWriter, r *http.Request) {
t0 := time.Now()
lg := internal.GetRequestLogger(i.lg, r)
id := r.PathValue("id")
if id == "" {
id = uuid.NewString()
}
realIP, _ := internal.RealIP(r)
if !realIP.IsValid() {
realIP = netip.MustParseAddr(r.Header.Get("X-Real-Ip"))
}
network, ok := internal.ClampIP(realIP)
if !ok {
lg.Error("clampIP failed", "output", network, "ok", ok)
http.Error(w, "The cake is a lie", http.StatusTeapot)
return
}
networkCount := i.incrementNetwork(r.Context(), network.String())
stage := r.PathValue("stage")
if stage == "init" {
lg.Debug("found new entrance point", "id", id, "stage", stage, "userAgent", r.UserAgent(), "clampedIP", network)
} else {
switch {
case networkCount%256 == 0:
lg.Warn("found possible crawler", "id", id, "network", network, "userAgent", r.UserAgent())
}
}
millisecondAmount := math.Pow(float64(networkCount), 2)
time.Sleep(time.Duration(millisecondAmount) * time.Millisecond)
spins := i.makeSpins()
affirmations := i.makeAffirmations()
title := i.makeTitle()
var links []link
for _, affirmation := range affirmations {
links = append(links, link{
href: uuid.NewString(),
body: affirmation,
})
}
templ.Handler(
base(title, i.maze(spins, links)),
templ.WithStreaming(),
templ.WithStatus(http.StatusOK),
).ServeHTTP(w, r)
t1 := time.Since(t0)
honeypot.Timings.WithLabelValues("naive").Observe(float64(t1.Milliseconds()))
}
type link struct {
href string
body string
}