feat(honeypot/naive): attempt to automatically filter out based on crawling

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso
2025-12-16 01:22:13 -05:00
parent e0f4468b03
commit 958daba4a1
5 changed files with 407 additions and 59 deletions

33
internal/clampip.go Normal file
View File

@@ -0,0 +1,33 @@
package internal
import "net/netip"
func ClampIP(addr netip.Addr) (netip.Prefix, bool) {
switch {
case addr.Is4():
result, err := addr.Prefix(24)
if err != nil {
return netip.Prefix{}, false
}
return result, true
case addr.Is4In6():
// Extract the IPv4 address from IPv4-mapped IPv6 and clamp it
ipv4 := addr.Unmap()
result, err := ipv4.Prefix(24)
if err != nil {
return netip.Prefix{}, false
}
return result, true
case addr.Is6():
result, err := addr.Prefix(48)
if err != nil {
return netip.Prefix{}, false
}
return result, true
default:
return netip.Prefix{}, false
}
}

274
internal/clampip_test.go Normal file
View File

@@ -0,0 +1,274 @@
package internal
import (
"net/netip"
"testing"
)
func TestClampIP(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
// IPv4 addresses
{
name: "IPv4 normal address",
input: "192.168.1.100",
expected: "192.168.1.0/24",
},
{
name: "IPv4 boundary - network address",
input: "192.168.1.0",
expected: "192.168.1.0/24",
},
{
name: "IPv4 boundary - broadcast address",
input: "192.168.1.255",
expected: "192.168.1.0/24",
},
{
name: "IPv4 class A address",
input: "10.0.0.1",
expected: "10.0.0.0/24",
},
{
name: "IPv4 loopback",
input: "127.0.0.1",
expected: "127.0.0.0/24",
},
{
name: "IPv4 link-local",
input: "169.254.0.1",
expected: "169.254.0.0/24",
},
{
name: "IPv4 public address",
input: "203.0.113.1",
expected: "203.0.113.0/24",
},
// IPv6 addresses
{
name: "IPv6 normal address",
input: "2001:db8::1",
expected: "2001:db8::/48",
},
{
name: "IPv6 with full expansion",
input: "2001:0db8:0000:0000:0000:0000:0000:0001",
expected: "2001:db8::/48",
},
{
name: "IPv6 loopback",
input: "::1",
expected: "::/48",
},
{
name: "IPv6 unspecified address",
input: "::",
expected: "::/48",
},
{
name: "IPv6 link-local",
input: "fe80::1",
expected: "fe80::/48",
},
{
name: "IPv6 unique local",
input: "fc00::1",
expected: "fc00::/48",
},
{
name: "IPv6 documentation prefix",
input: "2001:db8:abcd:ef01::1234",
expected: "2001:db8:abcd::/48",
},
{
name: "IPv6 global unicast",
input: "2606:4700:4700::1111",
expected: "2606:4700:4700::/48",
},
{
name: "IPv6 multicast",
input: "ff02::1",
expected: "ff02::/48",
},
// IPv4-mapped IPv6 addresses
{
name: "IPv4-mapped IPv6 address",
input: "::ffff:192.168.1.100",
expected: "192.168.1.0/24",
},
{
name: "IPv4-mapped IPv6 with different format",
input: "::ffff:10.0.0.1",
expected: "10.0.0.0/24",
},
{
name: "IPv4-mapped IPv6 loopback",
input: "::ffff:127.0.0.1",
expected: "127.0.0.0/24",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
addr := netip.MustParseAddr(tt.input)
result, ok := ClampIP(addr)
if !ok {
t.Fatalf("ClampIP(%s) returned false, want true", tt.input)
}
if result.String() != tt.expected {
t.Errorf("ClampIP(%s) = %s, want %s", tt.input, result.String(), tt.expected)
}
})
}
}
func TestClampIPSuccess(t *testing.T) {
// Test that valid inputs return success
tests := []struct {
name string
input string
}{
{
name: "IPv4 address",
input: "192.168.1.100",
},
{
name: "IPv6 address",
input: "2001:db8::1",
},
{
name: "IPv4-mapped IPv6",
input: "::ffff:192.168.1.100",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
addr := netip.MustParseAddr(tt.input)
result, ok := ClampIP(addr)
if !ok {
t.Fatalf("ClampIP(%s) returned false, want true", tt.input)
}
// For valid inputs, we should get the clamped prefix
if addr.Is4() || addr.Is4In6() {
if result.Bits() != 24 {
t.Errorf("Expected 24 bits for IPv4, got %d", result.Bits())
}
} else if addr.Is6() {
if result.Bits() != 48 {
t.Errorf("Expected 48 bits for IPv6, got %d", result.Bits())
}
}
})
}
}
func TestClampIPZeroValue(t *testing.T) {
// Test that when ClampIP fails, it returns zero value
// Note: It's hard to make addr.Prefix() fail with valid inputs,
// so this test demonstrates the expected behavior
addr := netip.MustParseAddr("192.168.1.100")
// Manually create a zero value for comparison
zeroPrefix := netip.Prefix{}
// Call ClampIP - it should succeed with valid input
result, ok := ClampIP(addr)
// Verify the function succeeded
if !ok {
t.Error("ClampIP should succeed with valid input")
}
// Verify that the result is not a zero value
if result == zeroPrefix {
t.Error("Result should not be zero value for successful operation")
}
}
func TestClampIPSpecialCases(t *testing.T) {
tests := []struct {
name string
input string
expectedPrefix int
expectedNetwork string
}{
{
name: "Minimum IPv4",
input: "0.0.0.0",
expectedPrefix: 24,
expectedNetwork: "0.0.0.0",
},
{
name: "Maximum IPv4",
input: "255.255.255.255",
expectedPrefix: 24,
expectedNetwork: "255.255.255.0",
},
{
name: "Minimum IPv6",
input: "::",
expectedPrefix: 48,
expectedNetwork: "::",
},
{
name: "Maximum IPv6 prefix part",
input: "ffff:ffff:ffff::",
expectedPrefix: 48,
expectedNetwork: "ffff:ffff:ffff::",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
addr := netip.MustParseAddr(tt.input)
result, ok := ClampIP(addr)
if !ok {
t.Fatalf("ClampIP(%s) returned false, want true", tt.input)
}
if result.Bits() != tt.expectedPrefix {
t.Errorf("ClampIP(%s) bits = %d, want %d", tt.input, result.Bits(), tt.expectedPrefix)
}
if result.Addr().String() != tt.expectedNetwork {
t.Errorf("ClampIP(%s) network = %s, want %s", tt.input, result.Addr().String(), tt.expectedNetwork)
}
})
}
}
// Benchmark to ensure the function is performant
func BenchmarkClampIP(b *testing.B) {
ipv4 := netip.MustParseAddr("192.168.1.100")
ipv6 := netip.MustParseAddr("2001:db8::1")
ipv4mapped := netip.MustParseAddr("::ffff:192.168.1.100")
b.Run("IPv4", func(b *testing.B) {
for i := 0; i < b.N; i++ {
ClampIP(ipv4)
}
})
b.Run("IPv6", func(b *testing.B) {
for i := 0; i < b.N; i++ {
ClampIP(ipv6)
}
})
b.Run("IPv4-mapped", func(b *testing.B) {
for i := 0; i < b.N; i++ {
ClampIP(ipv4mapped)
}
})
}

View File

@@ -1,16 +1,17 @@
package naive
import (
"context"
_ "embed"
"fmt"
"log/slog"
"math/rand/v2"
"net/http"
"net/netip"
"time"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/internal/honeypot"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/store"
"github.com/a-h/templ"
"github.com/google/uuid"
@@ -53,23 +54,67 @@ func New(st store.Interface, lg *slog.Logger) (*Impl, error) {
lg.Debug("initialized basic bullshit generator", "affirmations", affirmation.Count(), "bodies", body.Count(), "titles", title.Count())
return &Impl{
st: st,
infos: store.JSON[honeypot.Info]{Underlying: st, Prefix: "honeypot-infos"},
affirmation: affirmation,
body: body,
title: title,
lg: lg.With("component", "honeypot/naive"),
st: st,
infos: store.JSON[honeypot.Info]{Underlying: st, Prefix: "honeypot:info"},
uaWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:user-agent"},
networkWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:network"},
affirmation: affirmation,
body: body,
title: title,
lg: lg.With("component", "honeypot/naive"),
}, nil
}
type Impl struct {
st store.Interface
infos store.JSON[honeypot.Info]
lg *slog.Logger
st store.Interface
infos store.JSON[honeypot.Info]
uaWeight store.JSON[int]
networkWeight store.JSON[int]
lg *slog.Logger
affirmation, body, title spintax.Spintax
}
func (i *Impl) incrementUA(ctx context.Context, userAgent string) int {
result, _ := i.uaWeight.Get(ctx, internal.SHA256sum(userAgent))
result++
i.uaWeight.Set(ctx, internal.SHA256sum(userAgent), result, time.Hour)
return result
}
func (i *Impl) incrementNetwork(ctx context.Context, network string) int {
result, _ := i.networkWeight.Get(ctx, internal.SHA256sum(network))
result++
i.networkWeight.Set(ctx, internal.SHA256sum(network), result, time.Hour)
return result
}
func (i *Impl) CheckUA() checker.Impl {
return checker.Func(func(r *http.Request) (bool, error) {
result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent()))
if result >= 25 {
return true, nil
}
return false, nil
})
}
func (i *Impl) CheckNetwork() checker.Impl {
return checker.Func(func(r *http.Request) (bool, error) {
result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent()))
if result >= 25 {
return true, nil
}
return false, nil
})
}
func (i *Impl) Hash() string {
return internal.SHA256sum("naive honeypot")
}
func (i *Impl) makeAffirmations() []string {
count := rand.IntN(5) + 1
@@ -96,30 +141,9 @@ func (i *Impl) makeTitle() string {
return i.title.Spin()
}
func (i *Impl) clampIP(addr netip.Addr) netip.Prefix {
fallback := netip.MustParsePrefix(addr.String() + "/32")
switch {
case addr.Is4() || addr.Is4In6():
result, err := addr.Prefix(24)
if err != nil {
return fallback
}
return result
case addr.Is6():
result, err := addr.Prefix(48)
if err != nil {
return fallback
}
return result
default:
return fallback
}
}
func (i *Impl) ServeHTTP(w http.ResponseWriter, r *http.Request) {
t0 := time.Now()
lg := internal.GetRequestLogger(i.lg, r)
id := r.PathValue("id")
if id == "" {
@@ -128,42 +152,31 @@ func (i *Impl) ServeHTTP(w http.ResponseWriter, r *http.Request) {
realIP, _ := internal.RealIP(r)
if !realIP.IsValid() {
i.lg.Error("the real IP is somehow invalid, bad middleware stack?")
lg.Error("the real IP is somehow invalid, bad middleware stack?")
http.Error(w, "The cake is a lie", http.StatusTeapot)
return
}
network := i.clampIP(realIP)
network, ok := internal.ClampIP(realIP)
if !ok {
lg.Error("clampIP failed", "output", network, "ok", ok)
http.Error(w, "The cake is a lie", http.StatusTeapot)
return
}
networkCount := i.incrementNetwork(r.Context(), network.String())
uaCount := i.incrementUA(r.Context(), r.UserAgent())
stage := r.PathValue("stage")
var info honeypot.Info
var err error
if stage == "init" {
i.lg.Debug("found new entrance point", "id", id, "userAgent", r.UserAgent(), "clampedIP", network)
info = honeypot.Info{
CreatedAt: time.Now(),
UserAgent: r.UserAgent(),
IPAddress: realIP.String(),
HitCount: 1,
}
i.infos.Set(r.Context(), network.String(), info, time.Hour)
lg.Debug("found new entrance point", "id", id, "stage", stage, "userAgent", r.UserAgent(), "clampedIP", network)
} else {
info, err = i.infos.Get(r.Context(), network.String())
if err != nil {
info = honeypot.Info{
CreatedAt: time.Now(),
UserAgent: r.UserAgent(),
IPAddress: realIP.String(),
HitCount: 1,
}
i.infos.Set(r.Context(), network.String(), info, time.Hour)
} else {
info.HitCount++
i.infos.Set(r.Context(), network.String(), info, time.Hour)
if networkCount >= 50 && networkCount%256 == 0 {
lg.Warn("found possible crawler", "id", id)
}
if uaCount >= 50 && uaCount%256 == 0 {
lg.Warn("found possible crawler", "id", id)
}
}

View File

@@ -179,6 +179,26 @@ func New(opts Options) (*Server, error) {
bsgen, err := naive.New(result.store, result.logger)
if err == nil {
registerWithPrefix(anubis.APIPrefix+"honeypot/{id}/{stage}", bsgen, http.MethodGet)
opts.Policy.Bots = append(
opts.Policy.Bots,
policy.Bot{
Rules: bsgen.CheckNetwork(),
Action: config.RuleWeigh,
Weight: &config.Weight{
Adjust: 30,
},
Name: "honeypot/network",
},
policy.Bot{
Rules: bsgen.CheckUA(),
Action: config.RuleWeigh,
Weight: &config.Weight{
Adjust: 30,
},
Name: "honeypot/user-agent",
},
)
} else {
result.logger.Error("can't init honeypot subsystem", "err", err)
}

View File

@@ -14,6 +14,14 @@ type Impl interface {
Hash() string
}
type Func func(*http.Request) (bool, error)
func (f Func) Check(r *http.Request) (bool, error) {
return f(r)
}
func (f Func) Hash() string { return internal.FastHash(fmt.Sprintf("%#v", f)) }
type List []Impl
// Check runs each checker in the list against the request.