feat: move metrics server config to the policy file (#1572)

* feat(config): add metrics bind config to policy file with flag hack

Signed-off-by: Xe Iaso <me@xeiaso.net>

* feat(internal): move SetupListener from main

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(main): use internal.SetupListener

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(config): add metrics socket mode

Signed-off-by: Xe Iaso <me@xeiaso.net>

* feat: move metrics server to a dedicated package

Signed-off-by: Xe Iaso <me@xeiaso.net>

* doc: add metrics server configuration docs

Signed-off-by: Xe Iaso <me@xeiaso.net>

* doc(default-config): add vague references to metrics server

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso
2026-04-21 15:36:11 -04:00
committed by GitHub
parent 3a6e368179
commit d5ccf9c670
45 changed files with 703 additions and 143 deletions
+9
View File
@@ -334,6 +334,7 @@ type fileConfig struct {
DNSBL bool `json:"dnsbl"`
DNSTTL DnsTTL `json:"dns_ttl"`
Logging *Logging `json:"logging"`
Metrics *Metrics `json:"metrics,omitempty"`
}
func (c *fileConfig) Valid() error {
@@ -375,6 +376,12 @@ func (c *fileConfig) Valid() error {
}
}
if c.Metrics != nil {
if err := c.Metrics.Valid(); err != nil {
errs = append(errs, err)
}
}
if len(errs) != 0 {
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
}
@@ -417,6 +424,7 @@ func Load(fin io.Reader, fname string) (*Config, error) {
StatusCodes: c.StatusCodes,
Store: c.Store,
Logging: c.Logging,
Metrics: c.Metrics,
}
if c.OpenGraph.TimeToLive != "" {
@@ -508,6 +516,7 @@ type Config struct {
Logging *Logging
DNSBL bool
DNSTTL DnsTTL
Metrics *Metrics
}
func (c Config) Valid() error {
+54
View File
@@ -0,0 +1,54 @@
package config
import (
"errors"
"fmt"
"strconv"
)
var (
ErrInvalidMetricsConfig = errors.New("config: invalid metrics configuration")
ErrNoMetricsBind = errors.New("config.Metrics: must define bind")
ErrNoMetricsNetwork = errors.New("config.Metrics: must define network")
ErrNoMetricsSocketMode = errors.New("config.Metrics: must define socket mode when using unix sockets")
ErrInvalidMetricsSocketMode = errors.New("config.Metrics: invalid unix socket mode")
ErrInvalidMetricsNetwork = errors.New("config.Metrics: invalid metrics network")
)
type Metrics struct {
Bind string `json:"bind" yaml:"bind"`
Network string `json:"network" yaml:"network"`
SocketMode string `json:"socketMode" yaml:"socketMode"`
}
func (m *Metrics) Valid() error {
var errs []error
if m.Bind == "" {
errs = append(errs, ErrNoMetricsBind)
}
if m.Network == "" {
errs = append(errs, ErrNoMetricsNetwork)
}
switch m.Network {
case "tcp", "tcp4", "tcp6": // https://pkg.go.dev/net#Listen
case "unix":
if m.SocketMode == "" {
errs = append(errs, ErrNoMetricsSocketMode)
}
if _, err := strconv.ParseUint(m.SocketMode, 8, 0); err != nil {
errs = append(errs, fmt.Errorf("%w: %w", ErrInvalidMetricsSocketMode, err))
}
default:
errs = append(errs, ErrInvalidMetricsNetwork)
}
if len(errs) != 0 {
return errors.Join(ErrInvalidMetricsConfig, errors.Join(errs...))
}
return nil
}
+87
View File
@@ -0,0 +1,87 @@
package config
import (
"errors"
"testing"
)
func TestMetricsValid(t *testing.T) {
for _, tt := range []struct {
name string
input *Metrics
err error
}{
{
name: "basic TCP",
input: &Metrics{
Bind: ":9090",
Network: "tcp",
},
},
{
name: "basic TCP4",
input: &Metrics{
Bind: ":9090",
Network: "tcp4",
},
},
{
name: "basic TCP6",
input: &Metrics{
Bind: ":9090",
Network: "tcp6",
},
},
{
name: "basic unix",
input: &Metrics{
Bind: "/tmp/anubis-metrics.sock",
Network: "unix",
SocketMode: "0770",
},
},
{
name: "no bind",
input: &Metrics{},
err: ErrNoMetricsBind,
},
{
name: "no network",
input: &Metrics{},
err: ErrNoMetricsNetwork,
},
{
name: "no unix socket mode",
input: &Metrics{
Bind: "/tmp/anubis-metrics.sock",
Network: "unix",
},
err: ErrNoMetricsSocketMode,
},
{
name: "invalid unix socket mode",
input: &Metrics{
Bind: "/tmp/anubis-metrics.sock",
Network: "unix",
SocketMode: "taco bell",
},
err: ErrInvalidMetricsSocketMode,
},
{
name: "invalid network",
input: &Metrics{
Bind: ":9090",
Network: "taco",
},
err: ErrInvalidMetricsNetwork,
},
} {
t.Run(tt.name, func(t *testing.T) {
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
t.Logf("wanted error: %v", tt.err)
t.Logf("got error: %v", err)
t.Error("validation failed")
}
})
}
}
+3
View File
@@ -0,0 +1,3 @@
metrics:
bind: ":9090"
network: taco
+5 -1
View File
@@ -5,5 +5,9 @@
"remote_addresses": ["0.0.0.0/0", "::/0"],
"action": "ALLOW"
}
]
],
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -4,3 +4,7 @@ bots:
- "0.0.0.0/0"
- "::/0"
action: ALLOW
metrics:
bind: ":9090"
network: "tcp"
+5 -1
View File
@@ -8,5 +8,9 @@
"action": "DENY"
}
],
"dnsbl": false
"dnsbl": false,
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -3,3 +3,7 @@ bots:
headers_regex:
CF-Worker: .*
action: DENY
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -4,3 +4,7 @@ bots:
asns:
match:
- 13335 # Cloudflare
metrics:
bind: ":9090"
network: "tcp"
+5 -1
View File
@@ -5,5 +5,9 @@
"user_agent_regex": "Mozilla",
"action": "CHALLENGE"
}
]
],
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -2,3 +2,7 @@ bots:
- name: generic-browser
user_agent_regex: Mozilla
action: CHALLENGE
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -6,3 +6,7 @@ bots:
- name: "test"
user_agent_regex: ".*"
action: "DENY"
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -6,3 +6,7 @@ bots:
- '"Accept" in headers'
- headers["Accept"].contains("text/html")
- randInt(1) == 0
metrics:
bind: ":9090"
network: "tcp"
+5 -1
View File
@@ -6,5 +6,9 @@
"action": "DENY"
}
],
"dnsbl": false
"dnsbl": false,
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -2,3 +2,7 @@ bots:
- name: everything
user_agent_regex: .*
action: DENY
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -4,3 +4,7 @@ bots:
geoip:
countries:
- US
metrics:
bind: ":9090"
network: "tcp"
+5 -1
View File
@@ -10,5 +10,9 @@
]
}
}
]
],
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -6,3 +6,7 @@ bots:
- userAgent.startsWith("git/") || userAgent.contains("libgit")
- >
"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"
metrics:
bind: ":9090"
network: "tcp"
+5 -1
View File
@@ -3,5 +3,9 @@
{
"import": "./testdata/hack-test.json"
}
]
],
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -1,2 +1,6 @@
bots:
- import: ./testdata/hack-test.yaml
metrics:
bind: ":9090"
network: "tcp"
+5 -1
View File
@@ -3,5 +3,9 @@
{
"import": "(data)/common/keep-internet-working.yaml"
}
]
],
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
@@ -1,2 +1,6 @@
bots:
- import: (data)/common/keep-internet-working.yaml
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -8,3 +8,7 @@ impressum:
page:
title: Test
body: <p>This is a test</p>
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -13,3 +13,7 @@ logs:
oldFileTimeFormat: 2006-01-02T15-04-05 # RFC 3339-ish
compress: true
useLocalTime: false # timezone for rotated files is UTC
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -5,3 +5,7 @@ bots:
logging:
sink: "stdio"
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -6,3 +6,7 @@ bots:
adjust: 5
thresholds: []
metrics:
bind: ":9090"
network: "tcp"
+5 -1
View File
@@ -75,5 +75,9 @@
"user_agent_regex": "Mozilla",
"action": "CHALLENGE"
}
]
],
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -10,3 +10,7 @@ openGraph:
default:
"og:title": "Xe's magic land of fun"
"og:description": "We're no strangers to love, you know the rules and so do I"
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -4,3 +4,7 @@ bots:
user_agent_regex: Mozilla
weight:
adjust: 5
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -9,5 +9,9 @@
"status_codes": {
"CHALLENGE": 200,
"DENY": 200
},
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -6,3 +6,7 @@ bots:
status_codes:
CHALLENGE: 200
DENY: 200
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -9,5 +9,9 @@
"status_codes": {
"CHALLENGE": 403,
"DENY": 403
},
"metrics": {
"bind": ":9090",
"network": "tcp"
}
}
+4
View File
@@ -6,3 +6,7 @@ bots:
status_codes:
CHALLENGE: 403
DENY: 403
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -33,3 +33,7 @@ thresholds:
challenge:
algorithm: fast
difficulty: 4
metrics:
bind: ":9090"
network: "tcp"
+4
View File
@@ -2,3 +2,7 @@ bots:
- name: weight
action: WEIGH
user_agent_regex: Mozilla
metrics:
bind: ":9090"
network: "tcp"
+81
View File
@@ -0,0 +1,81 @@
package metrics
import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"net/http/pprof"
"time"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/config"
"github.com/prometheus/client_golang/prometheus/promhttp"
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
)
type Server struct {
Config *config.Metrics
Log *slog.Logger
}
func (s *Server) Run(ctx context.Context, done func()) error {
defer done()
lg := s.Log.With("subsystem", "metrics")
mux := http.NewServeMux()
mux.HandleFunc("GET /debug/pprof/", pprof.Index)
mux.HandleFunc("GET /debug/pprof/cmdline", pprof.Cmdline)
mux.HandleFunc("GET /debug/pprof/profile", pprof.Profile)
mux.HandleFunc("GET /debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("GET /debug/pprof/trace", pprof.Trace)
mux.Handle("/metrics", promhttp.Handler())
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
st, ok := internal.GetHealth("anubis")
if !ok {
slog.Error("health service anubis does not exist, file a bug")
}
switch st {
case healthv1.HealthCheckResponse_NOT_SERVING:
http.Error(w, "NOT OK", http.StatusInternalServerError)
return
case healthv1.HealthCheckResponse_SERVING:
fmt.Fprintln(w, "OK")
return
default:
http.Error(w, "UNKNOWN", http.StatusFailedDependency)
return
}
})
srv := http.Server{
Handler: mux,
ErrorLog: internal.GetFilteredHTTPLogger(),
}
ln, metricsURL, err := internal.SetupListener(s.Config.Bind, s.Config.Network, s.Config.SocketMode)
if err != nil {
return fmt.Errorf("can't setup listener: %w", err)
}
defer ln.Close()
lg.Debug("listening for metrics", "url", metricsURL)
go func() {
<-ctx.Done()
c, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := srv.Shutdown(c); err != nil {
lg.Error("can't shut down metrics server", "err", err)
}
}()
if err := srv.Serve(ln); !errors.Is(err, http.ErrServerClosed) {
return fmt.Errorf("can't serve metrics server: %w", err)
}
return nil
}
+2
View File
@@ -46,6 +46,7 @@ type ParsedConfig struct {
DnsCache *dns.DnsCache
Dns *dns.Dns
Logger *slog.Logger
Metrics *config.Metrics
}
func newParsedConfig(orig *config.Config) *ParsedConfig {
@@ -53,6 +54,7 @@ func newParsedConfig(orig *config.Config) *ParsedConfig {
orig: orig,
OpenGraph: orig.OpenGraph,
StatusCodes: orig.StatusCodes,
Metrics: orig.Metrics,
}
}