feat(lib): add log filtering rules

Closes #942

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso
2025-08-18 10:51:14 +00:00
parent 53516738c1
commit a7a5e0d5c7
13 changed files with 128 additions and 142 deletions

View File

@@ -31,6 +31,7 @@ import (
"github.com/TecharoHQ/anubis/data"
"github.com/TecharoHQ/anubis/internal"
libanubis "github.com/TecharoHQ/anubis/lib"
"github.com/TecharoHQ/anubis/lib/logging"
botPolicy "github.com/TecharoHQ/anubis/lib/policy"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/thoth"
@@ -250,7 +251,7 @@ func main() {
return
}
internal.InitSlog(*slogLevel)
slog.SetDefault(slog.New(logging.Init(*slogLevel)))
internal.SetHealth("anubis", healthv1.HealthCheckResponse_NOT_SERVING)
if *healthcheck {
@@ -447,7 +448,10 @@ func main() {
h = internal.XForwardedForUpdate(*xffStripPrivate, h)
h = internal.JA4H(h)
srv := http.Server{Handler: h, ErrorLog: internal.GetFilteredHTTPLogger()}
srv := http.Server{
Handler: h,
ErrorLog: logging.StdlibLogger(s.GetLogger("http-server").Handler(), slog.LevelDebug),
}
listener, listenerUrl := setupListener(*bindNetwork, *bind)
slog.Info(
"listening",
@@ -507,7 +511,10 @@ func metricsServer(ctx context.Context, done func()) {
}
})
srv := http.Server{Handler: mux, ErrorLog: internal.GetFilteredHTTPLogger()}
srv := http.Server{
Handler: mux,
ErrorLog: logging.StdlibLogger(slog.With("subsystem", "metrics-server").Handler(), slog.LevelDebug),
}
listener, metricsUrl := setupListener(*metricsBindNetwork, *metricsBind)
slog.Debug("listening for metrics", "url", metricsUrl)

View File

@@ -10,7 +10,7 @@ import (
"path/filepath"
"strings"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/logging"
"github.com/facebookgo/flagenv"
)
@@ -28,7 +28,7 @@ func main() {
flagenv.Parse()
flag.Parse()
internal.InitSlog(*slogLevel)
slog.SetDefault(slog.New(logging.Init(*slogLevel)))
koDockerRepo := strings.TrimSuffix(*dockerRepo, "/"+filepath.Base(*dockerRepo))

View File

@@ -132,6 +132,23 @@ dnsbl: false
# <!-- ... -->
# Logging settings for Anubis
logging:
# CEL log filters. Note that this is a very powerful feature and it is very easy to get
# yourself into trouble with this. Avoid using log filters unless you are running into
# circumstances like https://github.com/TecharoHQ/anubis/issues/942. This has a nonzero
# impact on logging, which spirals out into a more than zero impact on Anubis'
# performance and memory usage.
filters:
# Every filter must have a name and an expression. You can use the same expression
# syntax as you can with bots or thresholds.
- name: "http-stdlib"
expression: msg.startsWith("http:")
- name: "context-canceled"
expression: msg.contains("context canceled")
- name: "http-pipelining"
expression: msg.contains("Unsolicited response received on idle HTTP channel")
# Open Graph passthrough configuration, see here for more information:
# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
openGraph:

View File

@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
<!-- This changes the project to: -->
- Fix lock convoy problem in decaymap ([#1103](https://github.com/TecharoHQ/anubis/issues/1103))
- [Log filtering](./admin/configuration/expressions.mdx#log-filtering) rules have been added. This allows users to write custom log filtering logic.
- Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086))
- Add validation warning when persistent storage is used without setting signing keys
- Fixed `robots2policy` to properly group consecutive user agents into `any:` instead of only processing the last one ([#925](https://github.com/TecharoHQ/anubis/pull/925))

View File

@@ -99,6 +99,10 @@ For this rule, if a request comes in matching [the signature of the `go get` com
Anubis exposes the following variables to expressions:
### Bot expressions
Bot expressions are used for evaluating [bot rules](../policies.mdx#bot-policies).
| Name | Type | Explanation | Example |
| :-------------- | :-------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------- |
| `headers` | `map[string, string]` | The [headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers) of the request being processed. | `{"User-Agent": "Mozilla/5.0 Gecko/20100101 Firefox/137.0"}` |
@@ -182,6 +186,17 @@ Something to keep in mind about system load average is that it is not aware of t
Also keep in mind that this does not account for other kinds of latency like I/O latency. A system can have its web applications unresponsive due to high latency from a MySQL server but still have that web application server report a load near or at zero.
### Log filtering
Log filters are run on every time Anubis logs data. These are high throughput filters and should be written with care.
| Name | Type | Explanation | Example |
| :------ | :-------------------- | :----------------------------------------------------------------------------------------------------- | --------------------------------------- |
| `time` | Timestamp | The time that the log line was emitted. | `2025-08-18T06:45:38-04:00` |
| `msg` | `string` | The text-based message for the given log line. | `"invalid response"` |
| `level` | `string` | The [log level](https://pkg.go.dev/log/slog#Level) for the log message. | `"INFO"` |
| `attrs` | `map[string, string]` | The key -> value attributes for the given log line. Note that this is an expensive variable to access. | `{"err": "internal: the sun exploded"}` |
## Functions exposed to Anubis expressions
Anubis expressions can be augmented with the following functions:

View File

@@ -123,6 +123,10 @@ remote_addresses:
Anubis has support for showing imprint / impressum information. This is defined in the `impressum` block of your configuration. See [Imprint / Impressum configuration](./configuration/impressum.mdx) for more information.
## Logging
Anubis has support for configuring log filtering using expressions. See the [log filters](./configuration/expressions.mdx#log-filters) of the [expression](./configuration/expressions.mdx) documentation for more information.
## Storage backends
Anubis needs to store temporary data in order to determine if a user is legitimate or not. Administrators should choose a storage backend based on their infrastructure needs. Each backend has its own advantages and disadvantages.

View File

@@ -1,31 +1,10 @@
package internal
import (
"fmt"
"log"
"log/slog"
"net/http"
"os"
"strings"
)
func InitSlog(level string) {
var programLevel slog.Level
if err := (&programLevel).UnmarshalText([]byte(level)); err != nil {
fmt.Fprintf(os.Stderr, "invalid log level %s: %v, using info\n", level, err)
programLevel = slog.LevelInfo
}
leveler := &slog.LevelVar{}
leveler.Set(programLevel)
h := slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
AddSource: true,
Level: leveler,
})
slog.SetDefault(slog.New(h))
}
func GetRequestLogger(base *slog.Logger, r *http.Request) *slog.Logger {
host := r.Host
if host == "" {
@@ -44,27 +23,3 @@ func GetRequestLogger(base *slog.Logger, r *http.Request) *slog.Logger {
"x-real-ip", r.Header.Get("X-Real-Ip"),
)
}
// ErrorLogFilter is used to suppress "context canceled" logs from the http server when a request is canceled (e.g., when a client disconnects).
type ErrorLogFilter struct {
Unwrap *log.Logger
}
func (elf *ErrorLogFilter) Write(p []byte) (n int, err error) {
logMessage := string(p)
if strings.Contains(logMessage, "context canceled") {
return len(p), nil // Suppress the log by doing nothing
}
if strings.Contains(logMessage, "Unsolicited response received on idle HTTP channel") {
return len(p), nil
}
if elf.Unwrap != nil {
return elf.Unwrap.Writer().Write(p)
}
return len(p), nil
}
func GetFilteredHTTPLogger() *log.Logger {
stdErrLogger := log.New(os.Stderr, "", log.LstdFlags) // essentially what the default logger is.
return log.New(&ErrorLogFilter{Unwrap: stdErrLogger}, "", 0)
}

View File

@@ -1,82 +0,0 @@
package internal
import (
"bytes"
"log"
"log/slog"
"net/http"
"strings"
"testing"
)
func TestErrorLogFilter(t *testing.T) {
var buf bytes.Buffer
destLogger := log.New(&buf, "", 0)
errorFilterWriter := &ErrorLogFilter{Unwrap: destLogger}
testErrorLogger := log.New(errorFilterWriter, "", 0)
// Test Case 1: Suppressed message
suppressedMessage := "http: proxy error: context canceled"
testErrorLogger.Println(suppressedMessage)
if buf.Len() != 0 {
t.Errorf("Suppressed message was written to output. Output: %q", buf.String())
}
buf.Reset()
// Test Case 2: Allowed message
allowedMessage := "http: another error occurred"
testErrorLogger.Println(allowedMessage)
output := buf.String()
if !strings.Contains(output, allowedMessage) {
t.Errorf("Allowed message was not written to output. Output: %q", output)
}
if !strings.HasSuffix(output, "\n") {
t.Errorf("Allowed message output is missing newline. Output: %q", output)
}
buf.Reset()
// Test Case 3: Partially matching message (should be suppressed)
partiallyMatchingMessage := "Some other log before http: proxy error: context canceled and after"
testErrorLogger.Println(partiallyMatchingMessage)
if buf.Len() != 0 {
t.Errorf("Partially matching message was written to output. Output: %q", buf.String())
}
buf.Reset()
}
func TestGetRequestLogger(t *testing.T) {
// Test case 1: Normal request with Host header
req1, _ := http.NewRequest("GET", "http://example.com/test", nil)
req1.Host = "example.com"
logger := slog.Default()
reqLogger := GetRequestLogger(logger, req1)
// We can't easily test the actual log output without setting up a test handler,
// but we can verify the function doesn't panic and returns a logger
if reqLogger == nil {
t.Error("GetRequestLogger returned nil")
}
// Test case 2: Subrequest auth mode with X-Forwarded-Host
req2, _ := http.NewRequest("GET", "http://test.com/auth", nil)
req2.Host = ""
req2.Header.Set("X-Forwarded-Host", "original-site.com")
reqLogger2 := GetRequestLogger(logger, req2)
if reqLogger2 == nil {
t.Error("GetRequestLogger returned nil for X-Forwarded-Host case")
}
// Test case 3: No host information available
req3, _ := http.NewRequest("GET", "http://test.com/nohost", nil)
req3.Host = ""
reqLogger3 := GetRequestLogger(logger, req3)
if reqLogger3 == nil {
t.Error("GetRequestLogger returned nil for no host case")
}
}

View File

@@ -78,6 +78,10 @@ type Server struct {
logger *slog.Logger
}
func (s *Server) GetLogger(subsystem string) *slog.Logger {
return s.logger.With("subsystem", subsystem)
}
func (s *Server) getTokenKeyfunc() jwt.Keyfunc {
// return ED25519 key if HS512 is not set
if len(s.hs512Secret) == 0 {

View File

@@ -120,6 +120,14 @@ func New(opts Options) (*Server, error) {
logger: opts.Logger,
}
if opts.Policy.Logging != nil {
var err error
result.logger, err = opts.Policy.ApplyLogFilters(opts.Logger)
if err != nil {
return nil, fmt.Errorf("can't create log filters: %w", err)
}
}
mux := http.NewServeMux()
xess.Mount(mux)

36
lib/logging/stdlib.go Normal file
View File

@@ -0,0 +1,36 @@
package logging
import (
"bytes"
"context"
"log"
"log/slog"
"time"
)
// handlerWriter is an io.Writer that calls a Handler.
// It is used to link the default log.Logger to the default slog.Logger.
//
// Adapted from https://cs.opensource.google/go/go/+/refs/tags/go1.24.5:src/log/slog/logger.go;l=62
type handlerWriter struct {
h slog.Handler
level slog.Leveler
}
func (w *handlerWriter) Write(buf []byte) (int, error) {
level := w.level.Level()
if !w.h.Enabled(context.Background(), level) {
return 0, nil
}
var pc uintptr
// Remove final newline.
origLen := len(buf) // Report that the entire buf was written.
buf = bytes.TrimSuffix(buf, []byte{'\n'})
r := slog.NewRecord(time.Now(), level, string(buf), pc)
return origLen, w.h.Handle(context.Background(), r)
}
func StdlibLogger(next slog.Handler, level slog.Level) *log.Logger {
return log.New(&handlerWriter{h: next, level: level}, "", log.LstdFlags)
}

View File

@@ -14,16 +14,11 @@ import (
)
var (
filterInvocations = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "techaro",
Subsystem: "anubis",
Name: "slog_filter_invocations",
}, []string{"name"})
filterExecutionTime = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "techaro",
Subsystem: "anubis",
Name: "slog_filter_execution_time_nanoseconds",
Namespace: "anubis",
Subsystem: "slog",
Name: "filter_execution_time_nanoseconds",
Help: "How long each log filter took to execute (nanoseconds)",
Buckets: []float64{10, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 200000, 500000, 1000000, 2000000, 5000000, 10000000}, // 10 nanoseconds to 10 milliseconds
}, []string{"name"})
)
@@ -76,7 +71,6 @@ func (f Filter) Filter(ctx context.Context, r slog.Record) bool {
}
dur := time.Since(t0)
filterExecutionTime.WithLabelValues(f.name).Observe(float64(dur.Nanoseconds()))
filterInvocations.WithLabelValues(f.name).Inc()
//f.log.Debug("filter execution", "dur", dur.Nanoseconds())
if val, ok := result.(types.Bool); ok {

View File

@@ -8,8 +8,10 @@ import (
"log/slog"
"sync/atomic"
"github.com/TecharoHQ/anubis/lib/logging"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/policy/expressions"
"github.com/TecharoHQ/anubis/lib/store"
"github.com/TecharoHQ/anubis/lib/thoth"
"github.com/prometheus/client_golang/prometheus"
@@ -35,17 +37,42 @@ type ParsedConfig struct {
Thresholds []*Threshold
DNSBL bool
Impressum *config.Impressum
Logging *config.Logging
OpenGraph config.OpenGraph
DefaultDifficulty int
StatusCodes config.StatusCodes
Store store.Interface
}
func (pc *ParsedConfig) ApplyLogFilters(base *slog.Logger) (*slog.Logger, error) {
var errs []error
var filters []logging.Filterer
for _, f := range pc.Logging.Filters {
filter, err := expressions.NewFilter(base, f.Name, f.Expression.String())
if err != nil {
errs = append(errs, fmt.Errorf("filter %s invalid: %w", f.Name, err))
continue
}
filters = append(filters, filter)
}
result := slog.New(logging.NewFilterHandler(base.Handler(), filters...))
slog.SetDefault(result)
if len(errs) != 0 {
return nil, errors.Join(errs...)
}
return result, nil
}
func newParsedConfig(orig *config.Config) *ParsedConfig {
return &ParsedConfig{
orig: orig,
OpenGraph: orig.OpenGraph,
StatusCodes: orig.StatusCodes,
Logging: orig.Logging,
}
}