diff --git a/cmd/anubis/main.go b/cmd/anubis/main.go index 14f45609..74951e12 100644 --- a/cmd/anubis/main.go +++ b/cmd/anubis/main.go @@ -31,6 +31,7 @@ import ( "github.com/TecharoHQ/anubis/data" "github.com/TecharoHQ/anubis/internal" libanubis "github.com/TecharoHQ/anubis/lib" + "github.com/TecharoHQ/anubis/lib/logging" botPolicy "github.com/TecharoHQ/anubis/lib/policy" "github.com/TecharoHQ/anubis/lib/policy/config" "github.com/TecharoHQ/anubis/lib/thoth" @@ -250,7 +251,7 @@ func main() { return } - internal.InitSlog(*slogLevel) + slog.SetDefault(slog.New(logging.Init(*slogLevel))) internal.SetHealth("anubis", healthv1.HealthCheckResponse_NOT_SERVING) if *healthcheck { @@ -447,7 +448,10 @@ func main() { h = internal.XForwardedForUpdate(*xffStripPrivate, h) h = internal.JA4H(h) - srv := http.Server{Handler: h, ErrorLog: internal.GetFilteredHTTPLogger()} + srv := http.Server{ + Handler: h, + ErrorLog: logging.StdlibLogger(s.GetLogger("http-server").Handler(), slog.LevelDebug), + } listener, listenerUrl := setupListener(*bindNetwork, *bind) slog.Info( "listening", @@ -507,7 +511,10 @@ func metricsServer(ctx context.Context, done func()) { } }) - srv := http.Server{Handler: mux, ErrorLog: internal.GetFilteredHTTPLogger()} + srv := http.Server{ + Handler: mux, + ErrorLog: logging.StdlibLogger(slog.With("subsystem", "metrics-server").Handler(), slog.LevelDebug), + } listener, metricsUrl := setupListener(*metricsBindNetwork, *metricsBind) slog.Debug("listening for metrics", "url", metricsUrl) diff --git a/cmd/containerbuild/main.go b/cmd/containerbuild/main.go index ce1995d7..5753a468 100644 --- a/cmd/containerbuild/main.go +++ b/cmd/containerbuild/main.go @@ -10,7 +10,7 @@ import ( "path/filepath" "strings" - "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/lib/logging" "github.com/facebookgo/flagenv" ) @@ -28,7 +28,7 @@ func main() { flagenv.Parse() flag.Parse() - internal.InitSlog(*slogLevel) + slog.SetDefault(slog.New(logging.Init(*slogLevel))) koDockerRepo := strings.TrimSuffix(*dockerRepo, "/"+filepath.Base(*dockerRepo)) diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index f62af353..2380e21e 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -132,6 +132,23 @@ dnsbl: false # +# Logging settings for Anubis +logging: + # CEL log filters. Note that this is a very powerful feature and it is very easy to get + # yourself into trouble with this. Avoid using log filters unless you are running into + # circumstances like https://github.com/TecharoHQ/anubis/issues/942. This has a nonzero + # impact on logging, which spirals out into a more than zero impact on Anubis' + # performance and memory usage. + filters: + # Every filter must have a name and an expression. You can use the same expression + # syntax as you can with bots or thresholds. + - name: "http-stdlib" + expression: msg.startsWith("http:") + - name: "context-canceled" + expression: msg.contains("context canceled") + - name: "http-pipelining" + expression: msg.contains("Unsolicited response received on idle HTTP channel") + # Open Graph passthrough configuration, see here for more information: # https://anubis.techaro.lol/docs/admin/configuration/open-graph/ openGraph: diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 8f717c6d..9f13e519 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix lock convoy problem in decaymap ([#1103](https://github.com/TecharoHQ/anubis/issues/1103)) +- [Log filtering](./admin/configuration/expressions.mdx#log-filtering) rules have been added. This allows users to write custom log filtering logic. - Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086)) - Add validation warning when persistent storage is used without setting signing keys - Fixed `robots2policy` to properly group consecutive user agents into `any:` instead of only processing the last one ([#925](https://github.com/TecharoHQ/anubis/pull/925)) diff --git a/docs/docs/admin/configuration/expressions.mdx b/docs/docs/admin/configuration/expressions.mdx index 6cb79546..b6f137ed 100644 --- a/docs/docs/admin/configuration/expressions.mdx +++ b/docs/docs/admin/configuration/expressions.mdx @@ -99,6 +99,10 @@ For this rule, if a request comes in matching [the signature of the `go get` com Anubis exposes the following variables to expressions: +### Bot expressions + +Bot expressions are used for evaluating [bot rules](../policies.mdx#bot-policies). + | Name | Type | Explanation | Example | | :-------------- | :-------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------- | | `headers` | `map[string, string]` | The [headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers) of the request being processed. | `{"User-Agent": "Mozilla/5.0 Gecko/20100101 Firefox/137.0"}` | @@ -182,6 +186,17 @@ Something to keep in mind about system load average is that it is not aware of t Also keep in mind that this does not account for other kinds of latency like I/O latency. A system can have its web applications unresponsive due to high latency from a MySQL server but still have that web application server report a load near or at zero. +### Log filtering + +Log filters are run on every time Anubis logs data. These are high throughput filters and should be written with care. + +| Name | Type | Explanation | Example | +| :------ | :-------------------- | :----------------------------------------------------------------------------------------------------- | --------------------------------------- | +| `time` | Timestamp | The time that the log line was emitted. | `2025-08-18T06:45:38-04:00` | +| `msg` | `string` | The text-based message for the given log line. | `"invalid response"` | +| `level` | `string` | The [log level](https://pkg.go.dev/log/slog#Level) for the log message. | `"INFO"` | +| `attrs` | `map[string, string]` | The key -> value attributes for the given log line. Note that this is an expensive variable to access. | `{"err": "internal: the sun exploded"}` | + ## Functions exposed to Anubis expressions Anubis expressions can be augmented with the following functions: diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx index f95821df..60309498 100644 --- a/docs/docs/admin/policies.mdx +++ b/docs/docs/admin/policies.mdx @@ -123,6 +123,10 @@ remote_addresses: Anubis has support for showing imprint / impressum information. This is defined in the `impressum` block of your configuration. See [Imprint / Impressum configuration](./configuration/impressum.mdx) for more information. +## Logging + +Anubis has support for configuring log filtering using expressions. See the [log filters](./configuration/expressions.mdx#log-filters) of the [expression](./configuration/expressions.mdx) documentation for more information. + ## Storage backends Anubis needs to store temporary data in order to determine if a user is legitimate or not. Administrators should choose a storage backend based on their infrastructure needs. Each backend has its own advantages and disadvantages. diff --git a/internal/log.go b/internal/log.go index fb2f2509..a9ba04ef 100644 --- a/internal/log.go +++ b/internal/log.go @@ -1,31 +1,10 @@ package internal import ( - "fmt" - "log" "log/slog" "net/http" - "os" - "strings" ) -func InitSlog(level string) { - var programLevel slog.Level - if err := (&programLevel).UnmarshalText([]byte(level)); err != nil { - fmt.Fprintf(os.Stderr, "invalid log level %s: %v, using info\n", level, err) - programLevel = slog.LevelInfo - } - - leveler := &slog.LevelVar{} - leveler.Set(programLevel) - - h := slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{ - AddSource: true, - Level: leveler, - }) - slog.SetDefault(slog.New(h)) -} - func GetRequestLogger(base *slog.Logger, r *http.Request) *slog.Logger { host := r.Host if host == "" { @@ -44,27 +23,3 @@ func GetRequestLogger(base *slog.Logger, r *http.Request) *slog.Logger { "x-real-ip", r.Header.Get("X-Real-Ip"), ) } - -// ErrorLogFilter is used to suppress "context canceled" logs from the http server when a request is canceled (e.g., when a client disconnects). -type ErrorLogFilter struct { - Unwrap *log.Logger -} - -func (elf *ErrorLogFilter) Write(p []byte) (n int, err error) { - logMessage := string(p) - if strings.Contains(logMessage, "context canceled") { - return len(p), nil // Suppress the log by doing nothing - } - if strings.Contains(logMessage, "Unsolicited response received on idle HTTP channel") { - return len(p), nil - } - if elf.Unwrap != nil { - return elf.Unwrap.Writer().Write(p) - } - return len(p), nil -} - -func GetFilteredHTTPLogger() *log.Logger { - stdErrLogger := log.New(os.Stderr, "", log.LstdFlags) // essentially what the default logger is. - return log.New(&ErrorLogFilter{Unwrap: stdErrLogger}, "", 0) -} diff --git a/internal/log_test.go b/internal/log_test.go deleted file mode 100644 index 83c92189..00000000 --- a/internal/log_test.go +++ /dev/null @@ -1,82 +0,0 @@ -package internal - -import ( - "bytes" - "log" - "log/slog" - "net/http" - "strings" - "testing" -) - -func TestErrorLogFilter(t *testing.T) { - var buf bytes.Buffer - destLogger := log.New(&buf, "", 0) - errorFilterWriter := &ErrorLogFilter{Unwrap: destLogger} - testErrorLogger := log.New(errorFilterWriter, "", 0) - - // Test Case 1: Suppressed message - suppressedMessage := "http: proxy error: context canceled" - testErrorLogger.Println(suppressedMessage) - - if buf.Len() != 0 { - t.Errorf("Suppressed message was written to output. Output: %q", buf.String()) - } - buf.Reset() - - // Test Case 2: Allowed message - allowedMessage := "http: another error occurred" - testErrorLogger.Println(allowedMessage) - - output := buf.String() - if !strings.Contains(output, allowedMessage) { - t.Errorf("Allowed message was not written to output. Output: %q", output) - } - if !strings.HasSuffix(output, "\n") { - t.Errorf("Allowed message output is missing newline. Output: %q", output) - } - buf.Reset() - - // Test Case 3: Partially matching message (should be suppressed) - partiallyMatchingMessage := "Some other log before http: proxy error: context canceled and after" - testErrorLogger.Println(partiallyMatchingMessage) - - if buf.Len() != 0 { - t.Errorf("Partially matching message was written to output. Output: %q", buf.String()) - } - buf.Reset() -} - -func TestGetRequestLogger(t *testing.T) { - // Test case 1: Normal request with Host header - req1, _ := http.NewRequest("GET", "http://example.com/test", nil) - req1.Host = "example.com" - - logger := slog.Default() - reqLogger := GetRequestLogger(logger, req1) - - // We can't easily test the actual log output without setting up a test handler, - // but we can verify the function doesn't panic and returns a logger - if reqLogger == nil { - t.Error("GetRequestLogger returned nil") - } - - // Test case 2: Subrequest auth mode with X-Forwarded-Host - req2, _ := http.NewRequest("GET", "http://test.com/auth", nil) - req2.Host = "" - req2.Header.Set("X-Forwarded-Host", "original-site.com") - - reqLogger2 := GetRequestLogger(logger, req2) - if reqLogger2 == nil { - t.Error("GetRequestLogger returned nil for X-Forwarded-Host case") - } - - // Test case 3: No host information available - req3, _ := http.NewRequest("GET", "http://test.com/nohost", nil) - req3.Host = "" - - reqLogger3 := GetRequestLogger(logger, req3) - if reqLogger3 == nil { - t.Error("GetRequestLogger returned nil for no host case") - } -} diff --git a/lib/anubis.go b/lib/anubis.go index 6a44e1fc..a68c3fe9 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -78,6 +78,10 @@ type Server struct { logger *slog.Logger } +func (s *Server) GetLogger(subsystem string) *slog.Logger { + return s.logger.With("subsystem", subsystem) +} + func (s *Server) getTokenKeyfunc() jwt.Keyfunc { // return ED25519 key if HS512 is not set if len(s.hs512Secret) == 0 { diff --git a/lib/config.go b/lib/config.go index c9437e67..af22be50 100644 --- a/lib/config.go +++ b/lib/config.go @@ -120,6 +120,14 @@ func New(opts Options) (*Server, error) { logger: opts.Logger, } + if opts.Policy.Logging != nil { + var err error + result.logger, err = opts.Policy.ApplyLogFilters(opts.Logger) + if err != nil { + return nil, fmt.Errorf("can't create log filters: %w", err) + } + } + mux := http.NewServeMux() xess.Mount(mux) diff --git a/lib/logging/stdlib.go b/lib/logging/stdlib.go new file mode 100644 index 00000000..5304a1b7 --- /dev/null +++ b/lib/logging/stdlib.go @@ -0,0 +1,36 @@ +package logging + +import ( + "bytes" + "context" + "log" + "log/slog" + "time" +) + +// handlerWriter is an io.Writer that calls a Handler. +// It is used to link the default log.Logger to the default slog.Logger. +// +// Adapted from https://cs.opensource.google/go/go/+/refs/tags/go1.24.5:src/log/slog/logger.go;l=62 +type handlerWriter struct { + h slog.Handler + level slog.Leveler +} + +func (w *handlerWriter) Write(buf []byte) (int, error) { + level := w.level.Level() + if !w.h.Enabled(context.Background(), level) { + return 0, nil + } + var pc uintptr + + // Remove final newline. + origLen := len(buf) // Report that the entire buf was written. + buf = bytes.TrimSuffix(buf, []byte{'\n'}) + r := slog.NewRecord(time.Now(), level, string(buf), pc) + return origLen, w.h.Handle(context.Background(), r) +} + +func StdlibLogger(next slog.Handler, level slog.Level) *log.Logger { + return log.New(&handlerWriter{h: next, level: level}, "", log.LstdFlags) +} diff --git a/lib/policy/expressions/logging.go b/lib/policy/expressions/logging.go index ea0e8021..718af4f4 100644 --- a/lib/policy/expressions/logging.go +++ b/lib/policy/expressions/logging.go @@ -14,16 +14,11 @@ import ( ) var ( - filterInvocations = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "techaro", - Subsystem: "anubis", - Name: "slog_filter_invocations", - }, []string{"name"}) - filterExecutionTime = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "techaro", - Subsystem: "anubis", - Name: "slog_filter_execution_time_nanoseconds", + Namespace: "anubis", + Subsystem: "slog", + Name: "filter_execution_time_nanoseconds", + Help: "How long each log filter took to execute (nanoseconds)", Buckets: []float64{10, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 200000, 500000, 1000000, 2000000, 5000000, 10000000}, // 10 nanoseconds to 10 milliseconds }, []string{"name"}) ) @@ -76,7 +71,6 @@ func (f Filter) Filter(ctx context.Context, r slog.Record) bool { } dur := time.Since(t0) filterExecutionTime.WithLabelValues(f.name).Observe(float64(dur.Nanoseconds())) - filterInvocations.WithLabelValues(f.name).Inc() //f.log.Debug("filter execution", "dur", dur.Nanoseconds()) if val, ok := result.(types.Bool); ok { diff --git a/lib/policy/policy.go b/lib/policy/policy.go index 5493d8dd..2953a70d 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -8,8 +8,10 @@ import ( "log/slog" "sync/atomic" + "github.com/TecharoHQ/anubis/lib/logging" "github.com/TecharoHQ/anubis/lib/policy/checker" "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/TecharoHQ/anubis/lib/policy/expressions" "github.com/TecharoHQ/anubis/lib/store" "github.com/TecharoHQ/anubis/lib/thoth" "github.com/prometheus/client_golang/prometheus" @@ -35,17 +37,42 @@ type ParsedConfig struct { Thresholds []*Threshold DNSBL bool Impressum *config.Impressum + Logging *config.Logging OpenGraph config.OpenGraph DefaultDifficulty int StatusCodes config.StatusCodes Store store.Interface } +func (pc *ParsedConfig) ApplyLogFilters(base *slog.Logger) (*slog.Logger, error) { + var errs []error + var filters []logging.Filterer + + for _, f := range pc.Logging.Filters { + filter, err := expressions.NewFilter(base, f.Name, f.Expression.String()) + if err != nil { + errs = append(errs, fmt.Errorf("filter %s invalid: %w", f.Name, err)) + continue + } + filters = append(filters, filter) + } + + result := slog.New(logging.NewFilterHandler(base.Handler(), filters...)) + slog.SetDefault(result) + + if len(errs) != 0 { + return nil, errors.Join(errs...) + } + + return result, nil +} + func newParsedConfig(orig *config.Config) *ParsedConfig { return &ParsedConfig{ orig: orig, OpenGraph: orig.OpenGraph, StatusCodes: orig.StatusCodes, + Logging: orig.Logging, } }