Compare commits

..

2 Commits

Author SHA1 Message Date
Xe Iaso
be42c5accf fix(lib/store/bbolt): run cleanup every hour instead of every 5 minutes
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-07-06 01:15:11 +00:00
Xe Iaso
7d0c58d1a8 fix: make ogtags and dnsbl use the Store instead of memory (#760)
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-07-05 16:17:46 -04:00
23 changed files with 221 additions and 192 deletions

View File

@@ -31,7 +31,6 @@ botstopper
BPort
Brightbot
broked
byteslice
Bytespider
cachebuster
cachediptoasn

View File

@@ -231,20 +231,6 @@ func makeReverseProxy(target string, targetSNI string, targetHost string, insecu
return rp, nil
}
func startDecayMapCleanup(ctx context.Context, s *libanubis.Server) {
ticker := time.NewTicker(1 * time.Hour)
defer ticker.Stop()
for {
select {
case <-ticker.C:
s.CleanupDecayMap()
case <-ctx.Done():
return
}
}
}
func main() {
flagenv.Parse()
flag.Parse()
@@ -421,7 +407,6 @@ func main() {
wg.Add(1)
go metricsServer(ctx, wg.Done)
}
go startDecayMapCleanup(ctx, s)
var h http.Handler
h = s

View File

@@ -24,9 +24,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Remove the "Success" interstitial after a proof of work challenge is concluded.
- Anubis now has the concept of [storage backends](./admin/policies.mdx#storage-backends). These allow you to change how Anubis stores temporary data (in memory, on the disk, or in Valkey). If you run Anubis in an environment where you have a low amount of memory available for Anubis (eg: less than 64 megabytes), be sure to configure the [`bbolt`](./admin/policies.mdx#bbolt) storage backend.
- The challenge issuance and validation process has been rewritten from scratch. Instead of generating challenge strings from request metadata (under the assumption that the values being compared against are stable), Anubis now generates random data for each challenge. This data is stored in the active [storage backend](./admin/policies.mdx#storage-backends) for up to 30 minutes. Fixes [#564](https://github.com/TecharoHQ/anubis/issues/564), [#746](https://github.com/TecharoHQ/anubis/issues/746), and other similar instances of this issue.
- Make the [Open Graph](./admin/configuration/open-graph.mdx) subsystem and DNSBL subsystem use [storage backends](./admin/policies.mdx#storage-backends) instead of storing everything in memory by default.
- Add option for forcing a specific language ([#742](https://github.com/TecharoHQ/anubis/pull/742))
- Add translation for Turkish language ([#751](https://github.com/TecharoHQ/anubis/pull/751))
- Allow [Common Crawl](https://commoncrawl.org/) by default so scrapers have less incentive to scrape
- The [bbolt storage backend](./admin/policies.mdx#bbolt) now runs its cleanup every hour instead of every five minutes.
### Potentially breaking changes

View File

@@ -289,9 +289,10 @@ When Anubis opens a bbolt database, it takes an exclusive lock on that database.
The `bbolt` backend takes the following configuration options:
| Name | Type | Example | Description |
| :----- | :--- | :----------------- | :--------------------------------------------------------------------------------------------------------------------------- |
| `path` | path | `/data/anubis.bdb` | The filesystem path for the Anubis bbolt database. Anubis requires write access to the folder containing the bbolt database. |
| Name | Type | Example | Description |
| :------- | :----- | :----------------- | :-------------------------------------------------------------------------------------------------------------------------------- |
| `bucket` | string | `anubis` | The bbolt bucket that Anubis should place all its data into. If this is not set, then Anubis will default to the bucket `anubis`. |
| `path` | path | `/data/anubis.bdb` | The filesystem path for the Anubis bbolt database. Anubis requires write access to the folder containing the bbolt database. |
Example:

View File

@@ -1,6 +1,7 @@
package ogtags
import (
"context"
"errors"
"log/slog"
"net/url"
@@ -8,7 +9,7 @@ import (
)
// GetOGTags is the main function that retrieves Open Graph tags for a URL
func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]string, error) {
func (c *OGTagCache) GetOGTags(ctx context.Context, url *url.URL, originalHost string) (map[string]string, error) {
if url == nil {
return nil, errors.New("nil URL provided, cannot fetch OG tags")
}
@@ -21,12 +22,12 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
cacheKey := c.generateCacheKey(target, originalHost)
// Check cache first
if cachedTags := c.checkCache(cacheKey); cachedTags != nil {
if cachedTags := c.checkCache(ctx, cacheKey); cachedTags != nil {
return cachedTags, nil
}
// Fetch HTML content, passing the original host
doc, err := c.fetchHTMLDocumentWithCache(target, originalHost, cacheKey)
doc, err := c.fetchHTMLDocumentWithCache(ctx, target, originalHost, cacheKey)
if errors.Is(err, syscall.ECONNREFUSED) {
slog.Debug("Connection refused, returning empty tags")
return nil, nil
@@ -42,7 +43,7 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
ogTags := c.extractOGTags(doc)
// Store in cache
c.cache.Set(cacheKey, ogTags, c.ogTimeToLive)
c.cache.Set(ctx, cacheKey, ogTags, c.ogTimeToLive)
return ogTags, nil
}
@@ -59,8 +60,8 @@ func (c *OGTagCache) generateCacheKey(target string, originalHost string) string
}
// checkCache checks if we have the tags cached and returns them if so
func (c *OGTagCache) checkCache(cacheKey string) map[string]string {
if cachedTags, ok := c.cache.Get(cacheKey); ok {
func (c *OGTagCache) checkCache(ctx context.Context, cacheKey string) map[string]string {
if cachedTags, err := c.cache.Get(ctx, cacheKey); err == nil {
slog.Debug("cache hit", "tags", cachedTags)
return cachedTags
}

View File

@@ -9,6 +9,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
)
func TestCacheReturnsDefault(t *testing.T) {
@@ -21,14 +22,14 @@ func TestCacheReturnsDefault(t *testing.T) {
TimeToLive: time.Minute,
ConsiderHost: false,
Override: want,
})
}, memory.New(t.Context()))
u, err := url.Parse("https://anubis.techaro.lol")
if err != nil {
t.Fatal(err)
}
result, err := cache.GetOGTags(u, "anubis.techaro.lol")
result, err := cache.GetOGTags(t.Context(), u, "anubis.techaro.lol")
if err != nil {
t.Fatal(err)
}
@@ -49,7 +50,7 @@ func TestCheckCache(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
// Set up test data
urlStr := "http://example.com/page"
@@ -60,16 +61,16 @@ func TestCheckCache(t *testing.T) {
cacheKey := cache.generateCacheKey(urlStr, "example.com")
// Test cache miss
tags := cache.checkCache(cacheKey)
tags := cache.checkCache(t.Context(), cacheKey)
if tags != nil {
t.Errorf("expected nil tags on cache miss, got %v", tags)
}
// Manually add to cache
cache.cache.Set(cacheKey, expectedTags, time.Minute)
cache.cache.Set(t.Context(), cacheKey, expectedTags, time.Minute)
// Test cache hit
tags = cache.checkCache(cacheKey)
tags = cache.checkCache(t.Context(), cacheKey)
if tags == nil {
t.Fatal("expected non-nil tags on cache hit, got nil")
}
@@ -112,7 +113,7 @@ func TestGetOGTags(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
// Parse the test server URL
parsedURL, err := url.Parse(ts.URL)
@@ -122,7 +123,7 @@ func TestGetOGTags(t *testing.T) {
// Test fetching OG tags from the test server
// Pass the host from the parsed test server URL
ogTags, err := cache.GetOGTags(parsedURL, parsedURL.Host)
ogTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
if err != nil {
t.Fatalf("failed to get OG tags: %v", err)
}
@@ -142,14 +143,14 @@ func TestGetOGTags(t *testing.T) {
// Test fetching OG tags from the cache
// Pass the host from the parsed test server URL
ogTags, err = cache.GetOGTags(parsedURL, parsedURL.Host)
ogTags, err = cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}
// Test fetching OG tags from the cache (3rd time)
// Pass the host from the parsed test server URL
newOgTags, err := cache.GetOGTags(parsedURL, parsedURL.Host)
newOgTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}
@@ -263,10 +264,10 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: tc.ogCacheConsiderHost,
})
}, memory.New(t.Context()))
for i, req := range tc.requests {
ogTags, err := cache.GetOGTags(parsedURL, req.host)
ogTags, err := cache.GetOGTags(t.Context(), parsedURL, req.host)
if err != nil {
t.Errorf("Request %d (host: %s): unexpected error: %v", i+1, req.host, err)
continue // Skip further checks for this request if error occurred

View File

@@ -20,8 +20,8 @@ var (
// fetchHTMLDocumentWithCache fetches the HTML document from the given URL string,
// preserving the original host header.
func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost string, cacheKey string) (*html.Node, error) {
req, err := http.NewRequestWithContext(context.Background(), "GET", urlStr, nil)
func (c *OGTagCache) fetchHTMLDocumentWithCache(ctx context.Context, urlStr string, originalHost string, cacheKey string) (*html.Node, error) {
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
if err != nil {
return nil, fmt.Errorf("failed to create http request: %w", err)
}
@@ -41,7 +41,7 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
slog.Debug("og: request timed out", "url", urlStr)
c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server
c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server
}
return nil, fmt.Errorf("http get failed: %w", err)
}
@@ -56,7 +56,7 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri
if resp.StatusCode != http.StatusOK {
slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode)
c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
return nil, fmt.Errorf("%w: page not found", ErrOgHandled)
}

View File

@@ -1,6 +1,7 @@
package ogtags
import (
"context"
"fmt"
"io"
"net/http"
@@ -11,6 +12,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
"golang.org/x/net/html"
)
@@ -85,8 +87,8 @@ func TestFetchHTMLDocument(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
}, memory.New(t.Context()))
doc, err := cache.fetchHTMLDocument(t.Context(), ts.URL, "anything")
if tt.expectError {
if err == nil {
@@ -116,9 +118,9 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")
doc, err := cache.fetchHTMLDocument(t.Context(), "http://invalid.url.that.doesnt.exist.example", "anything")
if err == nil {
t.Error("expected error for invalid URL, got nil")
@@ -130,7 +132,7 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
}
// fetchHTMLDocument allows you to call fetchHTMLDocumentWithCache without a duplicate generateCacheKey call
func (c *OGTagCache) fetchHTMLDocument(urlStr string, originalHost string) (*html.Node, error) {
func (c *OGTagCache) fetchHTMLDocument(ctx context.Context, urlStr string, originalHost string) (*html.Node, error) {
cacheKey := c.generateCacheKey(urlStr, originalHost)
return c.fetchHTMLDocumentWithCache(urlStr, originalHost, cacheKey)
return c.fetchHTMLDocumentWithCache(ctx, urlStr, originalHost, cacheKey)
}

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
)
func TestIntegrationGetOGTags(t *testing.T) {
@@ -110,7 +111,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
// Create URL for test
testURL, _ := url.Parse(ts.URL)
@@ -119,7 +120,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
// Get OG tags
// Pass the host from the test URL
ogTags, err := cache.GetOGTags(testURL, testURL.Host)
ogTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host)
// Check error expectation
if tc.expectError {
@@ -147,7 +148,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
// Test cache retrieval
// Pass the host from the test URL
cachedOGTags, err := cache.GetOGTags(testURL, testURL.Host)
cachedOGTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}

View File

@@ -7,6 +7,7 @@ import (
"testing"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
"golang.org/x/net/html"
)
@@ -30,7 +31,7 @@ func BenchmarkGetTarget(b *testing.B) {
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
cache := NewOGTagCache(tt.target, config.OpenGraph{})
cache := NewOGTagCache(tt.target, config.OpenGraph{}, memory.New(b.Context()))
urls := make([]*url.URL, len(tt.paths))
for i, path := range tt.paths {
u, _ := url.Parse(path)
@@ -66,7 +67,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
</head><body><div><p>Content</p></div></body></html>`,
}
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(b.Context()))
docs := make([]*html.Node, len(htmlSamples))
for i, sample := range htmlSamples {
@@ -84,7 +85,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
// Memory usage test
func TestMemoryUsage(t *testing.T) {
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(t.Context()))
// Force GC and wait for it to complete
runtime.GC()

View File

@@ -9,8 +9,8 @@ import (
"strings"
"time"
"github.com/TecharoHQ/anubis/decaymap"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store"
)
const (
@@ -22,7 +22,7 @@ const (
)
type OGTagCache struct {
cache *decaymap.Impl[string, map[string]string]
cache store.JSON[map[string]string]
targetURL *url.URL
client *http.Client
@@ -36,7 +36,7 @@ type OGTagCache struct {
ogOverride map[string]string
}
func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
func NewOGTagCache(target string, conf config.OpenGraph, backend store.Interface) *OGTagCache {
// Predefined approved tags and prefixes
defaultApprovedTags := []string{"description", "keywords", "author"}
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
@@ -77,7 +77,10 @@ func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
}
return &OGTagCache{
cache: decaymap.New[string, map[string]string](),
cache: store.JSON[map[string]string]{
Underlying: backend,
Prefix: "ogtags:",
},
targetURL: parsedTargetURL,
ogPassthrough: conf.Enabled,
ogTimeToLive: conf.TimeToLive,
@@ -124,9 +127,3 @@ func (c *OGTagCache) getTarget(u *url.URL) string {
return sb.String()
}
func (c *OGTagCache) Cleanup() {
if c.cache != nil {
c.cache.Cleanup()
}
}

View File

@@ -1,12 +1,14 @@
package ogtags
import (
"context"
"net/url"
"strings"
"testing"
"unicode/utf8"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
"golang.org/x/net/html"
)
@@ -46,7 +48,7 @@ func FuzzGetTarget(f *testing.F) {
}
// Create cache - should not panic
cache := NewOGTagCache(target, config.OpenGraph{})
cache := NewOGTagCache(target, config.OpenGraph{}, memory.New(context.Background()))
// Create URL
u := &url.URL{
@@ -130,7 +132,7 @@ func FuzzExtractOGTags(f *testing.F) {
return
}
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(context.Background()))
// Should not panic
tags := cache.extractOGTags(doc)
@@ -186,7 +188,7 @@ func FuzzGetTargetRoundTrip(f *testing.F) {
t.Skip()
}
cache := NewOGTagCache(target, config.OpenGraph{})
cache := NewOGTagCache(target, config.OpenGraph{}, memory.New(context.Background()))
u := &url.URL{Path: path, RawQuery: query}
result := cache.getTarget(u)
@@ -243,7 +245,7 @@ func FuzzExtractMetaTagInfo(f *testing.F) {
},
}
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(context.Background()))
// Should not panic
property, content := cache.extractMetaTagInfo(node)
@@ -296,7 +298,7 @@ func BenchmarkFuzzedGetTarget(b *testing.B) {
for _, input := range inputs {
b.Run(input.name, func(b *testing.B) {
cache := NewOGTagCache(input.target, config.OpenGraph{})
cache := NewOGTagCache(input.target, config.OpenGraph{}, memory.New(context.Background()))
u := &url.URL{Path: input.path, RawQuery: input.query}
b.ResetTimer()

View File

@@ -15,6 +15,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
)
func TestNewOGTagCache(t *testing.T) {
@@ -44,7 +45,7 @@ func TestNewOGTagCache(t *testing.T) {
Enabled: tt.ogPassthrough,
TimeToLive: tt.ogTimeToLive,
ConsiderHost: false,
})
}, memory.New(t.Context()))
if cache == nil {
t.Fatal("expected non-nil cache, got nil")
@@ -84,7 +85,7 @@ func TestNewOGTagCache_UnixSocket(t *testing.T) {
Enabled: true,
TimeToLive: 5 * time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
if cache == nil {
t.Fatal("expected non-nil cache, got nil")
@@ -169,7 +170,7 @@ func TestGetTarget(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
u := &url.URL{
Path: tt.path,
@@ -242,14 +243,14 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
// Create a dummy URL for the request (path and query matter)
testReqURL, _ := url.Parse("/some/page?query=1")
// Get OG tags
// Pass an empty string for host, as it's irrelevant for unix sockets
ogTags, err := cache.GetOGTags(testReqURL, "")
ogTags, err := cache.GetOGTags(t.Context(), testReqURL, "")
if err != nil {
t.Fatalf("GetOGTags failed for unix socket: %v", err)
@@ -265,7 +266,7 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
// Test cache retrieval (should hit cache)
// Pass an empty string for host
cachedTags, err := cache.GetOGTags(testReqURL, "")
cachedTags, err := cache.GetOGTags(t.Context(), testReqURL, "")
if err != nil {
t.Fatalf("GetOGTags (cache hit) failed for unix socket: %v", err)
}

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
"golang.org/x/net/html"
)
@@ -17,7 +18,7 @@ func TestExtractOGTags(t *testing.T) {
Enabled: false,
ConsiderHost: false,
TimeToLive: time.Minute,
})
}, memory.New(t.Context()))
// Manually set approved tags/prefixes based on the user request for clarity
testCache.approvedTags = []string{"description"}
testCache.approvedPrefixes = []string{"og:"}
@@ -198,7 +199,7 @@ func TestExtractMetaTagInfo(t *testing.T) {
Enabled: false,
ConsiderHost: false,
TimeToLive: time.Minute,
})
}, memory.New(t.Context()))
testCache.approvedTags = []string{"description"}
testCache.approvedPrefixes = []string{"og:"}

View File

@@ -70,7 +70,6 @@ type Server struct {
next http.Handler
mux *http.ServeMux
policy *policy.ParsedConfig
DNSBLCache *decaymap.Impl[string, dnsbl.DroneBLResponse]
OGTags *ogtags.OGTagCache
ed25519Priv ed25519.PrivateKey
hs512Secret []byte
@@ -279,15 +278,16 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch
}
func (s *Server) handleDNSBL(w http.ResponseWriter, r *http.Request, ip string, lg *slog.Logger) bool {
db := &store.JSON[dnsbl.DroneBLResponse]{Underlying: s.store, Prefix: "dronebl:"}
if s.policy.DNSBL && ip != "" {
resp, ok := s.DNSBLCache.Get(ip)
if !ok {
resp, err := db.Get(r.Context(), ip)
if err != nil {
lg.Debug("looking up ip in dnsbl")
resp, err := dnsbl.Lookup(ip)
if err != nil {
lg.Error("can't look up ip in dnsbl", "err", err)
}
s.DNSBLCache.Set(ip, resp, 24*time.Hour)
db.Set(r.Context(), ip, resp, 24*time.Hour)
droneBLHits.WithLabelValues(resp.String()).Inc()
}
@@ -551,8 +551,3 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error)
Rules: &checker.List{},
}, nil
}
func (s *Server) CleanupDecayMap() {
s.DNSBLCache.Cleanup()
s.OGTags.Cleanup()
}

View File

@@ -15,9 +15,7 @@ import (
"github.com/TecharoHQ/anubis"
"github.com/TecharoHQ/anubis/data"
"github.com/TecharoHQ/anubis/decaymap"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/internal/dnsbl"
"github.com/TecharoHQ/anubis/internal/ogtags"
"github.com/TecharoHQ/anubis/lib/challenge"
"github.com/TecharoHQ/anubis/lib/localization"
@@ -108,8 +106,7 @@ func New(opts Options) (*Server, error) {
hs512Secret: opts.HS512Secret,
policy: opts.Policy,
opts: opts,
DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](),
OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph),
OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph, opts.Policy.Store),
store: opts.Policy.Store,
}

View File

@@ -138,7 +138,7 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
var ogTags map[string]string = nil
if s.opts.OpenGraph.Enabled {
var err error
ogTags, err = s.OGTags.GetOGTags(r.URL, r.Host)
ogTags, err = s.OGTags.GetOGTags(r.Context(), r.URL, r.Host)
if err != nil {
lg.Error("failed to get OG tags", "err", err)
}

View File

@@ -2,6 +2,7 @@ package bbolt
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
@@ -11,85 +12,52 @@ import (
"go.etcd.io/bbolt"
)
// Sentinel error values used for testing and in admin-visible error messages.
var (
ErrBucketDoesNotExist = errors.New("bbolt: bucket does not exist")
ErrNotExists = errors.New("bbolt: value does not exist in store")
)
// Store implements store.Interface backed by bbolt[1].
//
// In essence, bbolt is a hierarchical key/value store with a twist: every value
// needs to belong to a bucket. Buckets can contain an infinite number of
// buckets. As such, Anubis nests values in buckets. Each value in the store
// is given its own bucket with two keys:
//
// 1. data - The raw data, usually in JSON
// 2. expiry - The expiry time formatted as a time.RFC3339Nano timestamp string
//
// When Anubis stores a new bit of data, it creates a new bucket for that value.
// This allows the cleanup phase to iterate over every bucket in the database and
// only scan the expiry times without having to decode the entire record.
//
// bbolt is not suitable for environments where multiple instance of Anubis need
// to read from and write to the same backend store. For that, use the valkey
// storage backend.
//
// [1]: https://github.com/etcd-io/bbolt
type Store struct {
bdb *bbolt.DB
type Item struct {
Data []byte `json:"data"`
Expires time.Time `json:"expires"`
}
type Store struct {
bucket []byte
bdb *bbolt.DB
}
// Delete a key from the datastore. If the key does not exist, return an error.
func (s *Store) Delete(ctx context.Context, key string) error {
return s.bdb.Update(func(tx *bbolt.Tx) error {
if tx.Bucket([]byte(key)) == nil {
bkt := tx.Bucket(s.bucket)
if bkt == nil {
return fmt.Errorf("%w: %q", ErrBucketDoesNotExist, string(s.bucket))
}
if bkt.Get([]byte(key)) == nil {
return fmt.Errorf("%w: %q", ErrNotExists, key)
}
return tx.DeleteBucket([]byte(key))
return bkt.Delete([]byte(key))
})
}
// Get a value from the datastore.
//
// Because each value is stored in its own bucket with data and expiry keys,
// two get operations are required:
//
// 1. Get the expiry key, parse as time.RFC3339Nano. If the key has expired, run deletion in the background and return a "key not found" error.
// 2. Get the data key, copy into the result byteslice, return it.
func (s *Store) Get(ctx context.Context, key string) ([]byte, error) {
var result []byte
var i Item
if err := s.bdb.View(func(tx *bbolt.Tx) error {
itemBucket := tx.Bucket([]byte(key))
if itemBucket == nil {
bkt := tx.Bucket(s.bucket)
if bkt == nil {
return fmt.Errorf("%w: %q", ErrBucketDoesNotExist, string(s.bucket))
}
bucketData := bkt.Get([]byte(key))
if bucketData == nil {
return fmt.Errorf("%w: %q", store.ErrNotFound, key)
}
expiryStr := itemBucket.Get([]byte("expiry"))
if expiryStr == nil {
return fmt.Errorf("[unexpected] %w: %q (expiry is nil)", store.ErrNotFound, key)
}
expiry, err := time.Parse(time.RFC3339Nano, string(expiryStr))
if err != nil {
return fmt.Errorf("[unexpected] %w: %w", store.ErrCantDecode, err)
}
if time.Now().After(expiry) {
go s.Delete(context.Background(), key)
return fmt.Errorf("%w: %q", store.ErrNotFound, key)
}
dataStr := itemBucket.Get([]byte("data"))
if dataStr == nil {
return fmt.Errorf("[unexpected] %w: %q (data is nil)", store.ErrNotFound, key)
}
result = make([]byte, len(dataStr))
if n := copy(result, dataStr); n != len(dataStr) {
return fmt.Errorf("[unexpected] %w: %d bytes copied of %d", store.ErrCantDecode, n, len(dataStr))
if err := json.Unmarshal(bucketData, &i); err != nil {
return fmt.Errorf("%w: %w", store.ErrCantDecode, err)
}
return nil
@@ -97,28 +65,32 @@ func (s *Store) Get(ctx context.Context, key string) ([]byte, error) {
return nil, err
}
return result, nil
if time.Now().After(i.Expires) {
go s.Delete(context.Background(), key)
return nil, fmt.Errorf("%w: %q", store.ErrNotFound, key)
}
return i.Data, nil
}
// Set a value into the store with a given expiry.
func (s *Store) Set(ctx context.Context, key string, value []byte, expiry time.Duration) error {
expires := time.Now().Add(expiry)
i := Item{
Data: value,
Expires: time.Now().Add(expiry),
}
data, err := json.Marshal(i)
if err != nil {
return fmt.Errorf("%w: %w", store.ErrCantEncode, err)
}
return s.bdb.Update(func(tx *bbolt.Tx) error {
valueBkt, err := tx.CreateBucketIfNotExists([]byte(key))
if err != nil {
return fmt.Errorf("%w: %w: %q (create bucket)", store.ErrCantEncode, err, key)
bkt := tx.Bucket(s.bucket)
if bkt == nil {
return fmt.Errorf("%w: %q", ErrBucketDoesNotExist, string(s.bucket))
}
if err := valueBkt.Put([]byte("expiry"), []byte(expires.Format(time.RFC3339Nano))); err != nil {
return fmt.Errorf("%w: %q (expiry)", store.ErrCantEncode, key)
}
if err := valueBkt.Put([]byte("data"), value); err != nil {
return fmt.Errorf("%w: %q (data)", store.ErrCantEncode, key)
}
return nil
return bkt.Put([]byte(key), data)
})
}
@@ -126,32 +98,35 @@ func (s *Store) cleanup(ctx context.Context) error {
now := time.Now()
return s.bdb.Update(func(tx *bbolt.Tx) error {
return tx.ForEach(func(key []byte, valueBkt *bbolt.Bucket) error {
var expiry time.Time
var err error
bkt := tx.Bucket(s.bucket)
if bkt == nil {
return fmt.Errorf("cache bucket %q does not exist", string(s.bucket))
}
expiryStr := valueBkt.Get([]byte("expiry"))
if expiryStr == nil {
slog.Warn("while running cleanup, expiry is not set somehow, file a bug?", "key", string(key))
return nil
return bkt.ForEach(func(k, v []byte) error {
var i Item
data := bkt.Get(k)
if data == nil {
return fmt.Errorf("%s in Cache bucket does not exist???", string(k))
}
expiry, err = time.Parse(time.RFC3339Nano, string(expiryStr))
if err != nil {
return fmt.Errorf("[unexpected] %w in bucket %q: %w", store.ErrCantDecode, string(key), err)
if err := json.Unmarshal(data, &i); err != nil {
return fmt.Errorf("can't unmarshal data at key %s: %w", string(k), err)
}
if now.After(expiry) {
return valueBkt.DeleteBucket(key)
if now.After(i.Expires) {
return bkt.Delete(k)
}
return nil
})
})
}
func (s *Store) cleanupThread(ctx context.Context) {
t := time.NewTicker(5 * time.Minute)
t := time.NewTicker(time.Hour)
defer t.Stop()
for {

View File

@@ -12,7 +12,8 @@ func TestImpl(t *testing.T) {
path := filepath.Join(t.TempDir(), "db")
t.Log(path)
data, err := json.Marshal(Config{
Path: path,
Path: path,
Bucket: "anubis",
})
if err != nil {
t.Fatal(err)

View File

@@ -21,12 +21,8 @@ func init() {
store.Register("bbolt", Factory{})
}
// Factory builds new instances of the bbolt storage backend according to
// configuration passed via a json.RawMessage.
type Factory struct{}
// Build parses and validates the bbolt storage backend Config and creates
// a new instance of it.
func (Factory) Build(ctx context.Context, data json.RawMessage) (store.Interface, error) {
var config Config
if err := json.Unmarshal([]byte(data), &config); err != nil {
@@ -37,13 +33,28 @@ func (Factory) Build(ctx context.Context, data json.RawMessage) (store.Interface
return nil, fmt.Errorf("%w: %w", store.ErrBadConfig, err)
}
if config.Bucket == "" {
config.Bucket = "anubis"
}
bdb, err := bbolt.Open(config.Path, 0600, nil)
if err != nil {
return nil, fmt.Errorf("can't open bbolt database %s: %w", config.Path, err)
}
if err := bdb.Update(func(tx *bbolt.Tx) error {
if _, err := tx.CreateBucketIfNotExists([]byte(config.Bucket)); err != nil {
return err
}
return nil
}); err != nil {
return nil, fmt.Errorf("can't create bbolt bucket %q: %w", config.Bucket, err)
}
result := &Store{
bdb: bdb,
bdb: bdb,
bucket: []byte(config.Bucket),
}
go result.cleanupThread(ctx)
@@ -51,8 +62,6 @@ func (Factory) Build(ctx context.Context, data json.RawMessage) (store.Interface
return result, nil
}
// Valid parses and validates the bbolt store Config or returns
// an error.
func (Factory) Valid(data json.RawMessage) error {
var config Config
if err := json.Unmarshal([]byte(data), &config); err != nil {
@@ -66,13 +75,11 @@ func (Factory) Valid(data json.RawMessage) error {
return nil
}
// Config is the bbolt storage backend configuration.
type Config struct {
// Path is the filesystem path of the database. The folder must be writable to Anubis.
Path string `json:"path"`
Path string `json:"path"`
Bucket string `json:"bucket,omitempty"`
}
// Valid validates the configuration including checking if its containing folder is writable.
func (c Config) Valid() error {
var errs []error
@@ -83,7 +90,6 @@ func (c Config) Valid() error {
if err := os.WriteFile(filepath.Join(dir, ".test-file"), []byte(""), 0600); err != nil {
errs = append(errs, ErrCantWriteToPath)
}
os.Remove(filepath.Join(dir, ".test-file"))
}
if len(errs) != 0 {

View File

@@ -43,13 +43,22 @@ func z[T any]() T { return *new(T) }
type JSON[T any] struct {
Underlying Interface
Prefix string
}
func (j *JSON[T]) Delete(ctx context.Context, key string) error {
if j.Prefix != "" {
key = j.Prefix + key
}
return j.Underlying.Delete(ctx, key)
}
func (j *JSON[T]) Get(ctx context.Context, key string) (T, error) {
if j.Prefix != "" {
key = j.Prefix + key
}
data, err := j.Underlying.Get(ctx, key)
if err != nil {
return z[T](), err
@@ -64,6 +73,10 @@ func (j *JSON[T]) Get(ctx context.Context, key string) (T, error) {
}
func (j *JSON[T]) Set(ctx context.Context, key string, value T, expiry time.Duration) error {
if j.Prefix != "" {
key = j.Prefix + key
}
data, err := json.Marshal(value)
if err != nil {
return fmt.Errorf("%w: %w", ErrCantEncode, err)

50
lib/store/json_test.go Normal file
View File

@@ -0,0 +1,50 @@
package store_test
import (
"testing"
"time"
"github.com/TecharoHQ/anubis/lib/store"
"github.com/TecharoHQ/anubis/lib/store/memory"
)
func TestJSON(t *testing.T) {
type data struct {
ID string `json:"id"`
}
st := memory.New(t.Context())
db := store.JSON[data]{
Underlying: st,
Prefix: "foo:",
}
if err := db.Set(t.Context(), "test", data{ID: t.Name()}, time.Minute); err != nil {
t.Fatal(err)
}
got, err := db.Get(t.Context(), "test")
if err != nil {
t.Fatal(err)
}
if got.ID != t.Name() {
t.Fatalf("got wrong data for key \"test\", wanted %q but got: %q", t.Name(), got.ID)
}
if err := db.Delete(t.Context(), "test"); err != nil {
t.Fatal(err)
}
if _, err := db.Get(t.Context(), "test"); err == nil {
t.Fatal("wanted invalid get to fail, it did not")
}
if err := st.Set(t.Context(), "foo:test", []byte("}"), time.Minute); err != nil {
t.Fatal(err)
}
if _, err := db.Get(t.Context(), "test"); err == nil {
t.Fatal("wanted invalid get to fail, it did not")
}
}

View File

@@ -38,9 +38,7 @@ func Common(t *testing.T, f store.Factory, config json.RawMessage) {
val, err := s.Get(t.Context(), t.Name())
if errors.Is(err, store.ErrNotFound) {
t.Errorf("wanted %s to exist in store but it does not: %v", t.Name(), err)
} else if err != nil {
t.Error(err)
t.Errorf("wanted %s to exist in store but it does not", t.Name())
}
if !bytes.Equal(val, []byte(t.Name())) {