mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-09 10:08:45 +00:00
Compare commits
4 Commits
Xe/optimiz
...
Xe/docs-re
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
617812a039 | ||
|
|
9ddbc69c0e | ||
|
|
edb7ab1ae9 | ||
|
|
7d0c58d1a8 |
@@ -7,7 +7,8 @@
|
||||
"workspaceFolder": "/workspace/anubis",
|
||||
"postStartCommand": "npm ci && go mod download",
|
||||
"features": {
|
||||
"ghcr.io/xe/devcontainer-features/ko:1.1.0": {}
|
||||
"ghcr.io/xe/devcontainer-features/ko:1.1.0": {},
|
||||
"ghcr.io/devcontainers/features/github-cli:1": {}
|
||||
},
|
||||
"initializeCommand": "mkdir -p ${localEnv:HOME}${localEnv:USERPROFILE}/.local/share/atuin",
|
||||
"customizations": {
|
||||
|
||||
2
.github/actions/spelling/expect.txt
vendored
2
.github/actions/spelling/expect.txt
vendored
@@ -22,7 +22,6 @@ berr
|
||||
bingbot
|
||||
Bitcoin
|
||||
bitrate
|
||||
blogging
|
||||
Bluesky
|
||||
blueskybot
|
||||
boi
|
||||
@@ -70,6 +69,7 @@ DDOS
|
||||
Debian
|
||||
debrpm
|
||||
decaymap
|
||||
devcontainers
|
||||
Diffbot
|
||||
discordapp
|
||||
discordbot
|
||||
|
||||
@@ -231,20 +231,6 @@ func makeReverseProxy(target string, targetSNI string, targetHost string, insecu
|
||||
return rp, nil
|
||||
}
|
||||
|
||||
func startDecayMapCleanup(ctx context.Context, s *libanubis.Server) {
|
||||
ticker := time.NewTicker(1 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
s.CleanupDecayMap()
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flagenv.Parse()
|
||||
flag.Parse()
|
||||
@@ -421,7 +407,6 @@ func main() {
|
||||
wg.Add(1)
|
||||
go metricsServer(ctx, wg.Done)
|
||||
}
|
||||
go startDecayMapCleanup(ctx, s)
|
||||
|
||||
var h http.Handler
|
||||
h = s
|
||||
|
||||
@@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Remove the "Success" interstitial after a proof of work challenge is concluded.
|
||||
- Anubis now has the concept of [storage backends](./admin/policies.mdx#storage-backends). These allow you to change how Anubis stores temporary data (in memory, on the disk, or in Valkey). If you run Anubis in an environment where you have a low amount of memory available for Anubis (eg: less than 64 megabytes), be sure to configure the [`bbolt`](./admin/policies.mdx#bbolt) storage backend.
|
||||
- The challenge issuance and validation process has been rewritten from scratch. Instead of generating challenge strings from request metadata (under the assumption that the values being compared against are stable), Anubis now generates random data for each challenge. This data is stored in the active [storage backend](./admin/policies.mdx#storage-backends) for up to 30 minutes. Fixes [#564](https://github.com/TecharoHQ/anubis/issues/564), [#746](https://github.com/TecharoHQ/anubis/issues/746), and other similar instances of this issue.
|
||||
- Make the [Open Graph](./admin/configuration/open-graph.mdx) subsystem and DNSBL subsystem use [storage backends](./admin/policies.mdx#storage-backends) instead of storing everything in memory by default.
|
||||
- Add option for forcing a specific language ([#742](https://github.com/TecharoHQ/anubis/pull/742))
|
||||
- Add translation for Turkish language ([#751](https://github.com/TecharoHQ/anubis/pull/751))
|
||||
- Allow [Common Crawl](https://commoncrawl.org/) by default so scrapers have less incentive to scrape
|
||||
|
||||
@@ -6,7 +6,7 @@ import type * as Preset from '@docusaurus/preset-classic';
|
||||
|
||||
const config: Config = {
|
||||
title: 'Anubis',
|
||||
tagline: 'Weigh the soul of incoming HTTP requests using proof-of-work to stop AI crawlers',
|
||||
tagline: 'Weigh the soul of incoming HTTP requests to protect your website!',
|
||||
favicon: 'img/favicon.ico',
|
||||
|
||||
// Set the production url of your site here
|
||||
@@ -40,27 +40,20 @@ const config: Config = {
|
||||
[
|
||||
'classic',
|
||||
{
|
||||
docs: {
|
||||
sidebarPath: './sidebars.ts',
|
||||
// Please change this to your repo.
|
||||
// Remove this to remove the "edit this page" links.
|
||||
editUrl:
|
||||
'https://github.com/TecharoHQ/anubis/tree/main/docs/',
|
||||
},
|
||||
blog: {
|
||||
showReadingTime: true,
|
||||
feedOptions: {
|
||||
type: ['rss', 'atom', "json"],
|
||||
xslt: true,
|
||||
},
|
||||
// Please change this to your repo.
|
||||
// Remove this to remove the "edit this page" links.
|
||||
editUrl:
|
||||
'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/',
|
||||
// Useful options to enforce blogging best practices
|
||||
editUrl: 'https://github.com/TecharoHQ/anubis/tree/main/docs/',
|
||||
onInlineTags: 'warn',
|
||||
onInlineAuthors: 'warn',
|
||||
onUntruncatedBlogPosts: 'warn',
|
||||
onUntruncatedBlogPosts: 'throw',
|
||||
},
|
||||
docs: {
|
||||
sidebarPath: './sidebars.ts',
|
||||
editUrl: 'https://github.com/TecharoHQ/anubis/tree/main/docs/',
|
||||
},
|
||||
theme: {
|
||||
customCss: './src/css/custom.css',
|
||||
@@ -74,7 +67,7 @@ const config: Config = {
|
||||
respectPrefersColorScheme: true,
|
||||
},
|
||||
// Replace with your project's social card
|
||||
image: 'img/docusaurus-social-card.jpg',
|
||||
image: 'img/social-card.jpg',
|
||||
navbar: {
|
||||
title: 'Anubis',
|
||||
logo: {
|
||||
@@ -82,23 +75,28 @@ const config: Config = {
|
||||
src: 'img/favicon.webp',
|
||||
},
|
||||
items: [
|
||||
{ to: '/blog', label: 'Blog', position: 'left' },
|
||||
{
|
||||
type: 'docSidebar',
|
||||
sidebarId: 'tutorialSidebar',
|
||||
position: 'left',
|
||||
label: 'Docs',
|
||||
},
|
||||
{ to: '/blog', label: 'Blog', position: 'left' },
|
||||
{
|
||||
href: 'https://github.com/sponsors/Xe',
|
||||
label: "Sponsorship",
|
||||
position: 'left'
|
||||
to: '/docs/admin/botstopper',
|
||||
label: "Unbranded Version",
|
||||
position: "left"
|
||||
},
|
||||
{
|
||||
href: 'https://github.com/TecharoHQ/anubis',
|
||||
label: 'GitHub',
|
||||
position: 'right',
|
||||
},
|
||||
{
|
||||
href: 'https://github.com/sponsors/Xe',
|
||||
label: "Sponsor the Project",
|
||||
position: 'right'
|
||||
},
|
||||
],
|
||||
},
|
||||
footer: {
|
||||
|
||||
@@ -5,49 +5,50 @@ import styles from "./styles.module.css";
|
||||
|
||||
type FeatureItem = {
|
||||
title: string;
|
||||
Svg: React.ComponentType<React.ComponentProps<"svg">>;
|
||||
imageURL: string;
|
||||
description: ReactNode;
|
||||
};
|
||||
|
||||
const FeatureList: FeatureItem[] = [
|
||||
{
|
||||
title: "Easy to Use",
|
||||
Svg: require("@site/static/img/undraw_docusaurus_mountain.svg").default,
|
||||
imageURL: require("@site/static/img/anubis/happy.webp").default,
|
||||
description: (
|
||||
<>
|
||||
Anubis is easy to set up, lightweight, and helps get rid of the lowest
|
||||
hanging fruit so you can sleep at night.
|
||||
Anubis sits in the background and weighs the risk of incoming requests.
|
||||
If it asks a client to complete a challenge, no user interaction is
|
||||
required.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "Lightweight",
|
||||
Svg: require("@site/static/img/undraw_docusaurus_tree.svg").default,
|
||||
imageURL: require("@site/static/img/anubis/pensive.webp").default,
|
||||
description: (
|
||||
<>
|
||||
Anubis is efficient and as lightweight as possible, blocking the worst
|
||||
of the bots on the internet and makes it easy to protect what you host
|
||||
online.
|
||||
Anubis is so lightweight you'll forget it's there until you look at your
|
||||
hosting bill. On average it uses less than 128 MB of ram.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "Multi-threaded",
|
||||
Svg: require("@site/static/img/undraw_docusaurus_react.svg").default,
|
||||
title: "Block the scrapers",
|
||||
imageURL: require("@site/static/img/anubis/reject.webp").default,
|
||||
description: (
|
||||
<>
|
||||
Anubis uses a multi-threaded proof of work check to ensure that users
|
||||
browsers are up to date and support modern standards.
|
||||
Anubis uses a combination of heuristics to identify and block bots
|
||||
before they take your website down. You can customize the rules with{" "}
|
||||
<a href="/docs/admin/policies">your own policies</a>.
|
||||
</>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
function Feature({ title, Svg, description }: FeatureItem) {
|
||||
function Feature({ title, description, imageURL }: FeatureItem) {
|
||||
return (
|
||||
<div className={clsx("col col--4")}>
|
||||
<div className="text--center">
|
||||
<Svg className={styles.featureSvg} role="img" />
|
||||
<img src={imageURL} className={styles.featureSvg} role="img" />
|
||||
</div>
|
||||
<div className="text--center padding-horiz--md">
|
||||
<Heading as="h3">{title}</Heading>
|
||||
|
||||
@@ -31,19 +31,12 @@ export default function Home(): ReactNode {
|
||||
const { siteConfig } = useDocusaurusContext();
|
||||
return (
|
||||
<Layout
|
||||
title={`Anubis: self hostable scraper defense software`}
|
||||
description="Weigh the soul of incoming HTTP requests using proof-of-work to stop AI crawlers"
|
||||
title={`Anubis: Web AI Firewall Utility`}
|
||||
description="Weigh the soul of incoming HTTP requests to protect your website!"
|
||||
>
|
||||
<HomepageHeader />
|
||||
<main>
|
||||
<HomepageFeatures />
|
||||
|
||||
<center>
|
||||
<p>
|
||||
This is all placeholder text. It will be fixed. Give me time. I am
|
||||
one person and my project has unexpectedly gone viral.
|
||||
</p>
|
||||
</center>
|
||||
</main>
|
||||
</Layout>
|
||||
);
|
||||
|
||||
BIN
docs/static/img/anubis/happy.webp
vendored
Normal file
BIN
docs/static/img/anubis/happy.webp
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 30 KiB |
BIN
docs/static/img/anubis/pensive.webp
vendored
Normal file
BIN
docs/static/img/anubis/pensive.webp
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 28 KiB |
BIN
docs/static/img/anubis/reject.webp
vendored
Normal file
BIN
docs/static/img/anubis/reject.webp
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 26 KiB |
BIN
docs/static/img/docusaurus-social-card.jpg
vendored
BIN
docs/static/img/docusaurus-social-card.jpg
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 54 KiB |
BIN
docs/static/img/social-card.jpg
vendored
Normal file
BIN
docs/static/img/social-card.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 881 KiB |
@@ -1,6 +1,7 @@
|
||||
package ogtags
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"net/url"
|
||||
@@ -8,7 +9,7 @@ import (
|
||||
)
|
||||
|
||||
// GetOGTags is the main function that retrieves Open Graph tags for a URL
|
||||
func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]string, error) {
|
||||
func (c *OGTagCache) GetOGTags(ctx context.Context, url *url.URL, originalHost string) (map[string]string, error) {
|
||||
if url == nil {
|
||||
return nil, errors.New("nil URL provided, cannot fetch OG tags")
|
||||
}
|
||||
@@ -21,12 +22,12 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
|
||||
cacheKey := c.generateCacheKey(target, originalHost)
|
||||
|
||||
// Check cache first
|
||||
if cachedTags := c.checkCache(cacheKey); cachedTags != nil {
|
||||
if cachedTags := c.checkCache(ctx, cacheKey); cachedTags != nil {
|
||||
return cachedTags, nil
|
||||
}
|
||||
|
||||
// Fetch HTML content, passing the original host
|
||||
doc, err := c.fetchHTMLDocumentWithCache(target, originalHost, cacheKey)
|
||||
doc, err := c.fetchHTMLDocumentWithCache(ctx, target, originalHost, cacheKey)
|
||||
if errors.Is(err, syscall.ECONNREFUSED) {
|
||||
slog.Debug("Connection refused, returning empty tags")
|
||||
return nil, nil
|
||||
@@ -42,7 +43,7 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
|
||||
ogTags := c.extractOGTags(doc)
|
||||
|
||||
// Store in cache
|
||||
c.cache.Set(cacheKey, ogTags, c.ogTimeToLive)
|
||||
c.cache.Set(ctx, cacheKey, ogTags, c.ogTimeToLive)
|
||||
|
||||
return ogTags, nil
|
||||
}
|
||||
@@ -59,8 +60,8 @@ func (c *OGTagCache) generateCacheKey(target string, originalHost string) string
|
||||
}
|
||||
|
||||
// checkCache checks if we have the tags cached and returns them if so
|
||||
func (c *OGTagCache) checkCache(cacheKey string) map[string]string {
|
||||
if cachedTags, ok := c.cache.Get(cacheKey); ok {
|
||||
func (c *OGTagCache) checkCache(ctx context.Context, cacheKey string) map[string]string {
|
||||
if cachedTags, err := c.cache.Get(ctx, cacheKey); err == nil {
|
||||
slog.Debug("cache hit", "tags", cachedTags)
|
||||
return cachedTags
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/store/memory"
|
||||
)
|
||||
|
||||
func TestCacheReturnsDefault(t *testing.T) {
|
||||
@@ -21,14 +22,14 @@ func TestCacheReturnsDefault(t *testing.T) {
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
Override: want,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
u, err := url.Parse("https://anubis.techaro.lol")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
result, err := cache.GetOGTags(u, "anubis.techaro.lol")
|
||||
result, err := cache.GetOGTags(t.Context(), u, "anubis.techaro.lol")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -49,7 +50,7 @@ func TestCheckCache(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
// Set up test data
|
||||
urlStr := "http://example.com/page"
|
||||
@@ -60,16 +61,16 @@ func TestCheckCache(t *testing.T) {
|
||||
cacheKey := cache.generateCacheKey(urlStr, "example.com")
|
||||
|
||||
// Test cache miss
|
||||
tags := cache.checkCache(cacheKey)
|
||||
tags := cache.checkCache(t.Context(), cacheKey)
|
||||
if tags != nil {
|
||||
t.Errorf("expected nil tags on cache miss, got %v", tags)
|
||||
}
|
||||
|
||||
// Manually add to cache
|
||||
cache.cache.Set(cacheKey, expectedTags, time.Minute)
|
||||
cache.cache.Set(t.Context(), cacheKey, expectedTags, time.Minute)
|
||||
|
||||
// Test cache hit
|
||||
tags = cache.checkCache(cacheKey)
|
||||
tags = cache.checkCache(t.Context(), cacheKey)
|
||||
if tags == nil {
|
||||
t.Fatal("expected non-nil tags on cache hit, got nil")
|
||||
}
|
||||
@@ -112,7 +113,7 @@ func TestGetOGTags(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
// Parse the test server URL
|
||||
parsedURL, err := url.Parse(ts.URL)
|
||||
@@ -122,7 +123,7 @@ func TestGetOGTags(t *testing.T) {
|
||||
|
||||
// Test fetching OG tags from the test server
|
||||
// Pass the host from the parsed test server URL
|
||||
ogTags, err := cache.GetOGTags(parsedURL, parsedURL.Host)
|
||||
ogTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get OG tags: %v", err)
|
||||
}
|
||||
@@ -142,14 +143,14 @@ func TestGetOGTags(t *testing.T) {
|
||||
|
||||
// Test fetching OG tags from the cache
|
||||
// Pass the host from the parsed test server URL
|
||||
ogTags, err = cache.GetOGTags(parsedURL, parsedURL.Host)
|
||||
ogTags, err = cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get OG tags from cache: %v", err)
|
||||
}
|
||||
|
||||
// Test fetching OG tags from the cache (3rd time)
|
||||
// Pass the host from the parsed test server URL
|
||||
newOgTags, err := cache.GetOGTags(parsedURL, parsedURL.Host)
|
||||
newOgTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get OG tags from cache: %v", err)
|
||||
}
|
||||
@@ -263,10 +264,10 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: tc.ogCacheConsiderHost,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
for i, req := range tc.requests {
|
||||
ogTags, err := cache.GetOGTags(parsedURL, req.host)
|
||||
ogTags, err := cache.GetOGTags(t.Context(), parsedURL, req.host)
|
||||
if err != nil {
|
||||
t.Errorf("Request %d (host: %s): unexpected error: %v", i+1, req.host, err)
|
||||
continue // Skip further checks for this request if error occurred
|
||||
|
||||
@@ -20,8 +20,8 @@ var (
|
||||
|
||||
// fetchHTMLDocumentWithCache fetches the HTML document from the given URL string,
|
||||
// preserving the original host header.
|
||||
func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost string, cacheKey string) (*html.Node, error) {
|
||||
req, err := http.NewRequestWithContext(context.Background(), "GET", urlStr, nil)
|
||||
func (c *OGTagCache) fetchHTMLDocumentWithCache(ctx context.Context, urlStr string, originalHost string, cacheKey string) (*html.Node, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create http request: %w", err)
|
||||
}
|
||||
@@ -41,7 +41,7 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri
|
||||
var netErr net.Error
|
||||
if errors.As(err, &netErr) && netErr.Timeout() {
|
||||
slog.Debug("og: request timed out", "url", urlStr)
|
||||
c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server
|
||||
c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server
|
||||
}
|
||||
return nil, fmt.Errorf("http get failed: %w", err)
|
||||
}
|
||||
@@ -56,7 +56,7 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode)
|
||||
c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
|
||||
c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
|
||||
return nil, fmt.Errorf("%w: page not found", ErrOgHandled)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package ogtags
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -11,6 +12,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/store/memory"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -85,8 +87,8 @@ func TestFetchHTMLDocument(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
|
||||
}, memory.New(t.Context()))
|
||||
doc, err := cache.fetchHTMLDocument(t.Context(), ts.URL, "anything")
|
||||
|
||||
if tt.expectError {
|
||||
if err == nil {
|
||||
@@ -116,9 +118,9 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")
|
||||
doc, err := cache.fetchHTMLDocument(t.Context(), "http://invalid.url.that.doesnt.exist.example", "anything")
|
||||
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid URL, got nil")
|
||||
@@ -130,7 +132,7 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
|
||||
}
|
||||
|
||||
// fetchHTMLDocument allows you to call fetchHTMLDocumentWithCache without a duplicate generateCacheKey call
|
||||
func (c *OGTagCache) fetchHTMLDocument(urlStr string, originalHost string) (*html.Node, error) {
|
||||
func (c *OGTagCache) fetchHTMLDocument(ctx context.Context, urlStr string, originalHost string) (*html.Node, error) {
|
||||
cacheKey := c.generateCacheKey(urlStr, originalHost)
|
||||
return c.fetchHTMLDocumentWithCache(urlStr, originalHost, cacheKey)
|
||||
return c.fetchHTMLDocumentWithCache(ctx, urlStr, originalHost, cacheKey)
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/store/memory"
|
||||
)
|
||||
|
||||
func TestIntegrationGetOGTags(t *testing.T) {
|
||||
@@ -110,7 +111,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
// Create URL for test
|
||||
testURL, _ := url.Parse(ts.URL)
|
||||
@@ -119,7 +120,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
|
||||
|
||||
// Get OG tags
|
||||
// Pass the host from the test URL
|
||||
ogTags, err := cache.GetOGTags(testURL, testURL.Host)
|
||||
ogTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host)
|
||||
|
||||
// Check error expectation
|
||||
if tc.expectError {
|
||||
@@ -147,7 +148,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
|
||||
|
||||
// Test cache retrieval
|
||||
// Pass the host from the test URL
|
||||
cachedOGTags, err := cache.GetOGTags(testURL, testURL.Host)
|
||||
cachedOGTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get OG tags from cache: %v", err)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/store/memory"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -30,7 +31,7 @@ func BenchmarkGetTarget(b *testing.B) {
|
||||
|
||||
for _, tt := range tests {
|
||||
b.Run(tt.name, func(b *testing.B) {
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{})
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{}, memory.New(b.Context()))
|
||||
urls := make([]*url.URL, len(tt.paths))
|
||||
for i, path := range tt.paths {
|
||||
u, _ := url.Parse(path)
|
||||
@@ -66,7 +67,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
|
||||
</head><body><div><p>Content</p></div></body></html>`,
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(b.Context()))
|
||||
docs := make([]*html.Node, len(htmlSamples))
|
||||
|
||||
for i, sample := range htmlSamples {
|
||||
@@ -84,7 +85,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
|
||||
|
||||
// Memory usage test
|
||||
func TestMemoryUsage(t *testing.T) {
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(t.Context()))
|
||||
|
||||
// Force GC and wait for it to complete
|
||||
runtime.GC()
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/decaymap"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/store"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -22,7 +22,7 @@ const (
|
||||
)
|
||||
|
||||
type OGTagCache struct {
|
||||
cache *decaymap.Impl[string, map[string]string]
|
||||
cache store.JSON[map[string]string]
|
||||
targetURL *url.URL
|
||||
client *http.Client
|
||||
|
||||
@@ -36,7 +36,7 @@ type OGTagCache struct {
|
||||
ogOverride map[string]string
|
||||
}
|
||||
|
||||
func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
|
||||
func NewOGTagCache(target string, conf config.OpenGraph, backend store.Interface) *OGTagCache {
|
||||
// Predefined approved tags and prefixes
|
||||
defaultApprovedTags := []string{"description", "keywords", "author"}
|
||||
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
|
||||
@@ -77,7 +77,10 @@ func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
|
||||
}
|
||||
|
||||
return &OGTagCache{
|
||||
cache: decaymap.New[string, map[string]string](),
|
||||
cache: store.JSON[map[string]string]{
|
||||
Underlying: backend,
|
||||
Prefix: "ogtags:",
|
||||
},
|
||||
targetURL: parsedTargetURL,
|
||||
ogPassthrough: conf.Enabled,
|
||||
ogTimeToLive: conf.TimeToLive,
|
||||
@@ -124,9 +127,3 @@ func (c *OGTagCache) getTarget(u *url.URL) string {
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func (c *OGTagCache) Cleanup() {
|
||||
if c.cache != nil {
|
||||
c.cache.Cleanup()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
package ogtags
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/store/memory"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -46,7 +48,7 @@ func FuzzGetTarget(f *testing.F) {
|
||||
}
|
||||
|
||||
// Create cache - should not panic
|
||||
cache := NewOGTagCache(target, config.OpenGraph{})
|
||||
cache := NewOGTagCache(target, config.OpenGraph{}, memory.New(context.Background()))
|
||||
|
||||
// Create URL
|
||||
u := &url.URL{
|
||||
@@ -130,7 +132,7 @@ func FuzzExtractOGTags(f *testing.F) {
|
||||
return
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(context.Background()))
|
||||
|
||||
// Should not panic
|
||||
tags := cache.extractOGTags(doc)
|
||||
@@ -186,7 +188,7 @@ func FuzzGetTargetRoundTrip(f *testing.F) {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
cache := NewOGTagCache(target, config.OpenGraph{})
|
||||
cache := NewOGTagCache(target, config.OpenGraph{}, memory.New(context.Background()))
|
||||
u := &url.URL{Path: path, RawQuery: query}
|
||||
|
||||
result := cache.getTarget(u)
|
||||
@@ -243,7 +245,7 @@ func FuzzExtractMetaTagInfo(f *testing.F) {
|
||||
},
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(context.Background()))
|
||||
|
||||
// Should not panic
|
||||
property, content := cache.extractMetaTagInfo(node)
|
||||
@@ -296,7 +298,7 @@ func BenchmarkFuzzedGetTarget(b *testing.B) {
|
||||
|
||||
for _, input := range inputs {
|
||||
b.Run(input.name, func(b *testing.B) {
|
||||
cache := NewOGTagCache(input.target, config.OpenGraph{})
|
||||
cache := NewOGTagCache(input.target, config.OpenGraph{}, memory.New(context.Background()))
|
||||
u := &url.URL{Path: input.path, RawQuery: input.query}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/store/memory"
|
||||
)
|
||||
|
||||
func TestNewOGTagCache(t *testing.T) {
|
||||
@@ -44,7 +45,7 @@ func TestNewOGTagCache(t *testing.T) {
|
||||
Enabled: tt.ogPassthrough,
|
||||
TimeToLive: tt.ogTimeToLive,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
if cache == nil {
|
||||
t.Fatal("expected non-nil cache, got nil")
|
||||
@@ -84,7 +85,7 @@ func TestNewOGTagCache_UnixSocket(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: 5 * time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
if cache == nil {
|
||||
t.Fatal("expected non-nil cache, got nil")
|
||||
@@ -169,7 +170,7 @@ func TestGetTarget(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
u := &url.URL{
|
||||
Path: tt.path,
|
||||
@@ -242,14 +243,14 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
|
||||
// Create a dummy URL for the request (path and query matter)
|
||||
testReqURL, _ := url.Parse("/some/page?query=1")
|
||||
|
||||
// Get OG tags
|
||||
// Pass an empty string for host, as it's irrelevant for unix sockets
|
||||
ogTags, err := cache.GetOGTags(testReqURL, "")
|
||||
ogTags, err := cache.GetOGTags(t.Context(), testReqURL, "")
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("GetOGTags failed for unix socket: %v", err)
|
||||
@@ -265,7 +266,7 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
|
||||
|
||||
// Test cache retrieval (should hit cache)
|
||||
// Pass an empty string for host
|
||||
cachedTags, err := cache.GetOGTags(testReqURL, "")
|
||||
cachedTags, err := cache.GetOGTags(t.Context(), testReqURL, "")
|
||||
if err != nil {
|
||||
t.Fatalf("GetOGTags (cache hit) failed for unix socket: %v", err)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/store/memory"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -17,7 +18,7 @@ func TestExtractOGTags(t *testing.T) {
|
||||
Enabled: false,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: time.Minute,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
// Manually set approved tags/prefixes based on the user request for clarity
|
||||
testCache.approvedTags = []string{"description"}
|
||||
testCache.approvedPrefixes = []string{"og:"}
|
||||
@@ -198,7 +199,7 @@ func TestExtractMetaTagInfo(t *testing.T) {
|
||||
Enabled: false,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: time.Minute,
|
||||
})
|
||||
}, memory.New(t.Context()))
|
||||
testCache.approvedTags = []string{"description"}
|
||||
testCache.approvedPrefixes = []string{"og:"}
|
||||
|
||||
|
||||
@@ -70,7 +70,6 @@ type Server struct {
|
||||
next http.Handler
|
||||
mux *http.ServeMux
|
||||
policy *policy.ParsedConfig
|
||||
DNSBLCache *decaymap.Impl[string, dnsbl.DroneBLResponse]
|
||||
OGTags *ogtags.OGTagCache
|
||||
ed25519Priv ed25519.PrivateKey
|
||||
hs512Secret []byte
|
||||
@@ -279,15 +278,16 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch
|
||||
}
|
||||
|
||||
func (s *Server) handleDNSBL(w http.ResponseWriter, r *http.Request, ip string, lg *slog.Logger) bool {
|
||||
db := &store.JSON[dnsbl.DroneBLResponse]{Underlying: s.store, Prefix: "dronebl:"}
|
||||
if s.policy.DNSBL && ip != "" {
|
||||
resp, ok := s.DNSBLCache.Get(ip)
|
||||
if !ok {
|
||||
resp, err := db.Get(r.Context(), ip)
|
||||
if err != nil {
|
||||
lg.Debug("looking up ip in dnsbl")
|
||||
resp, err := dnsbl.Lookup(ip)
|
||||
if err != nil {
|
||||
lg.Error("can't look up ip in dnsbl", "err", err)
|
||||
}
|
||||
s.DNSBLCache.Set(ip, resp, 24*time.Hour)
|
||||
db.Set(r.Context(), ip, resp, 24*time.Hour)
|
||||
droneBLHits.WithLabelValues(resp.String()).Inc()
|
||||
}
|
||||
|
||||
@@ -551,8 +551,3 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error)
|
||||
Rules: &checker.List{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *Server) CleanupDecayMap() {
|
||||
s.DNSBLCache.Cleanup()
|
||||
s.OGTags.Cleanup()
|
||||
}
|
||||
|
||||
@@ -15,9 +15,7 @@ import (
|
||||
|
||||
"github.com/TecharoHQ/anubis"
|
||||
"github.com/TecharoHQ/anubis/data"
|
||||
"github.com/TecharoHQ/anubis/decaymap"
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/TecharoHQ/anubis/internal/dnsbl"
|
||||
"github.com/TecharoHQ/anubis/internal/ogtags"
|
||||
"github.com/TecharoHQ/anubis/lib/challenge"
|
||||
"github.com/TecharoHQ/anubis/lib/localization"
|
||||
@@ -108,8 +106,7 @@ func New(opts Options) (*Server, error) {
|
||||
hs512Secret: opts.HS512Secret,
|
||||
policy: opts.Policy,
|
||||
opts: opts,
|
||||
DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](),
|
||||
OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph),
|
||||
OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph, opts.Policy.Store),
|
||||
store: opts.Policy.Store,
|
||||
}
|
||||
|
||||
|
||||
@@ -138,7 +138,7 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *polic
|
||||
var ogTags map[string]string = nil
|
||||
if s.opts.OpenGraph.Enabled {
|
||||
var err error
|
||||
ogTags, err = s.OGTags.GetOGTags(r.URL, r.Host)
|
||||
ogTags, err = s.OGTags.GetOGTags(r.Context(), r.URL, r.Host)
|
||||
if err != nil {
|
||||
lg.Error("failed to get OG tags", "err", err)
|
||||
}
|
||||
|
||||
@@ -43,13 +43,22 @@ func z[T any]() T { return *new(T) }
|
||||
|
||||
type JSON[T any] struct {
|
||||
Underlying Interface
|
||||
Prefix string
|
||||
}
|
||||
|
||||
func (j *JSON[T]) Delete(ctx context.Context, key string) error {
|
||||
if j.Prefix != "" {
|
||||
key = j.Prefix + key
|
||||
}
|
||||
|
||||
return j.Underlying.Delete(ctx, key)
|
||||
}
|
||||
|
||||
func (j *JSON[T]) Get(ctx context.Context, key string) (T, error) {
|
||||
if j.Prefix != "" {
|
||||
key = j.Prefix + key
|
||||
}
|
||||
|
||||
data, err := j.Underlying.Get(ctx, key)
|
||||
if err != nil {
|
||||
return z[T](), err
|
||||
@@ -64,6 +73,10 @@ func (j *JSON[T]) Get(ctx context.Context, key string) (T, error) {
|
||||
}
|
||||
|
||||
func (j *JSON[T]) Set(ctx context.Context, key string, value T, expiry time.Duration) error {
|
||||
if j.Prefix != "" {
|
||||
key = j.Prefix + key
|
||||
}
|
||||
|
||||
data, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: %w", ErrCantEncode, err)
|
||||
|
||||
50
lib/store/json_test.go
Normal file
50
lib/store/json_test.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package store_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/store"
|
||||
"github.com/TecharoHQ/anubis/lib/store/memory"
|
||||
)
|
||||
|
||||
func TestJSON(t *testing.T) {
|
||||
type data struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
|
||||
st := memory.New(t.Context())
|
||||
db := store.JSON[data]{
|
||||
Underlying: st,
|
||||
Prefix: "foo:",
|
||||
}
|
||||
|
||||
if err := db.Set(t.Context(), "test", data{ID: t.Name()}, time.Minute); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
got, err := db.Get(t.Context(), "test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if got.ID != t.Name() {
|
||||
t.Fatalf("got wrong data for key \"test\", wanted %q but got: %q", t.Name(), got.ID)
|
||||
}
|
||||
|
||||
if err := db.Delete(t.Context(), "test"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := db.Get(t.Context(), "test"); err == nil {
|
||||
t.Fatal("wanted invalid get to fail, it did not")
|
||||
}
|
||||
|
||||
if err := st.Set(t.Context(), "foo:test", []byte("}"), time.Minute); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := db.Get(t.Context(), "test"); err == nil {
|
||||
t.Fatal("wanted invalid get to fail, it did not")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user