mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-12 19:48:44 +00:00
Compare commits
3 Commits
revert-134
...
v1.15.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3c026977f | ||
|
|
7d4be0dcec | ||
|
|
d1d63d9c18 |
7
.github/workflows/go.yml
vendored
7
.github/workflows/go.yml
vendored
@@ -64,8 +64,13 @@ jobs:
|
||||
~/.cache/ms-playwright
|
||||
key: ${{ runner.os }}-playwright-${{ hashFiles('**/go.sum') }}
|
||||
|
||||
- name: install playwright browsers
|
||||
run: |
|
||||
npx --yes playwright@1.50.1 install --with-deps
|
||||
npx --yes playwright@1.50.1 run-server --port 3000 &
|
||||
|
||||
- name: Build
|
||||
run: go build ./...
|
||||
|
||||
- name: Test
|
||||
run: go test ./...
|
||||
run: go test -v ./...
|
||||
|
||||
@@ -33,10 +33,9 @@ import (
|
||||
var (
|
||||
bind = flag.String("bind", ":8923", "network address to bind HTTP to")
|
||||
bindNetwork = flag.String("bind-network", "tcp", "network family to bind HTTP to, e.g. unix, tcp")
|
||||
cookieDomain = flag.String("cookie-domain", "", "if set, the top-level domain that the Anubis cookie will be valid for")
|
||||
cookieName = flag.String("cookie-name", anubis.CookieName, "the name of the cookie that Anubis stores challenge pass records in")
|
||||
cookiePartitioned = flag.Bool("cookie-partitioned", false, "if true, sets the partitioned flag on Anubis cookies, enabling CHIPS support")
|
||||
challengeDifficulty = flag.Int("difficulty", anubis.DefaultDifficulty, "difficulty of the challenge")
|
||||
cookieDomain = flag.String("cookie-domain", "", "if set, the top-level domain that the Anubis cookie will be valid for")
|
||||
cookiePartitioned = flag.Bool("cookie-partitioned", false, "if true, sets the partitioned flag on Anubis cookies, enabling CHIPS support")
|
||||
ed25519PrivateKeyHex = flag.String("ed25519-private-key-hex", "", "private key used to sign JWTs, if not set a random one will be assigned")
|
||||
metricsBind = flag.String("metrics-bind", ":9090", "network address to bind metrics to")
|
||||
metricsBindNetwork = flag.String("metrics-bind-network", "tcp", "network family for the metrics server to bind to")
|
||||
@@ -192,10 +191,12 @@ func main() {
|
||||
}
|
||||
|
||||
s, err := libanubis.New(libanubis.Options{
|
||||
Next: rp,
|
||||
Policy: policy,
|
||||
ServeRobotsTXT: *robotsTxt,
|
||||
PrivateKey: priv,
|
||||
Next: rp,
|
||||
Policy: policy,
|
||||
ServeRobotsTXT: *robotsTxt,
|
||||
PrivateKey: priv,
|
||||
CookieDomain: *cookieDomain,
|
||||
CookiePartitioned: *cookiePartitioned,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("can't construct libanubis.Server: %v", err)
|
||||
|
||||
@@ -11,15 +11,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## v1.15.0
|
||||
|
||||
Zenos yae Galvus
|
||||
|
||||
> Yes...the coming days promise to be most interesting. Most interesting.
|
||||
|
||||
Headline changes:
|
||||
|
||||
- ed25519 signing keys for Anubis can be stored in the flag `--ed25519-private-key-hex` or envvar `ED25519_PRIVATE_KEY_HEX`; if one is not provided when Anubis starts, a new one is generated and logged
|
||||
- Add the ability to set the cookie domain with the envvar `COOKIE_DOMAIN=techaro.lol` for all domains under `techaro.lol`
|
||||
- Add the ability to set the cookie partitioned flag with the envvar `COOKIE_PARTITIONED=true`
|
||||
|
||||
Many other small changes were made, including but not limited to:
|
||||
|
||||
- Fixed and clarified installation instructions
|
||||
- Introduced integration tests using Playwright
|
||||
- Refactor & Split up Anubis into cmd and lib.go
|
||||
- Fixed bot check to only apply if address range matches
|
||||
- Fix default difficulty setting that was broken in a refactor
|
||||
- Linting fixes
|
||||
- Extra cookie options can be set such as the name, domain, and partitioned flag [#73](https://github.com/TecharoHQ/anubis/issues/73)
|
||||
- Make dark mode diff lines readable in the documentation
|
||||
- Fix CI based browser smoke test
|
||||
|
||||
Users running Anubis' test suite may run into issues with the integration tests on Windows hosts. This is a known issue and will be fixed at some point in the future. In the meantime, use the Windows Subsystem for Linux (WSL).
|
||||
|
||||
## v1.14.2
|
||||
|
||||
|
||||
@@ -41,21 +41,20 @@ Anubis has very minimal system requirements. I suspect that 128Mi of ram may be
|
||||
|
||||
Anubis uses these environment variables for configuration:
|
||||
|
||||
| Environment Variable | Default value | Explanation |
|
||||
| :------------------------ | :--------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `BIND` | `:8923` | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock` |
|
||||
| `BIND_NETWORK` | `tcp` | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports. |
|
||||
| `COOKIE_DOMAIN` | unset | The domain the Anubis challenge pass cookie should be set to. This should be set to the domain you bought from your registrar (EG: `techaro.lol` if your webapp is running on `anubis.techaro.lol`). See [here](https://stackoverflow.com/a/1063760) for more information. |
|
||||
| `COOKIE_NAME` | `within.website-x-cmd-anubis-auth` | The cookie that Anubis uses to determine if users have passed a challenge. This should not be changed without a good reason. |
|
||||
| `COOKIE_PARTITIONED` | `false` | If set to `true`, enables the [partitioned (CHIPS) flag](https://developers.google.com/privacy-sandbox/cookies/chips), meaning that Anubis inside an iframe has a different set of cookies than the domain hosting the iframe. |
|
||||
| `DIFFICULTY` | `5` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. |
|
||||
| `ED25519_PRIVATE_KEY_HEX` | unset | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. See below for details. |
|
||||
| `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. |
|
||||
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
|
||||
| `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. |
|
||||
| `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.md). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. |
|
||||
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
|
||||
| `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. Supports Unix domain sockets, set this to a URI like so: `unix:///path/to/socket.sock`. |
|
||||
| Environment Variable | Default value | Explanation |
|
||||
| :------------------------ | :---------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `BIND` | `:8923` | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock` |
|
||||
| `BIND_NETWORK` | `tcp` | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports. |
|
||||
| `COOKIE_DOMAIN` | unset | The domain the Anubis challenge pass cookie should be set to. This should be set to the domain you bought from your registrar (EG: `techaro.lol` if your webapp is running on `anubis.techaro.lol`). See [here](https://stackoverflow.com/a/1063760) for more information. |
|
||||
| `COOKIE_PARTITIONED` | `false` | If set to `true`, enables the [partitioned (CHIPS) flag](https://developers.google.com/privacy-sandbox/cookies/chips), meaning that Anubis inside an iframe has a different set of cookies than the domain hosting the iframe. |
|
||||
| `DIFFICULTY` | `5` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. |
|
||||
| `ED25519_PRIVATE_KEY_HEX` | | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. See below for details. |
|
||||
| `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. |
|
||||
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
|
||||
| `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. |
|
||||
| `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.md). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. |
|
||||
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
|
||||
| `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. Supports Unix domain sockets, set this to a URI like so: `unix:///path/to/socket.sock`. |
|
||||
|
||||
### Key generation
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ Here is a minimal policy file that will protect against most scraper bots:
|
||||
}
|
||||
```
|
||||
|
||||
This allows requests to [`/.well-known`](https://en.wikipedia.org/wiki/Well-known_URI), `/favicon.ico`, `/robots.txt`, and challenges any request that has the word `Mozilla` in its User-Agent string. The [default policy file](https://github.com/TecharoHQ/anubis/blob/main/cmd/anubis/botPolicies.json) is a bit more cohesive, but this should be more than enough for most users.
|
||||
This allows requests to [`/.well-known`](https://en.wikipedia.org/wiki/Well-known_URI), `/favicon.ico`, `/robots.txt`, and challenges any request that has the word `Mozilla` in its User-Agent string. The [default policy file](https://github.com/TecharoHQ/anubis/blob/main/data/botPolicies.json) is a bit more cohesive, but this should be more than enough for most users.
|
||||
|
||||
If no rules match the request, it is allowed through.
|
||||
|
||||
|
||||
@@ -166,10 +166,6 @@ func startPlaywright(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestPlaywrightBrowser(t *testing.T) {
|
||||
if os.Getenv("CI") == "true" {
|
||||
t.Skip("XXX(Xe): This is broken in CI, will fix later")
|
||||
}
|
||||
|
||||
if os.Getenv("DONT_USE_NETWORK") != "" {
|
||||
t.Skip("test requires network egress")
|
||||
return
|
||||
@@ -225,12 +221,20 @@ func TestPlaywrightBrowser(t *testing.T) {
|
||||
t.Skip("skipping hard challenge with deadline")
|
||||
}
|
||||
|
||||
perfomedAction := executeTestCase(t, tc, typ, anubisURL)
|
||||
|
||||
var perfomedAction action
|
||||
var err error
|
||||
for i := 0; i < 5; i++ {
|
||||
perfomedAction, err = executeTestCase(t, tc, typ, anubisURL)
|
||||
if perfomedAction == tc.action {
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Duration(i+1) * 250 * time.Millisecond)
|
||||
}
|
||||
if perfomedAction != tc.action {
|
||||
t.Errorf("unexpected test result, expected %s, got %s", tc.action, perfomedAction)
|
||||
} else {
|
||||
t.Logf("test passed")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("test error: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -247,14 +251,14 @@ func buildBrowserConnect(name string) string {
|
||||
return u.String()
|
||||
}
|
||||
|
||||
func executeTestCase(t *testing.T, tc testCase, typ playwright.BrowserType, anubisURL string) action {
|
||||
func executeTestCase(t *testing.T, tc testCase, typ playwright.BrowserType, anubisURL string) (action, error) {
|
||||
deadline, _ := t.Deadline()
|
||||
|
||||
browser, err := typ.Connect(buildBrowserConnect(typ.Name()), playwright.BrowserTypeConnectOptions{
|
||||
ExposeNetwork: playwright.String("<loopback>"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("could not connect to remote browser: %v", err)
|
||||
return "", fmt.Errorf("could not connect to remote browser: %w", err)
|
||||
}
|
||||
defer browser.Close()
|
||||
|
||||
@@ -266,13 +270,13 @@ func executeTestCase(t *testing.T, tc testCase, typ playwright.BrowserType, anub
|
||||
UserAgent: playwright.String(tc.userAgent),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("could not create context: %v", err)
|
||||
return "", fmt.Errorf("could not create context: %w", err)
|
||||
}
|
||||
defer ctx.Close()
|
||||
|
||||
page, err := ctx.NewPage()
|
||||
if err != nil {
|
||||
t.Fatalf("could not create page: %v", err)
|
||||
return "", fmt.Errorf("could not create page: %w", err)
|
||||
}
|
||||
defer page.Close()
|
||||
|
||||
@@ -283,7 +287,7 @@ func executeTestCase(t *testing.T, tc testCase, typ playwright.BrowserType, anub
|
||||
Timeout: pwTimeout(tc, deadline),
|
||||
})
|
||||
if err != nil {
|
||||
pwFail(t, page, "could not navigate to test server: %v", err)
|
||||
return "", pwFail(t, page, "could not navigate to test server: %v", err)
|
||||
}
|
||||
|
||||
hadChallenge := false
|
||||
@@ -294,7 +298,7 @@ func executeTestCase(t *testing.T, tc testCase, typ playwright.BrowserType, anub
|
||||
hadChallenge = true
|
||||
case actionDeny:
|
||||
checkImage(t, tc, deadline, page, "#image[src*=sad]")
|
||||
return actionDeny
|
||||
return actionDeny, nil
|
||||
}
|
||||
|
||||
// Ensure protected resource was provided.
|
||||
@@ -317,9 +321,9 @@ func executeTestCase(t *testing.T, tc testCase, typ playwright.BrowserType, anub
|
||||
}
|
||||
|
||||
if hadChallenge {
|
||||
return actionChallenge
|
||||
return actionChallenge, nil
|
||||
} else {
|
||||
return actionAllow
|
||||
return actionAllow, nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -342,11 +346,11 @@ func checkImage(t *testing.T, tc testCase, deadline time.Time, page playwright.P
|
||||
}
|
||||
}
|
||||
|
||||
func pwFail(t *testing.T, page playwright.Page, format string, args ...any) {
|
||||
func pwFail(t *testing.T, page playwright.Page, format string, args ...any) error {
|
||||
t.Helper()
|
||||
|
||||
saveScreenshot(t, page)
|
||||
t.Fatalf(format, args...)
|
||||
return fmt.Errorf(format, args...)
|
||||
}
|
||||
|
||||
func pwTimeout(tc testCase, deadline time.Time) *float64 {
|
||||
|
||||
3
internal/test/var/.gitignore
vendored
Normal file
3
internal/test/var/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*.png
|
||||
*.txt
|
||||
*.html
|
||||
@@ -247,7 +247,7 @@ func (s *Server) MaybeReverseProxy(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
ckie, err := r.Cookie(s.opts.CookieName)
|
||||
ckie, err := r.Cookie(anubis.CookieName)
|
||||
if err != nil {
|
||||
lg.Debug("cookie not found", "path", r.URL.Path)
|
||||
s.ClearCookie(w)
|
||||
@@ -454,12 +454,13 @@ func (s *Server) PassChallenge(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
http.SetCookie(w, &http.Cookie{
|
||||
Name: s.opts.CookieName,
|
||||
Name: anubis.CookieName,
|
||||
Value: tokenString,
|
||||
Expires: time.Now().Add(24 * 7 * time.Hour),
|
||||
SameSite: http.SameSiteLaxMode,
|
||||
Domain: s.opts.CookieDomain,
|
||||
Partitioned: s.opts.CookiePartitioned,
|
||||
SameSite: http.SameSiteLaxMode,
|
||||
Path: "/",
|
||||
})
|
||||
|
||||
challengesValidated.Inc()
|
||||
|
||||
@@ -96,20 +96,25 @@ func TestCookieSettings(t *testing.T) {
|
||||
t.Errorf("wanted %d, got: %d", http.StatusFound, resp.StatusCode)
|
||||
}
|
||||
|
||||
found := false
|
||||
var ckie *http.Cookie
|
||||
for _, cookie := range resp.Cookies() {
|
||||
t.Logf("%#v", cookie)
|
||||
if cookie.Name == t.Name() {
|
||||
found = true
|
||||
}
|
||||
|
||||
if found && cookie.Domain != "local.cetacean.club" {
|
||||
t.Errorf("cookie domain is wrong, wanted local.cetacean.club, got: %s", cookie.Domain)
|
||||
if cookie.Name == anubis.CookieName {
|
||||
ckie = cookie
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
t.Errorf("Cookie %q not found", t.Name())
|
||||
if ckie.Domain != "local.cetacean.club" {
|
||||
t.Errorf("cookie domain is wrong, wanted local.cetacean.club, got: %s", ckie.Domain)
|
||||
}
|
||||
|
||||
if ckie.Partitioned != srv.opts.CookiePartitioned {
|
||||
t.Errorf("wanted partitioned flag %v, got: %v", srv.opts.CookiePartitioned, ckie.Partitioned)
|
||||
}
|
||||
|
||||
if ckie == nil {
|
||||
t.Errorf("Cookie %q not found", anubis.CookieName)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
15
lib/http.go
15
lib/http.go
@@ -3,17 +3,18 @@ package lib
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis"
|
||||
)
|
||||
|
||||
func (s *Server) ClearCookie(w http.ResponseWriter) {
|
||||
http.SetCookie(w, &http.Cookie{
|
||||
Name: s.opts.CookieName,
|
||||
Value: "",
|
||||
Expires: time.Now().Add(-1 * time.Hour),
|
||||
MaxAge: -1,
|
||||
Domain: s.opts.CookieDomain,
|
||||
Partitioned: s.opts.CookiePartitioned,
|
||||
SameSite: http.SameSiteLaxMode,
|
||||
Name: anubis.CookieName,
|
||||
Value: "",
|
||||
Expires: time.Now().Add(-1 * time.Hour),
|
||||
MaxAge: -1,
|
||||
SameSite: http.SameSiteLaxMode,
|
||||
Domain: s.opts.CookieDomain,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user