mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-06 00:38:18 +00:00
Compare commits
1 Commits
json/requi
...
json/docs
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8393df522 |
6
.github/workflows/docker-pr.yml
vendored
6
.github/workflows/docker-pr.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
with:
|
||||
images: ghcr.io/${{ github.repository }}
|
||||
|
||||
|
||||
10
.github/workflows/docker.yml
vendored
10
.github/workflows/docker.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
brew bundle
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
with:
|
||||
images: ${{ env.IMAGE }}
|
||||
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
SLOG_LEVEL: debug
|
||||
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0
|
||||
uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0
|
||||
with:
|
||||
subject-name: ${{ env.IMAGE }}
|
||||
subject-digest: ${{ steps.build.outputs.digest }}
|
||||
|
||||
10
.github/workflows/docs-deploy.yml
vendored
10
.github/workflows/docs-deploy.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: techarohq
|
||||
@@ -33,7 +33,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
with:
|
||||
images: ghcr.io/techarohq/anubis/docs
|
||||
tags: |
|
||||
@@ -53,14 +53,14 @@ jobs:
|
||||
push: true
|
||||
|
||||
- name: Apply k8s manifests to limsa lominsa
|
||||
uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0
|
||||
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
with:
|
||||
args: apply -k docs/manifest
|
||||
|
||||
- name: Apply k8s manifests to limsa lominsa
|
||||
uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0
|
||||
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
with:
|
||||
|
||||
4
.github/workflows/docs-test.yml
vendored
4
.github/workflows/docs-test.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -22,7 +22,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
with:
|
||||
images: ghcr.io/techarohq/anubis/docs
|
||||
tags: |
|
||||
|
||||
8
.github/workflows/go.yml
vendored
8
.github/workflows/go.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
brew bundle
|
||||
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
@@ -59,7 +59,7 @@ jobs:
|
||||
${{ runner.os }}-golang-
|
||||
|
||||
- name: Cache playwright binaries
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
id: playwright-cache
|
||||
with:
|
||||
path: |
|
||||
|
||||
6
.github/workflows/package-builds-stable.yml
vendored
6
.github/workflows/package-builds-stable.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-tags: true
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
brew bundle
|
||||
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-tags: true
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
brew bundle
|
||||
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
|
||||
2
.github/workflows/smoke-tests.yml
vendored
2
.github/workflows/smoke-tests.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
4
.github/workflows/ssh-ci-runner-cron.yml
vendored
4
.github/workflows/ssh-ci-runner-cron.yml
vendored
@@ -18,13 +18,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
|
||||
2
.github/workflows/ssh-ci.yml
vendored
2
.github/workflows/ssh-ci.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
- ci@ppc64le.techaro.lol
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
|
||||
6
.github/workflows/zizmor.yml
vendored
6
.github/workflows/zizmor.yml
vendored
@@ -16,12 +16,12 @@ jobs:
|
||||
security-events: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@4959332f0f014c5280e7eac8b70c90cb574c9f9b # v6.6.0
|
||||
uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
|
||||
|
||||
- name: Run zizmor 🌈
|
||||
run: uvx zizmor --format sarif . > results.sarif
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload SARIF file
|
||||
uses: github/codeql-action/upload-sarif@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11
|
||||
uses: github/codeql-action/upload-sarif@4e828ff8d448a8a6e532957b1811f387a63867e8 # v3.29.4
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
category: zizmor
|
||||
|
||||
@@ -317,16 +317,6 @@ func main() {
|
||||
log.Fatalf("can't parse policy file: %v", err)
|
||||
}
|
||||
|
||||
// Warn if persistent storage is used without a configured signing key
|
||||
if policy.Store.IsPersistent() {
|
||||
if *hs512Secret == "" && *ed25519PrivateKeyHex == "" && *ed25519PrivateKeyHexFile == "" {
|
||||
slog.Warn("[misconfiguration] persistent storage backend is configured, but no private key is set. " +
|
||||
"Challenges will be invalidated when Anubis restarts. " +
|
||||
"Set HS512_SECRET, ED25519_PRIVATE_KEY_HEX, or ED25519_PRIVATE_KEY_HEX_FILE to ensure challenges survive service restarts. " +
|
||||
"See: https://anubis.techaro.lol/docs/admin/installation#key-generation")
|
||||
}
|
||||
}
|
||||
|
||||
ruleErrorIDs := make(map[string]string)
|
||||
for _, rule := range policy.Bots {
|
||||
if rule.Action != config.RuleDeny {
|
||||
|
||||
@@ -29,7 +29,7 @@ var (
|
||||
)
|
||||
|
||||
type RobotsRule struct {
|
||||
UserAgents []string
|
||||
UserAgent string
|
||||
Disallows []string
|
||||
Allows []string
|
||||
CrawlDelay int
|
||||
@@ -130,26 +130,10 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
func createRuleFromAccumulated(userAgents, disallows, allows []string, crawlDelay int) RobotsRule {
|
||||
rule := RobotsRule{
|
||||
UserAgents: make([]string, len(userAgents)),
|
||||
Disallows: make([]string, len(disallows)),
|
||||
Allows: make([]string, len(allows)),
|
||||
CrawlDelay: crawlDelay,
|
||||
}
|
||||
copy(rule.UserAgents, userAgents)
|
||||
copy(rule.Disallows, disallows)
|
||||
copy(rule.Allows, allows)
|
||||
return rule
|
||||
}
|
||||
|
||||
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||
scanner := bufio.NewScanner(input)
|
||||
var rules []RobotsRule
|
||||
var currentUserAgents []string
|
||||
var currentDisallows []string
|
||||
var currentAllows []string
|
||||
var currentCrawlDelay int
|
||||
var currentRule *RobotsRule
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
@@ -170,42 +154,38 @@ func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||
|
||||
switch directive {
|
||||
case "user-agent":
|
||||
// If we have accumulated rules with directives and encounter a new user-agent,
|
||||
// flush the current rules
|
||||
if len(currentUserAgents) > 0 && (len(currentDisallows) > 0 || len(currentAllows) > 0 || currentCrawlDelay > 0) {
|
||||
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
|
||||
rules = append(rules, rule)
|
||||
// Reset for next group
|
||||
currentUserAgents = nil
|
||||
currentDisallows = nil
|
||||
currentAllows = nil
|
||||
currentCrawlDelay = 0
|
||||
// Start a new rule section
|
||||
if currentRule != nil {
|
||||
rules = append(rules, *currentRule)
|
||||
}
|
||||
currentRule = &RobotsRule{
|
||||
UserAgent: value,
|
||||
Disallows: make([]string, 0),
|
||||
Allows: make([]string, 0),
|
||||
}
|
||||
currentUserAgents = append(currentUserAgents, value)
|
||||
|
||||
case "disallow":
|
||||
if len(currentUserAgents) > 0 && value != "" {
|
||||
currentDisallows = append(currentDisallows, value)
|
||||
if currentRule != nil && value != "" {
|
||||
currentRule.Disallows = append(currentRule.Disallows, value)
|
||||
}
|
||||
|
||||
case "allow":
|
||||
if len(currentUserAgents) > 0 && value != "" {
|
||||
currentAllows = append(currentAllows, value)
|
||||
if currentRule != nil && value != "" {
|
||||
currentRule.Allows = append(currentRule.Allows, value)
|
||||
}
|
||||
|
||||
case "crawl-delay":
|
||||
if len(currentUserAgents) > 0 {
|
||||
if currentRule != nil {
|
||||
if delay, err := parseIntSafe(value); err == nil {
|
||||
currentCrawlDelay = delay
|
||||
currentRule.CrawlDelay = delay
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last group of rules
|
||||
if len(currentUserAgents) > 0 {
|
||||
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
|
||||
rules = append(rules, rule)
|
||||
// Don't forget the last rule
|
||||
if currentRule != nil {
|
||||
rules = append(rules, *currentRule)
|
||||
}
|
||||
|
||||
// Mark blacklisted user agents (those with "Disallow: /")
|
||||
@@ -231,11 +211,10 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
var anubisRules []AnubisRule
|
||||
ruleCounter := 0
|
||||
|
||||
// Process each robots rule individually
|
||||
for _, robotsRule := range robotsRules {
|
||||
userAgents := robotsRule.UserAgents
|
||||
userAgent := robotsRule.UserAgent
|
||||
|
||||
// Handle crawl delay
|
||||
// Handle crawl delay as weight adjustment (do this first before any continues)
|
||||
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
@@ -244,32 +223,20 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
Weight: &config.Weight{Adjust: *crawlDelay},
|
||||
}
|
||||
|
||||
if len(userAgents) == 1 && userAgents[0] == "*" {
|
||||
if userAgent == "*" {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else if len(userAgents) == 1 {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgents[0])},
|
||||
}
|
||||
} else {
|
||||
// Multiple user agents - use any block
|
||||
var expressions []string
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
expressions = append(expressions, "true")
|
||||
} else {
|
||||
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
|
||||
}
|
||||
}
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
Any: expressions,
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
}
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
// Handle blacklisted user agents
|
||||
// Handle blacklisted user agents (complete deny/challenge)
|
||||
if robotsRule.IsBlacklist {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
@@ -277,36 +244,21 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
Action: *userAgentDeny,
|
||||
}
|
||||
|
||||
if len(userAgents) == 1 {
|
||||
userAgent := userAgents[0]
|
||||
if userAgent == "*" {
|
||||
// This would block everything - convert to a weight adjustment instead
|
||||
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||
rule.Action = "WEIGH"
|
||||
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
if userAgent == "*" {
|
||||
// This would block everything - convert to a weight adjustment instead
|
||||
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||
rule.Action = "WEIGH"
|
||||
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
// Multiple user agents - use any block
|
||||
var expressions []string
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
expressions = append(expressions, "true")
|
||||
} else {
|
||||
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
|
||||
}
|
||||
}
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
Any: expressions,
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
}
|
||||
anubisRules = append(anubisRules, rule)
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle specific disallow rules
|
||||
@@ -324,33 +276,9 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
// Build CEL expression
|
||||
var conditions []string
|
||||
|
||||
// Add user agent conditions
|
||||
if len(userAgents) == 1 && userAgents[0] == "*" {
|
||||
// Wildcard user agent - no user agent condition needed
|
||||
} else if len(userAgents) == 1 {
|
||||
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgents[0]))
|
||||
} else {
|
||||
// For multiple user agents, we need to use a more complex expression
|
||||
// This is a limitation - we can't easily combine any for user agents with all for path
|
||||
// So we'll create separate rules for each user agent
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
continue // Skip wildcard as it's handled separately
|
||||
}
|
||||
ruleCounter++
|
||||
subRule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||
Action: *baseAction,
|
||||
Expression: &config.ExpressionOrList{
|
||||
All: []string{
|
||||
fmt.Sprintf("userAgent.contains(%q)", ua),
|
||||
buildPathCondition(disallow),
|
||||
},
|
||||
},
|
||||
}
|
||||
anubisRules = append(anubisRules, subRule)
|
||||
}
|
||||
continue
|
||||
// Add user agent condition if not wildcard
|
||||
if userAgent != "*" {
|
||||
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
|
||||
}
|
||||
|
||||
// Add path condition
|
||||
@@ -363,6 +291,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return anubisRules
|
||||
|
||||
@@ -78,12 +78,6 @@ func TestDataFileConversion(t *testing.T) {
|
||||
expectedFile: "complex.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
|
||||
},
|
||||
{
|
||||
name: "consecutive_user_agents",
|
||||
robotsFile: "consecutive.robots.txt",
|
||||
expectedFile: "consecutive.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
|
||||
6
cmd/robots2policy/testdata/blacklist.yaml
vendored
6
cmd/robots2policy/testdata/blacklist.yaml
vendored
@@ -25,6 +25,6 @@
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
24
cmd/robots2policy/testdata/complex.yaml
vendored
24
cmd/robots2policy/testdata/complex.yaml
vendored
@@ -20,8 +20,8 @@
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-6
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Bingbot")
|
||||
@@ -31,14 +31,14 @@
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-8
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-9
|
||||
- action: DENY
|
||||
expression: userAgent.contains("BadBot")
|
||||
@@ -54,18 +54,18 @@
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
name: robots-txt-policy-disallow-13
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
name: robots-txt-policy-disallow-14
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
name: robots-txt-policy-disallow-15
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
# Test consecutive user agents that should be grouped into any: blocks
|
||||
User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
# Multiple consecutive user agents - should be grouped
|
||||
User-agent: BadBot
|
||||
User-agent: SpamBot
|
||||
User-agent: EvilBot
|
||||
Disallow: /
|
||||
|
||||
# Single user agent - should be separate
|
||||
User-agent: GoodBot
|
||||
Disallow: /private
|
||||
|
||||
# Multiple consecutive user agents with crawl delay
|
||||
User-agent: SlowBot1
|
||||
User-agent: SlowBot2
|
||||
Crawl-delay: 5
|
||||
|
||||
# Multiple consecutive user agents with specific path
|
||||
User-agent: SearchBot1
|
||||
User-agent: SearchBot2
|
||||
User-agent: SearchBot3
|
||||
Disallow: /search
|
||||
47
cmd/robots2policy/testdata/consecutive.yaml
vendored
47
cmd/robots2policy/testdata/consecutive.yaml
vendored
@@ -1,47 +0,0 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: DENY
|
||||
expression:
|
||||
any:
|
||||
- userAgent.contains("BadBot")
|
||||
- userAgent.contains("SpamBot")
|
||||
- userAgent.contains("EvilBot")
|
||||
name: robots-txt-policy-blacklist-3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("GoodBot")
|
||||
- path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-4
|
||||
- action: WEIGH
|
||||
expression:
|
||||
any:
|
||||
- userAgent.contains("SlowBot1")
|
||||
- userAgent.contains("SlowBot2")
|
||||
name: robots-txt-policy-crawl-delay-5
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot1")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot2")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-8
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot3")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-9
|
||||
8
cmd/robots2policy/testdata/simple.json
vendored
8
cmd/robots2policy/testdata/simple.json
vendored
@@ -1,12 +1,12 @@
|
||||
[
|
||||
{
|
||||
"action": "CHALLENGE",
|
||||
"expression": "path.startsWith(\"/admin/\")",
|
||||
"name": "robots-txt-policy-disallow-1",
|
||||
"action": "CHALLENGE"
|
||||
"name": "robots-txt-policy-disallow-1"
|
||||
},
|
||||
{
|
||||
"action": "CHALLENGE",
|
||||
"expression": "path.startsWith(\"/private\")",
|
||||
"name": "robots-txt-policy-disallow-2",
|
||||
"action": "CHALLENGE"
|
||||
"name": "robots-txt-policy-disallow-2"
|
||||
}
|
||||
]
|
||||
@@ -12,8 +12,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
## [Unreleased]
|
||||
|
||||
- Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086))
|
||||
- Add validation warning when persistent storage is used without setting signing keys
|
||||
- Fixed `robots2policy` to properly group consecutive user agents into `any:` instead of only processing the last one ([#925](https://github.com/TecharoHQ/anubis/pull/925))
|
||||
|
||||
<!-- This changes the project to: -->
|
||||
|
||||
|
||||
@@ -70,8 +70,8 @@ Anubis uses these environment variables for configuration:
|
||||
| `COOKIE_PREFIX` | `anubis-cookie` | The prefix used for browser cookies created by Anubis. Useful for customization or avoiding conflicts with other applications. |
|
||||
| `COOKIE_SECURE` | `true` | If set to `true`, enables the [Secure flag](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Cookies#block_access_to_your_cookies), meaning that the cookies will only be transmitted over HTTPS. If Anubis is used in an unsecure context (plain HTTP), this will be need to be set to false |
|
||||
| `DIFFICULTY` | `4` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. |
|
||||
| `ED25519_PRIVATE_KEY_HEX` | unset | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. **Required when using persistent storage backends** (like bbolt) to ensure challenges survive service restarts. When running multiple instances on the same base domain, the key must be the same across all instances. See below for details. |
|
||||
| `ED25519_PRIVATE_KEY_HEX_FILE` | unset | Path to a file containing the hex-encoded ed25519 private key. Only one of this or its sister option may be set. **Required when using persistent storage backends** (like bbolt) to ensure challenges survive service restarts. When running multiple instances on the same base domain, the key must be the same across all instances. |
|
||||
| `ED25519_PRIVATE_KEY_HEX` | unset | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. When running multiple instances on the same base domain, the key must be the same across all instances. See below for details. |
|
||||
| `ED25519_PRIVATE_KEY_HEX_FILE` | unset | Path to a file containing the hex-encoded ed25519 private key. Only one of this or its sister option may be set. |
|
||||
| `JWT_RESTRICTION_HEADER` | `X-Real-IP` | If set, the JWT is only valid if the current value of this header matches the value when the JWT was created. You can use it e.g. to restrict a JWT to the source IP of the user using `X-Real-IP`. |
|
||||
| `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. |
|
||||
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
|
||||
@@ -103,7 +103,7 @@ If you don't know or understand what these settings mean, ignore them. These are
|
||||
| Environment Variable | Default value | Explanation |
|
||||
| :---------------------------- | :------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `FORCED_LANGUAGE` | unset | If set, forces Anubis to display challenge pages in the specified language instead of using the browser's Accept-Language header. Use ISO 639-1 language codes (e.g., `de` for German, `fr` for French). |
|
||||
| `HS512_SECRET` | unset | Secret string for JWT HS512 algorithm. If this is not set, Anubis will use ED25519 as defined via the variables above. The longer the better; 128 chars should suffice. **Required when using persistent storage backends** (like bbolt) to ensure challenges survive service restarts. When running multiple instances on the same base domain, the key must be the same across all instances. |
|
||||
| `HS512_SECRET` | unset | Secret string for JWT HS512 algorithm. If this is not set, Anubis will use ED25519 as defined via the variables above. The longer the better; 128 chars should suffice. |
|
||||
| `TARGET_DISABLE_KEEPALIVE` | `false` | If `true`, disables HTTP keep-alive for connections to the target backend. Useful for backends that don't handle keep-alive properly. |
|
||||
| `TARGET_HOST` | unset | If set, overrides the Host header in requests forwarded to `TARGET`. |
|
||||
| `TARGET_INSECURE_SKIP_VERIFY` | `false` | If `true`, skip TLS certificate validation for targets that listen over `https`. If your backend does not listen over `https`, ignore this setting. |
|
||||
|
||||
@@ -11,9 +11,10 @@ import (
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
// Sentinel error value used for testing and in admin-visible error messages.
|
||||
// Sentinel error values used for testing and in admin-visible error messages.
|
||||
var (
|
||||
ErrNotExists = errors.New("bbolt: value does not exist in store")
|
||||
ErrBucketDoesNotExist = errors.New("bbolt: bucket does not exist")
|
||||
ErrNotExists = errors.New("bbolt: value does not exist in store")
|
||||
)
|
||||
|
||||
// Store implements store.Interface backed by bbolt[1].
|
||||
@@ -149,10 +150,6 @@ func (s *Store) cleanup(ctx context.Context) error {
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Store) IsPersistent() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *Store) cleanupThread(ctx context.Context) {
|
||||
t := time.NewTicker(time.Hour)
|
||||
defer t.Stop()
|
||||
|
||||
@@ -37,11 +37,6 @@ type Interface interface {
|
||||
|
||||
// Set puts a value into the store that expires according to its expiry.
|
||||
Set(ctx context.Context, key string, value []byte, expiry time.Duration) error
|
||||
|
||||
// IsPersistent returns true if this storage backend persists data across
|
||||
// service restarts (e.g., bbolt, valkey). Returns false for volatile storage
|
||||
// like in-memory backends.
|
||||
IsPersistent() bool
|
||||
}
|
||||
|
||||
func z[T any]() T { return *new(T) }
|
||||
@@ -93,7 +88,3 @@ func (j *JSON[T]) Set(ctx context.Context, key string, value T, expiry time.Dura
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (j *JSON[T]) IsPersistent() bool {
|
||||
return j.Underlying.IsPersistent()
|
||||
}
|
||||
|
||||
@@ -48,10 +48,6 @@ func (i *impl) Set(_ context.Context, key string, value []byte, expiry time.Dura
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *impl) IsPersistent() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (i *impl) cleanupThread(ctx context.Context) {
|
||||
t := time.NewTicker(5 * time.Minute)
|
||||
defer t.Stop()
|
||||
|
||||
@@ -47,7 +47,3 @@ func (s *Store) Set(ctx context.Context, key string, value []byte, expiry time.D
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) IsPersistent() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user