Compare commits

..

1 Commits

Author SHA1 Message Date
Jason Cameron
a8393df522 Merge branch 'main' into json/docs
Signed-off-by: Jason Cameron <git@jasoncameron.dev>
2025-09-06 21:27:10 -04:00
25 changed files with 95 additions and 276 deletions

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
fetch-tags: true fetch-tags: true
fetch-depth: 0 fetch-depth: 0
@@ -25,7 +25,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -47,7 +47,7 @@ jobs:
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0 uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with: with:
images: ghcr.io/${{ github.repository }} images: ghcr.io/${{ github.repository }}

View File

@@ -21,7 +21,7 @@ jobs:
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
fetch-tags: true fetch-tags: true
fetch-depth: 0 fetch-depth: 0
@@ -35,7 +35,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -56,7 +56,7 @@ jobs:
brew bundle brew bundle
- name: Log into registry - name: Log into registry
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{ github.repository_owner }} username: ${{ github.repository_owner }}
@@ -64,7 +64,7 @@ jobs:
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0 uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with: with:
images: ${{ env.IMAGE }} images: ${{ env.IMAGE }}
@@ -78,7 +78,7 @@ jobs:
SLOG_LEVEL: debug SLOG_LEVEL: debug
- name: Generate artifact attestation - name: Generate artifact attestation
uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0 uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0
with: with:
subject-name: ${{ env.IMAGE }} subject-name: ${{ env.IMAGE }}
subject-digest: ${{ steps.build.outputs.digest }} subject-digest: ${{ steps.build.outputs.digest }}

View File

@@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
persist-credentials: false persist-credentials: false
@@ -25,7 +25,7 @@ jobs:
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
- name: Log into registry - name: Log into registry
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with: with:
registry: ghcr.io registry: ghcr.io
username: techarohq username: techarohq
@@ -33,7 +33,7 @@ jobs:
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0 uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with: with:
images: ghcr.io/techarohq/anubis/docs images: ghcr.io/techarohq/anubis/docs
tags: | tags: |
@@ -53,14 +53,14 @@ jobs:
push: true push: true
- name: Apply k8s manifests to limsa lominsa - name: Apply k8s manifests to limsa lominsa
uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0 uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
env: env:
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }} KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
with: with:
args: apply -k docs/manifest args: apply -k docs/manifest
- name: Apply k8s manifests to limsa lominsa - name: Apply k8s manifests to limsa lominsa
uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0 uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
env: env:
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }} KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
with: with:

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
persist-credentials: false persist-credentials: false
@@ -22,7 +22,7 @@ jobs:
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0 uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with: with:
images: ghcr.io/techarohq/anubis/docs images: ghcr.io/techarohq/anubis/docs
tags: | tags: |

View File

@@ -15,7 +15,7 @@ jobs:
#runs-on: alrest-techarohq #runs-on: alrest-techarohq
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
persist-credentials: false persist-credentials: false
@@ -28,7 +28,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -49,7 +49,7 @@ jobs:
brew bundle brew bundle
- name: Setup Golang caches - name: Setup Golang caches
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with: with:
path: | path: |
~/.cache/go-build ~/.cache/go-build
@@ -59,7 +59,7 @@ jobs:
${{ runner.os }}-golang- ${{ runner.os }}-golang-
- name: Cache playwright binaries - name: Cache playwright binaries
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
id: playwright-cache id: playwright-cache
with: with:
path: | path: |

View File

@@ -14,7 +14,7 @@ jobs:
#runs-on: alrest-techarohq #runs-on: alrest-techarohq
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
persist-credentials: false persist-credentials: false
fetch-tags: true fetch-tags: true
@@ -29,7 +29,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -50,7 +50,7 @@ jobs:
brew bundle brew bundle
- name: Setup Golang caches - name: Setup Golang caches
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with: with:
path: | path: |
~/.cache/go-build ~/.cache/go-build

View File

@@ -15,7 +15,7 @@ jobs:
#runs-on: alrest-techarohq #runs-on: alrest-techarohq
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
persist-credentials: false persist-credentials: false
fetch-tags: true fetch-tags: true
@@ -30,7 +30,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -51,7 +51,7 @@ jobs:
brew bundle brew bundle
- name: Setup Golang caches - name: Setup Golang caches
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with: with:
path: | path: |
~/.cache/go-build ~/.cache/go-build

View File

@@ -24,7 +24,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
persist-credentials: false persist-credentials: false

View File

@@ -18,13 +18,13 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
fetch-tags: true fetch-tags: true
fetch-depth: 0 fetch-depth: 0
persist-credentials: false persist-credentials: false
- name: Log into registry - name: Log into registry
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{ github.repository_owner }} username: ${{ github.repository_owner }}

View File

@@ -20,7 +20,7 @@ jobs:
- ci@ppc64le.techaro.lol - ci@ppc64le.techaro.lol
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
fetch-tags: true fetch-tags: true
fetch-depth: 0 fetch-depth: 0

View File

@@ -16,12 +16,12 @@ jobs:
security-events: write security-events: write
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with: with:
persist-credentials: false persist-credentials: false
- name: Install the latest version of uv - name: Install the latest version of uv
uses: astral-sh/setup-uv@4959332f0f014c5280e7eac8b70c90cb574c9f9b # v6.6.0 uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
- name: Run zizmor 🌈 - name: Run zizmor 🌈
run: uvx zizmor --format sarif . > results.sarif run: uvx zizmor --format sarif . > results.sarif
@@ -29,7 +29,7 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload SARIF file - name: Upload SARIF file
uses: github/codeql-action/upload-sarif@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11 uses: github/codeql-action/upload-sarif@4e828ff8d448a8a6e532957b1811f387a63867e8 # v3.29.4
with: with:
sarif_file: results.sarif sarif_file: results.sarif
category: zizmor category: zizmor

View File

@@ -317,16 +317,6 @@ func main() {
log.Fatalf("can't parse policy file: %v", err) log.Fatalf("can't parse policy file: %v", err)
} }
// Warn if persistent storage is used without a configured signing key
if policy.Store.IsPersistent() {
if *hs512Secret == "" && *ed25519PrivateKeyHex == "" && *ed25519PrivateKeyHexFile == "" {
slog.Warn("[misconfiguration] persistent storage backend is configured, but no private key is set. " +
"Challenges will be invalidated when Anubis restarts. " +
"Set HS512_SECRET, ED25519_PRIVATE_KEY_HEX, or ED25519_PRIVATE_KEY_HEX_FILE to ensure challenges survive service restarts. " +
"See: https://anubis.techaro.lol/docs/admin/installation#key-generation")
}
}
ruleErrorIDs := make(map[string]string) ruleErrorIDs := make(map[string]string)
for _, rule := range policy.Bots { for _, rule := range policy.Bots {
if rule.Action != config.RuleDeny { if rule.Action != config.RuleDeny {

View File

@@ -29,7 +29,7 @@ var (
) )
type RobotsRule struct { type RobotsRule struct {
UserAgents []string UserAgent string
Disallows []string Disallows []string
Allows []string Allows []string
CrawlDelay int CrawlDelay int
@@ -130,26 +130,10 @@ func main() {
} }
} }
func createRuleFromAccumulated(userAgents, disallows, allows []string, crawlDelay int) RobotsRule {
rule := RobotsRule{
UserAgents: make([]string, len(userAgents)),
Disallows: make([]string, len(disallows)),
Allows: make([]string, len(allows)),
CrawlDelay: crawlDelay,
}
copy(rule.UserAgents, userAgents)
copy(rule.Disallows, disallows)
copy(rule.Allows, allows)
return rule
}
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) { func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
scanner := bufio.NewScanner(input) scanner := bufio.NewScanner(input)
var rules []RobotsRule var rules []RobotsRule
var currentUserAgents []string var currentRule *RobotsRule
var currentDisallows []string
var currentAllows []string
var currentCrawlDelay int
for scanner.Scan() { for scanner.Scan() {
line := strings.TrimSpace(scanner.Text()) line := strings.TrimSpace(scanner.Text())
@@ -170,42 +154,38 @@ func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
switch directive { switch directive {
case "user-agent": case "user-agent":
// If we have accumulated rules with directives and encounter a new user-agent, // Start a new rule section
// flush the current rules if currentRule != nil {
if len(currentUserAgents) > 0 && (len(currentDisallows) > 0 || len(currentAllows) > 0 || currentCrawlDelay > 0) { rules = append(rules, *currentRule)
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay) }
rules = append(rules, rule) currentRule = &RobotsRule{
// Reset for next group UserAgent: value,
currentUserAgents = nil Disallows: make([]string, 0),
currentDisallows = nil Allows: make([]string, 0),
currentAllows = nil
currentCrawlDelay = 0
} }
currentUserAgents = append(currentUserAgents, value)
case "disallow": case "disallow":
if len(currentUserAgents) > 0 && value != "" { if currentRule != nil && value != "" {
currentDisallows = append(currentDisallows, value) currentRule.Disallows = append(currentRule.Disallows, value)
} }
case "allow": case "allow":
if len(currentUserAgents) > 0 && value != "" { if currentRule != nil && value != "" {
currentAllows = append(currentAllows, value) currentRule.Allows = append(currentRule.Allows, value)
} }
case "crawl-delay": case "crawl-delay":
if len(currentUserAgents) > 0 { if currentRule != nil {
if delay, err := parseIntSafe(value); err == nil { if delay, err := parseIntSafe(value); err == nil {
currentCrawlDelay = delay currentRule.CrawlDelay = delay
} }
} }
} }
} }
// Don't forget the last group of rules // Don't forget the last rule
if len(currentUserAgents) > 0 { if currentRule != nil {
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay) rules = append(rules, *currentRule)
rules = append(rules, rule)
} }
// Mark blacklisted user agents (those with "Disallow: /") // Mark blacklisted user agents (those with "Disallow: /")
@@ -231,11 +211,10 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
var anubisRules []AnubisRule var anubisRules []AnubisRule
ruleCounter := 0 ruleCounter := 0
// Process each robots rule individually
for _, robotsRule := range robotsRules { for _, robotsRule := range robotsRules {
userAgents := robotsRule.UserAgents userAgent := robotsRule.UserAgent
// Handle crawl delay // Handle crawl delay as weight adjustment (do this first before any continues)
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 { if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
ruleCounter++ ruleCounter++
rule := AnubisRule{ rule := AnubisRule{
@@ -244,32 +223,20 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
Weight: &config.Weight{Adjust: *crawlDelay}, Weight: &config.Weight{Adjust: *crawlDelay},
} }
if len(userAgents) == 1 && userAgents[0] == "*" { if userAgent == "*" {
rule.Expression = &config.ExpressionOrList{ rule.Expression = &config.ExpressionOrList{
All: []string{"true"}, // Always applies All: []string{"true"}, // Always applies
} }
} else if len(userAgents) == 1 {
rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgents[0])},
}
} else { } else {
// Multiple user agents - use any block
var expressions []string
for _, ua := range userAgents {
if ua == "*" {
expressions = append(expressions, "true")
} else {
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
}
}
rule.Expression = &config.ExpressionOrList{ rule.Expression = &config.ExpressionOrList{
Any: expressions, All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
} }
} }
anubisRules = append(anubisRules, rule) anubisRules = append(anubisRules, rule)
} }
// Handle blacklisted user agents // Handle blacklisted user agents (complete deny/challenge)
if robotsRule.IsBlacklist { if robotsRule.IsBlacklist {
ruleCounter++ ruleCounter++
rule := AnubisRule{ rule := AnubisRule{
@@ -277,36 +244,21 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
Action: *userAgentDeny, Action: *userAgentDeny,
} }
if len(userAgents) == 1 { if userAgent == "*" {
userAgent := userAgents[0] // This would block everything - convert to a weight adjustment instead
if userAgent == "*" { rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
// This would block everything - convert to a weight adjustment instead rule.Action = "WEIGH"
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter) rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
rule.Action = "WEIGH" rule.Expression = &config.ExpressionOrList{
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly All: []string{"true"}, // Always applies
rule.Expression = &config.ExpressionOrList{
All: []string{"true"}, // Always applies
}
} else {
rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
}
} }
} else { } else {
// Multiple user agents - use any block
var expressions []string
for _, ua := range userAgents {
if ua == "*" {
expressions = append(expressions, "true")
} else {
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
}
}
rule.Expression = &config.ExpressionOrList{ rule.Expression = &config.ExpressionOrList{
Any: expressions, All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
} }
} }
anubisRules = append(anubisRules, rule) anubisRules = append(anubisRules, rule)
continue
} }
// Handle specific disallow rules // Handle specific disallow rules
@@ -324,33 +276,9 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
// Build CEL expression // Build CEL expression
var conditions []string var conditions []string
// Add user agent conditions // Add user agent condition if not wildcard
if len(userAgents) == 1 && userAgents[0] == "*" { if userAgent != "*" {
// Wildcard user agent - no user agent condition needed conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
} else if len(userAgents) == 1 {
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgents[0]))
} else {
// For multiple user agents, we need to use a more complex expression
// This is a limitation - we can't easily combine any for user agents with all for path
// So we'll create separate rules for each user agent
for _, ua := range userAgents {
if ua == "*" {
continue // Skip wildcard as it's handled separately
}
ruleCounter++
subRule := AnubisRule{
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
Action: *baseAction,
Expression: &config.ExpressionOrList{
All: []string{
fmt.Sprintf("userAgent.contains(%q)", ua),
buildPathCondition(disallow),
},
},
}
anubisRules = append(anubisRules, subRule)
}
continue
} }
// Add path condition // Add path condition
@@ -363,6 +291,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
anubisRules = append(anubisRules, rule) anubisRules = append(anubisRules, rule)
} }
} }
return anubisRules return anubisRules

View File

@@ -78,12 +78,6 @@ func TestDataFileConversion(t *testing.T) {
expectedFile: "complex.yaml", expectedFile: "complex.yaml",
options: TestOptions{format: "yaml", crawlDelayWeight: 5}, options: TestOptions{format: "yaml", crawlDelayWeight: 5},
}, },
{
name: "consecutive_user_agents",
robotsFile: "consecutive.robots.txt",
expectedFile: "consecutive.yaml",
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
},
} }
for _, tc := range testCases { for _, tc := range testCases {

View File

@@ -25,6 +25,6 @@
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("Googlebot") - userAgent.contains("Googlebot")
- path.startsWith("/search") - path.startsWith("/search")
name: robots-txt-policy-disallow-7 name: robots-txt-policy-disallow-7

View File

@@ -20,8 +20,8 @@
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("Googlebot") - userAgent.contains("Googlebot")
- path.startsWith("/search/") - path.startsWith("/search/")
name: robots-txt-policy-disallow-6 name: robots-txt-policy-disallow-6
- action: WEIGH - action: WEIGH
expression: userAgent.contains("Bingbot") expression: userAgent.contains("Bingbot")
@@ -31,14 +31,14 @@
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("Bingbot") - userAgent.contains("Bingbot")
- path.startsWith("/search/") - path.startsWith("/search/")
name: robots-txt-policy-disallow-8 name: robots-txt-policy-disallow-8
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("Bingbot") - userAgent.contains("Bingbot")
- path.startsWith("/admin/") - path.startsWith("/admin/")
name: robots-txt-policy-disallow-9 name: robots-txt-policy-disallow-9
- action: DENY - action: DENY
expression: userAgent.contains("BadBot") expression: userAgent.contains("BadBot")
@@ -54,18 +54,18 @@
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("TestBot") - userAgent.contains("TestBot")
- path.matches("^/.*/admin") - path.matches("^/.*/admin")
name: robots-txt-policy-disallow-13 name: robots-txt-policy-disallow-13
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("TestBot") - userAgent.contains("TestBot")
- path.matches("^/temp.*\\.html") - path.matches("^/temp.*\\.html")
name: robots-txt-policy-disallow-14 name: robots-txt-policy-disallow-14
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("TestBot") - userAgent.contains("TestBot")
- path.matches("^/file.\\.log") - path.matches("^/file.\\.log")
name: robots-txt-policy-disallow-15 name: robots-txt-policy-disallow-15

View File

@@ -1,25 +0,0 @@
# Test consecutive user agents that should be grouped into any: blocks
User-agent: *
Disallow: /admin
Crawl-delay: 10
# Multiple consecutive user agents - should be grouped
User-agent: BadBot
User-agent: SpamBot
User-agent: EvilBot
Disallow: /
# Single user agent - should be separate
User-agent: GoodBot
Disallow: /private
# Multiple consecutive user agents with crawl delay
User-agent: SlowBot1
User-agent: SlowBot2
Crawl-delay: 5
# Multiple consecutive user agents with specific path
User-agent: SearchBot1
User-agent: SearchBot2
User-agent: SearchBot3
Disallow: /search

View File

@@ -1,47 +0,0 @@
- action: WEIGH
expression: "true"
name: robots-txt-policy-crawl-delay-1
weight:
adjust: 3
- action: CHALLENGE
expression: path.startsWith("/admin")
name: robots-txt-policy-disallow-2
- action: DENY
expression:
any:
- userAgent.contains("BadBot")
- userAgent.contains("SpamBot")
- userAgent.contains("EvilBot")
name: robots-txt-policy-blacklist-3
- action: CHALLENGE
expression:
all:
- userAgent.contains("GoodBot")
- path.startsWith("/private")
name: robots-txt-policy-disallow-4
- action: WEIGH
expression:
any:
- userAgent.contains("SlowBot1")
- userAgent.contains("SlowBot2")
name: robots-txt-policy-crawl-delay-5
weight:
adjust: 3
- action: CHALLENGE
expression:
all:
- userAgent.contains("SearchBot1")
- path.startsWith("/search")
name: robots-txt-policy-disallow-7
- action: CHALLENGE
expression:
all:
- userAgent.contains("SearchBot2")
- path.startsWith("/search")
name: robots-txt-policy-disallow-8
- action: CHALLENGE
expression:
all:
- userAgent.contains("SearchBot3")
- path.startsWith("/search")
name: robots-txt-policy-disallow-9

View File

@@ -1,12 +1,12 @@
[ [
{ {
"action": "CHALLENGE",
"expression": "path.startsWith(\"/admin/\")", "expression": "path.startsWith(\"/admin/\")",
"name": "robots-txt-policy-disallow-1", "name": "robots-txt-policy-disallow-1"
"action": "CHALLENGE"
}, },
{ {
"action": "CHALLENGE",
"expression": "path.startsWith(\"/private\")", "expression": "path.startsWith(\"/private\")",
"name": "robots-txt-policy-disallow-2", "name": "robots-txt-policy-disallow-2"
"action": "CHALLENGE"
} }
] ]

View File

@@ -12,8 +12,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
- Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086)) - Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086))
- Add validation warning when persistent storage is used without setting signing keys
- Fixed `robots2policy` to properly group consecutive user agents into `any:` instead of only processing the last one ([#925](https://github.com/TecharoHQ/anubis/pull/925))
<!-- This changes the project to: --> <!-- This changes the project to: -->

View File

@@ -70,8 +70,8 @@ Anubis uses these environment variables for configuration:
| `COOKIE_PREFIX` | `anubis-cookie` | The prefix used for browser cookies created by Anubis. Useful for customization or avoiding conflicts with other applications. | | `COOKIE_PREFIX` | `anubis-cookie` | The prefix used for browser cookies created by Anubis. Useful for customization or avoiding conflicts with other applications. |
| `COOKIE_SECURE` | `true` | If set to `true`, enables the [Secure flag](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Cookies#block_access_to_your_cookies), meaning that the cookies will only be transmitted over HTTPS. If Anubis is used in an unsecure context (plain HTTP), this will be need to be set to false | | `COOKIE_SECURE` | `true` | If set to `true`, enables the [Secure flag](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Cookies#block_access_to_your_cookies), meaning that the cookies will only be transmitted over HTTPS. If Anubis is used in an unsecure context (plain HTTP), this will be need to be set to false |
| `DIFFICULTY` | `4` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. | | `DIFFICULTY` | `4` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. |
| `ED25519_PRIVATE_KEY_HEX` | unset | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. **Required when using persistent storage backends** (like bbolt) to ensure challenges survive service restarts. When running multiple instances on the same base domain, the key must be the same across all instances. See below for details. | | `ED25519_PRIVATE_KEY_HEX` | unset | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. When running multiple instances on the same base domain, the key must be the same across all instances. See below for details. |
| `ED25519_PRIVATE_KEY_HEX_FILE` | unset | Path to a file containing the hex-encoded ed25519 private key. Only one of this or its sister option may be set. **Required when using persistent storage backends** (like bbolt) to ensure challenges survive service restarts. When running multiple instances on the same base domain, the key must be the same across all instances. | | `ED25519_PRIVATE_KEY_HEX_FILE` | unset | Path to a file containing the hex-encoded ed25519 private key. Only one of this or its sister option may be set. |
| `JWT_RESTRICTION_HEADER` | `X-Real-IP` | If set, the JWT is only valid if the current value of this header matches the value when the JWT was created. You can use it e.g. to restrict a JWT to the source IP of the user using `X-Real-IP`. | | `JWT_RESTRICTION_HEADER` | `X-Real-IP` | If set, the JWT is only valid if the current value of this header matches the value when the JWT was created. You can use it e.g. to restrict a JWT to the source IP of the user using `X-Real-IP`. |
| `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. | | `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. |
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. | | `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
@@ -103,7 +103,7 @@ If you don't know or understand what these settings mean, ignore them. These are
| Environment Variable | Default value | Explanation | | Environment Variable | Default value | Explanation |
| :---------------------------- | :------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | :---------------------------- | :------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `FORCED_LANGUAGE` | unset | If set, forces Anubis to display challenge pages in the specified language instead of using the browser's Accept-Language header. Use ISO 639-1 language codes (e.g., `de` for German, `fr` for French). | | `FORCED_LANGUAGE` | unset | If set, forces Anubis to display challenge pages in the specified language instead of using the browser's Accept-Language header. Use ISO 639-1 language codes (e.g., `de` for German, `fr` for French). |
| `HS512_SECRET` | unset | Secret string for JWT HS512 algorithm. If this is not set, Anubis will use ED25519 as defined via the variables above. The longer the better; 128 chars should suffice. **Required when using persistent storage backends** (like bbolt) to ensure challenges survive service restarts. When running multiple instances on the same base domain, the key must be the same across all instances. | | `HS512_SECRET` | unset | Secret string for JWT HS512 algorithm. If this is not set, Anubis will use ED25519 as defined via the variables above. The longer the better; 128 chars should suffice. |
| `TARGET_DISABLE_KEEPALIVE` | `false` | If `true`, disables HTTP keep-alive for connections to the target backend. Useful for backends that don't handle keep-alive properly. | | `TARGET_DISABLE_KEEPALIVE` | `false` | If `true`, disables HTTP keep-alive for connections to the target backend. Useful for backends that don't handle keep-alive properly. |
| `TARGET_HOST` | unset | If set, overrides the Host header in requests forwarded to `TARGET`. | | `TARGET_HOST` | unset | If set, overrides the Host header in requests forwarded to `TARGET`. |
| `TARGET_INSECURE_SKIP_VERIFY` | `false` | If `true`, skip TLS certificate validation for targets that listen over `https`. If your backend does not listen over `https`, ignore this setting. | | `TARGET_INSECURE_SKIP_VERIFY` | `false` | If `true`, skip TLS certificate validation for targets that listen over `https`. If your backend does not listen over `https`, ignore this setting. |

View File

@@ -11,9 +11,10 @@ import (
"go.etcd.io/bbolt" "go.etcd.io/bbolt"
) )
// Sentinel error value used for testing and in admin-visible error messages. // Sentinel error values used for testing and in admin-visible error messages.
var ( var (
ErrNotExists = errors.New("bbolt: value does not exist in store") ErrBucketDoesNotExist = errors.New("bbolt: bucket does not exist")
ErrNotExists = errors.New("bbolt: value does not exist in store")
) )
// Store implements store.Interface backed by bbolt[1]. // Store implements store.Interface backed by bbolt[1].
@@ -149,10 +150,6 @@ func (s *Store) cleanup(ctx context.Context) error {
}) })
} }
func (s *Store) IsPersistent() bool {
return true
}
func (s *Store) cleanupThread(ctx context.Context) { func (s *Store) cleanupThread(ctx context.Context) {
t := time.NewTicker(time.Hour) t := time.NewTicker(time.Hour)
defer t.Stop() defer t.Stop()

View File

@@ -37,11 +37,6 @@ type Interface interface {
// Set puts a value into the store that expires according to its expiry. // Set puts a value into the store that expires according to its expiry.
Set(ctx context.Context, key string, value []byte, expiry time.Duration) error Set(ctx context.Context, key string, value []byte, expiry time.Duration) error
// IsPersistent returns true if this storage backend persists data across
// service restarts (e.g., bbolt, valkey). Returns false for volatile storage
// like in-memory backends.
IsPersistent() bool
} }
func z[T any]() T { return *new(T) } func z[T any]() T { return *new(T) }
@@ -93,7 +88,3 @@ func (j *JSON[T]) Set(ctx context.Context, key string, value T, expiry time.Dura
return nil return nil
} }
func (j *JSON[T]) IsPersistent() bool {
return j.Underlying.IsPersistent()
}

View File

@@ -48,10 +48,6 @@ func (i *impl) Set(_ context.Context, key string, value []byte, expiry time.Dura
return nil return nil
} }
func (i *impl) IsPersistent() bool {
return false
}
func (i *impl) cleanupThread(ctx context.Context) { func (i *impl) cleanupThread(ctx context.Context) {
t := time.NewTicker(5 * time.Minute) t := time.NewTicker(5 * time.Minute)
defer t.Stop() defer t.Stop()

View File

@@ -47,7 +47,3 @@ func (s *Store) Set(ctx context.Context, key string, value []byte, expiry time.D
return nil return nil
} }
func (s *Store) IsPersistent() bool {
return true
}