Compare commits

..

7 Commits

Author SHA1 Message Date
Xe Iaso
a173b8f484 fix(data): add services folder to embedded filesystem
Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-09-07 16:14:37 +00:00
Xe Iaso
7e1b5d9951 fix: demote temporal assurance checks
* fix(challenge): demote temporal assurance to 80% instead of 95%

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(challenge/preact): wait a little longer to be extra safe

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(challenge/metarefresh): wait a little longer to be extra safe

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(CHANGELOG): add fix notes

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-09-07 16:10:54 +00:00
Xe Iaso
98945fb56f feat(lib/store): add s3api storage backend (#1089)
* feat(lib/store): add s3api storage backend

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(store/s3api): replace fake S3 API keys with the bee movie script

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(store/s3api): fix spelling sin

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(store/s3api): remove vestigal experiment

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(store/s3api): support IsPersistent call

Ref #1088

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(test): go mod tidy

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-09-07 09:24:14 -04:00
Jason Cameron
82099d9e05 fix(robots2policy): handle multiple user agents under one block (#925) 2025-09-06 22:35:19 -04:00
dependabot[bot]
87c2f1e0e6 build(deps): bump the github-actions group across 1 directory with 8 updates (#1071)
Co-authored-by: Jason Cameron <git@jasoncameron.dev>
2025-09-06 22:30:43 -04:00
Jason Cameron
f0199d014f docs: document some missing env vars (#1087) 2025-09-07 01:34:42 +00:00
Jason Cameron
75109f6b73 docs(installation): add SLOG_LEVEL environment variable to configuration (#1086)
* docs(installation): add SLOG_LEVEL environment variable to configuration

* docs(installation): add SLOG_LEVEL environment variable to configuration
2025-09-06 20:59:02 -04:00
25 changed files with 258 additions and 98 deletions

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
fetch-tags: true fetch-tags: true
fetch-depth: 0 fetch-depth: 0
@@ -25,7 +25,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -47,7 +47,7 @@ jobs:
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
with: with:
images: ghcr.io/${{ github.repository }} images: ghcr.io/${{ github.repository }}

View File

@@ -21,7 +21,7 @@ jobs:
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
fetch-tags: true fetch-tags: true
fetch-depth: 0 fetch-depth: 0
@@ -35,7 +35,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -56,7 +56,7 @@ jobs:
brew bundle brew bundle
- name: Log into registry - name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{ github.repository_owner }} username: ${{ github.repository_owner }}
@@ -64,7 +64,7 @@ jobs:
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
with: with:
images: ${{ env.IMAGE }} images: ${{ env.IMAGE }}
@@ -78,7 +78,7 @@ jobs:
SLOG_LEVEL: debug SLOG_LEVEL: debug
- name: Generate artifact attestation - name: Generate artifact attestation
uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0 uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0
with: with:
subject-name: ${{ env.IMAGE }} subject-name: ${{ env.IMAGE }}
subject-digest: ${{ steps.build.outputs.digest }} subject-digest: ${{ steps.build.outputs.digest }}

View File

@@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
persist-credentials: false persist-credentials: false
@@ -25,7 +25,7 @@ jobs:
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
- name: Log into registry - name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
with: with:
registry: ghcr.io registry: ghcr.io
username: techarohq username: techarohq
@@ -33,7 +33,7 @@ jobs:
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
with: with:
images: ghcr.io/techarohq/anubis/docs images: ghcr.io/techarohq/anubis/docs
tags: | tags: |
@@ -53,14 +53,14 @@ jobs:
push: true push: true
- name: Apply k8s manifests to limsa lominsa - name: Apply k8s manifests to limsa lominsa
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3 uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0
env: env:
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }} KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
with: with:
args: apply -k docs/manifest args: apply -k docs/manifest
- name: Apply k8s manifests to limsa lominsa - name: Apply k8s manifests to limsa lominsa
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3 uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0
env: env:
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }} KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
with: with:

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
persist-credentials: false persist-credentials: false
@@ -22,7 +22,7 @@ jobs:
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
with: with:
images: ghcr.io/techarohq/anubis/docs images: ghcr.io/techarohq/anubis/docs
tags: | tags: |

View File

@@ -15,7 +15,7 @@ jobs:
#runs-on: alrest-techarohq #runs-on: alrest-techarohq
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
persist-credentials: false persist-credentials: false
@@ -28,7 +28,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -49,7 +49,7 @@ jobs:
brew bundle brew bundle
- name: Setup Golang caches - name: Setup Golang caches
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with: with:
path: | path: |
~/.cache/go-build ~/.cache/go-build
@@ -59,7 +59,7 @@ jobs:
${{ runner.os }}-golang- ${{ runner.os }}-golang-
- name: Cache playwright binaries - name: Cache playwright binaries
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
id: playwright-cache id: playwright-cache
with: with:
path: | path: |

View File

@@ -14,7 +14,7 @@ jobs:
#runs-on: alrest-techarohq #runs-on: alrest-techarohq
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
persist-credentials: false persist-credentials: false
fetch-tags: true fetch-tags: true
@@ -29,7 +29,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -50,7 +50,7 @@ jobs:
brew bundle brew bundle
- name: Setup Golang caches - name: Setup Golang caches
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with: with:
path: | path: |
~/.cache/go-build ~/.cache/go-build

View File

@@ -15,7 +15,7 @@ jobs:
#runs-on: alrest-techarohq #runs-on: alrest-techarohq
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
persist-credentials: false persist-credentials: false
fetch-tags: true fetch-tags: true
@@ -30,7 +30,7 @@ jobs:
uses: Homebrew/actions/setup-homebrew@main uses: Homebrew/actions/setup-homebrew@main
- name: Setup Homebrew cellar cache - name: Setup Homebrew cellar cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with: with:
path: | path: |
/home/linuxbrew/.linuxbrew/Cellar /home/linuxbrew/.linuxbrew/Cellar
@@ -51,7 +51,7 @@ jobs:
brew bundle brew bundle
- name: Setup Golang caches - name: Setup Golang caches
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with: with:
path: | path: |
~/.cache/go-build ~/.cache/go-build

View File

@@ -24,7 +24,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
persist-credentials: false persist-credentials: false

View File

@@ -18,13 +18,13 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
fetch-tags: true fetch-tags: true
fetch-depth: 0 fetch-depth: 0
persist-credentials: false persist-credentials: false
- name: Log into registry - name: Log into registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{ github.repository_owner }} username: ${{ github.repository_owner }}

View File

@@ -20,7 +20,7 @@ jobs:
- ci@ppc64le.techaro.lol - ci@ppc64le.techaro.lol
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
fetch-tags: true fetch-tags: true
fetch-depth: 0 fetch-depth: 0

View File

@@ -16,12 +16,12 @@ jobs:
security-events: write security-events: write
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with: with:
persist-credentials: false persist-credentials: false
- name: Install the latest version of uv - name: Install the latest version of uv
uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3 uses: astral-sh/setup-uv@4959332f0f014c5280e7eac8b70c90cb574c9f9b # v6.6.0
- name: Run zizmor 🌈 - name: Run zizmor 🌈
run: uvx zizmor --format sarif . > results.sarif run: uvx zizmor --format sarif . > results.sarif
@@ -29,7 +29,7 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload SARIF file - name: Upload SARIF file
uses: github/codeql-action/upload-sarif@4e828ff8d448a8a6e532957b1811f387a63867e8 # v3.29.4 uses: github/codeql-action/upload-sarif@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11
with: with:
sarif_file: results.sarif sarif_file: results.sarif
category: zizmor category: zizmor

View File

@@ -29,7 +29,7 @@ var (
) )
type RobotsRule struct { type RobotsRule struct {
UserAgent string UserAgents []string
Disallows []string Disallows []string
Allows []string Allows []string
CrawlDelay int CrawlDelay int
@@ -130,10 +130,26 @@ func main() {
} }
} }
func createRuleFromAccumulated(userAgents, disallows, allows []string, crawlDelay int) RobotsRule {
rule := RobotsRule{
UserAgents: make([]string, len(userAgents)),
Disallows: make([]string, len(disallows)),
Allows: make([]string, len(allows)),
CrawlDelay: crawlDelay,
}
copy(rule.UserAgents, userAgents)
copy(rule.Disallows, disallows)
copy(rule.Allows, allows)
return rule
}
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) { func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
scanner := bufio.NewScanner(input) scanner := bufio.NewScanner(input)
var rules []RobotsRule var rules []RobotsRule
var currentRule *RobotsRule var currentUserAgents []string
var currentDisallows []string
var currentAllows []string
var currentCrawlDelay int
for scanner.Scan() { for scanner.Scan() {
line := strings.TrimSpace(scanner.Text()) line := strings.TrimSpace(scanner.Text())
@@ -154,38 +170,42 @@ func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
switch directive { switch directive {
case "user-agent": case "user-agent":
// Start a new rule section // If we have accumulated rules with directives and encounter a new user-agent,
if currentRule != nil { // flush the current rules
rules = append(rules, *currentRule) if len(currentUserAgents) > 0 && (len(currentDisallows) > 0 || len(currentAllows) > 0 || currentCrawlDelay > 0) {
} rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
currentRule = &RobotsRule{ rules = append(rules, rule)
UserAgent: value, // Reset for next group
Disallows: make([]string, 0), currentUserAgents = nil
Allows: make([]string, 0), currentDisallows = nil
currentAllows = nil
currentCrawlDelay = 0
} }
currentUserAgents = append(currentUserAgents, value)
case "disallow": case "disallow":
if currentRule != nil && value != "" { if len(currentUserAgents) > 0 && value != "" {
currentRule.Disallows = append(currentRule.Disallows, value) currentDisallows = append(currentDisallows, value)
} }
case "allow": case "allow":
if currentRule != nil && value != "" { if len(currentUserAgents) > 0 && value != "" {
currentRule.Allows = append(currentRule.Allows, value) currentAllows = append(currentAllows, value)
} }
case "crawl-delay": case "crawl-delay":
if currentRule != nil { if len(currentUserAgents) > 0 {
if delay, err := parseIntSafe(value); err == nil { if delay, err := parseIntSafe(value); err == nil {
currentRule.CrawlDelay = delay currentCrawlDelay = delay
} }
} }
} }
} }
// Don't forget the last rule // Don't forget the last group of rules
if currentRule != nil { if len(currentUserAgents) > 0 {
rules = append(rules, *currentRule) rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
rules = append(rules, rule)
} }
// Mark blacklisted user agents (those with "Disallow: /") // Mark blacklisted user agents (those with "Disallow: /")
@@ -211,10 +231,11 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
var anubisRules []AnubisRule var anubisRules []AnubisRule
ruleCounter := 0 ruleCounter := 0
// Process each robots rule individually
for _, robotsRule := range robotsRules { for _, robotsRule := range robotsRules {
userAgent := robotsRule.UserAgent userAgents := robotsRule.UserAgents
// Handle crawl delay as weight adjustment (do this first before any continues) // Handle crawl delay
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 { if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
ruleCounter++ ruleCounter++
rule := AnubisRule{ rule := AnubisRule{
@@ -223,20 +244,32 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
Weight: &config.Weight{Adjust: *crawlDelay}, Weight: &config.Weight{Adjust: *crawlDelay},
} }
if userAgent == "*" { if len(userAgents) == 1 && userAgents[0] == "*" {
rule.Expression = &config.ExpressionOrList{ rule.Expression = &config.ExpressionOrList{
All: []string{"true"}, // Always applies All: []string{"true"}, // Always applies
} }
} else { } else if len(userAgents) == 1 {
rule.Expression = &config.ExpressionOrList{ rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)}, All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgents[0])},
}
} else {
// Multiple user agents - use any block
var expressions []string
for _, ua := range userAgents {
if ua == "*" {
expressions = append(expressions, "true")
} else {
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
}
}
rule.Expression = &config.ExpressionOrList{
Any: expressions,
} }
} }
anubisRules = append(anubisRules, rule) anubisRules = append(anubisRules, rule)
} }
// Handle blacklisted user agents (complete deny/challenge) // Handle blacklisted user agents
if robotsRule.IsBlacklist { if robotsRule.IsBlacklist {
ruleCounter++ ruleCounter++
rule := AnubisRule{ rule := AnubisRule{
@@ -244,21 +277,36 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
Action: *userAgentDeny, Action: *userAgentDeny,
} }
if userAgent == "*" { if len(userAgents) == 1 {
// This would block everything - convert to a weight adjustment instead userAgent := userAgents[0]
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter) if userAgent == "*" {
rule.Action = "WEIGH" // This would block everything - convert to a weight adjustment instead
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
rule.Expression = &config.ExpressionOrList{ rule.Action = "WEIGH"
All: []string{"true"}, // Always applies rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
rule.Expression = &config.ExpressionOrList{
All: []string{"true"}, // Always applies
}
} else {
rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
}
} }
} else { } else {
// Multiple user agents - use any block
var expressions []string
for _, ua := range userAgents {
if ua == "*" {
expressions = append(expressions, "true")
} else {
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
}
}
rule.Expression = &config.ExpressionOrList{ rule.Expression = &config.ExpressionOrList{
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)}, Any: expressions,
} }
} }
anubisRules = append(anubisRules, rule) anubisRules = append(anubisRules, rule)
continue
} }
// Handle specific disallow rules // Handle specific disallow rules
@@ -276,9 +324,33 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
// Build CEL expression // Build CEL expression
var conditions []string var conditions []string
// Add user agent condition if not wildcard // Add user agent conditions
if userAgent != "*" { if len(userAgents) == 1 && userAgents[0] == "*" {
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent)) // Wildcard user agent - no user agent condition needed
} else if len(userAgents) == 1 {
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgents[0]))
} else {
// For multiple user agents, we need to use a more complex expression
// This is a limitation - we can't easily combine any for user agents with all for path
// So we'll create separate rules for each user agent
for _, ua := range userAgents {
if ua == "*" {
continue // Skip wildcard as it's handled separately
}
ruleCounter++
subRule := AnubisRule{
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
Action: *baseAction,
Expression: &config.ExpressionOrList{
All: []string{
fmt.Sprintf("userAgent.contains(%q)", ua),
buildPathCondition(disallow),
},
},
}
anubisRules = append(anubisRules, subRule)
}
continue
} }
// Add path condition // Add path condition
@@ -291,7 +363,6 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
anubisRules = append(anubisRules, rule) anubisRules = append(anubisRules, rule)
} }
} }
return anubisRules return anubisRules

View File

@@ -78,6 +78,12 @@ func TestDataFileConversion(t *testing.T) {
expectedFile: "complex.yaml", expectedFile: "complex.yaml",
options: TestOptions{format: "yaml", crawlDelayWeight: 5}, options: TestOptions{format: "yaml", crawlDelayWeight: 5},
}, },
{
name: "consecutive_user_agents",
robotsFile: "consecutive.robots.txt",
expectedFile: "consecutive.yaml",
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
},
} }
for _, tc := range testCases { for _, tc := range testCases {

View File

@@ -25,6 +25,6 @@
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("Googlebot") - userAgent.contains("Googlebot")
- path.startsWith("/search") - path.startsWith("/search")
name: robots-txt-policy-disallow-7 name: robots-txt-policy-disallow-7

View File

@@ -20,8 +20,8 @@
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("Googlebot") - userAgent.contains("Googlebot")
- path.startsWith("/search/") - path.startsWith("/search/")
name: robots-txt-policy-disallow-6 name: robots-txt-policy-disallow-6
- action: WEIGH - action: WEIGH
expression: userAgent.contains("Bingbot") expression: userAgent.contains("Bingbot")
@@ -31,14 +31,14 @@
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("Bingbot") - userAgent.contains("Bingbot")
- path.startsWith("/search/") - path.startsWith("/search/")
name: robots-txt-policy-disallow-8 name: robots-txt-policy-disallow-8
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("Bingbot") - userAgent.contains("Bingbot")
- path.startsWith("/admin/") - path.startsWith("/admin/")
name: robots-txt-policy-disallow-9 name: robots-txt-policy-disallow-9
- action: DENY - action: DENY
expression: userAgent.contains("BadBot") expression: userAgent.contains("BadBot")
@@ -54,18 +54,18 @@
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("TestBot") - userAgent.contains("TestBot")
- path.matches("^/.*/admin") - path.matches("^/.*/admin")
name: robots-txt-policy-disallow-13 name: robots-txt-policy-disallow-13
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("TestBot") - userAgent.contains("TestBot")
- path.matches("^/temp.*\\.html") - path.matches("^/temp.*\\.html")
name: robots-txt-policy-disallow-14 name: robots-txt-policy-disallow-14
- action: CHALLENGE - action: CHALLENGE
expression: expression:
all: all:
- userAgent.contains("TestBot") - userAgent.contains("TestBot")
- path.matches("^/file.\\.log") - path.matches("^/file.\\.log")
name: robots-txt-policy-disallow-15 name: robots-txt-policy-disallow-15

View File

@@ -0,0 +1,25 @@
# Test consecutive user agents that should be grouped into any: blocks
User-agent: *
Disallow: /admin
Crawl-delay: 10
# Multiple consecutive user agents - should be grouped
User-agent: BadBot
User-agent: SpamBot
User-agent: EvilBot
Disallow: /
# Single user agent - should be separate
User-agent: GoodBot
Disallow: /private
# Multiple consecutive user agents with crawl delay
User-agent: SlowBot1
User-agent: SlowBot2
Crawl-delay: 5
# Multiple consecutive user agents with specific path
User-agent: SearchBot1
User-agent: SearchBot2
User-agent: SearchBot3
Disallow: /search

View File

@@ -0,0 +1,47 @@
- action: WEIGH
expression: "true"
name: robots-txt-policy-crawl-delay-1
weight:
adjust: 3
- action: CHALLENGE
expression: path.startsWith("/admin")
name: robots-txt-policy-disallow-2
- action: DENY
expression:
any:
- userAgent.contains("BadBot")
- userAgent.contains("SpamBot")
- userAgent.contains("EvilBot")
name: robots-txt-policy-blacklist-3
- action: CHALLENGE
expression:
all:
- userAgent.contains("GoodBot")
- path.startsWith("/private")
name: robots-txt-policy-disallow-4
- action: WEIGH
expression:
any:
- userAgent.contains("SlowBot1")
- userAgent.contains("SlowBot2")
name: robots-txt-policy-crawl-delay-5
weight:
adjust: 3
- action: CHALLENGE
expression:
all:
- userAgent.contains("SearchBot1")
- path.startsWith("/search")
name: robots-txt-policy-disallow-7
- action: CHALLENGE
expression:
all:
- userAgent.contains("SearchBot2")
- path.startsWith("/search")
name: robots-txt-policy-disallow-8
- action: CHALLENGE
expression:
all:
- userAgent.contains("SearchBot3")
- path.startsWith("/search")
name: robots-txt-policy-disallow-9

View File

@@ -1,12 +1,12 @@
[ [
{ {
"action": "CHALLENGE",
"expression": "path.startsWith(\"/admin/\")", "expression": "path.startsWith(\"/admin/\")",
"name": "robots-txt-policy-disallow-1" "name": "robots-txt-policy-disallow-1",
"action": "CHALLENGE"
}, },
{ {
"action": "CHALLENGE",
"expression": "path.startsWith(\"/private\")", "expression": "path.startsWith(\"/private\")",
"name": "robots-txt-policy-disallow-2" "name": "robots-txt-policy-disallow-2",
"action": "CHALLENGE"
} }
] ]

View File

@@ -3,6 +3,6 @@ package data
import "embed" import "embed"
var ( var (
//go:embed botPolicies.yaml all:apps all:bots all:clients all:common all:crawlers all:meta //go:embed botPolicies.yaml all:apps all:bots all:clients all:common all:crawlers all:meta all:services
BotPolicies embed.FS BotPolicies embed.FS
) )

View File

@@ -13,7 +13,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
<!-- This changes the project to: --> <!-- This changes the project to: -->
- Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086))
- Fixed `robots2policy` to properly group consecutive user agents into `any:` instead of only processing the last one ([#925](https://github.com/TecharoHQ/anubis/pull/925))
- Add the [`s3api` storage backend](./admin/policies.mdx#s3api) to allow Anubis to use S3 API compatible object storage as its storage backend. - Add the [`s3api` storage backend](./admin/policies.mdx#s3api) to allow Anubis to use S3 API compatible object storage as its storage backend.
- Add the `services` folder to the embedded data filesystem.
### Bug Fixes
Sometimes the enhanced temporal assurance in [#1038](https://github.com/TecharoHQ/anubis/pull/1038) and [#1068](https://github.com/TecharoHQ/anubis/pull/1068) could backfire because Chromium and its ilk randomize the amount of time they wait in order to avoid a timing side channel attack. This has been fixed by both increasing the amount of time a client has to wait for the metarefresh and preact challenges as well as making the server side logic more permissive.
## v1.22.0: Yda Hext ## v1.22.0: Yda Hext

View File

@@ -59,7 +59,7 @@ Currently the following settings are configurable via the policy file:
Anubis uses these environment variables for configuration: Anubis uses these environment variables for configuration:
| Environment Variable | Default value | Explanation | | Environment Variable | Default value | Explanation |
| :----------------------------- | :---------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | |:-------------------------------|:------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `BASE_PREFIX` | unset | If set, adds a global prefix to all Anubis endpoints (everything starting with `/.within.website/x/anubis/`). For example, setting this to `/myapp` would make Anubis accessible at `/myapp/` instead of `/`. This is useful when running Anubis behind a reverse proxy that routes based on path prefixes. | | `BASE_PREFIX` | unset | If set, adds a global prefix to all Anubis endpoints (everything starting with `/.within.website/x/anubis/`). For example, setting this to `/myapp` would make Anubis accessible at `/myapp/` instead of `/`. This is useful when running Anubis behind a reverse proxy that routes based on path prefixes. |
| `BIND` | `:8923` | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock` | | `BIND` | `:8923` | The network address that Anubis listens on. For `unix`, set this to a path: `/run/anubis/instance.sock` |
| `BIND_NETWORK` | `tcp` | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports. | | `BIND_NETWORK` | `tcp` | The address family that Anubis listens on. Accepts `tcp`, `unix` and anything Go's [`net.Listen`](https://pkg.go.dev/net#Listen) supports. |
@@ -67,6 +67,7 @@ Anubis uses these environment variables for configuration:
| `COOKIE_DYNAMIC_DOMAIN` | false | If set to true, automatically set cookie domain fields based on the hostname of the request. EG: if you are making a request to `anubis.techaro.lol`, the Anubis cookie will be valid for any subdomain of `techaro.lol`. | | `COOKIE_DYNAMIC_DOMAIN` | false | If set to true, automatically set cookie domain fields based on the hostname of the request. EG: if you are making a request to `anubis.techaro.lol`, the Anubis cookie will be valid for any subdomain of `techaro.lol`. |
| `COOKIE_EXPIRATION_TIME` | `168h` | The amount of time the authorization cookie is valid for. | | `COOKIE_EXPIRATION_TIME` | `168h` | The amount of time the authorization cookie is valid for. |
| `COOKIE_PARTITIONED` | `false` | If set to `true`, enables the [partitioned (CHIPS) flag](https://developers.google.com/privacy-sandbox/cookies/chips), meaning that Anubis inside an iframe has a different set of cookies than the domain hosting the iframe. | | `COOKIE_PARTITIONED` | `false` | If set to `true`, enables the [partitioned (CHIPS) flag](https://developers.google.com/privacy-sandbox/cookies/chips), meaning that Anubis inside an iframe has a different set of cookies than the domain hosting the iframe. |
| `COOKIE_PREFIX` | `anubis-cookie` | The prefix used for browser cookies created by Anubis. Useful for customization or avoiding conflicts with other applications. |
| `COOKIE_SECURE` | `true` | If set to `true`, enables the [Secure flag](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Cookies#block_access_to_your_cookies), meaning that the cookies will only be transmitted over HTTPS. If Anubis is used in an unsecure context (plain HTTP), this will be need to be set to false | | `COOKIE_SECURE` | `true` | If set to `true`, enables the [Secure flag](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Cookies#block_access_to_your_cookies), meaning that the cookies will only be transmitted over HTTPS. If Anubis is used in an unsecure context (plain HTTP), this will be need to be set to false |
| `DIFFICULTY` | `4` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. | | `DIFFICULTY` | `4` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. |
| `ED25519_PRIVATE_KEY_HEX` | unset | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. When running multiple instances on the same base domain, the key must be the same across all instances. See below for details. | | `ED25519_PRIVATE_KEY_HEX` | unset | The hex-encoded ed25519 private key used to sign Anubis responses. If this is not set, Anubis will generate one for you. This should be exactly 64 characters long. When running multiple instances on the same base domain, the key must be the same across all instances. See below for details. |
@@ -81,6 +82,7 @@ Anubis uses these environment variables for configuration:
| `PUBLIC_URL` | unset | The externally accessible URL for this Anubis instance, used for constructing redirect URLs (e.g., for Traefik forwardAuth). | | `PUBLIC_URL` | unset | The externally accessible URL for this Anubis instance, used for constructing redirect URLs (e.g., for Traefik forwardAuth). |
| `REDIRECT_DOMAINS` | unset | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain.<br/><br/>Note that if you are hosting Anubis on a non-standard port (`https://example:com:8443`, `http://www.example.net:8080`, etc.), you must also include the port number here. | | `REDIRECT_DOMAINS` | unset | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain.<br/><br/>Note that if you are hosting Anubis on a non-standard port (`https://example:com:8443`, `http://www.example.net:8080`, etc.), you must also include the port number here. |
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. | | `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
| `SLOG_LEVEL` | `INFO` | The log level for structured logging. Valid values are `DEBUG`, `INFO`, `WARN`, and `ERROR`. Set to `DEBUG` to see all requests, evaluations, and detailed diagnostic information. |
| `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. | | `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. |
| `STRIP_BASE_PREFIX` | `false` | If set to `true`, strips the base prefix from request paths when forwarding to the target server. This is useful when your target service expects to receive requests without the base prefix. For example, with `BASE_PREFIX=/foo` and `STRIP_BASE_PREFIX=true`, a request to `/foo/bar` would be forwarded to the target as `/bar`. | | `STRIP_BASE_PREFIX` | `false` | If set to `true`, strips the base prefix from request paths when forwarding to the target server. This is useful when your target service expects to receive requests without the base prefix. For example, with `BASE_PREFIX=/foo` and `STRIP_BASE_PREFIX=true`, a request to `/foo/bar` would be forwarded to the target as `/bar`. |
| `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. Supports Unix domain sockets, set this to a URI like so: `unix:///path/to/socket.sock`. | | `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. Supports Unix domain sockets, set this to a URI like so: `unix:///path/to/socket.sock`. |
@@ -100,10 +102,12 @@ If you don't know or understand what these settings mean, ignore them. These are
| Environment Variable | Default value | Explanation | | Environment Variable | Default value | Explanation |
| :---------------------------- | :------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | :---------------------------- | :------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `TARGET_SNI` | unset | If set, overrides the TLS handshake hostname in requests forwarded to `TARGET`. | | `FORCED_LANGUAGE` | unset | If set, forces Anubis to display challenge pages in the specified language instead of using the browser's Accept-Language header. Use ISO 639-1 language codes (e.g., `de` for German, `fr` for French). |
| `HS512_SECRET` | unset | Secret string for JWT HS512 algorithm. If this is not set, Anubis will use ED25519 as defined via the variables above. The longer the better; 128 chars should suffice. |
| `TARGET_DISABLE_KEEPALIVE` | `false` | If `true`, disables HTTP keep-alive for connections to the target backend. Useful for backends that don't handle keep-alive properly. |
| `TARGET_HOST` | unset | If set, overrides the Host header in requests forwarded to `TARGET`. | | `TARGET_HOST` | unset | If set, overrides the Host header in requests forwarded to `TARGET`. |
| `TARGET_INSECURE_SKIP_VERIFY` | `false` | If `true`, skip TLS certificate validation for targets that listen over `https`. If your backend does not listen over `https`, ignore this setting. | | `TARGET_INSECURE_SKIP_VERIFY` | `false` | If `true`, skip TLS certificate validation for targets that listen over `https`. If your backend does not listen over `https`, ignore this setting. |
| `HS512_SECRET` | unset | Secret string for JWT HS512 algorithm. If this is not set, Anubis will use ED25519 as defined via the variables above. The longer the better; 128 chars should suffice. | | `TARGET_SNI` | unset | If set, overrides the TLS handshake hostname in requests forwarded to `TARGET`. |
</details> </details>

View File

@@ -43,7 +43,7 @@ func (i *Impl) Issue(r *http.Request, lg *slog.Logger, in *challenge.IssueInput)
} }
func (i *Impl) Validate(r *http.Request, lg *slog.Logger, in *challenge.ValidateInput) error { func (i *Impl) Validate(r *http.Request, lg *slog.Logger, in *challenge.ValidateInput) error {
wantTime := in.Challenge.IssuedAt.Add(time.Duration(in.Rule.Challenge.Difficulty) * 950 * time.Millisecond) wantTime := in.Challenge.IssuedAt.Add(time.Duration(in.Rule.Challenge.Difficulty) * 800 * time.Millisecond)
if time.Now().Before(wantTime) { if time.Now().Before(wantTime) {
return challenge.NewError("validate", "insufficent time", fmt.Errorf("%w: wanted user to wait until at least %s", challenge.ErrFailed, wantTime.Format(time.RFC3339))) return challenge.NewError("validate", "insufficent time", fmt.Errorf("%w: wanted user to wait until at least %s", challenge.ErrFailed, wantTime.Format(time.RFC3339)))

View File

@@ -13,6 +13,6 @@ templ page(redir string, difficulty int, loc *localization.SimpleLocalizer) {
<img style="display:none;" style="width:100%;max-width:256px;" src={ anubis.BasePrefix + "/.within.website/x/cmd/anubis/static/img/happy.webp?cacheBuster=" + anubis.Version }/> <img style="display:none;" style="width:100%;max-width:256px;" src={ anubis.BasePrefix + "/.within.website/x/cmd/anubis/static/img/happy.webp?cacheBuster=" + anubis.Version }/>
<p id="status">{ loc.T("loading") }</p> <p id="status">{ loc.T("loading") }</p>
<p>{ loc.T("connection_security") }</p> <p>{ loc.T("connection_security") }</p>
<meta http-equiv="refresh" content={ fmt.Sprintf("%d; url=%s", difficulty, redir) }/> <meta http-equiv="refresh" content={ fmt.Sprintf("%d; url=%s", difficulty+1, redir) }/>
</div> </div>
} }

View File

@@ -32,7 +32,7 @@ const App = () => {
useEffect(() => { useEffect(() => {
const timer = setTimeout(() => { const timer = setTimeout(() => {
setPassed(true); setPassed(true);
}, state.difficulty * 100); }, state.difficulty * 125);
return () => clearTimeout(timer); return () => clearTimeout(timer);
}, [challenge]); }, [challenge]);

View File

@@ -57,7 +57,7 @@ func (i *impl) Issue(r *http.Request, lg *slog.Logger, in *challenge.IssueInput)
} }
func (i *impl) Validate(r *http.Request, lg *slog.Logger, in *challenge.ValidateInput) error { func (i *impl) Validate(r *http.Request, lg *slog.Logger, in *challenge.ValidateInput) error {
wantTime := in.Challenge.IssuedAt.Add(time.Duration(in.Rule.Challenge.Difficulty) * 95 * time.Millisecond) wantTime := in.Challenge.IssuedAt.Add(time.Duration(in.Rule.Challenge.Difficulty) * 80 * time.Millisecond)
if time.Now().Before(wantTime) { if time.Now().Before(wantTime) {
return challenge.NewError("validate", "insufficent time", fmt.Errorf("%w: wanted user to wait until at least %s", challenge.ErrFailed, wantTime.Format(time.RFC3339))) return challenge.NewError("validate", "insufficent time", fmt.Errorf("%w: wanted user to wait until at least %s", challenge.ErrFailed, wantTime.Format(time.RFC3339)))