mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-12 03:28:45 +00:00
Merge branch 'main' into json/requireED25519
Signed-off-by: Jason Cameron <git@jasoncameron.dev>
This commit is contained in:
6
.github/workflows/docker-pr.yml
vendored
6
.github/workflows/docker-pr.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
fetch-tags: true
|
fetch-tags: true
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
@@ -25,7 +25,7 @@ jobs:
|
|||||||
uses: Homebrew/actions/setup-homebrew@main
|
uses: Homebrew/actions/setup-homebrew@main
|
||||||
|
|
||||||
- name: Setup Homebrew cellar cache
|
- name: Setup Homebrew cellar cache
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
/home/linuxbrew/.linuxbrew/Cellar
|
/home/linuxbrew/.linuxbrew/Cellar
|
||||||
@@ -47,7 +47,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||||
with:
|
with:
|
||||||
images: ghcr.io/${{ github.repository }}
|
images: ghcr.io/${{ github.repository }}
|
||||||
|
|
||||||
|
|||||||
10
.github/workflows/docker.yml
vendored
10
.github/workflows/docker.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
|||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
fetch-tags: true
|
fetch-tags: true
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
@@ -35,7 +35,7 @@ jobs:
|
|||||||
uses: Homebrew/actions/setup-homebrew@main
|
uses: Homebrew/actions/setup-homebrew@main
|
||||||
|
|
||||||
- name: Setup Homebrew cellar cache
|
- name: Setup Homebrew cellar cache
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
/home/linuxbrew/.linuxbrew/Cellar
|
/home/linuxbrew/.linuxbrew/Cellar
|
||||||
@@ -56,7 +56,7 @@ jobs:
|
|||||||
brew bundle
|
brew bundle
|
||||||
|
|
||||||
- name: Log into registry
|
- name: Log into registry
|
||||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
username: ${{ github.repository_owner }}
|
username: ${{ github.repository_owner }}
|
||||||
@@ -64,7 +64,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||||
with:
|
with:
|
||||||
images: ${{ env.IMAGE }}
|
images: ${{ env.IMAGE }}
|
||||||
|
|
||||||
@@ -78,7 +78,7 @@ jobs:
|
|||||||
SLOG_LEVEL: debug
|
SLOG_LEVEL: debug
|
||||||
|
|
||||||
- name: Generate artifact attestation
|
- name: Generate artifact attestation
|
||||||
uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0
|
uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0
|
||||||
with:
|
with:
|
||||||
subject-name: ${{ env.IMAGE }}
|
subject-name: ${{ env.IMAGE }}
|
||||||
subject-digest: ${{ steps.build.outputs.digest }}
|
subject-digest: ${{ steps.build.outputs.digest }}
|
||||||
|
|||||||
10
.github/workflows/docs-deploy.yml
vendored
10
.github/workflows/docs-deploy.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
|||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
@@ -25,7 +25,7 @@ jobs:
|
|||||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||||
|
|
||||||
- name: Log into registry
|
- name: Log into registry
|
||||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
username: techarohq
|
username: techarohq
|
||||||
@@ -33,7 +33,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||||
with:
|
with:
|
||||||
images: ghcr.io/techarohq/anubis/docs
|
images: ghcr.io/techarohq/anubis/docs
|
||||||
tags: |
|
tags: |
|
||||||
@@ -53,14 +53,14 @@ jobs:
|
|||||||
push: true
|
push: true
|
||||||
|
|
||||||
- name: Apply k8s manifests to limsa lominsa
|
- name: Apply k8s manifests to limsa lominsa
|
||||||
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
|
uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0
|
||||||
env:
|
env:
|
||||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||||
with:
|
with:
|
||||||
args: apply -k docs/manifest
|
args: apply -k docs/manifest
|
||||||
|
|
||||||
- name: Apply k8s manifests to limsa lominsa
|
- name: Apply k8s manifests to limsa lominsa
|
||||||
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
|
uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0
|
||||||
env:
|
env:
|
||||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||||
with:
|
with:
|
||||||
|
|||||||
4
.github/workflows/docs-test.yml
vendored
4
.github/workflows/docs-test.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
|||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||||
with:
|
with:
|
||||||
images: ghcr.io/techarohq/anubis/docs
|
images: ghcr.io/techarohq/anubis/docs
|
||||||
tags: |
|
tags: |
|
||||||
|
|||||||
8
.github/workflows/go.yml
vendored
8
.github/workflows/go.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
#runs-on: alrest-techarohq
|
#runs-on: alrest-techarohq
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
@@ -28,7 +28,7 @@ jobs:
|
|||||||
uses: Homebrew/actions/setup-homebrew@main
|
uses: Homebrew/actions/setup-homebrew@main
|
||||||
|
|
||||||
- name: Setup Homebrew cellar cache
|
- name: Setup Homebrew cellar cache
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
/home/linuxbrew/.linuxbrew/Cellar
|
/home/linuxbrew/.linuxbrew/Cellar
|
||||||
@@ -49,7 +49,7 @@ jobs:
|
|||||||
brew bundle
|
brew bundle
|
||||||
|
|
||||||
- name: Setup Golang caches
|
- name: Setup Golang caches
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.cache/go-build
|
~/.cache/go-build
|
||||||
@@ -59,7 +59,7 @@ jobs:
|
|||||||
${{ runner.os }}-golang-
|
${{ runner.os }}-golang-
|
||||||
|
|
||||||
- name: Cache playwright binaries
|
- name: Cache playwright binaries
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
id: playwright-cache
|
id: playwright-cache
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
|
|||||||
6
.github/workflows/package-builds-stable.yml
vendored
6
.github/workflows/package-builds-stable.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
#runs-on: alrest-techarohq
|
#runs-on: alrest-techarohq
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
fetch-tags: true
|
fetch-tags: true
|
||||||
@@ -29,7 +29,7 @@ jobs:
|
|||||||
uses: Homebrew/actions/setup-homebrew@main
|
uses: Homebrew/actions/setup-homebrew@main
|
||||||
|
|
||||||
- name: Setup Homebrew cellar cache
|
- name: Setup Homebrew cellar cache
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
/home/linuxbrew/.linuxbrew/Cellar
|
/home/linuxbrew/.linuxbrew/Cellar
|
||||||
@@ -50,7 +50,7 @@ jobs:
|
|||||||
brew bundle
|
brew bundle
|
||||||
|
|
||||||
- name: Setup Golang caches
|
- name: Setup Golang caches
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.cache/go-build
|
~/.cache/go-build
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ jobs:
|
|||||||
#runs-on: alrest-techarohq
|
#runs-on: alrest-techarohq
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
fetch-tags: true
|
fetch-tags: true
|
||||||
@@ -30,7 +30,7 @@ jobs:
|
|||||||
uses: Homebrew/actions/setup-homebrew@main
|
uses: Homebrew/actions/setup-homebrew@main
|
||||||
|
|
||||||
- name: Setup Homebrew cellar cache
|
- name: Setup Homebrew cellar cache
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
/home/linuxbrew/.linuxbrew/Cellar
|
/home/linuxbrew/.linuxbrew/Cellar
|
||||||
@@ -51,7 +51,7 @@ jobs:
|
|||||||
brew bundle
|
brew bundle
|
||||||
|
|
||||||
- name: Setup Golang caches
|
- name: Setup Golang caches
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.cache/go-build
|
~/.cache/go-build
|
||||||
|
|||||||
2
.github/workflows/smoke-tests.yml
vendored
2
.github/workflows/smoke-tests.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
|
|||||||
4
.github/workflows/ssh-ci-runner-cron.yml
vendored
4
.github/workflows/ssh-ci-runner-cron.yml
vendored
@@ -18,13 +18,13 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
fetch-tags: true
|
fetch-tags: true
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
- name: Log into registry
|
- name: Log into registry
|
||||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
username: ${{ github.repository_owner }}
|
username: ${{ github.repository_owner }}
|
||||||
|
|||||||
2
.github/workflows/ssh-ci.yml
vendored
2
.github/workflows/ssh-ci.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
|||||||
- ci@ppc64le.techaro.lol
|
- ci@ppc64le.techaro.lol
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
fetch-tags: true
|
fetch-tags: true
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|||||||
6
.github/workflows/zizmor.yml
vendored
6
.github/workflows/zizmor.yml
vendored
@@ -16,12 +16,12 @@ jobs:
|
|||||||
security-events: write
|
security-events: write
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: Install the latest version of uv
|
- name: Install the latest version of uv
|
||||||
uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
|
uses: astral-sh/setup-uv@4959332f0f014c5280e7eac8b70c90cb574c9f9b # v6.6.0
|
||||||
|
|
||||||
- name: Run zizmor 🌈
|
- name: Run zizmor 🌈
|
||||||
run: uvx zizmor --format sarif . > results.sarif
|
run: uvx zizmor --format sarif . > results.sarif
|
||||||
@@ -29,7 +29,7 @@ jobs:
|
|||||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Upload SARIF file
|
- name: Upload SARIF file
|
||||||
uses: github/codeql-action/upload-sarif@4e828ff8d448a8a6e532957b1811f387a63867e8 # v3.29.4
|
uses: github/codeql-action/upload-sarif@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11
|
||||||
with:
|
with:
|
||||||
sarif_file: results.sarif
|
sarif_file: results.sarif
|
||||||
category: zizmor
|
category: zizmor
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type RobotsRule struct {
|
type RobotsRule struct {
|
||||||
UserAgent string
|
UserAgents []string
|
||||||
Disallows []string
|
Disallows []string
|
||||||
Allows []string
|
Allows []string
|
||||||
CrawlDelay int
|
CrawlDelay int
|
||||||
@@ -130,10 +130,26 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func createRuleFromAccumulated(userAgents, disallows, allows []string, crawlDelay int) RobotsRule {
|
||||||
|
rule := RobotsRule{
|
||||||
|
UserAgents: make([]string, len(userAgents)),
|
||||||
|
Disallows: make([]string, len(disallows)),
|
||||||
|
Allows: make([]string, len(allows)),
|
||||||
|
CrawlDelay: crawlDelay,
|
||||||
|
}
|
||||||
|
copy(rule.UserAgents, userAgents)
|
||||||
|
copy(rule.Disallows, disallows)
|
||||||
|
copy(rule.Allows, allows)
|
||||||
|
return rule
|
||||||
|
}
|
||||||
|
|
||||||
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||||
scanner := bufio.NewScanner(input)
|
scanner := bufio.NewScanner(input)
|
||||||
var rules []RobotsRule
|
var rules []RobotsRule
|
||||||
var currentRule *RobotsRule
|
var currentUserAgents []string
|
||||||
|
var currentDisallows []string
|
||||||
|
var currentAllows []string
|
||||||
|
var currentCrawlDelay int
|
||||||
|
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := strings.TrimSpace(scanner.Text())
|
line := strings.TrimSpace(scanner.Text())
|
||||||
@@ -154,38 +170,42 @@ func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
|||||||
|
|
||||||
switch directive {
|
switch directive {
|
||||||
case "user-agent":
|
case "user-agent":
|
||||||
// Start a new rule section
|
// If we have accumulated rules with directives and encounter a new user-agent,
|
||||||
if currentRule != nil {
|
// flush the current rules
|
||||||
rules = append(rules, *currentRule)
|
if len(currentUserAgents) > 0 && (len(currentDisallows) > 0 || len(currentAllows) > 0 || currentCrawlDelay > 0) {
|
||||||
}
|
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
|
||||||
currentRule = &RobotsRule{
|
rules = append(rules, rule)
|
||||||
UserAgent: value,
|
// Reset for next group
|
||||||
Disallows: make([]string, 0),
|
currentUserAgents = nil
|
||||||
Allows: make([]string, 0),
|
currentDisallows = nil
|
||||||
|
currentAllows = nil
|
||||||
|
currentCrawlDelay = 0
|
||||||
}
|
}
|
||||||
|
currentUserAgents = append(currentUserAgents, value)
|
||||||
|
|
||||||
case "disallow":
|
case "disallow":
|
||||||
if currentRule != nil && value != "" {
|
if len(currentUserAgents) > 0 && value != "" {
|
||||||
currentRule.Disallows = append(currentRule.Disallows, value)
|
currentDisallows = append(currentDisallows, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
case "allow":
|
case "allow":
|
||||||
if currentRule != nil && value != "" {
|
if len(currentUserAgents) > 0 && value != "" {
|
||||||
currentRule.Allows = append(currentRule.Allows, value)
|
currentAllows = append(currentAllows, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
case "crawl-delay":
|
case "crawl-delay":
|
||||||
if currentRule != nil {
|
if len(currentUserAgents) > 0 {
|
||||||
if delay, err := parseIntSafe(value); err == nil {
|
if delay, err := parseIntSafe(value); err == nil {
|
||||||
currentRule.CrawlDelay = delay
|
currentCrawlDelay = delay
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't forget the last rule
|
// Don't forget the last group of rules
|
||||||
if currentRule != nil {
|
if len(currentUserAgents) > 0 {
|
||||||
rules = append(rules, *currentRule)
|
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
|
||||||
|
rules = append(rules, rule)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark blacklisted user agents (those with "Disallow: /")
|
// Mark blacklisted user agents (those with "Disallow: /")
|
||||||
@@ -211,10 +231,11 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||||||
var anubisRules []AnubisRule
|
var anubisRules []AnubisRule
|
||||||
ruleCounter := 0
|
ruleCounter := 0
|
||||||
|
|
||||||
|
// Process each robots rule individually
|
||||||
for _, robotsRule := range robotsRules {
|
for _, robotsRule := range robotsRules {
|
||||||
userAgent := robotsRule.UserAgent
|
userAgents := robotsRule.UserAgents
|
||||||
|
|
||||||
// Handle crawl delay as weight adjustment (do this first before any continues)
|
// Handle crawl delay
|
||||||
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
||||||
ruleCounter++
|
ruleCounter++
|
||||||
rule := AnubisRule{
|
rule := AnubisRule{
|
||||||
@@ -223,20 +244,32 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||||||
Weight: &config.Weight{Adjust: *crawlDelay},
|
Weight: &config.Weight{Adjust: *crawlDelay},
|
||||||
}
|
}
|
||||||
|
|
||||||
if userAgent == "*" {
|
if len(userAgents) == 1 && userAgents[0] == "*" {
|
||||||
rule.Expression = &config.ExpressionOrList{
|
rule.Expression = &config.ExpressionOrList{
|
||||||
All: []string{"true"}, // Always applies
|
All: []string{"true"}, // Always applies
|
||||||
}
|
}
|
||||||
} else {
|
} else if len(userAgents) == 1 {
|
||||||
rule.Expression = &config.ExpressionOrList{
|
rule.Expression = &config.ExpressionOrList{
|
||||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgents[0])},
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Multiple user agents - use any block
|
||||||
|
var expressions []string
|
||||||
|
for _, ua := range userAgents {
|
||||||
|
if ua == "*" {
|
||||||
|
expressions = append(expressions, "true")
|
||||||
|
} else {
|
||||||
|
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rule.Expression = &config.ExpressionOrList{
|
||||||
|
Any: expressions,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
anubisRules = append(anubisRules, rule)
|
anubisRules = append(anubisRules, rule)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle blacklisted user agents (complete deny/challenge)
|
// Handle blacklisted user agents
|
||||||
if robotsRule.IsBlacklist {
|
if robotsRule.IsBlacklist {
|
||||||
ruleCounter++
|
ruleCounter++
|
||||||
rule := AnubisRule{
|
rule := AnubisRule{
|
||||||
@@ -244,21 +277,36 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||||||
Action: *userAgentDeny,
|
Action: *userAgentDeny,
|
||||||
}
|
}
|
||||||
|
|
||||||
if userAgent == "*" {
|
if len(userAgents) == 1 {
|
||||||
// This would block everything - convert to a weight adjustment instead
|
userAgent := userAgents[0]
|
||||||
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
if userAgent == "*" {
|
||||||
rule.Action = "WEIGH"
|
// This would block everything - convert to a weight adjustment instead
|
||||||
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||||
rule.Expression = &config.ExpressionOrList{
|
rule.Action = "WEIGH"
|
||||||
All: []string{"true"}, // Always applies
|
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||||
|
rule.Expression = &config.ExpressionOrList{
|
||||||
|
All: []string{"true"}, // Always applies
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rule.Expression = &config.ExpressionOrList{
|
||||||
|
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// Multiple user agents - use any block
|
||||||
|
var expressions []string
|
||||||
|
for _, ua := range userAgents {
|
||||||
|
if ua == "*" {
|
||||||
|
expressions = append(expressions, "true")
|
||||||
|
} else {
|
||||||
|
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
|
||||||
|
}
|
||||||
|
}
|
||||||
rule.Expression = &config.ExpressionOrList{
|
rule.Expression = &config.ExpressionOrList{
|
||||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
Any: expressions,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
anubisRules = append(anubisRules, rule)
|
anubisRules = append(anubisRules, rule)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle specific disallow rules
|
// Handle specific disallow rules
|
||||||
@@ -276,9 +324,33 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||||||
// Build CEL expression
|
// Build CEL expression
|
||||||
var conditions []string
|
var conditions []string
|
||||||
|
|
||||||
// Add user agent condition if not wildcard
|
// Add user agent conditions
|
||||||
if userAgent != "*" {
|
if len(userAgents) == 1 && userAgents[0] == "*" {
|
||||||
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
|
// Wildcard user agent - no user agent condition needed
|
||||||
|
} else if len(userAgents) == 1 {
|
||||||
|
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgents[0]))
|
||||||
|
} else {
|
||||||
|
// For multiple user agents, we need to use a more complex expression
|
||||||
|
// This is a limitation - we can't easily combine any for user agents with all for path
|
||||||
|
// So we'll create separate rules for each user agent
|
||||||
|
for _, ua := range userAgents {
|
||||||
|
if ua == "*" {
|
||||||
|
continue // Skip wildcard as it's handled separately
|
||||||
|
}
|
||||||
|
ruleCounter++
|
||||||
|
subRule := AnubisRule{
|
||||||
|
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||||
|
Action: *baseAction,
|
||||||
|
Expression: &config.ExpressionOrList{
|
||||||
|
All: []string{
|
||||||
|
fmt.Sprintf("userAgent.contains(%q)", ua),
|
||||||
|
buildPathCondition(disallow),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
anubisRules = append(anubisRules, subRule)
|
||||||
|
}
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add path condition
|
// Add path condition
|
||||||
@@ -291,7 +363,6 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||||||
|
|
||||||
anubisRules = append(anubisRules, rule)
|
anubisRules = append(anubisRules, rule)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return anubisRules
|
return anubisRules
|
||||||
|
|||||||
@@ -78,6 +78,12 @@ func TestDataFileConversion(t *testing.T) {
|
|||||||
expectedFile: "complex.yaml",
|
expectedFile: "complex.yaml",
|
||||||
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
|
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "consecutive_user_agents",
|
||||||
|
robotsFile: "consecutive.robots.txt",
|
||||||
|
expectedFile: "consecutive.yaml",
|
||||||
|
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
|
|||||||
6
cmd/robots2policy/testdata/blacklist.yaml
vendored
6
cmd/robots2policy/testdata/blacklist.yaml
vendored
@@ -25,6 +25,6 @@
|
|||||||
- action: CHALLENGE
|
- action: CHALLENGE
|
||||||
expression:
|
expression:
|
||||||
all:
|
all:
|
||||||
- userAgent.contains("Googlebot")
|
- userAgent.contains("Googlebot")
|
||||||
- path.startsWith("/search")
|
- path.startsWith("/search")
|
||||||
name: robots-txt-policy-disallow-7
|
name: robots-txt-policy-disallow-7
|
||||||
|
|||||||
24
cmd/robots2policy/testdata/complex.yaml
vendored
24
cmd/robots2policy/testdata/complex.yaml
vendored
@@ -20,8 +20,8 @@
|
|||||||
- action: CHALLENGE
|
- action: CHALLENGE
|
||||||
expression:
|
expression:
|
||||||
all:
|
all:
|
||||||
- userAgent.contains("Googlebot")
|
- userAgent.contains("Googlebot")
|
||||||
- path.startsWith("/search/")
|
- path.startsWith("/search/")
|
||||||
name: robots-txt-policy-disallow-6
|
name: robots-txt-policy-disallow-6
|
||||||
- action: WEIGH
|
- action: WEIGH
|
||||||
expression: userAgent.contains("Bingbot")
|
expression: userAgent.contains("Bingbot")
|
||||||
@@ -31,14 +31,14 @@
|
|||||||
- action: CHALLENGE
|
- action: CHALLENGE
|
||||||
expression:
|
expression:
|
||||||
all:
|
all:
|
||||||
- userAgent.contains("Bingbot")
|
- userAgent.contains("Bingbot")
|
||||||
- path.startsWith("/search/")
|
- path.startsWith("/search/")
|
||||||
name: robots-txt-policy-disallow-8
|
name: robots-txt-policy-disallow-8
|
||||||
- action: CHALLENGE
|
- action: CHALLENGE
|
||||||
expression:
|
expression:
|
||||||
all:
|
all:
|
||||||
- userAgent.contains("Bingbot")
|
- userAgent.contains("Bingbot")
|
||||||
- path.startsWith("/admin/")
|
- path.startsWith("/admin/")
|
||||||
name: robots-txt-policy-disallow-9
|
name: robots-txt-policy-disallow-9
|
||||||
- action: DENY
|
- action: DENY
|
||||||
expression: userAgent.contains("BadBot")
|
expression: userAgent.contains("BadBot")
|
||||||
@@ -54,18 +54,18 @@
|
|||||||
- action: CHALLENGE
|
- action: CHALLENGE
|
||||||
expression:
|
expression:
|
||||||
all:
|
all:
|
||||||
- userAgent.contains("TestBot")
|
- userAgent.contains("TestBot")
|
||||||
- path.matches("^/.*/admin")
|
- path.matches("^/.*/admin")
|
||||||
name: robots-txt-policy-disallow-13
|
name: robots-txt-policy-disallow-13
|
||||||
- action: CHALLENGE
|
- action: CHALLENGE
|
||||||
expression:
|
expression:
|
||||||
all:
|
all:
|
||||||
- userAgent.contains("TestBot")
|
- userAgent.contains("TestBot")
|
||||||
- path.matches("^/temp.*\\.html")
|
- path.matches("^/temp.*\\.html")
|
||||||
name: robots-txt-policy-disallow-14
|
name: robots-txt-policy-disallow-14
|
||||||
- action: CHALLENGE
|
- action: CHALLENGE
|
||||||
expression:
|
expression:
|
||||||
all:
|
all:
|
||||||
- userAgent.contains("TestBot")
|
- userAgent.contains("TestBot")
|
||||||
- path.matches("^/file.\\.log")
|
- path.matches("^/file.\\.log")
|
||||||
name: robots-txt-policy-disallow-15
|
name: robots-txt-policy-disallow-15
|
||||||
|
|||||||
25
cmd/robots2policy/testdata/consecutive.robots.txt
vendored
Normal file
25
cmd/robots2policy/testdata/consecutive.robots.txt
vendored
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# Test consecutive user agents that should be grouped into any: blocks
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /admin
|
||||||
|
Crawl-delay: 10
|
||||||
|
|
||||||
|
# Multiple consecutive user agents - should be grouped
|
||||||
|
User-agent: BadBot
|
||||||
|
User-agent: SpamBot
|
||||||
|
User-agent: EvilBot
|
||||||
|
Disallow: /
|
||||||
|
|
||||||
|
# Single user agent - should be separate
|
||||||
|
User-agent: GoodBot
|
||||||
|
Disallow: /private
|
||||||
|
|
||||||
|
# Multiple consecutive user agents with crawl delay
|
||||||
|
User-agent: SlowBot1
|
||||||
|
User-agent: SlowBot2
|
||||||
|
Crawl-delay: 5
|
||||||
|
|
||||||
|
# Multiple consecutive user agents with specific path
|
||||||
|
User-agent: SearchBot1
|
||||||
|
User-agent: SearchBot2
|
||||||
|
User-agent: SearchBot3
|
||||||
|
Disallow: /search
|
||||||
47
cmd/robots2policy/testdata/consecutive.yaml
vendored
Normal file
47
cmd/robots2policy/testdata/consecutive.yaml
vendored
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
- action: WEIGH
|
||||||
|
expression: "true"
|
||||||
|
name: robots-txt-policy-crawl-delay-1
|
||||||
|
weight:
|
||||||
|
adjust: 3
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/admin")
|
||||||
|
name: robots-txt-policy-disallow-2
|
||||||
|
- action: DENY
|
||||||
|
expression:
|
||||||
|
any:
|
||||||
|
- userAgent.contains("BadBot")
|
||||||
|
- userAgent.contains("SpamBot")
|
||||||
|
- userAgent.contains("EvilBot")
|
||||||
|
name: robots-txt-policy-blacklist-3
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("GoodBot")
|
||||||
|
- path.startsWith("/private")
|
||||||
|
name: robots-txt-policy-disallow-4
|
||||||
|
- action: WEIGH
|
||||||
|
expression:
|
||||||
|
any:
|
||||||
|
- userAgent.contains("SlowBot1")
|
||||||
|
- userAgent.contains("SlowBot2")
|
||||||
|
name: robots-txt-policy-crawl-delay-5
|
||||||
|
weight:
|
||||||
|
adjust: 3
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("SearchBot1")
|
||||||
|
- path.startsWith("/search")
|
||||||
|
name: robots-txt-policy-disallow-7
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("SearchBot2")
|
||||||
|
- path.startsWith("/search")
|
||||||
|
name: robots-txt-policy-disallow-8
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("SearchBot3")
|
||||||
|
- path.startsWith("/search")
|
||||||
|
name: robots-txt-policy-disallow-9
|
||||||
8
cmd/robots2policy/testdata/simple.json
vendored
8
cmd/robots2policy/testdata/simple.json
vendored
@@ -1,12 +1,12 @@
|
|||||||
[
|
[
|
||||||
{
|
{
|
||||||
"action": "CHALLENGE",
|
|
||||||
"expression": "path.startsWith(\"/admin/\")",
|
"expression": "path.startsWith(\"/admin/\")",
|
||||||
"name": "robots-txt-policy-disallow-1"
|
"name": "robots-txt-policy-disallow-1",
|
||||||
|
"action": "CHALLENGE"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"action": "CHALLENGE",
|
|
||||||
"expression": "path.startsWith(\"/private\")",
|
"expression": "path.startsWith(\"/private\")",
|
||||||
"name": "robots-txt-policy-disallow-2"
|
"name": "robots-txt-policy-disallow-2",
|
||||||
|
"action": "CHALLENGE"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
- Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086))
|
- Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086))
|
||||||
- Add validation warning when persistent storage is used without setting signing keys
|
- Add validation warning when persistent storage is used without setting signing keys
|
||||||
|
- Fixed `robots2policy` to properly group consecutive user agents into `any:` instead of only processing the last one ([#925](https://github.com/TecharoHQ/anubis/pull/925))
|
||||||
|
|
||||||
<!-- This changes the project to: -->
|
<!-- This changes the project to: -->
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user